evaluation: Minor corrections and text

2021-10-11 04:01:08 +02:00
parent d5c39c4e42
commit 6080efece9
6 changed files with 72 additions and 21 deletions
--- a/code/expt/lmdk_sel_cmp.py
+++ b/code/expt/lmdk_sel_cmp.py
@ -20,7 +20,15 @@ def main(args):
  # Distribution type
  dist_type = np.array(range(0, 4))
  # Number of landmarks
-  lmdk_n = np.array(range(int(.2*args.time), args.time, int(args.time/5)))
+  lmdk_n = np.array(range(0, args.time + 1, int(args.time/5)))
+
+  markers = [
+    '^', # Symmetric
+    'v', # Skewed
+    'D', # Bimodal
+    's'  # Uniform
+  ]
+
  # Initialize plot
  lmdk_lib.plot_init()
  # Width of bars
@ -30,11 +38,13 @@ def main(args):
  x_margin = bar_width*(len(dist_type)/2 + 1)
  plt.xticks(x_i, ((lmdk_n/len(seq))*100).astype(int))
  plt.xlabel('Landmarks (%)')  # Set x axis label.
-  plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin)
+  # plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin)
+  plt.xlim(x_i.min(), x_i.max())
  # The y axis
  # plt.yscale('log')
-  plt.ylabel('Euclidean distance')  # Set y axis label.
-  # plt.ylabel('Wasserstein distance')  # Set y axis label.
+  plt.ylim(0, 1)
+  plt.ylabel('Normalized Euclidean distance')  # Set y axis label.
+  # plt.ylabel('Normalized Wasserstein distance')  # Set y axis label.
  # Bar offset
  x_offset = -(bar_width/2)*(len(dist_type) - 1)
  for d_i, d in enumerate(dist_type):
@ -47,27 +57,41 @@ def main(args):
    print('(%d/%d) %s... ' %(d_i + 1, len(dist_type), title), end='', flush=True)
    mae = np.zeros(len(lmdk_n))
    for n_i, n in enumerate(lmdk_n):
-      for r in range(args.reps):
+      if n == lmdk_n[-1]:
+        break
+      for r in range(args.iter):
        lmdks = lmdk_lib.get_lmdks(seq, n, d)
        hist, h = lmdk_lib.get_hist(seq, lmdks)
        opts = lmdk_sel.get_opts_from_top_h(seq, lmdks)
        delta = 1.0
        res, _ = exp_mech.exponential(hist, opts, exp_mech.score, delta, epsilon)
-        mae[n_i] += lmdk_lib.get_norm(hist, res)/args.reps  # Euclidean
-        # mae[n_i] += lmdk_lib.get_emd(hist, res)/args.reps  # Wasserstein
+        mae[n_i] += lmdk_lib.get_norm(hist, res)/args.iter  # Euclidean
+        # mae[n_i] += lmdk_lib.get_emd(hist, res)/args.iter  # Wasserstein
+    mae = mae/21  # Euclidean
+    # mae = mae/11.75  # Wasserstein
    print('[OK]', flush=True)
-    # Plot bar for current distribution
-    plt.bar(
-      x_i + x_offset,
+    # # Plot bar for current distribution
+    # plt.bar(
+    #   x_i + x_offset,
+    #   mae,
+    #   bar_width,
+    #   label=label,
+    #   linewidth=lmdk_lib.line_width
+    # )
+    # # Change offset for next bar
+    # x_offset += bar_width
+    # Plot line
+    plt.plot(
+      x_i,
      mae,
-      bar_width,
      label=label,
+      marker=markers[d_i],
+      markersize=lmdk_lib.marker_size,
+      markeredgewidth=0,
      linewidth=lmdk_lib.line_width
    )
-    # Change offset for next bar
-    x_offset += bar_width
-  path = str('../../rslt/lmdk_sel_cmp/' + 'lmdk_sel_cmp-norm')
-  # path = str('../../rslt/lmdk_sel_cmp/' + 'lmdk_sel_cmp-emd')
+  path = str('../../rslt/lmdk_sel_cmp/' + 'lmdk_sel_cmp-norm-l')
+  # path = str('../../rslt/lmdk_sel_cmp/' + 'lmdk_sel_cmp-emd-l')
  # Plot legend
  lmdk_lib.plot_legend()
  # Show plot
@ -81,7 +105,7 @@ def main(args):
  Parse arguments.

  Optional:
-    reps - The number of repetitions.
+    iter - The number of iterations.
    time - The time limit of the sequence.
 '''
 def parse_args():
@ -91,7 +115,7 @@ def parse_args():
  # Mandatory arguments.

  # Optional arguments.
-  parser.add_argument('-r', '--reps', help='The number of repetitions.', type=int, default=1)
+  parser.add_argument('-i', '--iter', help='The number of iterations.', type=int, default=1)
  parser.add_argument('-t', '--time', help='The time limit of the sequence.', type=int, default=100)

  # Parse arguments.
--- a/graphics/evaluation/lmdk-sel-dist-emd.pdf
+++ b/graphics/evaluation/lmdk-sel-dist-emd.pdf
--- a/graphics/evaluation/lmdk-sel-dist-norm.pdf
+++ b/graphics/evaluation/lmdk-sel-dist-norm.pdf
--- a/rslt/lmdk_sel_cmp/lmdk_sel_cmp-emd-l.pdf
+++ b/rslt/lmdk_sel_cmp/lmdk_sel_cmp-emd-l.pdf
--- a/rslt/lmdk_sel_cmp/lmdk_sel_cmp-norm-l.pdf
+++ b/rslt/lmdk_sel_cmp/lmdk_sel_cmp-norm-l.pdf
--- a/text/evaluation/theotherthing.tex
+++ b/text/evaluation/theotherthing.tex
@ -1,9 +1,36 @@
 \section{Selection of events}
 \label{sec:lmdk-sel-eval}

-In this section we present the experiments that we performed, to test the methodology that we presented in Section~\ref{subsec:lmdk-sel-sol}, on real and synthetic data sets. 
-% With the experiments on the real data sets (Section~\ref{subsec:lmdk-expt-bgt}), we show the performance in terms of utility of our three {\thething} mechanisms.
-% With the experiments on the synthetic data sets (Section~\ref{subsec:lmdk-expt-cor}) we show the privacy loss by our framework when tuning the size and statistical characteristics of the input {\thething} set $L$ with special emphasis on how the privacy loss under temporal correlation is affected by the number and distribution of the {\thethings}.
+In this section we present the experiments that we performed, to test the methodology that we presented in Section~\ref{subsec:lmdk-sel-sol}, on real and synthetic data sets.
+With the experiments on the synthetic data sets (Section~\ref{subsec:sel-utiliy}) we show the normaziled distances for various {\thething} percentages.
+privacy loss by our framework when tuning the size and statistical characteristics of the input {\thething} set $L$ with special emphasis on how the privacy loss under temporal correlation is affected by the number and distribution of the {\thethings}.
+With the experiments on the real data sets (Section~\ref{subsec:sel-prv}), we show the performance in terms of utility of our three {\thething} mechanisms in combination with privacy preserving {\thething} that can be possibly applied to humans.
+
+
+\subsection{{\Thething} selection utility metrics}
+\label{subsec:sel-utl}
+
+Figure~\ref{fig:sel-dist} demonstrates the normalized distance that we obtain when we utilize either (a)~the Euclidean or (b)~the Wasserstein distance metric to obtain a set of {\thethings} including regular events.
+
+\begin{figure}[htp]
+  \centering
+  \subcaptionbox{Euclidean\label{fig:sel-dist-norm}}{%
+    \includegraphics[width=.5\linewidth]{evaluation/sel-dist-norm}%
+  }%
+  \subcaptionbox{Wasserstein\label{fig:sel-dist-emd}}{%
+    \includegraphics[width=.5\linewidth]{evaluation/sel-dist-emd}%
+  }%
+  \caption{The normalized (a)~Euclidean, and (b)~Wasserstein distance of the generated {\thething} sets for different {\thething} percentages.}
+  \label{fig:sel-dist}
+\end{figure}
+
+Comparing the results of the Euclidean distance in Figure~\ref{fig:sel-dist-norm} with those of the Wasserstein in Figure~\ref{fig:sel-dist-emd} we conclude that the Euclidean distance provides more consistent results for all possible distributions.
+The maximum difference is approximately $0.4$ for the former and $0.7$ for the latter between the bimodal and skewed {\thething} distribution.
+Therefore, we choose to utilize the Euclidean distance metric for the implementation of the privacy-preserving {\thething} selection.
+
+
+\subsection{Budget allocation and {\thething} selection}
+\label{subsec:sel-prv}

 Figure~\ref{fig:real-sel} exhibits the performance of Skip, Uniform, and Adaptive (see Section~\ref{subsec:lmdk-mechs}) in combination with the {\thething} selection component.

@ -19,7 +46,7 @@ Figure~\ref{fig:real-sel} exhibits the performance of Skip, Uniform, and Adaptiv
  \subcaptionbox{T-drive\label{fig:t-drive-sel}}{%
    \includegraphics[width=.5\linewidth]{evaluation/t-drive-sel}%
  }%
-  \caption{The mean absolute error (a)~as a percentage, (b)~in kWh, and (c)~in meters of the released data for different {\thethings} percentages.}
+  \caption{The mean absolute error (a)~as a percentage, (b)~in kWh, and (c)~in meters of the released data for different {\thething} percentages.}
  \label{fig:real-sel}
 \end{figure}