diff --git a/code/expt/lmdk_sel_cmp.py b/code/expt/lmdk_sel_cmp.py index a7d9d71..ce466bc 100644 --- a/code/expt/lmdk_sel_cmp.py +++ b/code/expt/lmdk_sel_cmp.py @@ -20,7 +20,15 @@ def main(args): # Distribution type dist_type = np.array(range(0, 4)) # Number of landmarks - lmdk_n = np.array(range(int(.2*args.time), args.time, int(args.time/5))) + lmdk_n = np.array(range(0, args.time + 1, int(args.time/5))) + + markers = [ + '^', # Symmetric + 'v', # Skewed + 'D', # Bimodal + 's' # Uniform + ] + # Initialize plot lmdk_lib.plot_init() # Width of bars @@ -30,11 +38,13 @@ def main(args): x_margin = bar_width*(len(dist_type)/2 + 1) plt.xticks(x_i, ((lmdk_n/len(seq))*100).astype(int)) plt.xlabel('Landmarks (%)') # Set x axis label. - plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin) + # plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin) + plt.xlim(x_i.min(), x_i.max()) # The y axis # plt.yscale('log') - plt.ylabel('Euclidean distance') # Set y axis label. - # plt.ylabel('Wasserstein distance') # Set y axis label. + plt.ylim(0, 1) + plt.ylabel('Normalized Euclidean distance') # Set y axis label. + # plt.ylabel('Normalized Wasserstein distance') # Set y axis label. # Bar offset x_offset = -(bar_width/2)*(len(dist_type) - 1) for d_i, d in enumerate(dist_type): @@ -47,27 +57,41 @@ def main(args): print('(%d/%d) %s... ' %(d_i + 1, len(dist_type), title), end='', flush=True) mae = np.zeros(len(lmdk_n)) for n_i, n in enumerate(lmdk_n): - for r in range(args.reps): + if n == lmdk_n[-1]: + break + for r in range(args.iter): lmdks = lmdk_lib.get_lmdks(seq, n, d) hist, h = lmdk_lib.get_hist(seq, lmdks) opts = lmdk_sel.get_opts_from_top_h(seq, lmdks) delta = 1.0 res, _ = exp_mech.exponential(hist, opts, exp_mech.score, delta, epsilon) - mae[n_i] += lmdk_lib.get_norm(hist, res)/args.reps # Euclidean - # mae[n_i] += lmdk_lib.get_emd(hist, res)/args.reps # Wasserstein + mae[n_i] += lmdk_lib.get_norm(hist, res)/args.iter # Euclidean + # mae[n_i] += lmdk_lib.get_emd(hist, res)/args.iter # Wasserstein + mae = mae/21 # Euclidean + # mae = mae/11.75 # Wasserstein print('[OK]', flush=True) - # Plot bar for current distribution - plt.bar( - x_i + x_offset, + # # Plot bar for current distribution + # plt.bar( + # x_i + x_offset, + # mae, + # bar_width, + # label=label, + # linewidth=lmdk_lib.line_width + # ) + # # Change offset for next bar + # x_offset += bar_width + # Plot line + plt.plot( + x_i, mae, - bar_width, label=label, + marker=markers[d_i], + markersize=lmdk_lib.marker_size, + markeredgewidth=0, linewidth=lmdk_lib.line_width ) - # Change offset for next bar - x_offset += bar_width - path = str('../../rslt/lmdk_sel_cmp/' + 'lmdk_sel_cmp-norm') - # path = str('../../rslt/lmdk_sel_cmp/' + 'lmdk_sel_cmp-emd') + path = str('../../rslt/lmdk_sel_cmp/' + 'lmdk_sel_cmp-norm-l') + # path = str('../../rslt/lmdk_sel_cmp/' + 'lmdk_sel_cmp-emd-l') # Plot legend lmdk_lib.plot_legend() # Show plot @@ -81,7 +105,7 @@ def main(args): Parse arguments. Optional: - reps - The number of repetitions. + iter - The number of iterations. time - The time limit of the sequence. ''' def parse_args(): @@ -91,7 +115,7 @@ def parse_args(): # Mandatory arguments. # Optional arguments. - parser.add_argument('-r', '--reps', help='The number of repetitions.', type=int, default=1) + parser.add_argument('-i', '--iter', help='The number of iterations.', type=int, default=1) parser.add_argument('-t', '--time', help='The time limit of the sequence.', type=int, default=100) # Parse arguments. diff --git a/graphics/evaluation/lmdk-sel-dist-emd.pdf b/graphics/evaluation/sel-dist-emd.pdf similarity index 70% rename from graphics/evaluation/lmdk-sel-dist-emd.pdf rename to graphics/evaluation/sel-dist-emd.pdf index b49ce11..c6f24ae 100644 Binary files a/graphics/evaluation/lmdk-sel-dist-emd.pdf and b/graphics/evaluation/sel-dist-emd.pdf differ diff --git a/graphics/evaluation/lmdk-sel-dist-norm.pdf b/graphics/evaluation/sel-dist-norm.pdf similarity index 71% rename from graphics/evaluation/lmdk-sel-dist-norm.pdf rename to graphics/evaluation/sel-dist-norm.pdf index dddda0c..e54f62d 100644 Binary files a/graphics/evaluation/lmdk-sel-dist-norm.pdf and b/graphics/evaluation/sel-dist-norm.pdf differ diff --git a/rslt/lmdk_sel_cmp/lmdk_sel_cmp-emd-l.pdf b/rslt/lmdk_sel_cmp/lmdk_sel_cmp-emd-l.pdf new file mode 100644 index 0000000..c6f24ae Binary files /dev/null and b/rslt/lmdk_sel_cmp/lmdk_sel_cmp-emd-l.pdf differ diff --git a/rslt/lmdk_sel_cmp/lmdk_sel_cmp-norm-l.pdf b/rslt/lmdk_sel_cmp/lmdk_sel_cmp-norm-l.pdf new file mode 100644 index 0000000..e54f62d Binary files /dev/null and b/rslt/lmdk_sel_cmp/lmdk_sel_cmp-norm-l.pdf differ diff --git a/text/evaluation/theotherthing.tex b/text/evaluation/theotherthing.tex index 66dcb85..a44f756 100644 --- a/text/evaluation/theotherthing.tex +++ b/text/evaluation/theotherthing.tex @@ -1,9 +1,36 @@ \section{Selection of events} \label{sec:lmdk-sel-eval} -In this section we present the experiments that we performed, to test the methodology that we presented in Section~\ref{subsec:lmdk-sel-sol}, on real and synthetic data sets. -% With the experiments on the real data sets (Section~\ref{subsec:lmdk-expt-bgt}), we show the performance in terms of utility of our three {\thething} mechanisms. -% With the experiments on the synthetic data sets (Section~\ref{subsec:lmdk-expt-cor}) we show the privacy loss by our framework when tuning the size and statistical characteristics of the input {\thething} set $L$ with special emphasis on how the privacy loss under temporal correlation is affected by the number and distribution of the {\thethings}. +In this section we present the experiments that we performed, to test the methodology that we presented in Section~\ref{subsec:lmdk-sel-sol}, on real and synthetic data sets. +With the experiments on the synthetic data sets (Section~\ref{subsec:sel-utiliy}) we show the normaziled distances for various {\thething} percentages. +privacy loss by our framework when tuning the size and statistical characteristics of the input {\thething} set $L$ with special emphasis on how the privacy loss under temporal correlation is affected by the number and distribution of the {\thethings}. +With the experiments on the real data sets (Section~\ref{subsec:sel-prv}), we show the performance in terms of utility of our three {\thething} mechanisms in combination with privacy preserving {\thething} that can be possibly applied to humans. + + +\subsection{{\Thething} selection utility metrics} +\label{subsec:sel-utl} + +Figure~\ref{fig:sel-dist} demonstrates the normalized distance that we obtain when we utilize either (a)~the Euclidean or (b)~the Wasserstein distance metric to obtain a set of {\thethings} including regular events. + +\begin{figure}[htp] + \centering + \subcaptionbox{Euclidean\label{fig:sel-dist-norm}}{% + \includegraphics[width=.5\linewidth]{evaluation/sel-dist-norm}% + }% + \subcaptionbox{Wasserstein\label{fig:sel-dist-emd}}{% + \includegraphics[width=.5\linewidth]{evaluation/sel-dist-emd}% + }% + \caption{The normalized (a)~Euclidean, and (b)~Wasserstein distance of the generated {\thething} sets for different {\thething} percentages.} + \label{fig:sel-dist} +\end{figure} + +Comparing the results of the Euclidean distance in Figure~\ref{fig:sel-dist-norm} with those of the Wasserstein in Figure~\ref{fig:sel-dist-emd} we conclude that the Euclidean distance provides more consistent results for all possible distributions. +The maximum difference is approximately $0.4$ for the former and $0.7$ for the latter between the bimodal and skewed {\thething} distribution. +Therefore, we choose to utilize the Euclidean distance metric for the implementation of the privacy-preserving {\thething} selection. + + +\subsection{Budget allocation and {\thething} selection} +\label{subsec:sel-prv} Figure~\ref{fig:real-sel} exhibits the performance of Skip, Uniform, and Adaptive (see Section~\ref{subsec:lmdk-mechs}) in combination with the {\thething} selection component. @@ -19,7 +46,7 @@ Figure~\ref{fig:real-sel} exhibits the performance of Skip, Uniform, and Adaptiv \subcaptionbox{T-drive\label{fig:t-drive-sel}}{% \includegraphics[width=.5\linewidth]{evaluation/t-drive-sel}% }% - \caption{The mean absolute error (a)~as a percentage, (b)~in kWh, and (c)~in meters of the released data for different {\thethings} percentages.} + \caption{The mean absolute error (a)~as a percentage, (b)~in kWh, and (c)~in meters of the released data for different {\thething} percentages.} \label{fig:real-sel} \end{figure}