evaluation: Minor corrections and text

This commit is contained in:
Manos Katsomallos 2021-10-11 04:01:08 +02:00
parent d5c39c4e42
commit 6080efece9
6 changed files with 72 additions and 21 deletions

View File

@ -20,7 +20,15 @@ def main(args):
# Distribution type
dist_type = np.array(range(0, 4))
# Number of landmarks
lmdk_n = np.array(range(int(.2*args.time), args.time, int(args.time/5)))
lmdk_n = np.array(range(0, args.time + 1, int(args.time/5)))
markers = [
'^', # Symmetric
'v', # Skewed
'D', # Bimodal
's' # Uniform
]
# Initialize plot
lmdk_lib.plot_init()
# Width of bars
@ -30,11 +38,13 @@ def main(args):
x_margin = bar_width*(len(dist_type)/2 + 1)
plt.xticks(x_i, ((lmdk_n/len(seq))*100).astype(int))
plt.xlabel('Landmarks (%)') # Set x axis label.
plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin)
# plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin)
plt.xlim(x_i.min(), x_i.max())
# The y axis
# plt.yscale('log')
plt.ylabel('Euclidean distance') # Set y axis label.
# plt.ylabel('Wasserstein distance') # Set y axis label.
plt.ylim(0, 1)
plt.ylabel('Normalized Euclidean distance') # Set y axis label.
# plt.ylabel('Normalized Wasserstein distance') # Set y axis label.
# Bar offset
x_offset = -(bar_width/2)*(len(dist_type) - 1)
for d_i, d in enumerate(dist_type):
@ -47,27 +57,41 @@ def main(args):
print('(%d/%d) %s... ' %(d_i + 1, len(dist_type), title), end='', flush=True)
mae = np.zeros(len(lmdk_n))
for n_i, n in enumerate(lmdk_n):
for r in range(args.reps):
if n == lmdk_n[-1]:
break
for r in range(args.iter):
lmdks = lmdk_lib.get_lmdks(seq, n, d)
hist, h = lmdk_lib.get_hist(seq, lmdks)
opts = lmdk_sel.get_opts_from_top_h(seq, lmdks)
delta = 1.0
res, _ = exp_mech.exponential(hist, opts, exp_mech.score, delta, epsilon)
mae[n_i] += lmdk_lib.get_norm(hist, res)/args.reps # Euclidean
# mae[n_i] += lmdk_lib.get_emd(hist, res)/args.reps # Wasserstein
mae[n_i] += lmdk_lib.get_norm(hist, res)/args.iter # Euclidean
# mae[n_i] += lmdk_lib.get_emd(hist, res)/args.iter # Wasserstein
mae = mae/21 # Euclidean
# mae = mae/11.75 # Wasserstein
print('[OK]', flush=True)
# Plot bar for current distribution
plt.bar(
x_i + x_offset,
# # Plot bar for current distribution
# plt.bar(
# x_i + x_offset,
# mae,
# bar_width,
# label=label,
# linewidth=lmdk_lib.line_width
# )
# # Change offset for next bar
# x_offset += bar_width
# Plot line
plt.plot(
x_i,
mae,
bar_width,
label=label,
marker=markers[d_i],
markersize=lmdk_lib.marker_size,
markeredgewidth=0,
linewidth=lmdk_lib.line_width
)
# Change offset for next bar
x_offset += bar_width
path = str('../../rslt/lmdk_sel_cmp/' + 'lmdk_sel_cmp-norm')
# path = str('../../rslt/lmdk_sel_cmp/' + 'lmdk_sel_cmp-emd')
path = str('../../rslt/lmdk_sel_cmp/' + 'lmdk_sel_cmp-norm-l')
# path = str('../../rslt/lmdk_sel_cmp/' + 'lmdk_sel_cmp-emd-l')
# Plot legend
lmdk_lib.plot_legend()
# Show plot
@ -81,7 +105,7 @@ def main(args):
Parse arguments.
Optional:
reps - The number of repetitions.
iter - The number of iterations.
time - The time limit of the sequence.
'''
def parse_args():
@ -91,7 +115,7 @@ def parse_args():
# Mandatory arguments.
# Optional arguments.
parser.add_argument('-r', '--reps', help='The number of repetitions.', type=int, default=1)
parser.add_argument('-i', '--iter', help='The number of iterations.', type=int, default=1)
parser.add_argument('-t', '--time', help='The time limit of the sequence.', type=int, default=100)
# Parse arguments.

Binary file not shown.

Binary file not shown.

View File

@ -1,9 +1,36 @@
\section{Selection of events}
\label{sec:lmdk-sel-eval}
In this section we present the experiments that we performed, to test the methodology that we presented in Section~\ref{subsec:lmdk-sel-sol}, on real and synthetic data sets.
% With the experiments on the real data sets (Section~\ref{subsec:lmdk-expt-bgt}), we show the performance in terms of utility of our three {\thething} mechanisms.
% With the experiments on the synthetic data sets (Section~\ref{subsec:lmdk-expt-cor}) we show the privacy loss by our framework when tuning the size and statistical characteristics of the input {\thething} set $L$ with special emphasis on how the privacy loss under temporal correlation is affected by the number and distribution of the {\thethings}.
In this section we present the experiments that we performed, to test the methodology that we presented in Section~\ref{subsec:lmdk-sel-sol}, on real and synthetic data sets.
With the experiments on the synthetic data sets (Section~\ref{subsec:sel-utiliy}) we show the normaziled distances for various {\thething} percentages.
privacy loss by our framework when tuning the size and statistical characteristics of the input {\thething} set $L$ with special emphasis on how the privacy loss under temporal correlation is affected by the number and distribution of the {\thethings}.
With the experiments on the real data sets (Section~\ref{subsec:sel-prv}), we show the performance in terms of utility of our three {\thething} mechanisms in combination with privacy preserving {\thething} that can be possibly applied to humans.
\subsection{{\Thething} selection utility metrics}
\label{subsec:sel-utl}
Figure~\ref{fig:sel-dist} demonstrates the normalized distance that we obtain when we utilize either (a)~the Euclidean or (b)~the Wasserstein distance metric to obtain a set of {\thethings} including regular events.
\begin{figure}[htp]
\centering
\subcaptionbox{Euclidean\label{fig:sel-dist-norm}}{%
\includegraphics[width=.5\linewidth]{evaluation/sel-dist-norm}%
}%
\subcaptionbox{Wasserstein\label{fig:sel-dist-emd}}{%
\includegraphics[width=.5\linewidth]{evaluation/sel-dist-emd}%
}%
\caption{The normalized (a)~Euclidean, and (b)~Wasserstein distance of the generated {\thething} sets for different {\thething} percentages.}
\label{fig:sel-dist}
\end{figure}
Comparing the results of the Euclidean distance in Figure~\ref{fig:sel-dist-norm} with those of the Wasserstein in Figure~\ref{fig:sel-dist-emd} we conclude that the Euclidean distance provides more consistent results for all possible distributions.
The maximum difference is approximately $0.4$ for the former and $0.7$ for the latter between the bimodal and skewed {\thething} distribution.
Therefore, we choose to utilize the Euclidean distance metric for the implementation of the privacy-preserving {\thething} selection.
\subsection{Budget allocation and {\thething} selection}
\label{subsec:sel-prv}
Figure~\ref{fig:real-sel} exhibits the performance of Skip, Uniform, and Adaptive (see Section~\ref{subsec:lmdk-mechs}) in combination with the {\thething} selection component.
@ -19,7 +46,7 @@ Figure~\ref{fig:real-sel} exhibits the performance of Skip, Uniform, and Adaptiv
\subcaptionbox{T-drive\label{fig:t-drive-sel}}{%
\includegraphics[width=.5\linewidth]{evaluation/t-drive-sel}%
}%
\caption{The mean absolute error (a)~as a percentage, (b)~in kWh, and (c)~in meters of the released data for different {\thethings} percentages.}
\caption{The mean absolute error (a)~as a percentage, (b)~in kWh, and (c)~in meters of the released data for different {\thething} percentages.}
\label{fig:real-sel}
\end{figure}