lmdk-expt: Reviewed all graphs for synthetic
This commit is contained in:
parent
edb98f736d
commit
b03b510f02
121
code/expt/avg_dist.py
Normal file
121
code/expt/avg_dist.py
Normal file
@ -0,0 +1,121 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
sys.path.insert(1, '../lib')
|
||||
import argparse
|
||||
import gdp
|
||||
import lmdk_lib
|
||||
import math
|
||||
from matplotlib import pyplot as plt
|
||||
import numpy as np
|
||||
import os
|
||||
import time
|
||||
|
||||
|
||||
def main(args):
|
||||
# Number of timestamps
|
||||
seq = lmdk_lib.get_seq(1, args.time)
|
||||
# Distribution type
|
||||
dist_type = np.array(range(0, 4))
|
||||
# Number of landmarks
|
||||
lmdk_n = np.array(range(0, args.time + 1, int(args.time/5)))
|
||||
|
||||
markers = [
|
||||
'^', # Symmetric
|
||||
'v', # Skewed
|
||||
'D', # Bimodal
|
||||
's' # Uniform
|
||||
]
|
||||
|
||||
# Initialize plot
|
||||
lmdk_lib.plot_init()
|
||||
# The x axis
|
||||
x_i = np.arange(len(lmdk_n))
|
||||
plt.xticks(x_i, ((lmdk_n/len(seq))*100).astype(int))
|
||||
plt.xlabel('Landmarks (%)') # Set x axis label.
|
||||
plt.xlim(x_i.min(), x_i.max())
|
||||
# The y axis
|
||||
plt.ylabel('Normalized average distance') # Set y axis label.
|
||||
plt.yscale('log')
|
||||
plt.ylim(.001, 1)
|
||||
# Logging
|
||||
print('Average distance', end='', flush=True)
|
||||
for d_i, d in enumerate(dist_type):
|
||||
avg_dist = np.zeros(len(lmdk_n))
|
||||
# Logging
|
||||
print('.', end='', flush=True)
|
||||
for i, n in enumerate(lmdk_n):
|
||||
for r in range(args.reps):
|
||||
# Generate landmarks
|
||||
lmdks = lmdk_lib.get_lmdks(seq, n, d)
|
||||
# Calculate average distance
|
||||
avg_cur = 0
|
||||
for t in seq:
|
||||
t_prv, t_nxt = gdp.get_limits(t, seq, lmdks)
|
||||
avg_cur += (abs(t - t_prv) - 1 + abs(t - t_nxt) - 1 )/len(seq)
|
||||
# Normalized average based on repetitions
|
||||
avg_dist[i] += avg_cur/args.reps
|
||||
# Rescaling (min-max normalization)
|
||||
# https://en.wikipedia.org/wiki/Feature_scaling#Rescaling_(min-max_normalization)
|
||||
avg_dist = (avg_dist - avg_dist.min())/(avg_dist.max() - avg_dist.min())
|
||||
# Normalize for log scale
|
||||
if avg_dist[len(avg_dist) - 1] == 0:
|
||||
avg_dist[len(avg_dist) - 1] = .001
|
||||
# Set label
|
||||
label = lmdk_lib.dist_type_to_str(d_i)
|
||||
if d_i == 1:
|
||||
label = 'Skewed'
|
||||
# Plot line
|
||||
plt.plot(
|
||||
x_i,
|
||||
avg_dist,
|
||||
label=label,
|
||||
marker=markers[d_i],
|
||||
markersize=lmdk_lib.marker_size,
|
||||
markeredgewidth=0,
|
||||
linewidth=lmdk_lib.line_width
|
||||
)
|
||||
# Plot legend
|
||||
lmdk_lib.plot_legend()
|
||||
# Show plot
|
||||
# plt.show()
|
||||
# Save plot
|
||||
lmdk_lib.save_plot(str('../../rslt/avg_dist/' + 'avg-dist' + '.pdf'))
|
||||
print(' [OK]', flush=True)
|
||||
|
||||
|
||||
'''
|
||||
Parse arguments.
|
||||
|
||||
Optional:
|
||||
reps - The number of repetitions.
|
||||
time - The time limit of the sequence.
|
||||
'''
|
||||
def parse_args():
|
||||
# Create argument parser.
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
# Mandatory arguments.
|
||||
|
||||
# Optional arguments.
|
||||
parser.add_argument('-r', '--reps', help='The number of repetitions.', type=int, default=1)
|
||||
parser.add_argument('-t', '--time', help='The time limit of the sequence.', type=int, default=100)
|
||||
|
||||
# Parse arguments.
|
||||
args = parser.parse_args()
|
||||
|
||||
return args
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
args = parse_args()
|
||||
start_time = time.time()
|
||||
main(args)
|
||||
end_time = time.time()
|
||||
print('##############################')
|
||||
print('Time elapsed: %s' % (time.strftime('%H:%M:%S', time.gmtime(end_time - start_time))))
|
||||
print('##############################')
|
||||
except KeyboardInterrupt:
|
||||
print('Interrupted by user.')
|
||||
exit()
|
131
code/expt/dist_cor.py
Normal file
131
code/expt/dist_cor.py
Normal file
@ -0,0 +1,131 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
sys.path.insert(1, '../lib')
|
||||
import argparse
|
||||
import gdp
|
||||
import itertools
|
||||
import lmdk_bgt
|
||||
import lmdk_lib
|
||||
import numpy as np
|
||||
import os
|
||||
from matplotlib import pyplot as plt
|
||||
import time
|
||||
|
||||
|
||||
def main(args):
|
||||
# Privacy goal
|
||||
epsilon = 1.0
|
||||
# Number of timestamps
|
||||
seq = lmdk_lib.get_seq(1, args.time)
|
||||
# Correlation degree (higher values means weaker correlations)
|
||||
cor_deg = np.array([.01, .1, 1.0])
|
||||
cor_lbl = ['Strong correlation', 'Moderate correlation', 'Weak correlation']
|
||||
# Distribution type
|
||||
dist_type = np.array(range(0, 4))
|
||||
# Number of landmarks
|
||||
lmdk_n = np.array(range(0, args.time + 1, int(args.time/5)))
|
||||
# Width of bars
|
||||
bar_width = 1/(len(dist_type) + 1)
|
||||
# For each correlation degree
|
||||
for c_i, c in enumerate(cor_deg):
|
||||
# Logging
|
||||
title = cor_lbl[c_i]
|
||||
print('(%d/%d) %s' %(c_i + 1, len(cor_deg), title), end='', flush=True)
|
||||
# The transition matrix
|
||||
p = gdp.gen_trans_mt(2, c)
|
||||
# Bar offset
|
||||
x_offset = -(bar_width/2)*(len(dist_type) - 1)
|
||||
# Initialize plot
|
||||
lmdk_lib.plot_init()
|
||||
# The x axis
|
||||
x_i = np.arange(len(lmdk_n))
|
||||
plt.xticks(x_i, ((lmdk_n/len(seq))*100).astype(int))
|
||||
plt.xlabel('Landmarks (%)') # Set x axis label.
|
||||
x_margin = bar_width*(len(dist_type)/2 + 1)
|
||||
plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin)
|
||||
# The y axis
|
||||
plt.ylabel('Privacy loss') # Set y axis label.
|
||||
plt.yscale('log')
|
||||
plt.ylim(epsilon/10, 100*len(seq))
|
||||
# plt.ylim(0, 10000)
|
||||
for d_i, d in enumerate(dist_type):
|
||||
print('.', end='', flush=True)
|
||||
# Initialization
|
||||
e = np.zeros(len(lmdk_n))
|
||||
a = np.zeros(len(lmdk_n))
|
||||
for i, n in enumerate(lmdk_n):
|
||||
for r in range(args.reps):
|
||||
# Generate landmarks
|
||||
lmdks = lmdk_lib.get_lmdks(seq, n, d)
|
||||
# Uniform budget allocation
|
||||
e_cur = lmdk_bgt.uniform(seq, lmdks, epsilon)
|
||||
_, _, a_cur = gdp.tpl_lmdk_mem(e_cur, p, p, seq, lmdks)
|
||||
# Save privacy loss
|
||||
e[i] += np.sum(e_cur)/args.reps
|
||||
a[i] += np.sum(a_cur)/args.reps
|
||||
# Set label
|
||||
label = lmdk_lib.dist_type_to_str(d_i)
|
||||
if d_i == 1:
|
||||
label = 'Skewed'
|
||||
# Plot bar for current distribution
|
||||
plt.bar(
|
||||
x_i + x_offset,
|
||||
a,
|
||||
bar_width,
|
||||
label=label,
|
||||
linewidth=lmdk_lib.line_width
|
||||
)
|
||||
# Change offset for next bar
|
||||
x_offset += bar_width
|
||||
# Plot line for no correlation
|
||||
plt.plot(
|
||||
x_i,
|
||||
e,
|
||||
linewidth=lmdk_lib.line_width,
|
||||
color='#e0e0e0',
|
||||
)
|
||||
# Plot legend
|
||||
lmdk_lib.plot_legend()
|
||||
# Show plot
|
||||
# plt.show()
|
||||
# Save plot
|
||||
lmdk_lib.save_plot(str('../../rslt/dist_cor/' + title + '.pdf'))
|
||||
print(' [OK]', flush=True)
|
||||
|
||||
|
||||
'''
|
||||
Parse arguments.
|
||||
|
||||
Optional:
|
||||
reps - The number of repetitions.
|
||||
time - The time limit of the sequence.
|
||||
'''
|
||||
def parse_args():
|
||||
# Create argument parser.
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
# Mandatory arguments.
|
||||
|
||||
# Optional arguments.
|
||||
parser.add_argument('-r', '--reps', help='The number of repetitions.', type=int, default=1)
|
||||
parser.add_argument('-t', '--time', help='The time limit of the sequence.', type=int, default=100)
|
||||
|
||||
# Parse arguments.
|
||||
args = parser.parse_args()
|
||||
|
||||
return args
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
args = parse_args()
|
||||
start_time = time.time()
|
||||
main(args)
|
||||
end_time = time.time()
|
||||
print('##############################')
|
||||
print('Time elapsed: %s' % (time.strftime('%H:%M:%S', time.gmtime(end_time - start_time))))
|
||||
print('##############################')
|
||||
except KeyboardInterrupt:
|
||||
print('Interrupted by user.')
|
||||
exit()
|
1576
code/lib/gdp.py
Normal file
1576
code/lib/gdp.py
Normal file
File diff suppressed because it is too large
Load Diff
Binary file not shown.
BIN
graphics/evaluation/dist-cor-mod.pdf
Normal file
BIN
graphics/evaluation/dist-cor-mod.pdf
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
rslt/avg_dist/avg-dist.pdf
Normal file
BIN
rslt/avg_dist/avg-dist.pdf
Normal file
Binary file not shown.
BIN
rslt/dist_cor/dist-cor-mod.pdf
Normal file
BIN
rslt/dist_cor/dist-cor-mod.pdf
Normal file
Binary file not shown.
Binary file not shown.
BIN
rslt/dist_cor/dist-cor-wk.pdf
Normal file
BIN
rslt/dist_cor/dist-cor-wk.pdf
Normal file
Binary file not shown.
@ -51,7 +51,8 @@ In general, we can claim that the Adaptive is the most reliable and best perform
|
||||
Moreover, designing a data-dependent sampling scheme would possibly result in better results for Adaptive.
|
||||
|
||||
|
||||
\paragraph{Temporal distance and correlation}
|
||||
\subsubsection{Temporal distance and correlation}
|
||||
|
||||
Figure~\ref{fig:avg-dist} shows a comparison of the average temporal distance of the events from the previous/next {\thething} or the start/end of the time series for various distributions in synthetic data.
|
||||
More particularly, we count for every event the total number of events between itself and the nearest {\thething} or the series edge.
|
||||
We observe that the uniform and bimodal distributions tend to limit the regular event--{\thething} distance.
|
||||
@ -61,33 +62,33 @@ On the contrary, distributing the {\thethings} at one part of the sequence, as i
|
||||
|
||||
\begin{figure}[htp]
|
||||
\centering
|
||||
\includegraphics[width=.5\linewidth]{avg-dist}%
|
||||
\includegraphics[width=.5\linewidth]{evaluation/avg-dist}%
|
||||
\caption{Average temporal distance of the events from the {\thethings} for different {\thethings} percentages within a time series in various {\thethings} distributions.}
|
||||
\label{fig:avg-dist}
|
||||
\end{figure}
|
||||
|
||||
Figure~\ref{fig:dist-cor} illustrates a comparison among the aforementioned distributions regarding the overall privacy loss under moderate (Figure~\ref{fig:dist-cor-mod}), and strong (Figure~\ref{fig:dist-cor-stg}) correlation degrees.
|
||||
Figure~\ref{fig:dist-cor} illustrates a comparison among the aforementioned distributions regarding the overall privacy loss under (a)~weak, (b)~moderate, and (c)~strong temporal correlation degrees.
|
||||
The line shows the overall privacy loss---for all cases of {\thethings} distribution---without temporal correlation.
|
||||
We skip the presentation of the results under a weak correlation degree, since they converge in this case.
|
||||
In combination with Figure~\ref{fig:avg-dist}, we conclude that a greater average event-{\thething} distance in a distribution can result into greater overall privacy loss under moderate and strong temporal correlation.
|
||||
This is due to the fact that the backward/forward privacy loss accumulates more over time in wider spaces without {\thethings} (see Section~\ref{subsec:correlations}).
|
||||
In combination with Figure~\ref{fig:avg-dist}, we conclude that a greater average event--{\thething} distance in a distribution can result into greater overall privacy loss under moderate and strong temporal correlation.
|
||||
This is due to the fact that the backward/forward privacy loss accumulates more over time in wider spaces without {\thethings} (see Section~\ref{sec:correlation}).
|
||||
Furthermore, the behavior of the privacy loss is as expected regarding the temporal correlation degree.
|
||||
Predictably, a stronger correlation degree generates higher privacy loss while widening the gap between the different distribution cases.
|
||||
On the contrary, a weaker correlation degree makes it harder to differentiate among the {\thethings} distributions.
|
||||
The privacy loss under a weak correlation degree converge.
|
||||
|
||||
\begin{figure}[htp]
|
||||
\centering
|
||||
\subcaptionbox{Weak correlation\label{fig:dist-cor-wk}}{%
|
||||
\includegraphics[width=.5\linewidth]{dist-cor-wk}%
|
||||
\includegraphics[width=.5\linewidth]{evaluation/dist-cor-wk}%
|
||||
}%
|
||||
\hspace{\fill}
|
||||
\subcaptionbox{Moderate correlation\label{fig:dist-cor-mod}}{%
|
||||
\includegraphics[width=.5\linewidth]{dist-cor-mod}%
|
||||
\includegraphics[width=.5\linewidth]{evaluation/dist-cor-mod}%
|
||||
}%
|
||||
\subcaptionbox{Strong correlation\label{fig:dist-cor-stg}}{%
|
||||
\includegraphics[width=.5\linewidth]{dist-cor-stg}%
|
||||
\includegraphics[width=.5\linewidth]{evaluation/dist-cor-stg}%
|
||||
}%
|
||||
\caption{Privacy loss for different {\thethings} percentages and distributions, under weak, moderate, and strong degrees of temporal correlation.
|
||||
\caption{Privacy loss for different {\thethings} percentages and distributions, under (a)~weak, (b)~moderate, and (c)~strong degrees of temporal correlation.
|
||||
The line shows the overall privacy loss without temporal correlation.}
|
||||
\label{fig:dist-cor}
|
||||
\end{figure}
|
||||
|
Loading…
Reference in New Issue
Block a user