lmdk-expt: Reviewed all graphs for synthetic
This commit is contained in:
parent
edb98f736d
commit
b03b510f02
121
code/expt/avg_dist.py
Normal file
121
code/expt/avg_dist.py
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import sys
|
||||||
|
sys.path.insert(1, '../lib')
|
||||||
|
import argparse
|
||||||
|
import gdp
|
||||||
|
import lmdk_lib
|
||||||
|
import math
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
|
|
||||||
|
def main(args):
|
||||||
|
# Number of timestamps
|
||||||
|
seq = lmdk_lib.get_seq(1, args.time)
|
||||||
|
# Distribution type
|
||||||
|
dist_type = np.array(range(0, 4))
|
||||||
|
# Number of landmarks
|
||||||
|
lmdk_n = np.array(range(0, args.time + 1, int(args.time/5)))
|
||||||
|
|
||||||
|
markers = [
|
||||||
|
'^', # Symmetric
|
||||||
|
'v', # Skewed
|
||||||
|
'D', # Bimodal
|
||||||
|
's' # Uniform
|
||||||
|
]
|
||||||
|
|
||||||
|
# Initialize plot
|
||||||
|
lmdk_lib.plot_init()
|
||||||
|
# The x axis
|
||||||
|
x_i = np.arange(len(lmdk_n))
|
||||||
|
plt.xticks(x_i, ((lmdk_n/len(seq))*100).astype(int))
|
||||||
|
plt.xlabel('Landmarks (%)') # Set x axis label.
|
||||||
|
plt.xlim(x_i.min(), x_i.max())
|
||||||
|
# The y axis
|
||||||
|
plt.ylabel('Normalized average distance') # Set y axis label.
|
||||||
|
plt.yscale('log')
|
||||||
|
plt.ylim(.001, 1)
|
||||||
|
# Logging
|
||||||
|
print('Average distance', end='', flush=True)
|
||||||
|
for d_i, d in enumerate(dist_type):
|
||||||
|
avg_dist = np.zeros(len(lmdk_n))
|
||||||
|
# Logging
|
||||||
|
print('.', end='', flush=True)
|
||||||
|
for i, n in enumerate(lmdk_n):
|
||||||
|
for r in range(args.reps):
|
||||||
|
# Generate landmarks
|
||||||
|
lmdks = lmdk_lib.get_lmdks(seq, n, d)
|
||||||
|
# Calculate average distance
|
||||||
|
avg_cur = 0
|
||||||
|
for t in seq:
|
||||||
|
t_prv, t_nxt = gdp.get_limits(t, seq, lmdks)
|
||||||
|
avg_cur += (abs(t - t_prv) - 1 + abs(t - t_nxt) - 1 )/len(seq)
|
||||||
|
# Normalized average based on repetitions
|
||||||
|
avg_dist[i] += avg_cur/args.reps
|
||||||
|
# Rescaling (min-max normalization)
|
||||||
|
# https://en.wikipedia.org/wiki/Feature_scaling#Rescaling_(min-max_normalization)
|
||||||
|
avg_dist = (avg_dist - avg_dist.min())/(avg_dist.max() - avg_dist.min())
|
||||||
|
# Normalize for log scale
|
||||||
|
if avg_dist[len(avg_dist) - 1] == 0:
|
||||||
|
avg_dist[len(avg_dist) - 1] = .001
|
||||||
|
# Set label
|
||||||
|
label = lmdk_lib.dist_type_to_str(d_i)
|
||||||
|
if d_i == 1:
|
||||||
|
label = 'Skewed'
|
||||||
|
# Plot line
|
||||||
|
plt.plot(
|
||||||
|
x_i,
|
||||||
|
avg_dist,
|
||||||
|
label=label,
|
||||||
|
marker=markers[d_i],
|
||||||
|
markersize=lmdk_lib.marker_size,
|
||||||
|
markeredgewidth=0,
|
||||||
|
linewidth=lmdk_lib.line_width
|
||||||
|
)
|
||||||
|
# Plot legend
|
||||||
|
lmdk_lib.plot_legend()
|
||||||
|
# Show plot
|
||||||
|
# plt.show()
|
||||||
|
# Save plot
|
||||||
|
lmdk_lib.save_plot(str('../../rslt/avg_dist/' + 'avg-dist' + '.pdf'))
|
||||||
|
print(' [OK]', flush=True)
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
Parse arguments.
|
||||||
|
|
||||||
|
Optional:
|
||||||
|
reps - The number of repetitions.
|
||||||
|
time - The time limit of the sequence.
|
||||||
|
'''
|
||||||
|
def parse_args():
|
||||||
|
# Create argument parser.
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
|
# Mandatory arguments.
|
||||||
|
|
||||||
|
# Optional arguments.
|
||||||
|
parser.add_argument('-r', '--reps', help='The number of repetitions.', type=int, default=1)
|
||||||
|
parser.add_argument('-t', '--time', help='The time limit of the sequence.', type=int, default=100)
|
||||||
|
|
||||||
|
# Parse arguments.
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
try:
|
||||||
|
args = parse_args()
|
||||||
|
start_time = time.time()
|
||||||
|
main(args)
|
||||||
|
end_time = time.time()
|
||||||
|
print('##############################')
|
||||||
|
print('Time elapsed: %s' % (time.strftime('%H:%M:%S', time.gmtime(end_time - start_time))))
|
||||||
|
print('##############################')
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print('Interrupted by user.')
|
||||||
|
exit()
|
131
code/expt/dist_cor.py
Normal file
131
code/expt/dist_cor.py
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import sys
|
||||||
|
sys.path.insert(1, '../lib')
|
||||||
|
import argparse
|
||||||
|
import gdp
|
||||||
|
import itertools
|
||||||
|
import lmdk_bgt
|
||||||
|
import lmdk_lib
|
||||||
|
import numpy as np
|
||||||
|
import os
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
import time
|
||||||
|
|
||||||
|
|
||||||
|
def main(args):
|
||||||
|
# Privacy goal
|
||||||
|
epsilon = 1.0
|
||||||
|
# Number of timestamps
|
||||||
|
seq = lmdk_lib.get_seq(1, args.time)
|
||||||
|
# Correlation degree (higher values means weaker correlations)
|
||||||
|
cor_deg = np.array([.01, .1, 1.0])
|
||||||
|
cor_lbl = ['Strong correlation', 'Moderate correlation', 'Weak correlation']
|
||||||
|
# Distribution type
|
||||||
|
dist_type = np.array(range(0, 4))
|
||||||
|
# Number of landmarks
|
||||||
|
lmdk_n = np.array(range(0, args.time + 1, int(args.time/5)))
|
||||||
|
# Width of bars
|
||||||
|
bar_width = 1/(len(dist_type) + 1)
|
||||||
|
# For each correlation degree
|
||||||
|
for c_i, c in enumerate(cor_deg):
|
||||||
|
# Logging
|
||||||
|
title = cor_lbl[c_i]
|
||||||
|
print('(%d/%d) %s' %(c_i + 1, len(cor_deg), title), end='', flush=True)
|
||||||
|
# The transition matrix
|
||||||
|
p = gdp.gen_trans_mt(2, c)
|
||||||
|
# Bar offset
|
||||||
|
x_offset = -(bar_width/2)*(len(dist_type) - 1)
|
||||||
|
# Initialize plot
|
||||||
|
lmdk_lib.plot_init()
|
||||||
|
# The x axis
|
||||||
|
x_i = np.arange(len(lmdk_n))
|
||||||
|
plt.xticks(x_i, ((lmdk_n/len(seq))*100).astype(int))
|
||||||
|
plt.xlabel('Landmarks (%)') # Set x axis label.
|
||||||
|
x_margin = bar_width*(len(dist_type)/2 + 1)
|
||||||
|
plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin)
|
||||||
|
# The y axis
|
||||||
|
plt.ylabel('Privacy loss') # Set y axis label.
|
||||||
|
plt.yscale('log')
|
||||||
|
plt.ylim(epsilon/10, 100*len(seq))
|
||||||
|
# plt.ylim(0, 10000)
|
||||||
|
for d_i, d in enumerate(dist_type):
|
||||||
|
print('.', end='', flush=True)
|
||||||
|
# Initialization
|
||||||
|
e = np.zeros(len(lmdk_n))
|
||||||
|
a = np.zeros(len(lmdk_n))
|
||||||
|
for i, n in enumerate(lmdk_n):
|
||||||
|
for r in range(args.reps):
|
||||||
|
# Generate landmarks
|
||||||
|
lmdks = lmdk_lib.get_lmdks(seq, n, d)
|
||||||
|
# Uniform budget allocation
|
||||||
|
e_cur = lmdk_bgt.uniform(seq, lmdks, epsilon)
|
||||||
|
_, _, a_cur = gdp.tpl_lmdk_mem(e_cur, p, p, seq, lmdks)
|
||||||
|
# Save privacy loss
|
||||||
|
e[i] += np.sum(e_cur)/args.reps
|
||||||
|
a[i] += np.sum(a_cur)/args.reps
|
||||||
|
# Set label
|
||||||
|
label = lmdk_lib.dist_type_to_str(d_i)
|
||||||
|
if d_i == 1:
|
||||||
|
label = 'Skewed'
|
||||||
|
# Plot bar for current distribution
|
||||||
|
plt.bar(
|
||||||
|
x_i + x_offset,
|
||||||
|
a,
|
||||||
|
bar_width,
|
||||||
|
label=label,
|
||||||
|
linewidth=lmdk_lib.line_width
|
||||||
|
)
|
||||||
|
# Change offset for next bar
|
||||||
|
x_offset += bar_width
|
||||||
|
# Plot line for no correlation
|
||||||
|
plt.plot(
|
||||||
|
x_i,
|
||||||
|
e,
|
||||||
|
linewidth=lmdk_lib.line_width,
|
||||||
|
color='#e0e0e0',
|
||||||
|
)
|
||||||
|
# Plot legend
|
||||||
|
lmdk_lib.plot_legend()
|
||||||
|
# Show plot
|
||||||
|
# plt.show()
|
||||||
|
# Save plot
|
||||||
|
lmdk_lib.save_plot(str('../../rslt/dist_cor/' + title + '.pdf'))
|
||||||
|
print(' [OK]', flush=True)
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
Parse arguments.
|
||||||
|
|
||||||
|
Optional:
|
||||||
|
reps - The number of repetitions.
|
||||||
|
time - The time limit of the sequence.
|
||||||
|
'''
|
||||||
|
def parse_args():
|
||||||
|
# Create argument parser.
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
|
# Mandatory arguments.
|
||||||
|
|
||||||
|
# Optional arguments.
|
||||||
|
parser.add_argument('-r', '--reps', help='The number of repetitions.', type=int, default=1)
|
||||||
|
parser.add_argument('-t', '--time', help='The time limit of the sequence.', type=int, default=100)
|
||||||
|
|
||||||
|
# Parse arguments.
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
try:
|
||||||
|
args = parse_args()
|
||||||
|
start_time = time.time()
|
||||||
|
main(args)
|
||||||
|
end_time = time.time()
|
||||||
|
print('##############################')
|
||||||
|
print('Time elapsed: %s' % (time.strftime('%H:%M:%S', time.gmtime(end_time - start_time))))
|
||||||
|
print('##############################')
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print('Interrupted by user.')
|
||||||
|
exit()
|
1576
code/lib/gdp.py
Normal file
1576
code/lib/gdp.py
Normal file
File diff suppressed because it is too large
Load Diff
Binary file not shown.
BIN
graphics/evaluation/dist-cor-mod.pdf
Normal file
BIN
graphics/evaluation/dist-cor-mod.pdf
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
rslt/avg_dist/avg-dist.pdf
Normal file
BIN
rslt/avg_dist/avg-dist.pdf
Normal file
Binary file not shown.
BIN
rslt/dist_cor/dist-cor-mod.pdf
Normal file
BIN
rslt/dist_cor/dist-cor-mod.pdf
Normal file
Binary file not shown.
Binary file not shown.
BIN
rslt/dist_cor/dist-cor-wk.pdf
Normal file
BIN
rslt/dist_cor/dist-cor-wk.pdf
Normal file
Binary file not shown.
@ -51,7 +51,8 @@ In general, we can claim that the Adaptive is the most reliable and best perform
|
|||||||
Moreover, designing a data-dependent sampling scheme would possibly result in better results for Adaptive.
|
Moreover, designing a data-dependent sampling scheme would possibly result in better results for Adaptive.
|
||||||
|
|
||||||
|
|
||||||
\paragraph{Temporal distance and correlation}
|
\subsubsection{Temporal distance and correlation}
|
||||||
|
|
||||||
Figure~\ref{fig:avg-dist} shows a comparison of the average temporal distance of the events from the previous/next {\thething} or the start/end of the time series for various distributions in synthetic data.
|
Figure~\ref{fig:avg-dist} shows a comparison of the average temporal distance of the events from the previous/next {\thething} or the start/end of the time series for various distributions in synthetic data.
|
||||||
More particularly, we count for every event the total number of events between itself and the nearest {\thething} or the series edge.
|
More particularly, we count for every event the total number of events between itself and the nearest {\thething} or the series edge.
|
||||||
We observe that the uniform and bimodal distributions tend to limit the regular event--{\thething} distance.
|
We observe that the uniform and bimodal distributions tend to limit the regular event--{\thething} distance.
|
||||||
@ -61,33 +62,33 @@ On the contrary, distributing the {\thethings} at one part of the sequence, as i
|
|||||||
|
|
||||||
\begin{figure}[htp]
|
\begin{figure}[htp]
|
||||||
\centering
|
\centering
|
||||||
\includegraphics[width=.5\linewidth]{avg-dist}%
|
\includegraphics[width=.5\linewidth]{evaluation/avg-dist}%
|
||||||
\caption{Average temporal distance of the events from the {\thethings} for different {\thethings} percentages within a time series in various {\thethings} distributions.}
|
\caption{Average temporal distance of the events from the {\thethings} for different {\thethings} percentages within a time series in various {\thethings} distributions.}
|
||||||
\label{fig:avg-dist}
|
\label{fig:avg-dist}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
Figure~\ref{fig:dist-cor} illustrates a comparison among the aforementioned distributions regarding the overall privacy loss under moderate (Figure~\ref{fig:dist-cor-mod}), and strong (Figure~\ref{fig:dist-cor-stg}) correlation degrees.
|
Figure~\ref{fig:dist-cor} illustrates a comparison among the aforementioned distributions regarding the overall privacy loss under (a)~weak, (b)~moderate, and (c)~strong temporal correlation degrees.
|
||||||
The line shows the overall privacy loss---for all cases of {\thethings} distribution---without temporal correlation.
|
The line shows the overall privacy loss---for all cases of {\thethings} distribution---without temporal correlation.
|
||||||
We skip the presentation of the results under a weak correlation degree, since they converge in this case.
|
In combination with Figure~\ref{fig:avg-dist}, we conclude that a greater average event--{\thething} distance in a distribution can result into greater overall privacy loss under moderate and strong temporal correlation.
|
||||||
In combination with Figure~\ref{fig:avg-dist}, we conclude that a greater average event-{\thething} distance in a distribution can result into greater overall privacy loss under moderate and strong temporal correlation.
|
This is due to the fact that the backward/forward privacy loss accumulates more over time in wider spaces without {\thethings} (see Section~\ref{sec:correlation}).
|
||||||
This is due to the fact that the backward/forward privacy loss accumulates more over time in wider spaces without {\thethings} (see Section~\ref{subsec:correlations}).
|
|
||||||
Furthermore, the behavior of the privacy loss is as expected regarding the temporal correlation degree.
|
Furthermore, the behavior of the privacy loss is as expected regarding the temporal correlation degree.
|
||||||
Predictably, a stronger correlation degree generates higher privacy loss while widening the gap between the different distribution cases.
|
Predictably, a stronger correlation degree generates higher privacy loss while widening the gap between the different distribution cases.
|
||||||
On the contrary, a weaker correlation degree makes it harder to differentiate among the {\thethings} distributions.
|
On the contrary, a weaker correlation degree makes it harder to differentiate among the {\thethings} distributions.
|
||||||
|
The privacy loss under a weak correlation degree converge.
|
||||||
|
|
||||||
\begin{figure}[htp]
|
\begin{figure}[htp]
|
||||||
\centering
|
\centering
|
||||||
\subcaptionbox{Weak correlation\label{fig:dist-cor-wk}}{%
|
\subcaptionbox{Weak correlation\label{fig:dist-cor-wk}}{%
|
||||||
\includegraphics[width=.5\linewidth]{dist-cor-wk}%
|
\includegraphics[width=.5\linewidth]{evaluation/dist-cor-wk}%
|
||||||
}%
|
}%
|
||||||
\hspace{\fill}
|
\hspace{\fill}
|
||||||
\subcaptionbox{Moderate correlation\label{fig:dist-cor-mod}}{%
|
\subcaptionbox{Moderate correlation\label{fig:dist-cor-mod}}{%
|
||||||
\includegraphics[width=.5\linewidth]{dist-cor-mod}%
|
\includegraphics[width=.5\linewidth]{evaluation/dist-cor-mod}%
|
||||||
}%
|
}%
|
||||||
\subcaptionbox{Strong correlation\label{fig:dist-cor-stg}}{%
|
\subcaptionbox{Strong correlation\label{fig:dist-cor-stg}}{%
|
||||||
\includegraphics[width=.5\linewidth]{dist-cor-stg}%
|
\includegraphics[width=.5\linewidth]{evaluation/dist-cor-stg}%
|
||||||
}%
|
}%
|
||||||
\caption{Privacy loss for different {\thethings} percentages and distributions, under weak, moderate, and strong degrees of temporal correlation.
|
\caption{Privacy loss for different {\thethings} percentages and distributions, under (a)~weak, (b)~moderate, and (c)~strong degrees of temporal correlation.
|
||||||
The line shows the overall privacy loss without temporal correlation.}
|
The line shows the overall privacy loss without temporal correlation.}
|
||||||
\label{fig:dist-cor}
|
\label{fig:dist-cor}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
Loading…
Reference in New Issue
Block a user