lmdk-expt: Reviewed all graphs for synthetic
This commit is contained in:
		
							
								
								
									
										121
									
								
								code/expt/avg_dist.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										121
									
								
								code/expt/avg_dist.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,121 @@
 | 
			
		||||
#!/usr/bin/env python3
 | 
			
		||||
 | 
			
		||||
import sys
 | 
			
		||||
sys.path.insert(1, '../lib')
 | 
			
		||||
import argparse
 | 
			
		||||
import gdp
 | 
			
		||||
import lmdk_lib
 | 
			
		||||
import math
 | 
			
		||||
from matplotlib import pyplot as plt
 | 
			
		||||
import numpy as np
 | 
			
		||||
import os
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def main(args):
 | 
			
		||||
  # Number of timestamps
 | 
			
		||||
  seq = lmdk_lib.get_seq(1, args.time)
 | 
			
		||||
  # Distribution type
 | 
			
		||||
  dist_type = np.array(range(0, 4))
 | 
			
		||||
  # Number of landmarks
 | 
			
		||||
  lmdk_n = np.array(range(0, args.time + 1, int(args.time/5)))
 | 
			
		||||
 | 
			
		||||
  markers = [
 | 
			
		||||
    '^', # Symmetric
 | 
			
		||||
    'v', # Skewed
 | 
			
		||||
    'D', # Bimodal
 | 
			
		||||
    's'  # Uniform
 | 
			
		||||
  ]
 | 
			
		||||
 | 
			
		||||
  # Initialize plot
 | 
			
		||||
  lmdk_lib.plot_init()
 | 
			
		||||
  # The x axis
 | 
			
		||||
  x_i = np.arange(len(lmdk_n))
 | 
			
		||||
  plt.xticks(x_i, ((lmdk_n/len(seq))*100).astype(int))
 | 
			
		||||
  plt.xlabel('Landmarks (%)')  # Set x axis label.
 | 
			
		||||
  plt.xlim(x_i.min(), x_i.max())
 | 
			
		||||
  # The y axis
 | 
			
		||||
  plt.ylabel('Normalized average distance')  # Set y axis label.
 | 
			
		||||
  plt.yscale('log')
 | 
			
		||||
  plt.ylim(.001, 1)
 | 
			
		||||
  # Logging
 | 
			
		||||
  print('Average distance', end='', flush=True)
 | 
			
		||||
  for d_i, d in enumerate(dist_type):
 | 
			
		||||
    avg_dist = np.zeros(len(lmdk_n))
 | 
			
		||||
    # Logging
 | 
			
		||||
    print('.', end='', flush=True)
 | 
			
		||||
    for i, n in enumerate(lmdk_n):
 | 
			
		||||
      for r in range(args.reps):
 | 
			
		||||
        # Generate landmarks
 | 
			
		||||
        lmdks = lmdk_lib.get_lmdks(seq, n, d)
 | 
			
		||||
        # Calculate average distance
 | 
			
		||||
        avg_cur = 0
 | 
			
		||||
        for t in seq:
 | 
			
		||||
          t_prv, t_nxt = gdp.get_limits(t, seq, lmdks)
 | 
			
		||||
          avg_cur += (abs(t - t_prv) - 1 + abs(t - t_nxt) - 1 )/len(seq)
 | 
			
		||||
        # Normalized average based on repetitions
 | 
			
		||||
        avg_dist[i] += avg_cur/args.reps
 | 
			
		||||
    # Rescaling (min-max normalization)
 | 
			
		||||
    # https://en.wikipedia.org/wiki/Feature_scaling#Rescaling_(min-max_normalization)
 | 
			
		||||
    avg_dist = (avg_dist - avg_dist.min())/(avg_dist.max() - avg_dist.min())
 | 
			
		||||
    # Normalize for log scale
 | 
			
		||||
    if avg_dist[len(avg_dist) - 1] == 0:
 | 
			
		||||
      avg_dist[len(avg_dist) - 1] = .001
 | 
			
		||||
    # Set label
 | 
			
		||||
    label = lmdk_lib.dist_type_to_str(d_i)
 | 
			
		||||
    if d_i == 1:
 | 
			
		||||
      label = 'Skewed'
 | 
			
		||||
    # Plot line
 | 
			
		||||
    plt.plot(
 | 
			
		||||
      x_i,
 | 
			
		||||
      avg_dist,
 | 
			
		||||
      label=label,
 | 
			
		||||
      marker=markers[d_i],
 | 
			
		||||
      markersize=lmdk_lib.marker_size,
 | 
			
		||||
      markeredgewidth=0,
 | 
			
		||||
      linewidth=lmdk_lib.line_width
 | 
			
		||||
    )
 | 
			
		||||
  # Plot legend
 | 
			
		||||
  lmdk_lib.plot_legend()
 | 
			
		||||
  # Show plot
 | 
			
		||||
  # plt.show()
 | 
			
		||||
  # Save plot
 | 
			
		||||
  lmdk_lib.save_plot(str('../../rslt/avg_dist/' + 'avg-dist' + '.pdf'))
 | 
			
		||||
  print(' [OK]', flush=True)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
'''
 | 
			
		||||
  Parse arguments.
 | 
			
		||||
 | 
			
		||||
  Optional:
 | 
			
		||||
    reps - The number of repetitions.
 | 
			
		||||
    time - The time limit of the sequence.
 | 
			
		||||
'''
 | 
			
		||||
def parse_args():
 | 
			
		||||
  # Create argument parser.
 | 
			
		||||
  parser = argparse.ArgumentParser()
 | 
			
		||||
 | 
			
		||||
  # Mandatory arguments.
 | 
			
		||||
 | 
			
		||||
  # Optional arguments.
 | 
			
		||||
  parser.add_argument('-r', '--reps', help='The number of repetitions.', type=int, default=1)
 | 
			
		||||
  parser.add_argument('-t', '--time', help='The time limit of the sequence.', type=int, default=100)
 | 
			
		||||
 | 
			
		||||
  # Parse arguments.
 | 
			
		||||
  args = parser.parse_args()
 | 
			
		||||
 | 
			
		||||
  return args
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
  try:
 | 
			
		||||
    args = parse_args()
 | 
			
		||||
    start_time = time.time()
 | 
			
		||||
    main(args)
 | 
			
		||||
    end_time = time.time()
 | 
			
		||||
    print('##############################')
 | 
			
		||||
    print('Time elapsed: %s' % (time.strftime('%H:%M:%S', time.gmtime(end_time - start_time))))
 | 
			
		||||
    print('##############################')
 | 
			
		||||
  except KeyboardInterrupt:
 | 
			
		||||
    print('Interrupted by user.')
 | 
			
		||||
    exit()
 | 
			
		||||
							
								
								
									
										131
									
								
								code/expt/dist_cor.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										131
									
								
								code/expt/dist_cor.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,131 @@
 | 
			
		||||
#!/usr/bin/env python3
 | 
			
		||||
 | 
			
		||||
import sys
 | 
			
		||||
sys.path.insert(1, '../lib')
 | 
			
		||||
import argparse
 | 
			
		||||
import gdp
 | 
			
		||||
import itertools
 | 
			
		||||
import lmdk_bgt
 | 
			
		||||
import lmdk_lib
 | 
			
		||||
import numpy as np
 | 
			
		||||
import os
 | 
			
		||||
from matplotlib import pyplot as plt
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def main(args):
 | 
			
		||||
  # Privacy goal
 | 
			
		||||
  epsilon = 1.0
 | 
			
		||||
  # Number of timestamps
 | 
			
		||||
  seq = lmdk_lib.get_seq(1, args.time)
 | 
			
		||||
  # Correlation degree (higher values means weaker correlations)
 | 
			
		||||
  cor_deg = np.array([.01, .1, 1.0])
 | 
			
		||||
  cor_lbl = ['Strong correlation', 'Moderate correlation', 'Weak correlation']
 | 
			
		||||
  # Distribution type
 | 
			
		||||
  dist_type = np.array(range(0, 4))
 | 
			
		||||
  # Number of landmarks
 | 
			
		||||
  lmdk_n = np.array(range(0, args.time + 1, int(args.time/5)))
 | 
			
		||||
  # Width of bars
 | 
			
		||||
  bar_width = 1/(len(dist_type) + 1)
 | 
			
		||||
  # For each correlation degree
 | 
			
		||||
  for c_i, c in enumerate(cor_deg):
 | 
			
		||||
    # Logging
 | 
			
		||||
    title = cor_lbl[c_i]
 | 
			
		||||
    print('(%d/%d) %s' %(c_i + 1, len(cor_deg), title), end='', flush=True)
 | 
			
		||||
    # The transition matrix
 | 
			
		||||
    p = gdp.gen_trans_mt(2, c)
 | 
			
		||||
    # Bar offset
 | 
			
		||||
    x_offset = -(bar_width/2)*(len(dist_type) - 1)
 | 
			
		||||
    # Initialize plot
 | 
			
		||||
    lmdk_lib.plot_init()
 | 
			
		||||
    # The x axis
 | 
			
		||||
    x_i = np.arange(len(lmdk_n))
 | 
			
		||||
    plt.xticks(x_i, ((lmdk_n/len(seq))*100).astype(int))
 | 
			
		||||
    plt.xlabel('Landmarks (%)')  # Set x axis label.
 | 
			
		||||
    x_margin = bar_width*(len(dist_type)/2 + 1)
 | 
			
		||||
    plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin)
 | 
			
		||||
    # The y axis
 | 
			
		||||
    plt.ylabel('Privacy loss')  # Set y axis label.
 | 
			
		||||
    plt.yscale('log')
 | 
			
		||||
    plt.ylim(epsilon/10, 100*len(seq))
 | 
			
		||||
    # plt.ylim(0, 10000)
 | 
			
		||||
    for d_i, d in enumerate(dist_type):
 | 
			
		||||
      print('.', end='', flush=True)
 | 
			
		||||
      # Initialization
 | 
			
		||||
      e = np.zeros(len(lmdk_n))
 | 
			
		||||
      a = np.zeros(len(lmdk_n))
 | 
			
		||||
      for i, n in enumerate(lmdk_n):
 | 
			
		||||
        for r in range(args.reps):
 | 
			
		||||
          # Generate landmarks
 | 
			
		||||
          lmdks = lmdk_lib.get_lmdks(seq, n, d)
 | 
			
		||||
          # Uniform budget allocation
 | 
			
		||||
          e_cur = lmdk_bgt.uniform(seq, lmdks, epsilon)
 | 
			
		||||
          _, _, a_cur = gdp.tpl_lmdk_mem(e_cur, p, p, seq, lmdks)
 | 
			
		||||
          # Save privacy loss
 | 
			
		||||
          e[i] += np.sum(e_cur)/args.reps
 | 
			
		||||
          a[i] += np.sum(a_cur)/args.reps
 | 
			
		||||
      # Set label
 | 
			
		||||
      label = lmdk_lib.dist_type_to_str(d_i)
 | 
			
		||||
      if d_i == 1:
 | 
			
		||||
        label = 'Skewed'
 | 
			
		||||
      # Plot bar for current distribution
 | 
			
		||||
      plt.bar(
 | 
			
		||||
        x_i + x_offset,
 | 
			
		||||
        a,
 | 
			
		||||
        bar_width,
 | 
			
		||||
        label=label,
 | 
			
		||||
        linewidth=lmdk_lib.line_width
 | 
			
		||||
      )
 | 
			
		||||
      # Change offset for next bar
 | 
			
		||||
      x_offset += bar_width
 | 
			
		||||
    # Plot line for no correlation
 | 
			
		||||
    plt.plot(
 | 
			
		||||
      x_i,
 | 
			
		||||
      e,
 | 
			
		||||
      linewidth=lmdk_lib.line_width,
 | 
			
		||||
      color='#e0e0e0',
 | 
			
		||||
    )
 | 
			
		||||
    # Plot legend
 | 
			
		||||
    lmdk_lib.plot_legend()
 | 
			
		||||
    # Show plot
 | 
			
		||||
    # plt.show()
 | 
			
		||||
    # Save plot
 | 
			
		||||
    lmdk_lib.save_plot(str('../../rslt/dist_cor/' + title + '.pdf'))
 | 
			
		||||
    print(' [OK]', flush=True)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
'''
 | 
			
		||||
  Parse arguments.
 | 
			
		||||
 | 
			
		||||
  Optional:
 | 
			
		||||
    reps - The number of repetitions.
 | 
			
		||||
    time - The time limit of the sequence.
 | 
			
		||||
'''
 | 
			
		||||
def parse_args():
 | 
			
		||||
  # Create argument parser.
 | 
			
		||||
  parser = argparse.ArgumentParser()
 | 
			
		||||
 | 
			
		||||
  # Mandatory arguments.
 | 
			
		||||
 | 
			
		||||
  # Optional arguments.
 | 
			
		||||
  parser.add_argument('-r', '--reps', help='The number of repetitions.', type=int, default=1)
 | 
			
		||||
  parser.add_argument('-t', '--time', help='The time limit of the sequence.', type=int, default=100)
 | 
			
		||||
 | 
			
		||||
  # Parse arguments.
 | 
			
		||||
  args = parser.parse_args()
 | 
			
		||||
 | 
			
		||||
  return args
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
  try:
 | 
			
		||||
    args = parse_args()
 | 
			
		||||
    start_time = time.time()
 | 
			
		||||
    main(args)
 | 
			
		||||
    end_time = time.time()
 | 
			
		||||
    print('##############################')
 | 
			
		||||
    print('Time elapsed: %s' % (time.strftime('%H:%M:%S', time.gmtime(end_time - start_time))))
 | 
			
		||||
    print('##############################')
 | 
			
		||||
  except KeyboardInterrupt:
 | 
			
		||||
    print('Interrupted by user.')
 | 
			
		||||
    exit()
 | 
			
		||||
							
								
								
									
										1576
									
								
								code/lib/gdp.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1576
									
								
								code/lib/gdp.py
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								graphics/evaluation/dist-cor-mod.pdf
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								graphics/evaluation/dist-cor-mod.pdf
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								rslt/avg_dist/avg-dist.pdf
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								rslt/avg_dist/avg-dist.pdf
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								rslt/dist_cor/dist-cor-mod.pdf
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								rslt/dist_cor/dist-cor-mod.pdf
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								rslt/dist_cor/dist-cor-wk.pdf
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								rslt/dist_cor/dist-cor-wk.pdf
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							@ -51,7 +51,8 @@ In general, we can claim that the Adaptive is the most reliable and best perform
 | 
			
		||||
Moreover, designing a data-dependent sampling scheme would possibly result in better results for Adaptive.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
\paragraph{Temporal distance and correlation}
 | 
			
		||||
\subsubsection{Temporal distance and correlation}
 | 
			
		||||
 | 
			
		||||
Figure~\ref{fig:avg-dist} shows a comparison of the average temporal distance of the events from the previous/next {\thething} or the start/end of the time series for various distributions in synthetic data.
 | 
			
		||||
More particularly, we count for every event the total number of events between itself and the nearest {\thething} or the series edge.
 | 
			
		||||
We observe that the uniform and bimodal distributions tend to limit the regular event--{\thething} distance.
 | 
			
		||||
@ -61,33 +62,33 @@ On the contrary, distributing the {\thethings} at one part of the sequence, as i
 | 
			
		||||
 | 
			
		||||
\begin{figure}[htp]
 | 
			
		||||
  \centering
 | 
			
		||||
  \includegraphics[width=.5\linewidth]{avg-dist}%
 | 
			
		||||
  \includegraphics[width=.5\linewidth]{evaluation/avg-dist}%
 | 
			
		||||
  \caption{Average temporal distance of the events from the {\thethings} for different {\thethings} percentages within a time series in various {\thethings} distributions.}
 | 
			
		||||
  \label{fig:avg-dist}
 | 
			
		||||
\end{figure}
 | 
			
		||||
 | 
			
		||||
Figure~\ref{fig:dist-cor} illustrates a comparison among the aforementioned distributions regarding the overall privacy loss under moderate (Figure~\ref{fig:dist-cor-mod}), and strong (Figure~\ref{fig:dist-cor-stg}) correlation degrees.
 | 
			
		||||
Figure~\ref{fig:dist-cor} illustrates a comparison among the aforementioned distributions regarding the overall privacy loss under (a)~weak, (b)~moderate, and (c)~strong temporal correlation degrees.
 | 
			
		||||
The line shows the overall privacy loss---for all cases of {\thethings} distribution---without temporal correlation.
 | 
			
		||||
We skip the presentation of the results under a weak correlation degree, since they converge in this case.
 | 
			
		||||
In combination with Figure~\ref{fig:avg-dist}, we conclude that a greater average event-{\thething} distance  in a distribution can result into greater overall privacy loss under moderate and strong temporal correlation.
 | 
			
		||||
This is due to the fact that the backward/forward privacy loss accumulates more over time in wider spaces without {\thethings} (see Section~\ref{subsec:correlations}).
 | 
			
		||||
In combination with Figure~\ref{fig:avg-dist}, we conclude that a greater average event--{\thething} distance  in a distribution can result into greater overall privacy loss under moderate and strong temporal correlation.
 | 
			
		||||
This is due to the fact that the backward/forward privacy loss accumulates more over time in wider spaces without {\thethings} (see Section~\ref{sec:correlation}).
 | 
			
		||||
Furthermore, the behavior of the privacy loss is as expected regarding the temporal correlation degree.
 | 
			
		||||
Predictably, a stronger correlation degree generates higher privacy loss while widening the gap between the different distribution cases.
 | 
			
		||||
On the contrary, a weaker correlation degree makes it harder to differentiate among the {\thethings} distributions.
 | 
			
		||||
The privacy loss under a weak correlation degree converge.
 | 
			
		||||
 | 
			
		||||
\begin{figure}[htp]
 | 
			
		||||
  \centering
 | 
			
		||||
  \subcaptionbox{Weak correlation\label{fig:dist-cor-wk}}{%
 | 
			
		||||
    \includegraphics[width=.5\linewidth]{dist-cor-wk}%
 | 
			
		||||
    \includegraphics[width=.5\linewidth]{evaluation/dist-cor-wk}%
 | 
			
		||||
  }%
 | 
			
		||||
  \hspace{\fill}
 | 
			
		||||
  \subcaptionbox{Moderate correlation\label{fig:dist-cor-mod}}{%
 | 
			
		||||
    \includegraphics[width=.5\linewidth]{dist-cor-mod}%
 | 
			
		||||
    \includegraphics[width=.5\linewidth]{evaluation/dist-cor-mod}%
 | 
			
		||||
  }%
 | 
			
		||||
  \subcaptionbox{Strong correlation\label{fig:dist-cor-stg}}{%
 | 
			
		||||
    \includegraphics[width=.5\linewidth]{dist-cor-stg}%
 | 
			
		||||
    \includegraphics[width=.5\linewidth]{evaluation/dist-cor-stg}%
 | 
			
		||||
  }%
 | 
			
		||||
  \caption{Privacy loss for different {\thethings} percentages and distributions, under weak, moderate, and strong degrees of temporal correlation.
 | 
			
		||||
  \caption{Privacy loss for different {\thethings} percentages and distributions, under (a)~weak, (b)~moderate, and (c)~strong degrees of temporal correlation.
 | 
			
		||||
  The line shows the overall privacy loss without temporal correlation.}
 | 
			
		||||
  \label{fig:dist-cor}
 | 
			
		||||
\end{figure}
 | 
			
		||||
 | 
			
		||||
		Reference in New Issue
	
	Block a user