lmdk-expt: Reviewed all graphs for synthetic
This commit is contained in:
		
							
								
								
									
										121
									
								
								code/expt/avg_dist.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										121
									
								
								code/expt/avg_dist.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,121 @@
 | 
			
		||||
#!/usr/bin/env python3
 | 
			
		||||
 | 
			
		||||
import sys
 | 
			
		||||
sys.path.insert(1, '../lib')
 | 
			
		||||
import argparse
 | 
			
		||||
import gdp
 | 
			
		||||
import lmdk_lib
 | 
			
		||||
import math
 | 
			
		||||
from matplotlib import pyplot as plt
 | 
			
		||||
import numpy as np
 | 
			
		||||
import os
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def main(args):
 | 
			
		||||
  # Number of timestamps
 | 
			
		||||
  seq = lmdk_lib.get_seq(1, args.time)
 | 
			
		||||
  # Distribution type
 | 
			
		||||
  dist_type = np.array(range(0, 4))
 | 
			
		||||
  # Number of landmarks
 | 
			
		||||
  lmdk_n = np.array(range(0, args.time + 1, int(args.time/5)))
 | 
			
		||||
 | 
			
		||||
  markers = [
 | 
			
		||||
    '^', # Symmetric
 | 
			
		||||
    'v', # Skewed
 | 
			
		||||
    'D', # Bimodal
 | 
			
		||||
    's'  # Uniform
 | 
			
		||||
  ]
 | 
			
		||||
 | 
			
		||||
  # Initialize plot
 | 
			
		||||
  lmdk_lib.plot_init()
 | 
			
		||||
  # The x axis
 | 
			
		||||
  x_i = np.arange(len(lmdk_n))
 | 
			
		||||
  plt.xticks(x_i, ((lmdk_n/len(seq))*100).astype(int))
 | 
			
		||||
  plt.xlabel('Landmarks (%)')  # Set x axis label.
 | 
			
		||||
  plt.xlim(x_i.min(), x_i.max())
 | 
			
		||||
  # The y axis
 | 
			
		||||
  plt.ylabel('Normalized average distance')  # Set y axis label.
 | 
			
		||||
  plt.yscale('log')
 | 
			
		||||
  plt.ylim(.001, 1)
 | 
			
		||||
  # Logging
 | 
			
		||||
  print('Average distance', end='', flush=True)
 | 
			
		||||
  for d_i, d in enumerate(dist_type):
 | 
			
		||||
    avg_dist = np.zeros(len(lmdk_n))
 | 
			
		||||
    # Logging
 | 
			
		||||
    print('.', end='', flush=True)
 | 
			
		||||
    for i, n in enumerate(lmdk_n):
 | 
			
		||||
      for r in range(args.reps):
 | 
			
		||||
        # Generate landmarks
 | 
			
		||||
        lmdks = lmdk_lib.get_lmdks(seq, n, d)
 | 
			
		||||
        # Calculate average distance
 | 
			
		||||
        avg_cur = 0
 | 
			
		||||
        for t in seq:
 | 
			
		||||
          t_prv, t_nxt = gdp.get_limits(t, seq, lmdks)
 | 
			
		||||
          avg_cur += (abs(t - t_prv) - 1 + abs(t - t_nxt) - 1 )/len(seq)
 | 
			
		||||
        # Normalized average based on repetitions
 | 
			
		||||
        avg_dist[i] += avg_cur/args.reps
 | 
			
		||||
    # Rescaling (min-max normalization)
 | 
			
		||||
    # https://en.wikipedia.org/wiki/Feature_scaling#Rescaling_(min-max_normalization)
 | 
			
		||||
    avg_dist = (avg_dist - avg_dist.min())/(avg_dist.max() - avg_dist.min())
 | 
			
		||||
    # Normalize for log scale
 | 
			
		||||
    if avg_dist[len(avg_dist) - 1] == 0:
 | 
			
		||||
      avg_dist[len(avg_dist) - 1] = .001
 | 
			
		||||
    # Set label
 | 
			
		||||
    label = lmdk_lib.dist_type_to_str(d_i)
 | 
			
		||||
    if d_i == 1:
 | 
			
		||||
      label = 'Skewed'
 | 
			
		||||
    # Plot line
 | 
			
		||||
    plt.plot(
 | 
			
		||||
      x_i,
 | 
			
		||||
      avg_dist,
 | 
			
		||||
      label=label,
 | 
			
		||||
      marker=markers[d_i],
 | 
			
		||||
      markersize=lmdk_lib.marker_size,
 | 
			
		||||
      markeredgewidth=0,
 | 
			
		||||
      linewidth=lmdk_lib.line_width
 | 
			
		||||
    )
 | 
			
		||||
  # Plot legend
 | 
			
		||||
  lmdk_lib.plot_legend()
 | 
			
		||||
  # Show plot
 | 
			
		||||
  # plt.show()
 | 
			
		||||
  # Save plot
 | 
			
		||||
  lmdk_lib.save_plot(str('../../rslt/avg_dist/' + 'avg-dist' + '.pdf'))
 | 
			
		||||
  print(' [OK]', flush=True)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
'''
 | 
			
		||||
  Parse arguments.
 | 
			
		||||
 | 
			
		||||
  Optional:
 | 
			
		||||
    reps - The number of repetitions.
 | 
			
		||||
    time - The time limit of the sequence.
 | 
			
		||||
'''
 | 
			
		||||
def parse_args():
 | 
			
		||||
  # Create argument parser.
 | 
			
		||||
  parser = argparse.ArgumentParser()
 | 
			
		||||
 | 
			
		||||
  # Mandatory arguments.
 | 
			
		||||
 | 
			
		||||
  # Optional arguments.
 | 
			
		||||
  parser.add_argument('-r', '--reps', help='The number of repetitions.', type=int, default=1)
 | 
			
		||||
  parser.add_argument('-t', '--time', help='The time limit of the sequence.', type=int, default=100)
 | 
			
		||||
 | 
			
		||||
  # Parse arguments.
 | 
			
		||||
  args = parser.parse_args()
 | 
			
		||||
 | 
			
		||||
  return args
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
  try:
 | 
			
		||||
    args = parse_args()
 | 
			
		||||
    start_time = time.time()
 | 
			
		||||
    main(args)
 | 
			
		||||
    end_time = time.time()
 | 
			
		||||
    print('##############################')
 | 
			
		||||
    print('Time elapsed: %s' % (time.strftime('%H:%M:%S', time.gmtime(end_time - start_time))))
 | 
			
		||||
    print('##############################')
 | 
			
		||||
  except KeyboardInterrupt:
 | 
			
		||||
    print('Interrupted by user.')
 | 
			
		||||
    exit()
 | 
			
		||||
							
								
								
									
										131
									
								
								code/expt/dist_cor.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										131
									
								
								code/expt/dist_cor.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,131 @@
 | 
			
		||||
#!/usr/bin/env python3
 | 
			
		||||
 | 
			
		||||
import sys
 | 
			
		||||
sys.path.insert(1, '../lib')
 | 
			
		||||
import argparse
 | 
			
		||||
import gdp
 | 
			
		||||
import itertools
 | 
			
		||||
import lmdk_bgt
 | 
			
		||||
import lmdk_lib
 | 
			
		||||
import numpy as np
 | 
			
		||||
import os
 | 
			
		||||
from matplotlib import pyplot as plt
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def main(args):
 | 
			
		||||
  # Privacy goal
 | 
			
		||||
  epsilon = 1.0
 | 
			
		||||
  # Number of timestamps
 | 
			
		||||
  seq = lmdk_lib.get_seq(1, args.time)
 | 
			
		||||
  # Correlation degree (higher values means weaker correlations)
 | 
			
		||||
  cor_deg = np.array([.01, .1, 1.0])
 | 
			
		||||
  cor_lbl = ['Strong correlation', 'Moderate correlation', 'Weak correlation']
 | 
			
		||||
  # Distribution type
 | 
			
		||||
  dist_type = np.array(range(0, 4))
 | 
			
		||||
  # Number of landmarks
 | 
			
		||||
  lmdk_n = np.array(range(0, args.time + 1, int(args.time/5)))
 | 
			
		||||
  # Width of bars
 | 
			
		||||
  bar_width = 1/(len(dist_type) + 1)
 | 
			
		||||
  # For each correlation degree
 | 
			
		||||
  for c_i, c in enumerate(cor_deg):
 | 
			
		||||
    # Logging
 | 
			
		||||
    title = cor_lbl[c_i]
 | 
			
		||||
    print('(%d/%d) %s' %(c_i + 1, len(cor_deg), title), end='', flush=True)
 | 
			
		||||
    # The transition matrix
 | 
			
		||||
    p = gdp.gen_trans_mt(2, c)
 | 
			
		||||
    # Bar offset
 | 
			
		||||
    x_offset = -(bar_width/2)*(len(dist_type) - 1)
 | 
			
		||||
    # Initialize plot
 | 
			
		||||
    lmdk_lib.plot_init()
 | 
			
		||||
    # The x axis
 | 
			
		||||
    x_i = np.arange(len(lmdk_n))
 | 
			
		||||
    plt.xticks(x_i, ((lmdk_n/len(seq))*100).astype(int))
 | 
			
		||||
    plt.xlabel('Landmarks (%)')  # Set x axis label.
 | 
			
		||||
    x_margin = bar_width*(len(dist_type)/2 + 1)
 | 
			
		||||
    plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin)
 | 
			
		||||
    # The y axis
 | 
			
		||||
    plt.ylabel('Privacy loss')  # Set y axis label.
 | 
			
		||||
    plt.yscale('log')
 | 
			
		||||
    plt.ylim(epsilon/10, 100*len(seq))
 | 
			
		||||
    # plt.ylim(0, 10000)
 | 
			
		||||
    for d_i, d in enumerate(dist_type):
 | 
			
		||||
      print('.', end='', flush=True)
 | 
			
		||||
      # Initialization
 | 
			
		||||
      e = np.zeros(len(lmdk_n))
 | 
			
		||||
      a = np.zeros(len(lmdk_n))
 | 
			
		||||
      for i, n in enumerate(lmdk_n):
 | 
			
		||||
        for r in range(args.reps):
 | 
			
		||||
          # Generate landmarks
 | 
			
		||||
          lmdks = lmdk_lib.get_lmdks(seq, n, d)
 | 
			
		||||
          # Uniform budget allocation
 | 
			
		||||
          e_cur = lmdk_bgt.uniform(seq, lmdks, epsilon)
 | 
			
		||||
          _, _, a_cur = gdp.tpl_lmdk_mem(e_cur, p, p, seq, lmdks)
 | 
			
		||||
          # Save privacy loss
 | 
			
		||||
          e[i] += np.sum(e_cur)/args.reps
 | 
			
		||||
          a[i] += np.sum(a_cur)/args.reps
 | 
			
		||||
      # Set label
 | 
			
		||||
      label = lmdk_lib.dist_type_to_str(d_i)
 | 
			
		||||
      if d_i == 1:
 | 
			
		||||
        label = 'Skewed'
 | 
			
		||||
      # Plot bar for current distribution
 | 
			
		||||
      plt.bar(
 | 
			
		||||
        x_i + x_offset,
 | 
			
		||||
        a,
 | 
			
		||||
        bar_width,
 | 
			
		||||
        label=label,
 | 
			
		||||
        linewidth=lmdk_lib.line_width
 | 
			
		||||
      )
 | 
			
		||||
      # Change offset for next bar
 | 
			
		||||
      x_offset += bar_width
 | 
			
		||||
    # Plot line for no correlation
 | 
			
		||||
    plt.plot(
 | 
			
		||||
      x_i,
 | 
			
		||||
      e,
 | 
			
		||||
      linewidth=lmdk_lib.line_width,
 | 
			
		||||
      color='#e0e0e0',
 | 
			
		||||
    )
 | 
			
		||||
    # Plot legend
 | 
			
		||||
    lmdk_lib.plot_legend()
 | 
			
		||||
    # Show plot
 | 
			
		||||
    # plt.show()
 | 
			
		||||
    # Save plot
 | 
			
		||||
    lmdk_lib.save_plot(str('../../rslt/dist_cor/' + title + '.pdf'))
 | 
			
		||||
    print(' [OK]', flush=True)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
'''
 | 
			
		||||
  Parse arguments.
 | 
			
		||||
 | 
			
		||||
  Optional:
 | 
			
		||||
    reps - The number of repetitions.
 | 
			
		||||
    time - The time limit of the sequence.
 | 
			
		||||
'''
 | 
			
		||||
def parse_args():
 | 
			
		||||
  # Create argument parser.
 | 
			
		||||
  parser = argparse.ArgumentParser()
 | 
			
		||||
 | 
			
		||||
  # Mandatory arguments.
 | 
			
		||||
 | 
			
		||||
  # Optional arguments.
 | 
			
		||||
  parser.add_argument('-r', '--reps', help='The number of repetitions.', type=int, default=1)
 | 
			
		||||
  parser.add_argument('-t', '--time', help='The time limit of the sequence.', type=int, default=100)
 | 
			
		||||
 | 
			
		||||
  # Parse arguments.
 | 
			
		||||
  args = parser.parse_args()
 | 
			
		||||
 | 
			
		||||
  return args
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
  try:
 | 
			
		||||
    args = parse_args()
 | 
			
		||||
    start_time = time.time()
 | 
			
		||||
    main(args)
 | 
			
		||||
    end_time = time.time()
 | 
			
		||||
    print('##############################')
 | 
			
		||||
    print('Time elapsed: %s' % (time.strftime('%H:%M:%S', time.gmtime(end_time - start_time))))
 | 
			
		||||
    print('##############################')
 | 
			
		||||
  except KeyboardInterrupt:
 | 
			
		||||
    print('Interrupted by user.')
 | 
			
		||||
    exit()
 | 
			
		||||
		Reference in New Issue
	
	Block a user