expt_lmdk_sel: Testing the Pareto principle

2021-10-05 23:40:15 +02:00
parent dbebff7601
commit 63fd33f05a
7 changed files with 148 additions and 0 deletions
--- a/code/expt/expt_lmdk_sel-pareto.py
+++ b/code/expt/expt_lmdk_sel-pareto.py
@ -0,0 +1,107 @@
+#!/usr/bin/env python3
+
+import sys
+sys.path.insert(1, '../lib')
+import argparse
+import lmdk_lib
+import lmdk_sel
+import exp_mech
+import numpy as np
+import os
+from matplotlib import pyplot as plt
+import time
+
+
+def main(args):
+  # Privacy goal
+  epsilon = [.001, .01, .1, 1.0, 10.0, 100.0]
+  # Number of timestamps
+  seq = lmdk_lib.get_seq(1, args.time)
+  # Distribution type
+  dist_type = np.array(range(-1, 4))
+  # Number of landmarks
+  lmdk_n = np.array(range(int(.2*args.time), args.time, int(args.time/5)))
+  # Width of bars
+  bar_width = 1/(len(epsilon) + 1)
+  # The x axis
+  x_i = np.arange(len(lmdk_n))
+  x_margin = bar_width*(len(epsilon)/2 + 1)
+  for d_i, d in enumerate(dist_type):
+    # Logging
+    title =  lmdk_lib.dist_type_to_str(d) + ' landmark distribution'
+    print('(%d/%d) %s... ' %(d_i + 1, len(dist_type), title), end='', flush=True)
+    # Initialize plot
+    lmdk_lib.plot_init()
+    # The x axis
+    plt.xticks(x_i, ((lmdk_n/len(seq))*100).astype(int))
+    plt.xlabel('Landmarks (%)')  # Set x axis label.
+    plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin)
+    # The y axis
+    plt.ylabel('Mean absolute error')  # Set y axis label.
+    # plt.ylim(0, len(seq)*1.5)
+    # Bar offset
+    x_offset = -(bar_width/2)*(len(epsilon) - 1)
+    for e_i, e in enumerate(epsilon):
+      mae = np.zeros(len(lmdk_n))
+      for n_i, n in enumerate(lmdk_n):
+        for r in range(args.reps):
+          lmdks = lmdk_lib.get_lmdks(seq, n, d)
+          hist, h = lmdk_lib.get_hist(seq, lmdks)
+          opts = lmdk_sel.get_opts_from_top_h(seq, lmdks)
+          delta = 1.0
+          res, _ = exp_mech.exponential_pareto(hist, opts, exp_mech.score, delta, e)
+          mae[n_i] += lmdk_lib.get_norm(hist, res)/args.reps
+      # Plot bar for current epsilon
+      plt.bar(
+        x_i + x_offset,
+        mae,
+        bar_width,
+        label=u'\u03B5 = ' + str("{:.0e}".format(e)),
+        linewidth=lmdk_lib.line_width
+      )
+      # Change offset for next bar
+      x_offset += bar_width
+    path = str('../../rslt/lmdk_sel-pareto/' + title)
+    # Plot legend
+    lmdk_lib.plot_legend()
+    # Show plot
+    # plt.show()
+    # Save plot
+    lmdk_lib.save_plot(path + '.pdf')
+    print('[OK]', flush=True)
+
+
+'''
+  Parse arguments.
+
+  Optional:
+    reps - The number of repetitions.
+    time - The time limit of the sequence.
+'''
+def parse_args():
+  # Create argument parser.
+  parser = argparse.ArgumentParser()
+
+  # Mandatory arguments.
+
+  # Optional arguments.
+  parser.add_argument('-r', '--reps', help='The number of repetitions.', type=int, default=1)
+  parser.add_argument('-t', '--time', help='The time limit of the sequence.', type=int, default=100)
+
+  # Parse arguments.
+  args = parser.parse_args()
+
+  return args
+
+
+if __name__ == '__main__':
+  try:
+    start_time = time.time()
+    main(parse_args())
+    end_time = time.time()
+    print('##############################')
+    print('Time elapsed: %s' % (time.strftime('%H:%M:%S', time.gmtime(end_time - start_time))))
+    print('##############################')
+  except KeyboardInterrupt:
+    print('Interrupted by user.')
+    exit()
--- a/code/lib/exp_mech.py
+++ b/code/lib/exp_mech.py
@ -62,6 +62,47 @@ def exponential(x, R, u, delta, epsilon):
    return np.array([]), pr


+'''
+  The exponential mechanism.
+
+  Parameters:
+    x - The data.
+    R - The possible outputs.
+    u - The scoring function.
+    delta - The sensitivity of the scoring function.
+    epsilon - The privacy budget.
+  Returns:
+    res - A randomly sampled output.
+    pr - The PDF of all possible outputs.
+'''
+def exponential_pareto(x, R, u, delta, epsilon):
+  # Calculate the score for each element of R
+  scores = [u(x, r) for r in R]
+  # Keep the top 20%
+  n = int(len(scores)*.2)
+  scores = np.sort(scores)[-n : ]
+  # Normalize the scores between 0 and 1
+  # (the higher, the better the utility)
+  scores = 1 - (scores - np.min(scores))/(np.max(scores) - np.min(scores))
+
+  # Calculate the probability for each element, based on its score
+  pr = [np.exp(epsilon*score/(2*delta)) for score in scores]
+
+  # Normalize the probabilities so that they sum to 1
+  pr = pr/np.linalg.norm(pr, ord = 1)
+
+  # Debugging
+  # print(R[np.argmax(pr)], pr.max(), scores[np.argmax(pr)])
+  # print(R[np.argmin(pr)], pr.min(), scores[np.argmin(pr)])
+  # print(abs(pr.max() - pr.min()), abs(scores[np.argmax(pr)] - scores[np.argmin(pr)]))
+
+  # Choose an element from R based on the probabilities
+  if len(pr) > 0:
+    return R[np.random.choice(range(n), 1, p = pr)[0]], pr
+  else:
+    return np.array([]), pr
+
+
 def main():
  start, end = 1.0, 10.0
  scale = 1.0
--- a/rslt/lmdk_sel-pareto/Bimodal
+++ b/rslt/lmdk_sel-pareto/Bimodal
--- a/rslt/lmdk_sel-pareto/Left-skewed
+++ b/rslt/lmdk_sel-pareto/Left-skewed
--- a/rslt/lmdk_sel-pareto/Right-skewed
+++ b/rslt/lmdk_sel-pareto/Right-skewed
--- a/rslt/lmdk_sel-pareto/Symmetric
+++ b/rslt/lmdk_sel-pareto/Symmetric
--- a/rslt/lmdk_sel-pareto/Uniform
+++ b/rslt/lmdk_sel-pareto/Uniform