code: Ready for copenhagen

2021-09-29 03:12:24 +02:00
parent f5a6b317ac
commit 0fa215558a
2 changed files with 118 additions and 83 deletions
--- a/code/expt/bgt_cmp_copenhagen.py
+++ b/code/expt/bgt_cmp_copenhagen.py
@ -3,10 +3,12 @@
 import sys
 sys.path.insert(1, '../lib')
 import argparse
 import ast
 from datetime import datetime
 from geopy.distance import distance
 import lmdk_bgt
 import lmdk_lib
 import math
 import numpy as np
 from matplotlib import pyplot as plt
 import time
@ -28,7 +30,7 @@ def main(args):
  epsilon = 1.0
  # Number of methods
-  n = 6
+  n = 3
  # Width of bars
  bar_width = 1/(n + 1)
  # The x axis
@ -46,111 +48,89 @@ def main(args):
  plt.xlabel('Landmarks percentage')  # Set x axis label.
  plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin)
  # The y axis
-  plt.ylabel('Mean absolute error (m)')  # Set y axis label.
+  plt.ylabel('Mean absolute error')  # Set y axis label.
-  plt.yscale('log')
+  # plt.yscale('log')
-  plt.ylim(1, 100000000)
+  plt.ylim(0, 1.4)
  # Bar offset
  x_offset = -(bar_width/2)*(n - 1)
  mae_u = np.zeros(len(lmdks_pct))
  mae_s = np.zeros(len(lmdks_pct))
  mae_a = np.zeros(len(lmdks_pct))
-  mae_r = np.zeros(len(lmdks_pct))
+  mae_evt = np.zeros(len(lmdks_pct))
-  mae_d = np.zeros(len(lmdks_pct))
+  mae_usr = np.zeros(len(lmdks_pct))
  mae_i = np.zeros(len(lmdks_pct))
  for i, pct in enumerate(lmdks_pct):
    # Find landmarks
    # lmdks = lmdk_lib.find_lmdks_tim(lmdk_data, seq, uid, pct)
    lmdks = lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, pct)
-    print(pct, np.shape(lmdks)[0]/np.shape(seq)[0])
+    for _ in range(args.iter):
      # Skip
      rls_data_s, _ = lmdk_bgt.skip_cont(seq, lmdks, epsilon)
      mae_s[i] += lmdk_bgt.mae_cont(rls_data_s)/args.iter
-    # for _ in range(args.iter):
+      # Uniform
-    #   # Skip
+      rls_data_u, _ = lmdk_bgt.uniform_cont(seq, lmdks, epsilon)
-    #   rls_data_s, _ = lmdk_bgt.skip(seq, lmdks, epsilon)
+      mae_u[i] += lmdk_bgt.mae_cont(rls_data_u)/args.iter
    #   mae_s[i] += lmdk_bgt.mae(seq, rls_data_s)/args.iter
-    #   # Uniform
+      # Adaptive
-    #   rls_data_u, _ = lmdk_bgt.uniform_r(seq, lmdks, epsilon)
+      rls_data_a, _, _ = lmdk_bgt.adaptive_cont(seq, lmdks, epsilon, .5, .5)
-    #   mae_u[i] += lmdk_bgt.mae(seq, rls_data_u)/args.iter
+      mae_a[i] += lmdk_bgt.mae_cont(rls_data_a)/args.iter
-    #   # Adaptive
+      # Event
-    #   rls_data_a, _, _ = lmdk_bgt.adaptive(seq, lmdks, epsilon, .5, .5)
+      rls_data_evt, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 0), epsilon)
-    #   mae_a[i] += lmdk_bgt.mae(seq, rls_data_a)/args.iter
+      mae_evt[i] += lmdk_bgt.mae_cont(rls_data_evt)/args.iter
      # User
      rls_data_usr, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 100), epsilon)
      mae_usr[i] += lmdk_bgt.mae_cont(rls_data_usr)/args.iter
-    #   # Sample
+  plt.plot(
-    #   rls_data_r, _, _ = lmdk_bgt.sample(seq, lmdks, epsilon)
+    x_i,
-    #   mae_r[i] += lmdk_bgt.mae(seq, rls_data_r)/args.iter
+    mae_evt,
    linewidth=lmdk_lib.line_width
  )
-    #   # Discount
+  plt.plot(
-    #   rls_data_d, _, _ = lmdk_bgt.discount(seq, lmdks, epsilon)
+    x_i,
-    #   mae_d[i] += lmdk_bgt.mae(seq, rls_data_d)/args.iter
+    mae_usr,
    linewidth=lmdk_lib.line_width
  )
-    #   # Incremental
+  plt.bar(
-    #   rls_data_i, _, _ = lmdk_bgt.incremental(seq, lmdks, epsilon, .5)
+    x_i + x_offset,
-    #   mae_i[i] += lmdk_bgt.mae(seq, rls_data_i)/args.iter
+    mae_s,
    bar_width,
    label='Skip',
    linewidth=lmdk_lib.line_width
  )
  x_offset += bar_width
  plt.bar(
    x_i + x_offset,
    mae_u,
    bar_width,
    label='Uniform',
    linewidth=lmdk_lib.line_width
  )
  x_offset += bar_width
  plt.bar(
    x_i + x_offset,
    mae_a,
    bar_width,
    label='Adaptive',
    linewidth=lmdk_lib.line_width
  )
  x_offset += bar_width
-  # plt.bar(
+  path = str('rslt/bgt_cmp/' + d)
-  #   x_i + x_offset,
+  # Plot legend
-  #   mae_s,
+  lmdk_lib.plot_legend()
  #   bar_width,
  #   label='Skip',
  #   linewidth=lmdk_lib.line_width
  # )
  # x_offset += bar_width
  # # Plot bars
  # plt.bar(
  #   x_i + x_offset,
  #   mae_u,
  #   bar_width,
  #   label='Uniform',
  #   linewidth=lmdk_lib.line_width
  # )
  # x_offset += bar_width
  # plt.bar(
  #   x_i + x_offset,
  #   mae_a,
  #   bar_width,
  #   label='Adaptive',
  #   linewidth=lmdk_lib.line_width
  # )
  # x_offset += bar_width
  # plt.bar(
  #   x_i + x_offset,
  #   mae_r,
  #   bar_width,
  #   label='Sample',
  #   linewidth=lmdk_lib.line_width
  # )
  # x_offset += bar_width
  # plt.bar(
  #   x_i + x_offset,
  #   mae_d,
  #   bar_width,
  #   label='Discount',
  #   linewidth=lmdk_lib.line_width
  # )
  # x_offset += bar_width
  # plt.bar(
  #   x_i + x_offset,
  #   mae_i,
  #   bar_width,
  #   label='Incremental',
  #   linewidth=lmdk_lib.line_width
  # )
  # x_offset += bar_width
  # path = str('rslt/bgt_cmp/' + d)
  # # Plot legend
  # lmdk_lib.plot_legend()
  # # Show plot
-  # # plt.show()
+  # plt.show()
  # # Save plot
  # lmdk_lib.save_plot(path + '.pdf')
  print('[OK]', flush=True)
 def parse_args():
  '''
    Parse arguments.
--- a/code/lib/lmdk_bgt.py
+++ b/code/lib/lmdk_bgt.py
@ -343,6 +343,61 @@ def adaptive(seq, lmdks, epsilon, inc_rt, dec_rt):
  return rls_data, bgts, skipped
 def adaptive_cont(seq, lmdks, epsilon, inc_rt, dec_rt):
  '''
    Adaptive budget allocation.
    Parameters:
      seq - The point sequence.
      lmdks - The landmarks.
      epsilon - The available privacy budget.
      inc_rt - Sampling rate increase rate.
      dec_rt - Sampling rate decrease rate.
    Returns:
      rls_data - The perturbed data.
      bgts - The privacy budget allocation.
      skipped - The number of skipped releases.
  '''
  # Uniform budget allocation
  bgts = uniform(seq, lmdks, epsilon)
  # Released
  rls_data = [None]*len(seq)
  # The sampling rate
  samp_rt = 1
  # Track landmarks
  lmdk_cur = 0
  # Track skipped releases
  skipped = 0
  for i, p in enumerate(seq):
    # Check if current point is a landmark
    r = p[2] in lmdks
    if r:
      lmdk_cur += 1
    if lmdk_lib.should_sample(samp_rt) or i == 0:
      # Add noise to original data
      o = lmdk_lib.randomized_response(r, bgts[i])
      rls_data[i] = [r, o]
      # Adjust sampling rate
      if i > 0:
        if rls_data[i - 1][1] == o:
          # Decrease
          samp_rt -= samp_rt*dec_rt
        else:
          # Increase
          samp_rt += (1 - samp_rt)*inc_rt
    else:
      skipped += 1
      # Skip current release and approximate with previous
      rls_data[i] = rls_data[i - 1]
      if r:
        # Allocate the current budget to the following releases uniformly
        for j in range(i + 1, len(seq)):
          bgts[j] += bgts[i]/(len(lmdks) - lmdk_cur + 1)
      # No budget was spent
      bgts[i] = 0
  return rls_data, bgts, skipped
 def skip(seq, lmdks, epsilon):
  '''
    Skip landmarks.