From 0fa215558ac2bd267a5714952c48329226d3ed56 Mon Sep 17 00:00:00 2001
From: Manos <manos@delkappa.com>
Date: Wed, 29 Sep 2021 03:12:24 +0200
Subject: [PATCH] code: Ready for copenhagen

---
 code/expt/bgt_cmp_copenhagen.py | 146 ++++++++++++++------------------
 code/lib/lmdk_bgt.py            |  55 ++++++++++++
 2 files changed, 118 insertions(+), 83 deletions(-)

diff --git a/code/expt/bgt_cmp_copenhagen.py b/code/expt/bgt_cmp_copenhagen.py
index cd7b256..e5b9455 100644
--- a/code/expt/bgt_cmp_copenhagen.py
+++ b/code/expt/bgt_cmp_copenhagen.py
@@ -3,10 +3,12 @@
 import sys
 sys.path.insert(1, '../lib')
 import argparse
+import ast
 from datetime import datetime
 from geopy.distance import distance
 import lmdk_bgt
 import lmdk_lib
+import math
 import numpy as np
 from matplotlib import pyplot as plt
 import time
@@ -28,7 +30,7 @@ def main(args):
   epsilon = 1.0
 
   # Number of methods
-  n = 6
+  n = 3
   # Width of bars
   bar_width = 1/(n + 1)
   # The x axis
@@ -46,111 +48,89 @@ def main(args):
   plt.xlabel('Landmarks percentage')  # Set x axis label.
   plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin)
   # The y axis
-  plt.ylabel('Mean absolute error (m)')  # Set y axis label.
-  plt.yscale('log')
-  plt.ylim(1, 100000000)
+  plt.ylabel('Mean absolute error')  # Set y axis label.
+  # plt.yscale('log')
+  plt.ylim(0, 1.4)
   # Bar offset
   x_offset = -(bar_width/2)*(n - 1)
 
   mae_u = np.zeros(len(lmdks_pct))
   mae_s = np.zeros(len(lmdks_pct))
   mae_a = np.zeros(len(lmdks_pct))
-  mae_r = np.zeros(len(lmdks_pct))
-  mae_d = np.zeros(len(lmdks_pct))
-  mae_i = np.zeros(len(lmdks_pct))
+  mae_evt = np.zeros(len(lmdks_pct))
+  mae_usr = np.zeros(len(lmdks_pct))
   for i, pct in enumerate(lmdks_pct):
     # Find landmarks
+    # lmdks = lmdk_lib.find_lmdks_tim(lmdk_data, seq, uid, pct)
     lmdks = lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, pct)
 
-    print(pct, np.shape(lmdks)[0]/np.shape(seq)[0])
+    for _ in range(args.iter):
+      # Skip
+      rls_data_s, _ = lmdk_bgt.skip_cont(seq, lmdks, epsilon)
+      mae_s[i] += lmdk_bgt.mae_cont(rls_data_s)/args.iter
 
-    # for _ in range(args.iter):
-    #   # Skip
-    #   rls_data_s, _ = lmdk_bgt.skip(seq, lmdks, epsilon)
-    #   mae_s[i] += lmdk_bgt.mae(seq, rls_data_s)/args.iter
+      # Uniform
+      rls_data_u, _ = lmdk_bgt.uniform_cont(seq, lmdks, epsilon)
+      mae_u[i] += lmdk_bgt.mae_cont(rls_data_u)/args.iter
 
-    #   # Uniform
-    #   rls_data_u, _ = lmdk_bgt.uniform_r(seq, lmdks, epsilon)
-    #   mae_u[i] += lmdk_bgt.mae(seq, rls_data_u)/args.iter
+      # Adaptive
+      rls_data_a, _, _ = lmdk_bgt.adaptive_cont(seq, lmdks, epsilon, .5, .5)
+      mae_a[i] += lmdk_bgt.mae_cont(rls_data_a)/args.iter
 
-    #   # Adaptive
-    #   rls_data_a, _, _ = lmdk_bgt.adaptive(seq, lmdks, epsilon, .5, .5)
-    #   mae_a[i] += lmdk_bgt.mae(seq, rls_data_a)/args.iter
+      # Event
+      rls_data_evt, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 0), epsilon)
+      mae_evt[i] += lmdk_bgt.mae_cont(rls_data_evt)/args.iter
+      # User
+      rls_data_usr, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 100), epsilon)
+      mae_usr[i] += lmdk_bgt.mae_cont(rls_data_usr)/args.iter
 
-    #   # Sample
-    #   rls_data_r, _, _ = lmdk_bgt.sample(seq, lmdks, epsilon)
-    #   mae_r[i] += lmdk_bgt.mae(seq, rls_data_r)/args.iter
+  plt.plot(
+    x_i,
+    mae_evt,
+    linewidth=lmdk_lib.line_width
+  )
 
-    #   # Discount
-    #   rls_data_d, _, _ = lmdk_bgt.discount(seq, lmdks, epsilon)
-    #   mae_d[i] += lmdk_bgt.mae(seq, rls_data_d)/args.iter
+  plt.plot(
+    x_i,
+    mae_usr,
+    linewidth=lmdk_lib.line_width
+  )
 
-    #   # Incremental
-    #   rls_data_i, _, _ = lmdk_bgt.incremental(seq, lmdks, epsilon, .5)
-    #   mae_i[i] += lmdk_bgt.mae(seq, rls_data_i)/args.iter
+  plt.bar(
+    x_i + x_offset,
+    mae_s,
+    bar_width,
+    label='Skip',
+    linewidth=lmdk_lib.line_width
+  )
+  x_offset += bar_width
+  plt.bar(
+    x_i + x_offset,
+    mae_u,
+    bar_width,
+    label='Uniform',
+    linewidth=lmdk_lib.line_width
+  )
+  x_offset += bar_width
+  plt.bar(
+    x_i + x_offset,
+    mae_a,
+    bar_width,
+    label='Adaptive',
+    linewidth=lmdk_lib.line_width
+  )
+  x_offset += bar_width
 
-  # plt.bar(
-  #   x_i + x_offset,
-  #   mae_s,
-  #   bar_width,
-  #   label='Skip',
-  #   linewidth=lmdk_lib.line_width
-  # )
-  # x_offset += bar_width
-  # # Plot bars
-  # plt.bar(
-  #   x_i + x_offset,
-  #   mae_u,
-  #   bar_width,
-  #   label='Uniform',
-  #   linewidth=lmdk_lib.line_width
-  # )
-  # x_offset += bar_width
-  # plt.bar(
-  #   x_i + x_offset,
-  #   mae_a,
-  #   bar_width,
-  #   label='Adaptive',
-  #   linewidth=lmdk_lib.line_width
-  # )
-  # x_offset += bar_width
-  # plt.bar(
-  #   x_i + x_offset,
-  #   mae_r,
-  #   bar_width,
-  #   label='Sample',
-  #   linewidth=lmdk_lib.line_width
-  # )
-  # x_offset += bar_width
-  # plt.bar(
-  #   x_i + x_offset,
-  #   mae_d,
-  #   bar_width,
-  #   label='Discount',
-  #   linewidth=lmdk_lib.line_width
-  # )
-  # x_offset += bar_width
-  # plt.bar(
-  #   x_i + x_offset,
-  #   mae_i,
-  #   bar_width,
-  #   label='Incremental',
-  #   linewidth=lmdk_lib.line_width
-  # )
-  # x_offset += bar_width
-
-  # path = str('rslt/bgt_cmp/' + d)
-  # # Plot legend
-  # lmdk_lib.plot_legend()
+  path = str('rslt/bgt_cmp/' + d)
+  # Plot legend
+  lmdk_lib.plot_legend()
   # # Show plot
-  # # plt.show()
+  # plt.show()
   # # Save plot
   # lmdk_lib.save_plot(path + '.pdf')
   print('[OK]', flush=True)
 
 
-
-
 def parse_args():
   '''
     Parse arguments.
diff --git a/code/lib/lmdk_bgt.py b/code/lib/lmdk_bgt.py
index 07474b3..f1cae78 100644
--- a/code/lib/lmdk_bgt.py
+++ b/code/lib/lmdk_bgt.py
@@ -343,6 +343,61 @@ def adaptive(seq, lmdks, epsilon, inc_rt, dec_rt):
   return rls_data, bgts, skipped
 
 
+def adaptive_cont(seq, lmdks, epsilon, inc_rt, dec_rt):
+  '''
+    Adaptive budget allocation.
+
+    Parameters:
+      seq - The point sequence.
+      lmdks - The landmarks.
+      epsilon - The available privacy budget.
+      inc_rt - Sampling rate increase rate.
+      dec_rt - Sampling rate decrease rate.
+    Returns:
+      rls_data - The perturbed data.
+      bgts - The privacy budget allocation.
+      skipped - The number of skipped releases.
+  '''
+  # Uniform budget allocation
+  bgts = uniform(seq, lmdks, epsilon)
+  # Released
+  rls_data = [None]*len(seq)
+  # The sampling rate
+  samp_rt = 1
+  # Track landmarks
+  lmdk_cur = 0
+  # Track skipped releases
+  skipped = 0
+  for i, p in enumerate(seq):
+    # Check if current point is a landmark
+    r = p[2] in lmdks
+    if r:
+      lmdk_cur += 1
+    if lmdk_lib.should_sample(samp_rt) or i == 0:
+      # Add noise to original data
+      o = lmdk_lib.randomized_response(r, bgts[i])
+      rls_data[i] = [r, o]
+      # Adjust sampling rate
+      if i > 0:
+        if rls_data[i - 1][1] == o:
+          # Decrease
+          samp_rt -= samp_rt*dec_rt
+        else:
+          # Increase
+          samp_rt += (1 - samp_rt)*inc_rt
+    else:
+      skipped += 1
+      # Skip current release and approximate with previous
+      rls_data[i] = rls_data[i - 1]
+      if r:
+        # Allocate the current budget to the following releases uniformly
+        for j in range(i + 1, len(seq)):
+          bgts[j] += bgts[i]/(len(lmdks) - lmdk_cur + 1)
+      # No budget was spent
+      bgts[i] = 0
+  return rls_data, bgts, skipped
+
+
 def skip(seq, lmdks, epsilon):
   '''
     Skip landmarks.