From e1c78fbda925151b8a98609d6b97898eae22ac92 Mon Sep 17 00:00:00 2001
From: Manos Katsomallos <manos@delkappa.com>
Date: Fri, 1 Oct 2021 21:30:32 +0200
Subject: [PATCH] copenhagen: New experiment

---
 code/expt/bgt_cmp_hue.py                      | 169 ------------------
 .../{bgt_cmp_copenhagen.py => copenhagen.py}  |  38 ++--
 code/parse_copenhagen.py                      |  34 ++--
 3 files changed, 30 insertions(+), 211 deletions(-)
 delete mode 100644 code/expt/bgt_cmp_hue.py
 rename code/expt/{bgt_cmp_copenhagen.py => copenhagen.py} (87%)

diff --git a/code/expt/bgt_cmp_hue.py b/code/expt/bgt_cmp_hue.py
deleted file mode 100644
index 56e3751..0000000
--- a/code/expt/bgt_cmp_hue.py
+++ /dev/null
@@ -1,169 +0,0 @@
-#!/usr/bin/env python3
-
-import sys
-sys.path.insert(1, '../lib')
-import argparse
-import ast
-from datetime import datetime
-from geopy.distance import distance
-import lmdk_bgt
-import lmdk_lib
-import math
-import numpy as np
-from matplotlib import pyplot as plt
-import time
-
-
-def main(args):
-  res_file = '/home/manos/Cloud/Data/HUE/Results.zip'
-  # User's consumption
-  seq = lmdk_lib.load_data(args, 'cons')
-  # The name of the dataset
-  d = 'HUE'
-  # The landmarks percentages
-  lmdks_pct = [0, 20, 40, 60, 80, 100]
-  # Landmarks' thresholds
-  lmdks_th = [0, .54, .68, .88, 1.12, 10]
-  # The privacy budget
-  epsilon = 10.0
-
-  # Number of methods
-  n = 3
-  # Width of bars
-  bar_width = 1/(n + 1)
-  # The x axis
-  x_i = np.arange(len(lmdks_pct))
-  x_margin = bar_width*(n/2 + 1)
-
-  print('\n##############################', d, '\n')
-
-  # Initialize plot
-  lmdk_lib.plot_init()
-  # The x axis
-  plt.xticks(x_i, np.array(lmdks_pct, int))
-  plt.xlabel('Landmarks (%)')  # Set x axis label.
-  plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin)
-  # The y axis
-  plt.ylabel('Mean absolute error (kWh)')  # Set y axis label.
-  plt.yscale('log')
-  # plt.ylim(.01, 10000)
-  # Bar offset
-  x_offset = -(bar_width/2)*(n - 1)
-
-  mae_u = np.zeros(len(lmdks_pct))
-  mae_s = np.zeros(len(lmdks_pct))
-  mae_a = np.zeros(len(lmdks_pct))
-  mae_evt = 0
-  mae_usr = 0
-
-  for i, pct in enumerate(lmdks_pct):
-    # Find landmarks
-    lmdks = seq[seq[:, 1] < lmdks_th[i]]
-
-    for _ in range(args.iter):
-      # Skip
-      rls_data_s, bgts_s = lmdk_bgt.skip_cons(seq, lmdks, epsilon)
-      # lmdk_bgt.validate_bgts(seq, lmdks, epsilon, bgts_s)
-      mae_s[i] += lmdk_bgt.mae_cons(seq, rls_data_s)/args.iter
-
-      # Uniform
-      rls_data_u, bgts_u = lmdk_bgt.uniform_cons(seq, lmdks, epsilon)
-      mae_u[i] += lmdk_bgt.mae_cons(seq, rls_data_u)/args.iter
-
-      # Adaptive
-      rls_data_a, _, _ = lmdk_bgt.adaptive_cons(seq, lmdks, epsilon, .5, .5)
-      mae_a[i] += lmdk_bgt.mae_cons(seq, rls_data_a)/args.iter
-
-      # Calculate once
-      # Event
-      if i == 0:
-        rls_data_evt, _ = lmdk_bgt.uniform_cons(seq, seq[seq[:, 1] < lmdks_th[0]], epsilon)
-        mae_evt += lmdk_bgt.mae_cons(seq, rls_data_evt)/args.iter
-      # User
-      if i == 0:
-        rls_data_usr, _ = lmdk_bgt.uniform_cons(seq, seq[seq[:, 1] < lmdks_th[len(lmdks_th)-1]], epsilon)
-        mae_usr += lmdk_bgt.mae_cons(seq, rls_data_usr)/args.iter
-
-  plt.axhline(
-    y = mae_evt,
-    color = '#212121',
-    linewidth=lmdk_lib.line_width
-  )
-  plt.text(x_i[-1] + x_i[-1]*.14, mae_evt - mae_evt*.14, 'event')
-
-  plt.axhline(
-    y = mae_usr,
-    color = '#616161',
-    linewidth=lmdk_lib.line_width
-  )
-  plt.text(x_i[-1] + x_i[-1]*.14, mae_usr - mae_usr*.14, 'user')
-
-  plt.bar(
-    x_i + x_offset,
-    mae_s,
-    bar_width,
-    label='Skip',
-    linewidth=lmdk_lib.line_width
-  )
-  x_offset += bar_width
-  plt.bar(
-    x_i + x_offset,
-    mae_u,
-    bar_width,
-    label='Uniform',
-    linewidth=lmdk_lib.line_width
-  )
-  x_offset += bar_width
-  plt.bar(
-    x_i + x_offset,
-    mae_a,
-    bar_width,
-    label='Adaptive',
-    linewidth=lmdk_lib.line_width
-  )
-  x_offset += bar_width
-
-  path = str('../../rslt/bgt_cmp/' + d)
-  # Plot legend
-  lmdk_lib.plot_legend()
-  # Show plot
-  # plt.show()
-  # Save plot
-  lmdk_lib.save_plot(path + '.pdf')
-  print('[OK]', flush=True)
-
-
-def parse_args():
-  '''
-    Parse arguments.
-
-    Optional:
-      res  - The results archive file.
-      iter - The total iterations.
-  '''
-  # Create argument parser.
-  parser = argparse.ArgumentParser()
-
-  # Mandatory arguments.
-
-  # Optional arguments.
-  parser.add_argument('-r', '--res', help='The results archive file.', type=str, default='/home/manos/Cloud/Data/HUE/Results.zip')
-  parser.add_argument('-i', '--iter', help='The total iterations.', type=int, default=1)
-
-  # Parse arguments.
-  args = parser.parse_args()
-
-  return args
-
-
-if __name__ == '__main__':
-  try:
-    start_time = time.time()
-    main(parse_args())
-    end_time = time.time()
-    print('##############################')
-    print('Time   : %.4fs' % (end_time - start_time))
-    print('##############################')
-  except KeyboardInterrupt:
-    print('Interrupted by user.')
-    exit()
diff --git a/code/expt/bgt_cmp_copenhagen.py b/code/expt/copenhagen.py
similarity index 87%
rename from code/expt/bgt_cmp_copenhagen.py
rename to code/expt/copenhagen.py
index cd9c0c4..544591a 100644
--- a/code/expt/bgt_cmp_copenhagen.py
+++ b/code/expt/copenhagen.py
@@ -19,11 +19,11 @@ def main(args):
   # Contacts for all users
   cont_data = lmdk_lib.load_data(args, 'cont')
   # Contacts for landmark's percentages for all users
-  lmdk_data = lmdk_lib.load_data(args, 'usrs_expt')
+  lmdk_data = lmdk_lib.load_data(args, 'usrs_data')
   # The name of the dataset
   d = 'Copenhagen'
   # The user's id
-  uid = '623'
+  uid = '449'
   # The landmarks percentages
   lmdks_pct = [0, 20, 40, 60, 80, 100]
   # The privacy budget
@@ -39,7 +39,7 @@ def main(args):
 
   print('\n##############################', d, '\n')
   # Get user's contacts sequence
-  seq = cont_data[cont_data[:, 1] == float(uid)]
+  seq = cont_data[cont_data[:, 1] == float(uid)][:1000]
 
   # Initialize plot
   lmdk_lib.plot_init()
@@ -50,15 +50,15 @@ def main(args):
   # The y axis
   plt.ylabel('Mean absolute error')  # Set y axis label.
   # plt.yscale('log')
-  plt.ylim(0, 1.4)
+  # plt.ylim(0, 1.4)
   # Bar offset
   x_offset = -(bar_width/2)*(n - 1)
 
   mae_u = np.zeros(len(lmdks_pct))
   mae_s = np.zeros(len(lmdks_pct))
   mae_a = np.zeros(len(lmdks_pct))
-  mae_evt = np.zeros(len(lmdks_pct))
-  mae_usr = np.zeros(len(lmdks_pct))
+  mae_evt = 0
+  mae_usr = 0
 
   for i, pct in enumerate(lmdks_pct):
     # Find landmarks
@@ -79,30 +79,28 @@ def main(args):
       rls_data_a, _, _ = lmdk_bgt.adaptive_cont(seq, lmdks, epsilon, .5, .5)
       mae_a[i] += lmdk_bgt.mae_cont(rls_data_a)/args.iter
 
-      # Event
       # Calculate once
       if i == 0:
+        # Event
         rls_data_evt, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 0), epsilon)
-      mae_evt[i] += lmdk_bgt.mae_cont(rls_data_evt)/args.iter
-      # User
-      # Calculate once
-      if i == 0:
+        mae_evt += lmdk_bgt.mae_cont(rls_data_evt)/args.iter
+        # User
         rls_data_usr, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 100), epsilon)
-      mae_usr[i] += lmdk_bgt.mae_cont(rls_data_usr)/args.iter
+        mae_usr += lmdk_bgt.mae_cont(rls_data_usr)/args.iter
 
-  plt.plot(
-    x_i,
-    mae_evt,
+  plt.axhline(
+    y = mae_evt,
+    color = '#212121',
     linewidth=lmdk_lib.line_width
   )
-  plt.text(x_i[-1], mae_evt[-1], '        event')
+  plt.text(x_i[-1] + x_i[-1]*.14, mae_evt - mae_evt*.14, 'event')
 
-  plt.plot(
-    x_i,
-    mae_usr,
+  plt.axhline(
+    y = mae_usr,
+    color = '#616161',
     linewidth=lmdk_lib.line_width
   )
-  plt.text(x_i[-1], mae_usr[-1], '        user')
+  plt.text(x_i[-1] + x_i[-1]*.14, mae_usr - mae_usr*.14, 'user')
 
   plt.bar(
     x_i + x_offset,
diff --git a/code/parse_copenhagen.py b/code/parse_copenhagen.py
index 6ef312e..504a923 100644
--- a/code/parse_copenhagen.py
+++ b/code/parse_copenhagen.py
@@ -17,22 +17,11 @@ import time
 import zipfile
 
 '''
+  Suitable in first 1000 contacts.
   usr n
-  311 8193
-  317 2044
-  323 1642
-  366 3406
-  368 5099
-  478 2616
-  486 3112
-  508 4981
-  585 9443
-  595 4459
-  616 4355
-  623 1586
-  637 4479
-  664 4315
-  688 19728
+  449 12167
+  550 4221
+  689 3228
   705 5
 '''
 
@@ -90,15 +79,15 @@ def main(args):
   # Check each user
   goal = [.2, .4, .6, .8]
   # Get users' data from previous parsing
-  usrs_expt = lmdk_lib.load_data(args, 'usrs_expt')
-  if usrs_expt.size == 0:
+  usrs_data = lmdk_lib.load_data(args, 'usrs_data')
+  if usrs_data.size == 0:
     # Users suitable for experiments
     usrs_expt = []
     for usr_i, usr in enumerate(usrs):
       print('Checking %d (%d%%: %d/%d)... ' %(usr, (usr_i + 1)*100/len(usrs), usr_i + 1, len(usrs)), end='', flush=True)
       usrs_expt_cur = []
-      # User's contacts
-      usr_cont = cont_data[cont_data[:, uid_a] == usr]
+      # User's first 1000 contacts
+      usr_cont = cont_data[cont_data[:, uid_a] == usr][:1000]
       # For each goal
       for_expt = True
       for g in goal:
@@ -130,9 +119,10 @@ def main(args):
               for_expt = False
               break
     # Save to results
-    lmdk_lib.save_data(args, np.array(usrs_expt, str), 'usrs_expt')
+    usrs_data = np.array(usrs_expt, str)
+    lmdk_lib.save_data(args, usrs_data, 'usrs_data')
   # Get all users
-  usrs = np.unique(usrs_expt[:, 0])
+  usrs = np.unique(usrs_data[:, 0])
   for usr in usrs:
     usr_cont = cont_data[cont_data[:, uid_a] == float(usr)]
     print(int(usr), len(usr_cont))
@@ -145,7 +135,7 @@ def main(args):
   # # All user contacts
   # usr_cont = cont_data[cont_data[:, uid_a] == float(usr)]
   # # All user landmarks for different goals
-  # usr_lmdk = usrs_expt[usrs_expt[:, 0] == usr]
+  # usr_lmdk = usrs_data[usrs_data[:, 0] == usr]
   # for g in goal:
   #   # Get goal landmarks
   #   cont = ast.literal_eval(usr_lmdk[usr_lmdk[:, 1] == str(g)][0][2])