copenhagen: New experiment

2021-10-01 21:30:32 +02:00
parent 2887ef8376
commit e1c78fbda9
3 changed files with 30 additions and 211 deletions
--- a/code/expt/bgt_cmp_hue.py
+++ b/code/expt/bgt_cmp_hue.py
@ -1,169 +0,0 @@
-#!/usr/bin/env python3
-
-import sys
-sys.path.insert(1, '../lib')
-import argparse
-import ast
-from datetime import datetime
-from geopy.distance import distance
-import lmdk_bgt
-import lmdk_lib
-import math
-import numpy as np
-from matplotlib import pyplot as plt
-import time
-
-
-def main(args):
-  res_file = '/home/manos/Cloud/Data/HUE/Results.zip'
-  # User's consumption
-  seq = lmdk_lib.load_data(args, 'cons')
-  # The name of the dataset
-  d = 'HUE'
-  # The landmarks percentages
-  lmdks_pct = [0, 20, 40, 60, 80, 100]
-  # Landmarks' thresholds
-  lmdks_th = [0, .54, .68, .88, 1.12, 10]
-  # The privacy budget
-  epsilon = 10.0
-
-  # Number of methods
-  n = 3
-  # Width of bars
-  bar_width = 1/(n + 1)
-  # The x axis
-  x_i = np.arange(len(lmdks_pct))
-  x_margin = bar_width*(n/2 + 1)
-
-  print('\n##############################', d, '\n')
-
-  # Initialize plot
-  lmdk_lib.plot_init()
-  # The x axis
-  plt.xticks(x_i, np.array(lmdks_pct, int))
-  plt.xlabel('Landmarks (%)')  # Set x axis label.
-  plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin)
-  # The y axis
-  plt.ylabel('Mean absolute error (kWh)')  # Set y axis label.
-  plt.yscale('log')
-  # plt.ylim(.01, 10000)
-  # Bar offset
-  x_offset = -(bar_width/2)*(n - 1)
-
-  mae_u = np.zeros(len(lmdks_pct))
-  mae_s = np.zeros(len(lmdks_pct))
-  mae_a = np.zeros(len(lmdks_pct))
-  mae_evt = 0
-  mae_usr = 0
-
-  for i, pct in enumerate(lmdks_pct):
-    # Find landmarks
-    lmdks = seq[seq[:, 1] < lmdks_th[i]]
-
-    for _ in range(args.iter):
-      # Skip
-      rls_data_s, bgts_s = lmdk_bgt.skip_cons(seq, lmdks, epsilon)
-      # lmdk_bgt.validate_bgts(seq, lmdks, epsilon, bgts_s)
-      mae_s[i] += lmdk_bgt.mae_cons(seq, rls_data_s)/args.iter
-
-      # Uniform
-      rls_data_u, bgts_u = lmdk_bgt.uniform_cons(seq, lmdks, epsilon)
-      mae_u[i] += lmdk_bgt.mae_cons(seq, rls_data_u)/args.iter
-
-      # Adaptive
-      rls_data_a, _, _ = lmdk_bgt.adaptive_cons(seq, lmdks, epsilon, .5, .5)
-      mae_a[i] += lmdk_bgt.mae_cons(seq, rls_data_a)/args.iter
-
-      # Calculate once
-      # Event
-      if i == 0:
-        rls_data_evt, _ = lmdk_bgt.uniform_cons(seq, seq[seq[:, 1] < lmdks_th[0]], epsilon)
-        mae_evt += lmdk_bgt.mae_cons(seq, rls_data_evt)/args.iter
-      # User
-      if i == 0:
-        rls_data_usr, _ = lmdk_bgt.uniform_cons(seq, seq[seq[:, 1] < lmdks_th[len(lmdks_th)-1]], epsilon)
-        mae_usr += lmdk_bgt.mae_cons(seq, rls_data_usr)/args.iter
-
-  plt.axhline(
-    y = mae_evt,
-    color = '#212121',
-    linewidth=lmdk_lib.line_width
-  )
-  plt.text(x_i[-1] + x_i[-1]*.14, mae_evt - mae_evt*.14, 'event')
-
-  plt.axhline(
-    y = mae_usr,
-    color = '#616161',
-    linewidth=lmdk_lib.line_width
-  )
-  plt.text(x_i[-1] + x_i[-1]*.14, mae_usr - mae_usr*.14, 'user')
-
-  plt.bar(
-    x_i + x_offset,
-    mae_s,
-    bar_width,
-    label='Skip',
-    linewidth=lmdk_lib.line_width
-  )
-  x_offset += bar_width
-  plt.bar(
-    x_i + x_offset,
-    mae_u,
-    bar_width,
-    label='Uniform',
-    linewidth=lmdk_lib.line_width
-  )
-  x_offset += bar_width
-  plt.bar(
-    x_i + x_offset,
-    mae_a,
-    bar_width,
-    label='Adaptive',
-    linewidth=lmdk_lib.line_width
-  )
-  x_offset += bar_width
-
-  path = str('../../rslt/bgt_cmp/' + d)
-  # Plot legend
-  lmdk_lib.plot_legend()
-  # Show plot
-  # plt.show()
-  # Save plot
-  lmdk_lib.save_plot(path + '.pdf')
-  print('[OK]', flush=True)
-
-
-def parse_args():
-  '''
-    Parse arguments.
-
-    Optional:
-      res  - The results archive file.
-      iter - The total iterations.
-  '''
-  # Create argument parser.
-  parser = argparse.ArgumentParser()
-
-  # Mandatory arguments.
-
-  # Optional arguments.
-  parser.add_argument('-r', '--res', help='The results archive file.', type=str, default='/home/manos/Cloud/Data/HUE/Results.zip')
-  parser.add_argument('-i', '--iter', help='The total iterations.', type=int, default=1)
-
-  # Parse arguments.
-  args = parser.parse_args()
-
-  return args
-
-
-if __name__ == '__main__':
-  try:
-    start_time = time.time()
-    main(parse_args())
-    end_time = time.time()
-    print('##############################')
-    print('Time   : %.4fs' % (end_time - start_time))
-    print('##############################')
-  except KeyboardInterrupt:
-    print('Interrupted by user.')
-    exit()
--- a/code/expt/bgt_cmp_copenhagen.py
+++ b/code/expt/bgt_cmp_copenhagen.py
@ -19,11 +19,11 @@ def main(args):
  # Contacts for all users
  cont_data = lmdk_lib.load_data(args, 'cont')
  # Contacts for landmark's percentages for all users
-  lmdk_data = lmdk_lib.load_data(args, 'usrs_expt')
+  lmdk_data = lmdk_lib.load_data(args, 'usrs_data')
  # The name of the dataset
  d = 'Copenhagen'
  # The user's id
-  uid = '623'
+  uid = '449'
  # The landmarks percentages
  lmdks_pct = [0, 20, 40, 60, 80, 100]
  # The privacy budget
@ -39,7 +39,7 @@ def main(args):

  print('\n##############################', d, '\n')
  # Get user's contacts sequence
-  seq = cont_data[cont_data[:, 1] == float(uid)]
+  seq = cont_data[cont_data[:, 1] == float(uid)][:1000]

  # Initialize plot
  lmdk_lib.plot_init()
@ -50,15 +50,15 @@ def main(args):
  # The y axis
  plt.ylabel('Mean absolute error')  # Set y axis label.
  # plt.yscale('log')
-  plt.ylim(0, 1.4)
+  # plt.ylim(0, 1.4)
  # Bar offset
  x_offset = -(bar_width/2)*(n - 1)

  mae_u = np.zeros(len(lmdks_pct))
  mae_s = np.zeros(len(lmdks_pct))
  mae_a = np.zeros(len(lmdks_pct))
-  mae_evt = np.zeros(len(lmdks_pct))
-  mae_usr = np.zeros(len(lmdks_pct))
+  mae_evt = 0
+  mae_usr = 0

  for i, pct in enumerate(lmdks_pct):
    # Find landmarks
@ -79,30 +79,28 @@ def main(args):
      rls_data_a, _, _ = lmdk_bgt.adaptive_cont(seq, lmdks, epsilon, .5, .5)
      mae_a[i] += lmdk_bgt.mae_cont(rls_data_a)/args.iter

-      # Event
      # Calculate once
      if i == 0:
+        # Event
        rls_data_evt, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 0), epsilon)
-      mae_evt[i] += lmdk_bgt.mae_cont(rls_data_evt)/args.iter
-      # User
-      # Calculate once
-      if i == 0:
+        mae_evt += lmdk_bgt.mae_cont(rls_data_evt)/args.iter
+        # User
        rls_data_usr, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 100), epsilon)
-      mae_usr[i] += lmdk_bgt.mae_cont(rls_data_usr)/args.iter
+        mae_usr += lmdk_bgt.mae_cont(rls_data_usr)/args.iter

-  plt.plot(
-    x_i,
-    mae_evt,
+  plt.axhline(
+    y = mae_evt,
+    color = '#212121',
    linewidth=lmdk_lib.line_width
  )
-  plt.text(x_i[-1], mae_evt[-1], '        event')
+  plt.text(x_i[-1] + x_i[-1]*.14, mae_evt - mae_evt*.14, 'event')

-  plt.plot(
-    x_i,
-    mae_usr,
+  plt.axhline(
+    y = mae_usr,
+    color = '#616161',
    linewidth=lmdk_lib.line_width
  )
-  plt.text(x_i[-1], mae_usr[-1], '        user')
+  plt.text(x_i[-1] + x_i[-1]*.14, mae_usr - mae_usr*.14, 'user')

  plt.bar(
    x_i + x_offset,
--- a/code/parse_copenhagen.py
+++ b/code/parse_copenhagen.py
@ -17,22 +17,11 @@ import time
 import zipfile

 '''
+  Suitable in first 1000 contacts.
  usr n
-  311 8193
-  317 2044
-  323 1642
-  366 3406
-  368 5099
-  478 2616
-  486 3112
-  508 4981
-  585 9443
-  595 4459
-  616 4355
-  623 1586
-  637 4479
-  664 4315
-  688 19728
+  449 12167
+  550 4221
+  689 3228
  705 5
 '''

@ -90,15 +79,15 @@ def main(args):
  # Check each user
  goal = [.2, .4, .6, .8]
  # Get users' data from previous parsing
-  usrs_expt = lmdk_lib.load_data(args, 'usrs_expt')
-  if usrs_expt.size == 0:
+  usrs_data = lmdk_lib.load_data(args, 'usrs_data')
+  if usrs_data.size == 0:
    # Users suitable for experiments
    usrs_expt = []
    for usr_i, usr in enumerate(usrs):
      print('Checking %d (%d%%: %d/%d)... ' %(usr, (usr_i + 1)*100/len(usrs), usr_i + 1, len(usrs)), end='', flush=True)
      usrs_expt_cur = []
-      # User's contacts
-      usr_cont = cont_data[cont_data[:, uid_a] == usr]
+      # User's first 1000 contacts
+      usr_cont = cont_data[cont_data[:, uid_a] == usr][:1000]
      # For each goal
      for_expt = True
      for g in goal:
@ -130,9 +119,10 @@ def main(args):
              for_expt = False
              break
    # Save to results
-    lmdk_lib.save_data(args, np.array(usrs_expt, str), 'usrs_expt')
+    usrs_data = np.array(usrs_expt, str)
+    lmdk_lib.save_data(args, usrs_data, 'usrs_data')
  # Get all users
-  usrs = np.unique(usrs_expt[:, 0])
+  usrs = np.unique(usrs_data[:, 0])
  for usr in usrs:
    usr_cont = cont_data[cont_data[:, uid_a] == float(usr)]
    print(int(usr), len(usr_cont))
@ -145,7 +135,7 @@ def main(args):
  # # All user contacts
  # usr_cont = cont_data[cont_data[:, uid_a] == float(usr)]
  # # All user landmarks for different goals
-  # usr_lmdk = usrs_expt[usrs_expt[:, 0] == usr]
+  # usr_lmdk = usrs_data[usrs_data[:, 0] == usr]
  # for g in goal:
  #   # Get goal landmarks
  #   cont = ast.literal_eval(usr_lmdk[usr_lmdk[:, 1] == str(g)][0][2])