From e1c78fbda925151b8a98609d6b97898eae22ac92 Mon Sep 17 00:00:00 2001 From: Manos Katsomallos Date: Fri, 1 Oct 2021 21:30:32 +0200 Subject: [PATCH] copenhagen: New experiment --- code/expt/bgt_cmp_hue.py | 169 ------------------ .../{bgt_cmp_copenhagen.py => copenhagen.py} | 38 ++-- code/parse_copenhagen.py | 34 ++-- 3 files changed, 30 insertions(+), 211 deletions(-) delete mode 100644 code/expt/bgt_cmp_hue.py rename code/expt/{bgt_cmp_copenhagen.py => copenhagen.py} (87%) diff --git a/code/expt/bgt_cmp_hue.py b/code/expt/bgt_cmp_hue.py deleted file mode 100644 index 56e3751..0000000 --- a/code/expt/bgt_cmp_hue.py +++ /dev/null @@ -1,169 +0,0 @@ -#!/usr/bin/env python3 - -import sys -sys.path.insert(1, '../lib') -import argparse -import ast -from datetime import datetime -from geopy.distance import distance -import lmdk_bgt -import lmdk_lib -import math -import numpy as np -from matplotlib import pyplot as plt -import time - - -def main(args): - res_file = '/home/manos/Cloud/Data/HUE/Results.zip' - # User's consumption - seq = lmdk_lib.load_data(args, 'cons') - # The name of the dataset - d = 'HUE' - # The landmarks percentages - lmdks_pct = [0, 20, 40, 60, 80, 100] - # Landmarks' thresholds - lmdks_th = [0, .54, .68, .88, 1.12, 10] - # The privacy budget - epsilon = 10.0 - - # Number of methods - n = 3 - # Width of bars - bar_width = 1/(n + 1) - # The x axis - x_i = np.arange(len(lmdks_pct)) - x_margin = bar_width*(n/2 + 1) - - print('\n##############################', d, '\n') - - # Initialize plot - lmdk_lib.plot_init() - # The x axis - plt.xticks(x_i, np.array(lmdks_pct, int)) - plt.xlabel('Landmarks (%)') # Set x axis label. - plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin) - # The y axis - plt.ylabel('Mean absolute error (kWh)') # Set y axis label. - plt.yscale('log') - # plt.ylim(.01, 10000) - # Bar offset - x_offset = -(bar_width/2)*(n - 1) - - mae_u = np.zeros(len(lmdks_pct)) - mae_s = np.zeros(len(lmdks_pct)) - mae_a = np.zeros(len(lmdks_pct)) - mae_evt = 0 - mae_usr = 0 - - for i, pct in enumerate(lmdks_pct): - # Find landmarks - lmdks = seq[seq[:, 1] < lmdks_th[i]] - - for _ in range(args.iter): - # Skip - rls_data_s, bgts_s = lmdk_bgt.skip_cons(seq, lmdks, epsilon) - # lmdk_bgt.validate_bgts(seq, lmdks, epsilon, bgts_s) - mae_s[i] += lmdk_bgt.mae_cons(seq, rls_data_s)/args.iter - - # Uniform - rls_data_u, bgts_u = lmdk_bgt.uniform_cons(seq, lmdks, epsilon) - mae_u[i] += lmdk_bgt.mae_cons(seq, rls_data_u)/args.iter - - # Adaptive - rls_data_a, _, _ = lmdk_bgt.adaptive_cons(seq, lmdks, epsilon, .5, .5) - mae_a[i] += lmdk_bgt.mae_cons(seq, rls_data_a)/args.iter - - # Calculate once - # Event - if i == 0: - rls_data_evt, _ = lmdk_bgt.uniform_cons(seq, seq[seq[:, 1] < lmdks_th[0]], epsilon) - mae_evt += lmdk_bgt.mae_cons(seq, rls_data_evt)/args.iter - # User - if i == 0: - rls_data_usr, _ = lmdk_bgt.uniform_cons(seq, seq[seq[:, 1] < lmdks_th[len(lmdks_th)-1]], epsilon) - mae_usr += lmdk_bgt.mae_cons(seq, rls_data_usr)/args.iter - - plt.axhline( - y = mae_evt, - color = '#212121', - linewidth=lmdk_lib.line_width - ) - plt.text(x_i[-1] + x_i[-1]*.14, mae_evt - mae_evt*.14, 'event') - - plt.axhline( - y = mae_usr, - color = '#616161', - linewidth=lmdk_lib.line_width - ) - plt.text(x_i[-1] + x_i[-1]*.14, mae_usr - mae_usr*.14, 'user') - - plt.bar( - x_i + x_offset, - mae_s, - bar_width, - label='Skip', - linewidth=lmdk_lib.line_width - ) - x_offset += bar_width - plt.bar( - x_i + x_offset, - mae_u, - bar_width, - label='Uniform', - linewidth=lmdk_lib.line_width - ) - x_offset += bar_width - plt.bar( - x_i + x_offset, - mae_a, - bar_width, - label='Adaptive', - linewidth=lmdk_lib.line_width - ) - x_offset += bar_width - - path = str('../../rslt/bgt_cmp/' + d) - # Plot legend - lmdk_lib.plot_legend() - # Show plot - # plt.show() - # Save plot - lmdk_lib.save_plot(path + '.pdf') - print('[OK]', flush=True) - - -def parse_args(): - ''' - Parse arguments. - - Optional: - res - The results archive file. - iter - The total iterations. - ''' - # Create argument parser. - parser = argparse.ArgumentParser() - - # Mandatory arguments. - - # Optional arguments. - parser.add_argument('-r', '--res', help='The results archive file.', type=str, default='/home/manos/Cloud/Data/HUE/Results.zip') - parser.add_argument('-i', '--iter', help='The total iterations.', type=int, default=1) - - # Parse arguments. - args = parser.parse_args() - - return args - - -if __name__ == '__main__': - try: - start_time = time.time() - main(parse_args()) - end_time = time.time() - print('##############################') - print('Time : %.4fs' % (end_time - start_time)) - print('##############################') - except KeyboardInterrupt: - print('Interrupted by user.') - exit() diff --git a/code/expt/bgt_cmp_copenhagen.py b/code/expt/copenhagen.py similarity index 87% rename from code/expt/bgt_cmp_copenhagen.py rename to code/expt/copenhagen.py index cd9c0c4..544591a 100644 --- a/code/expt/bgt_cmp_copenhagen.py +++ b/code/expt/copenhagen.py @@ -19,11 +19,11 @@ def main(args): # Contacts for all users cont_data = lmdk_lib.load_data(args, 'cont') # Contacts for landmark's percentages for all users - lmdk_data = lmdk_lib.load_data(args, 'usrs_expt') + lmdk_data = lmdk_lib.load_data(args, 'usrs_data') # The name of the dataset d = 'Copenhagen' # The user's id - uid = '623' + uid = '449' # The landmarks percentages lmdks_pct = [0, 20, 40, 60, 80, 100] # The privacy budget @@ -39,7 +39,7 @@ def main(args): print('\n##############################', d, '\n') # Get user's contacts sequence - seq = cont_data[cont_data[:, 1] == float(uid)] + seq = cont_data[cont_data[:, 1] == float(uid)][:1000] # Initialize plot lmdk_lib.plot_init() @@ -50,15 +50,15 @@ def main(args): # The y axis plt.ylabel('Mean absolute error') # Set y axis label. # plt.yscale('log') - plt.ylim(0, 1.4) + # plt.ylim(0, 1.4) # Bar offset x_offset = -(bar_width/2)*(n - 1) mae_u = np.zeros(len(lmdks_pct)) mae_s = np.zeros(len(lmdks_pct)) mae_a = np.zeros(len(lmdks_pct)) - mae_evt = np.zeros(len(lmdks_pct)) - mae_usr = np.zeros(len(lmdks_pct)) + mae_evt = 0 + mae_usr = 0 for i, pct in enumerate(lmdks_pct): # Find landmarks @@ -79,30 +79,28 @@ def main(args): rls_data_a, _, _ = lmdk_bgt.adaptive_cont(seq, lmdks, epsilon, .5, .5) mae_a[i] += lmdk_bgt.mae_cont(rls_data_a)/args.iter - # Event # Calculate once if i == 0: + # Event rls_data_evt, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 0), epsilon) - mae_evt[i] += lmdk_bgt.mae_cont(rls_data_evt)/args.iter - # User - # Calculate once - if i == 0: + mae_evt += lmdk_bgt.mae_cont(rls_data_evt)/args.iter + # User rls_data_usr, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 100), epsilon) - mae_usr[i] += lmdk_bgt.mae_cont(rls_data_usr)/args.iter + mae_usr += lmdk_bgt.mae_cont(rls_data_usr)/args.iter - plt.plot( - x_i, - mae_evt, + plt.axhline( + y = mae_evt, + color = '#212121', linewidth=lmdk_lib.line_width ) - plt.text(x_i[-1], mae_evt[-1], ' event') + plt.text(x_i[-1] + x_i[-1]*.14, mae_evt - mae_evt*.14, 'event') - plt.plot( - x_i, - mae_usr, + plt.axhline( + y = mae_usr, + color = '#616161', linewidth=lmdk_lib.line_width ) - plt.text(x_i[-1], mae_usr[-1], ' user') + plt.text(x_i[-1] + x_i[-1]*.14, mae_usr - mae_usr*.14, 'user') plt.bar( x_i + x_offset, diff --git a/code/parse_copenhagen.py b/code/parse_copenhagen.py index 6ef312e..504a923 100644 --- a/code/parse_copenhagen.py +++ b/code/parse_copenhagen.py @@ -17,22 +17,11 @@ import time import zipfile ''' + Suitable in first 1000 contacts. usr n - 311 8193 - 317 2044 - 323 1642 - 366 3406 - 368 5099 - 478 2616 - 486 3112 - 508 4981 - 585 9443 - 595 4459 - 616 4355 - 623 1586 - 637 4479 - 664 4315 - 688 19728 + 449 12167 + 550 4221 + 689 3228 705 5 ''' @@ -90,15 +79,15 @@ def main(args): # Check each user goal = [.2, .4, .6, .8] # Get users' data from previous parsing - usrs_expt = lmdk_lib.load_data(args, 'usrs_expt') - if usrs_expt.size == 0: + usrs_data = lmdk_lib.load_data(args, 'usrs_data') + if usrs_data.size == 0: # Users suitable for experiments usrs_expt = [] for usr_i, usr in enumerate(usrs): print('Checking %d (%d%%: %d/%d)... ' %(usr, (usr_i + 1)*100/len(usrs), usr_i + 1, len(usrs)), end='', flush=True) usrs_expt_cur = [] - # User's contacts - usr_cont = cont_data[cont_data[:, uid_a] == usr] + # User's first 1000 contacts + usr_cont = cont_data[cont_data[:, uid_a] == usr][:1000] # For each goal for_expt = True for g in goal: @@ -130,9 +119,10 @@ def main(args): for_expt = False break # Save to results - lmdk_lib.save_data(args, np.array(usrs_expt, str), 'usrs_expt') + usrs_data = np.array(usrs_expt, str) + lmdk_lib.save_data(args, usrs_data, 'usrs_data') # Get all users - usrs = np.unique(usrs_expt[:, 0]) + usrs = np.unique(usrs_data[:, 0]) for usr in usrs: usr_cont = cont_data[cont_data[:, uid_a] == float(usr)] print(int(usr), len(usr_cont)) @@ -145,7 +135,7 @@ def main(args): # # All user contacts # usr_cont = cont_data[cont_data[:, uid_a] == float(usr)] # # All user landmarks for different goals - # usr_lmdk = usrs_expt[usrs_expt[:, 0] == usr] + # usr_lmdk = usrs_data[usrs_data[:, 0] == usr] # for g in goal: # # Get goal landmarks # cont = ast.literal_eval(usr_lmdk[usr_lmdk[:, 1] == str(g)][0][2])