From 0aca4ec9c158269e063dad18db30f5cea4b73a73 Mon Sep 17 00:00:00 2001 From: Manos Date: Wed, 6 Oct 2021 01:44:50 +0200 Subject: [PATCH] code: WIP --- code/expt/copenhagen-sel.py | 195 +++++++++++++++++++++++++++++++++ code/expt/hue-sel.py | 185 +++++++++++++++++++++++++++++++ code/expt/t-drive-sel.py | 211 ++++++++++++++++++++++++++++++++++++ code/lib/lmdk_lib.py | 8 ++ 4 files changed, 599 insertions(+) create mode 100644 code/expt/copenhagen-sel.py create mode 100644 code/expt/hue-sel.py create mode 100644 code/expt/t-drive-sel.py diff --git a/code/expt/copenhagen-sel.py b/code/expt/copenhagen-sel.py new file mode 100644 index 0000000..26597af --- /dev/null +++ b/code/expt/copenhagen-sel.py @@ -0,0 +1,195 @@ +#!/usr/bin/env python3 + +import sys +sys.path.insert(1, '../lib') +import argparse +import ast +from datetime import datetime +from geopy.distance import distance +import lmdk_bgt +import lmdk_lib +import math +import numpy as np +from matplotlib import pyplot as plt +import time + + +def main(args): + res_file = '/home/manos/Cloud/Data/Copenhagen/Results.zip' + # Contacts for all users + cont_data = lmdk_lib.load_data(args, 'cont') + # Contacts for landmark's percentages for all users + lmdk_data = lmdk_lib.load_data(args, 'usrs_data') + # The name of the dataset + d = 'Copenhagen' + # The user's id + uid = '449' + # The landmarks percentages + lmdks_pct = [0, 20, 40, 60, 80, 100] + # The privacy budget + epsilon = 1.0 + + # Number of methods + n = 3 + # Width of bars + bar_width = 1/(n + 1) + # The x axis + x_i = np.arange(len(lmdks_pct)) + x_margin = bar_width*(n/2 + 1) + + print('\n##############################', d, '\n') + # Get user's contacts sequence + seq = cont_data[cont_data[:, 1] == float(uid)][:1000] + + # Initialize plot + lmdk_lib.plot_init() + # The x axis + plt.xticks(x_i, np.array(lmdks_pct, int)) + plt.xlabel('Landmarks (%)') # Set x axis label. + plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin) + # The y axis + plt.ylabel('Mean absolute error (%)') # Set y axis label. + # plt.yscale('log') + plt.ylim(0, 100) + # Bar offset + x_offset = -(bar_width/2)*(n - 1) + + mae_u = np.zeros(len(lmdks_pct)) + mae_s = np.zeros(len(lmdks_pct)) + mae_a = np.zeros(len(lmdks_pct)) + mae_evt = 0 + mae_usr = 0 + + for i, pct in enumerate(lmdks_pct): + # Find landmarks + lmdks = lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, pct) + + for _ in range(args.iter): + + eps_sel = 0 + if pct != 0 and pct != 100: + # Get landmarks timestamps in sequence + lmdks_seq = lmdk_lib.find_lmdks_seq(seq, lmdks) + # Turn landmarks to histogram + hist, h = lmdk_lib.get_hist(lmdk_lib.get_seq(1, len(seq)), lmdks_seq) + # Find all possible options + opts = lmdk_sel.get_opts_from_top_h(lmdk_lib.get_seq(1, len(seq)), lmdks_seq) + # Landmarks selection budget + eps_sel = epsilon/(len(lmdks_seq) + 1) + # Get private landmarks timestamps + lmdks_seq, _ = exp_mech.exponential_pareto(hist, opts, exp_mech.score, 1.0, eps_sel) + # Get actual landmarks values + lmdks = seq[lmdks_seq] + + # Skip + rls_data_s, bgts_s = lmdk_bgt.skip_cont(seq, lmdks, epsilon - eps_sel) + # lmdk_bgt.validate_bgts(seq, lmdks, epsilon, bgts_s) + mae_s[i] += lmdk_bgt.mae_cont(rls_data_s)/args.iter + + # Uniform + rls_data_u, bgts_u = lmdk_bgt.uniform_cont(seq, lmdks, epsilon - eps_sel) + # lmdk_bgt.validate_bgts(seq, lmdks, epsilon, bgts_u) + mae_u[i] += lmdk_bgt.mae_cont(rls_data_u)/args.iter + + # Adaptive + rls_data_a, _, _ = lmdk_bgt.adaptive_cont(seq, lmdks, epsilon - eps_sel, .5, .5) + mae_a[i] += lmdk_bgt.mae_cont(rls_data_a)/args.iter + + # Calculate once + if i == 0: + # Event + rls_data_evt, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 0), epsilon) + mae_evt += lmdk_bgt.mae_cont(rls_data_evt)/args.iter + # User + rls_data_usr, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 100), epsilon) + mae_usr += lmdk_bgt.mae_cont(rls_data_usr)/args.iter + + mae_u *= 100 + mae_s *= 100 + mae_a *= 100 + mae_evt *= 100 + mae_usr *= 100 + + plt.axhline( + y = mae_evt, + color = '#212121', + linewidth=lmdk_lib.line_width + ) + plt.text(x_i[-1] + x_i[-1]*.14, mae_evt - mae_evt*.05, 'event') + + plt.axhline( + y = mae_usr, + color = '#616161', + linewidth=lmdk_lib.line_width + ) + plt.text(x_i[-1] + x_i[-1]*.14, mae_usr - mae_usr*.05, 'user') + + plt.bar( + x_i + x_offset, + mae_s, + bar_width, + label='Skip', + linewidth=lmdk_lib.line_width + ) + x_offset += bar_width + plt.bar( + x_i + x_offset, + mae_u, + bar_width, + label='Uniform', + linewidth=lmdk_lib.line_width + ) + x_offset += bar_width + plt.bar( + x_i + x_offset, + mae_a, + bar_width, + label='Adaptive', + linewidth=lmdk_lib.line_width + ) + x_offset += bar_width + + path = str('../../rslt/bgt_cmp/' + d) + # Plot legend + lmdk_lib.plot_legend() + # # Show plot + # plt.show() + # Save plot + lmdk_lib.save_plot(path + '-sel.pdf') + print('[OK]', flush=True) + + +def parse_args(): + ''' + Parse arguments. + + Optional: + res - The results archive file. + iter - The total iterations. + ''' + # Create argument parser. + parser = argparse.ArgumentParser() + + # Mandatory arguments. + + # Optional arguments. + parser.add_argument('-r', '--res', help='The results archive file.', type=str, default='/home/manos/Cloud/Data/Copenhagen/Results.zip') + parser.add_argument('-i', '--iter', help='The total iterations.', type=int, default=1) + + # Parse arguments. + args = parser.parse_args() + + return args + + +if __name__ == '__main__': + try: + start_time = time.time() + main(parse_args()) + end_time = time.time() + print('##############################') + print('Time elapsed: %s' % (time.strftime('%H:%M:%S', time.gmtime(end_time - start_time)))) + print('##############################') + except KeyboardInterrupt: + print('Interrupted by user.') + exit() diff --git a/code/expt/hue-sel.py b/code/expt/hue-sel.py new file mode 100644 index 0000000..240fe37 --- /dev/null +++ b/code/expt/hue-sel.py @@ -0,0 +1,185 @@ +#!/usr/bin/env python3 + +import sys +sys.path.insert(1, '../lib') +import argparse +import ast +from datetime import datetime +from geopy.distance import distance +import lmdk_bgt +import lmdk_lib +import math +import numpy as np +from matplotlib import pyplot as plt +import time + + +def main(args): + res_file = '/home/manos/Cloud/Data/HUE/Results.zip' + # User's consumption + seq = lmdk_lib.load_data(args, 'cons') + # The name of the dataset + d = 'HUE' + # The landmarks percentages + lmdks_pct = [0, 20, 40, 60, 80, 100] + # Landmarks' thresholds + lmdks_th = [0, .54, .68, .88, 1.12, 10] + # The privacy budget + epsilon = 10.0 + + # Number of methods + n = 3 + # Width of bars + bar_width = 1/(n + 1) + # The x axis + x_i = np.arange(len(lmdks_pct)) + x_margin = bar_width*(n/2 + 1) + + print('\n##############################', d, '\n') + + # Initialize plot + lmdk_lib.plot_init() + # The x axis + plt.xticks(x_i, np.array(lmdks_pct, int)) + plt.xlabel('Landmarks (%)') # Set x axis label. + plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin) + # The y axis + plt.ylabel('Mean absolute error (kWh)') # Set y axis label. + plt.yscale('log') + # plt.ylim(.01, 10000) + # Bar offset + x_offset = -(bar_width/2)*(n - 1) + + mae_u = np.zeros(len(lmdks_pct)) + mae_s = np.zeros(len(lmdks_pct)) + mae_a = np.zeros(len(lmdks_pct)) + mae_evt = 0 + mae_usr = 0 + + for i, pct in enumerate(lmdks_pct): + # Find landmarks + lmdks = seq[seq[:, 1] < lmdks_th[i]] + + for _ in range(args.iter): + + eps_sel = 0 + if pct != 0 and pct != 100: + # Get landmarks timestamps in sequence + lmdks_seq = lmdk_lib.find_lmdks_seq(seq, lmdks) + # Turn landmarks to histogram + hist, h = lmdk_lib.get_hist(lmdk_lib.get_seq(1, len(seq)), lmdks_seq) + # Find all possible options + opts = lmdk_sel.get_opts_from_top_h(lmdk_lib.get_seq(1, len(seq)), lmdks_seq) + # Landmarks selection budget + eps_sel = epsilon/(len(lmdks_seq) + 1) + # Get private landmarks timestamps + lmdks_seq, _ = exp_mech.exponential_pareto(hist, opts, exp_mech.score, 1.0, eps_sel) + # Get actual landmarks values + lmdks = seq[lmdks_seq] + + # Skip + rls_data_s, bgts_s = lmdk_bgt.skip_cons(seq, lmdks, epsilon - eps_sel) + # lmdk_bgt.validate_bgts(seq, lmdks, epsilon, bgts_s) + mae_s[i] += lmdk_bgt.mae_cons(seq, rls_data_s)/args.iter + + # Uniform + rls_data_u, bgts_u = lmdk_bgt.uniform_cons(seq, lmdks, epsilon - eps_sel) + mae_u[i] += lmdk_bgt.mae_cons(seq, rls_data_u)/args.iter + + # Adaptive + rls_data_a, _, _ = lmdk_bgt.adaptive_cons(seq, lmdks, epsilon - eps_sel, .5, .5) + mae_a[i] += lmdk_bgt.mae_cons(seq, rls_data_a)/args.iter + + # Calculate once + # Event + if i == 0: + rls_data_evt, _ = lmdk_bgt.uniform_cons(seq, seq[seq[:, 1] < lmdks_th[0]], epsilon) + mae_evt += lmdk_bgt.mae_cons(seq, rls_data_evt)/args.iter + # User + if i == 0: + rls_data_usr, _ = lmdk_bgt.uniform_cons(seq, seq[seq[:, 1] < lmdks_th[len(lmdks_th)-1]], epsilon) + mae_usr += lmdk_bgt.mae_cons(seq, rls_data_usr)/args.iter + + plt.axhline( + y = mae_evt, + color = '#212121', + linewidth=lmdk_lib.line_width + ) + plt.text(x_i[-1] + x_i[-1]*.14, mae_evt - mae_evt*.14, 'event') + + plt.axhline( + y = mae_usr, + color = '#616161', + linewidth=lmdk_lib.line_width + ) + plt.text(x_i[-1] + x_i[-1]*.14, mae_usr - mae_usr*.14, 'user') + + plt.bar( + x_i + x_offset, + mae_s, + bar_width, + label='Skip', + linewidth=lmdk_lib.line_width + ) + x_offset += bar_width + plt.bar( + x_i + x_offset, + mae_u, + bar_width, + label='Uniform', + linewidth=lmdk_lib.line_width + ) + x_offset += bar_width + plt.bar( + x_i + x_offset, + mae_a, + bar_width, + label='Adaptive', + linewidth=lmdk_lib.line_width + ) + x_offset += bar_width + + path = str('../../rslt/bgt_cmp/' + d) + # Plot legend + lmdk_lib.plot_legend() + # Show plot + # plt.show() + # Save plot + lmdk_lib.save_plot(path + '-sel.pdf') + print('[OK]', flush=True) + + +def parse_args(): + ''' + Parse arguments. + + Optional: + res - The results archive file. + iter - The total iterations. + ''' + # Create argument parser. + parser = argparse.ArgumentParser() + + # Mandatory arguments. + + # Optional arguments. + parser.add_argument('-r', '--res', help='The results archive file.', type=str, default='/home/manos/Cloud/Data/HUE/Results.zip') + parser.add_argument('-i', '--iter', help='The total iterations.', type=int, default=1) + + # Parse arguments. + args = parser.parse_args() + + return args + + +if __name__ == '__main__': + try: + start_time = time.time() + main(parse_args()) + end_time = time.time() + print('##############################') + print('Time elapsed: %s' % (time.strftime('%H:%M:%S', time.gmtime(end_time - start_time)))) + print('##############################') + except KeyboardInterrupt: + print('Interrupted by user.') + exit() diff --git a/code/expt/t-drive-sel.py b/code/expt/t-drive-sel.py new file mode 100644 index 0000000..77a4a1d --- /dev/null +++ b/code/expt/t-drive-sel.py @@ -0,0 +1,211 @@ +#!/usr/bin/env python3 + +import sys +sys.path.insert(1, '../lib') +import argparse +from datetime import datetime +from geopy.distance import distance +import lmdk_bgt +import lmdk_lib +import numpy as np +from matplotlib import pyplot as plt +import time + + +def main(args): + # The data files + data_files = { + 'T-drive': '/home/manos/Cloud/Data/T-drive/Results.zip', + } + # Data related info + data_info = { + 'T-drive': { + 'uid': 2, + 'lmdks': { + 0: {'dist': 0, 'per': 1000}, # 0.0% + 20: {'dist': 2095, 'per': 30}, # 19.6% + 40: {'dist': 2790, 'per': 30}, # 40.2% + 60: {'dist': 3590, 'per': 30}, # 59.9% + 80: {'dist': 4825, 'per': 30}, # 79.4% + 100: {'dist': 10350, 'per': 30} # 100.0% + } + } + } + # The data sets + data_sets = {} + # Load data sets + for df in data_files: + args.res = data_files[df] + data_sets[df] = lmdk_lib.load_data(args, 'usrs_data') + # Geo-I configuration + # epsilon = level/radius + # Radius is in meters + bgt_conf = [ + {'epsilon': 1}, + ] + + # Number of methods + n = 3 + # Width of bars + bar_width = 1/(n + 1) + # The x axis + x_i = np.arange(len(list(data_info.values())[0]['lmdks'])) + x_margin = bar_width*(n/2 + 1) + + for d in data_sets: + print('\n##############################', d, '\n') + args.res = data_files[d] + data = data_sets[d] + # Truncate trajectory according to arguments + seq = data[data[:,0]==data_info[d]['uid'], :][:args.time] + + # Initialize plot + lmdk_lib.plot_init() + # The x axis + plt.xticks(x_i, np.array([key for key in data_info[d]['lmdks']]).astype(int)) + plt.xlabel('Landmarks (%)') # Set x axis label. + plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin) + # The y axis + plt.ylabel('Mean absolute error (m)') # Set y axis label. + plt.yscale('log') + # plt.ylim(1, 100000000) + # Bar offset + x_offset = -(bar_width/2)*(n - 1) + + mae_u = np.zeros(len(data_info[d]['lmdks'])) + mae_s = np.zeros(len(data_info[d]['lmdks'])) + mae_a = np.zeros(len(data_info[d]['lmdks'])) + mae_evt = 0 + mae_usr = 0 + for i, lmdk in enumerate(data_info[d]['lmdks']): + # Find landmarks + args.dist = data_info[d]['lmdks'][lmdk]['dist'] + args.per = data_info[d]['lmdks'][lmdk]['per'] + lmdks = lmdk_lib.find_lmdks(seq, args)[:args.time] + for bgt in bgt_conf: + for _ in range(args.iter): + + eps_sel = 0 + if lmdk != 0 and lmdk != 100: + # Get landmarks timestamps in sequence + lmdks_seq = lmdk_lib.find_lmdks_seq(seq, lmdks) + # Turn landmarks to histogram + hist, h = lmdk_lib.get_hist(lmdk_lib.get_seq(1, len(seq)), lmdks_seq) + # Find all possible options + opts = lmdk_sel.get_opts_from_top_h(lmdk_lib.get_seq(1, len(seq)), lmdks_seq) + # Landmarks selection budget + eps_sel = bgt['epsilon']/(len(lmdks_seq) + 1) + # Get private landmarks timestamps + lmdks_seq, _ = exp_mech.exponential_pareto(hist, opts, exp_mech.score, 1.0, eps_sel) + # Get actual landmarks values + lmdks = seq[lmdks_seq] + + # Skip + rls_data_s, _ = lmdk_bgt.skip(seq, lmdks, bgt['epsilon'] - eps_sel) + mae_s[i] += lmdk_bgt.mae(seq, rls_data_s)/args.iter + + # Uniform + rls_data_u, _ = lmdk_bgt.uniform_r(seq, lmdks, bgt['epsilon'] - eps_sel) + mae_u[i] += lmdk_bgt.mae(seq, rls_data_u)/args.iter + + # Adaptive + rls_data_a, _, _ = lmdk_bgt.adaptive(seq, lmdks, bgt['epsilon'] - eps_sel, .5, .5) + mae_a[i] += lmdk_bgt.mae(seq, rls_data_a)/args.iter + + # Event + if lmdk == 0: + rls_data_evt, _ = lmdk_bgt.uniform_r(seq, lmdks, bgt['epsilon']) + mae_evt += lmdk_bgt.mae(seq, rls_data_evt)/args.iter + # User + if lmdk == 100: + rls_data_usr, _ = lmdk_bgt.uniform_r(seq, lmdks, bgt['epsilon']) + mae_usr += lmdk_bgt.mae(seq, rls_data_usr)/args.iter + + # Plot lines + plt.axhline( + y = mae_evt, + color = '#212121', + linewidth=lmdk_lib.line_width + ) + plt.text(x_i[-1] + x_i[-1]*.14, mae_evt - mae_evt*.14, 'event') + plt.axhline( + y = mae_usr, + color = '#616161', + linewidth=lmdk_lib.line_width + ) + plt.text(x_i[-1] + x_i[-1]*.14, mae_usr - mae_usr*.14, 'user') + + # Plot bars + plt.bar( + x_i + x_offset, + mae_s, + bar_width, + label='Skip', + linewidth=lmdk_lib.line_width + ) + x_offset += bar_width + plt.bar( + x_i + x_offset, + mae_u, + bar_width, + label='Uniform', + linewidth=lmdk_lib.line_width + ) + x_offset += bar_width + plt.bar( + x_i + x_offset, + mae_a, + bar_width, + label='Adaptive', + linewidth=lmdk_lib.line_width + ) + + path = str('../../rslt/bgt_cmp/' + d) + # Plot legend + lmdk_lib.plot_legend() + # Show plot + # plt.show() + # Save plot + lmdk_lib.save_plot(path + '-sel.pdf') + print('[OK]', flush=True) + + +def parse_args(): + ''' + Parse arguments. + + Optional: + dist - The coordinates distance threshold in meters. + per - The timestaps period threshold in mimutes. + time - The total timestamps. + iter - The total iterations. + ''' + # Create argument parser. + parser = argparse.ArgumentParser() + + # Mandatory arguments. + + # Optional arguments. + parser.add_argument('-l', '--dist', help='The coordinates distance threshold in meters.', type=int, default=200) + parser.add_argument('-p', '--per', help='The timestaps period threshold in mimutes.', type=int, default=30) + parser.add_argument('-r', '--res', help='The results archive file.', type=str, default='/home/manos/Cloud/Data/T-drive/Results.zip') + parser.add_argument('-t', '--time', help='The total timestamps.', type=int, default=1000) + parser.add_argument('-i', '--iter', help='The total iterations.', type=int, default=1) + + # Parse arguments. + args = parser.parse_args() + + return args + + +if __name__ == '__main__': + try: + start_time = time.time() + main(parse_args()) + end_time = time.time() + print('##############################') + print('Time elapsed: %s' % (time.strftime('%H:%M:%S', time.gmtime(end_time - start_time)))) + print('##############################') + except KeyboardInterrupt: + print('Interrupted by user.') + exit() diff --git a/code/lib/lmdk_lib.py b/code/lib/lmdk_lib.py index 2d1255d..d040af2 100644 --- a/code/lib/lmdk_lib.py +++ b/code/lib/lmdk_lib.py @@ -907,6 +907,14 @@ def find_lmdks(usrs_data, args): return usrs_lmdks +def find_lmdks_seq(seq, lmdks): + lmdks_seq = [] + for i, p in enumerate(seq): + if any(np.equal(lmdks, p).all(1)): + lmdks_seq.append(i + 1) + return np.numpy(lmdks_seq, dtype = int) + + def find_lmdks_tim(lmdk_data, seq, uid, pct): ''' Find user's landmarks timestamps.