copenhagen: New experiment

This commit is contained in:
Manos Katsomallos 2021-10-01 21:30:32 +02:00
parent 2887ef8376
commit e1c78fbda9
3 changed files with 30 additions and 211 deletions

View File

@ -1,169 +0,0 @@
#!/usr/bin/env python3
import sys
sys.path.insert(1, '../lib')
import argparse
import ast
from datetime import datetime
from geopy.distance import distance
import lmdk_bgt
import lmdk_lib
import math
import numpy as np
from matplotlib import pyplot as plt
import time
def main(args):
res_file = '/home/manos/Cloud/Data/HUE/Results.zip'
# User's consumption
seq = lmdk_lib.load_data(args, 'cons')
# The name of the dataset
d = 'HUE'
# The landmarks percentages
lmdks_pct = [0, 20, 40, 60, 80, 100]
# Landmarks' thresholds
lmdks_th = [0, .54, .68, .88, 1.12, 10]
# The privacy budget
epsilon = 10.0
# Number of methods
n = 3
# Width of bars
bar_width = 1/(n + 1)
# The x axis
x_i = np.arange(len(lmdks_pct))
x_margin = bar_width*(n/2 + 1)
print('\n##############################', d, '\n')
# Initialize plot
lmdk_lib.plot_init()
# The x axis
plt.xticks(x_i, np.array(lmdks_pct, int))
plt.xlabel('Landmarks (%)') # Set x axis label.
plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin)
# The y axis
plt.ylabel('Mean absolute error (kWh)') # Set y axis label.
plt.yscale('log')
# plt.ylim(.01, 10000)
# Bar offset
x_offset = -(bar_width/2)*(n - 1)
mae_u = np.zeros(len(lmdks_pct))
mae_s = np.zeros(len(lmdks_pct))
mae_a = np.zeros(len(lmdks_pct))
mae_evt = 0
mae_usr = 0
for i, pct in enumerate(lmdks_pct):
# Find landmarks
lmdks = seq[seq[:, 1] < lmdks_th[i]]
for _ in range(args.iter):
# Skip
rls_data_s, bgts_s = lmdk_bgt.skip_cons(seq, lmdks, epsilon)
# lmdk_bgt.validate_bgts(seq, lmdks, epsilon, bgts_s)
mae_s[i] += lmdk_bgt.mae_cons(seq, rls_data_s)/args.iter
# Uniform
rls_data_u, bgts_u = lmdk_bgt.uniform_cons(seq, lmdks, epsilon)
mae_u[i] += lmdk_bgt.mae_cons(seq, rls_data_u)/args.iter
# Adaptive
rls_data_a, _, _ = lmdk_bgt.adaptive_cons(seq, lmdks, epsilon, .5, .5)
mae_a[i] += lmdk_bgt.mae_cons(seq, rls_data_a)/args.iter
# Calculate once
# Event
if i == 0:
rls_data_evt, _ = lmdk_bgt.uniform_cons(seq, seq[seq[:, 1] < lmdks_th[0]], epsilon)
mae_evt += lmdk_bgt.mae_cons(seq, rls_data_evt)/args.iter
# User
if i == 0:
rls_data_usr, _ = lmdk_bgt.uniform_cons(seq, seq[seq[:, 1] < lmdks_th[len(lmdks_th)-1]], epsilon)
mae_usr += lmdk_bgt.mae_cons(seq, rls_data_usr)/args.iter
plt.axhline(
y = mae_evt,
color = '#212121',
linewidth=lmdk_lib.line_width
)
plt.text(x_i[-1] + x_i[-1]*.14, mae_evt - mae_evt*.14, 'event')
plt.axhline(
y = mae_usr,
color = '#616161',
linewidth=lmdk_lib.line_width
)
plt.text(x_i[-1] + x_i[-1]*.14, mae_usr - mae_usr*.14, 'user')
plt.bar(
x_i + x_offset,
mae_s,
bar_width,
label='Skip',
linewidth=lmdk_lib.line_width
)
x_offset += bar_width
plt.bar(
x_i + x_offset,
mae_u,
bar_width,
label='Uniform',
linewidth=lmdk_lib.line_width
)
x_offset += bar_width
plt.bar(
x_i + x_offset,
mae_a,
bar_width,
label='Adaptive',
linewidth=lmdk_lib.line_width
)
x_offset += bar_width
path = str('../../rslt/bgt_cmp/' + d)
# Plot legend
lmdk_lib.plot_legend()
# Show plot
# plt.show()
# Save plot
lmdk_lib.save_plot(path + '.pdf')
print('[OK]', flush=True)
def parse_args():
'''
Parse arguments.
Optional:
res - The results archive file.
iter - The total iterations.
'''
# Create argument parser.
parser = argparse.ArgumentParser()
# Mandatory arguments.
# Optional arguments.
parser.add_argument('-r', '--res', help='The results archive file.', type=str, default='/home/manos/Cloud/Data/HUE/Results.zip')
parser.add_argument('-i', '--iter', help='The total iterations.', type=int, default=1)
# Parse arguments.
args = parser.parse_args()
return args
if __name__ == '__main__':
try:
start_time = time.time()
main(parse_args())
end_time = time.time()
print('##############################')
print('Time : %.4fs' % (end_time - start_time))
print('##############################')
except KeyboardInterrupt:
print('Interrupted by user.')
exit()

View File

@ -19,11 +19,11 @@ def main(args):
# Contacts for all users # Contacts for all users
cont_data = lmdk_lib.load_data(args, 'cont') cont_data = lmdk_lib.load_data(args, 'cont')
# Contacts for landmark's percentages for all users # Contacts for landmark's percentages for all users
lmdk_data = lmdk_lib.load_data(args, 'usrs_expt') lmdk_data = lmdk_lib.load_data(args, 'usrs_data')
# The name of the dataset # The name of the dataset
d = 'Copenhagen' d = 'Copenhagen'
# The user's id # The user's id
uid = '623' uid = '449'
# The landmarks percentages # The landmarks percentages
lmdks_pct = [0, 20, 40, 60, 80, 100] lmdks_pct = [0, 20, 40, 60, 80, 100]
# The privacy budget # The privacy budget
@ -39,7 +39,7 @@ def main(args):
print('\n##############################', d, '\n') print('\n##############################', d, '\n')
# Get user's contacts sequence # Get user's contacts sequence
seq = cont_data[cont_data[:, 1] == float(uid)] seq = cont_data[cont_data[:, 1] == float(uid)][:1000]
# Initialize plot # Initialize plot
lmdk_lib.plot_init() lmdk_lib.plot_init()
@ -50,15 +50,15 @@ def main(args):
# The y axis # The y axis
plt.ylabel('Mean absolute error') # Set y axis label. plt.ylabel('Mean absolute error') # Set y axis label.
# plt.yscale('log') # plt.yscale('log')
plt.ylim(0, 1.4) # plt.ylim(0, 1.4)
# Bar offset # Bar offset
x_offset = -(bar_width/2)*(n - 1) x_offset = -(bar_width/2)*(n - 1)
mae_u = np.zeros(len(lmdks_pct)) mae_u = np.zeros(len(lmdks_pct))
mae_s = np.zeros(len(lmdks_pct)) mae_s = np.zeros(len(lmdks_pct))
mae_a = np.zeros(len(lmdks_pct)) mae_a = np.zeros(len(lmdks_pct))
mae_evt = np.zeros(len(lmdks_pct)) mae_evt = 0
mae_usr = np.zeros(len(lmdks_pct)) mae_usr = 0
for i, pct in enumerate(lmdks_pct): for i, pct in enumerate(lmdks_pct):
# Find landmarks # Find landmarks
@ -79,30 +79,28 @@ def main(args):
rls_data_a, _, _ = lmdk_bgt.adaptive_cont(seq, lmdks, epsilon, .5, .5) rls_data_a, _, _ = lmdk_bgt.adaptive_cont(seq, lmdks, epsilon, .5, .5)
mae_a[i] += lmdk_bgt.mae_cont(rls_data_a)/args.iter mae_a[i] += lmdk_bgt.mae_cont(rls_data_a)/args.iter
# Calculate once
if i == 0:
# Event # Event
# Calculate once
if i == 0:
rls_data_evt, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 0), epsilon) rls_data_evt, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 0), epsilon)
mae_evt[i] += lmdk_bgt.mae_cont(rls_data_evt)/args.iter mae_evt += lmdk_bgt.mae_cont(rls_data_evt)/args.iter
# User # User
# Calculate once
if i == 0:
rls_data_usr, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 100), epsilon) rls_data_usr, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 100), epsilon)
mae_usr[i] += lmdk_bgt.mae_cont(rls_data_usr)/args.iter mae_usr += lmdk_bgt.mae_cont(rls_data_usr)/args.iter
plt.plot( plt.axhline(
x_i, y = mae_evt,
mae_evt, color = '#212121',
linewidth=lmdk_lib.line_width linewidth=lmdk_lib.line_width
) )
plt.text(x_i[-1], mae_evt[-1], ' event') plt.text(x_i[-1] + x_i[-1]*.14, mae_evt - mae_evt*.14, 'event')
plt.plot( plt.axhline(
x_i, y = mae_usr,
mae_usr, color = '#616161',
linewidth=lmdk_lib.line_width linewidth=lmdk_lib.line_width
) )
plt.text(x_i[-1], mae_usr[-1], ' user') plt.text(x_i[-1] + x_i[-1]*.14, mae_usr - mae_usr*.14, 'user')
plt.bar( plt.bar(
x_i + x_offset, x_i + x_offset,

View File

@ -17,22 +17,11 @@ import time
import zipfile import zipfile
''' '''
Suitable in first 1000 contacts.
usr n usr n
311 8193 449 12167
317 2044 550 4221
323 1642 689 3228
366 3406
368 5099
478 2616
486 3112
508 4981
585 9443
595 4459
616 4355
623 1586
637 4479
664 4315
688 19728
705 5 705 5
''' '''
@ -90,15 +79,15 @@ def main(args):
# Check each user # Check each user
goal = [.2, .4, .6, .8] goal = [.2, .4, .6, .8]
# Get users' data from previous parsing # Get users' data from previous parsing
usrs_expt = lmdk_lib.load_data(args, 'usrs_expt') usrs_data = lmdk_lib.load_data(args, 'usrs_data')
if usrs_expt.size == 0: if usrs_data.size == 0:
# Users suitable for experiments # Users suitable for experiments
usrs_expt = [] usrs_expt = []
for usr_i, usr in enumerate(usrs): for usr_i, usr in enumerate(usrs):
print('Checking %d (%d%%: %d/%d)... ' %(usr, (usr_i + 1)*100/len(usrs), usr_i + 1, len(usrs)), end='', flush=True) print('Checking %d (%d%%: %d/%d)... ' %(usr, (usr_i + 1)*100/len(usrs), usr_i + 1, len(usrs)), end='', flush=True)
usrs_expt_cur = [] usrs_expt_cur = []
# User's contacts # User's first 1000 contacts
usr_cont = cont_data[cont_data[:, uid_a] == usr] usr_cont = cont_data[cont_data[:, uid_a] == usr][:1000]
# For each goal # For each goal
for_expt = True for_expt = True
for g in goal: for g in goal:
@ -130,9 +119,10 @@ def main(args):
for_expt = False for_expt = False
break break
# Save to results # Save to results
lmdk_lib.save_data(args, np.array(usrs_expt, str), 'usrs_expt') usrs_data = np.array(usrs_expt, str)
lmdk_lib.save_data(args, usrs_data, 'usrs_data')
# Get all users # Get all users
usrs = np.unique(usrs_expt[:, 0]) usrs = np.unique(usrs_data[:, 0])
for usr in usrs: for usr in usrs:
usr_cont = cont_data[cont_data[:, uid_a] == float(usr)] usr_cont = cont_data[cont_data[:, uid_a] == float(usr)]
print(int(usr), len(usr_cont)) print(int(usr), len(usr_cont))
@ -145,7 +135,7 @@ def main(args):
# # All user contacts # # All user contacts
# usr_cont = cont_data[cont_data[:, uid_a] == float(usr)] # usr_cont = cont_data[cont_data[:, uid_a] == float(usr)]
# # All user landmarks for different goals # # All user landmarks for different goals
# usr_lmdk = usrs_expt[usrs_expt[:, 0] == usr] # usr_lmdk = usrs_data[usrs_data[:, 0] == usr]
# for g in goal: # for g in goal:
# # Get goal landmarks # # Get goal landmarks
# cont = ast.literal_eval(usr_lmdk[usr_lmdk[:, 1] == str(g)][0][2]) # cont = ast.literal_eval(usr_lmdk[usr_lmdk[:, 1] == str(g)][0][2])