copenhagen: New experiment

This commit is contained in:
Manos Katsomallos 2021-10-01 21:30:32 +02:00
parent 2887ef8376
commit e1c78fbda9
3 changed files with 30 additions and 211 deletions

View File

@ -1,169 +0,0 @@
#!/usr/bin/env python3
import sys
sys.path.insert(1, '../lib')
import argparse
import ast
from datetime import datetime
from geopy.distance import distance
import lmdk_bgt
import lmdk_lib
import math
import numpy as np
from matplotlib import pyplot as plt
import time
def main(args):
res_file = '/home/manos/Cloud/Data/HUE/Results.zip'
# User's consumption
seq = lmdk_lib.load_data(args, 'cons')
# The name of the dataset
d = 'HUE'
# The landmarks percentages
lmdks_pct = [0, 20, 40, 60, 80, 100]
# Landmarks' thresholds
lmdks_th = [0, .54, .68, .88, 1.12, 10]
# The privacy budget
epsilon = 10.0
# Number of methods
n = 3
# Width of bars
bar_width = 1/(n + 1)
# The x axis
x_i = np.arange(len(lmdks_pct))
x_margin = bar_width*(n/2 + 1)
print('\n##############################', d, '\n')
# Initialize plot
lmdk_lib.plot_init()
# The x axis
plt.xticks(x_i, np.array(lmdks_pct, int))
plt.xlabel('Landmarks (%)') # Set x axis label.
plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin)
# The y axis
plt.ylabel('Mean absolute error (kWh)') # Set y axis label.
plt.yscale('log')
# plt.ylim(.01, 10000)
# Bar offset
x_offset = -(bar_width/2)*(n - 1)
mae_u = np.zeros(len(lmdks_pct))
mae_s = np.zeros(len(lmdks_pct))
mae_a = np.zeros(len(lmdks_pct))
mae_evt = 0
mae_usr = 0
for i, pct in enumerate(lmdks_pct):
# Find landmarks
lmdks = seq[seq[:, 1] < lmdks_th[i]]
for _ in range(args.iter):
# Skip
rls_data_s, bgts_s = lmdk_bgt.skip_cons(seq, lmdks, epsilon)
# lmdk_bgt.validate_bgts(seq, lmdks, epsilon, bgts_s)
mae_s[i] += lmdk_bgt.mae_cons(seq, rls_data_s)/args.iter
# Uniform
rls_data_u, bgts_u = lmdk_bgt.uniform_cons(seq, lmdks, epsilon)
mae_u[i] += lmdk_bgt.mae_cons(seq, rls_data_u)/args.iter
# Adaptive
rls_data_a, _, _ = lmdk_bgt.adaptive_cons(seq, lmdks, epsilon, .5, .5)
mae_a[i] += lmdk_bgt.mae_cons(seq, rls_data_a)/args.iter
# Calculate once
# Event
if i == 0:
rls_data_evt, _ = lmdk_bgt.uniform_cons(seq, seq[seq[:, 1] < lmdks_th[0]], epsilon)
mae_evt += lmdk_bgt.mae_cons(seq, rls_data_evt)/args.iter
# User
if i == 0:
rls_data_usr, _ = lmdk_bgt.uniform_cons(seq, seq[seq[:, 1] < lmdks_th[len(lmdks_th)-1]], epsilon)
mae_usr += lmdk_bgt.mae_cons(seq, rls_data_usr)/args.iter
plt.axhline(
y = mae_evt,
color = '#212121',
linewidth=lmdk_lib.line_width
)
plt.text(x_i[-1] + x_i[-1]*.14, mae_evt - mae_evt*.14, 'event')
plt.axhline(
y = mae_usr,
color = '#616161',
linewidth=lmdk_lib.line_width
)
plt.text(x_i[-1] + x_i[-1]*.14, mae_usr - mae_usr*.14, 'user')
plt.bar(
x_i + x_offset,
mae_s,
bar_width,
label='Skip',
linewidth=lmdk_lib.line_width
)
x_offset += bar_width
plt.bar(
x_i + x_offset,
mae_u,
bar_width,
label='Uniform',
linewidth=lmdk_lib.line_width
)
x_offset += bar_width
plt.bar(
x_i + x_offset,
mae_a,
bar_width,
label='Adaptive',
linewidth=lmdk_lib.line_width
)
x_offset += bar_width
path = str('../../rslt/bgt_cmp/' + d)
# Plot legend
lmdk_lib.plot_legend()
# Show plot
# plt.show()
# Save plot
lmdk_lib.save_plot(path + '.pdf')
print('[OK]', flush=True)
def parse_args():
'''
Parse arguments.
Optional:
res - The results archive file.
iter - The total iterations.
'''
# Create argument parser.
parser = argparse.ArgumentParser()
# Mandatory arguments.
# Optional arguments.
parser.add_argument('-r', '--res', help='The results archive file.', type=str, default='/home/manos/Cloud/Data/HUE/Results.zip')
parser.add_argument('-i', '--iter', help='The total iterations.', type=int, default=1)
# Parse arguments.
args = parser.parse_args()
return args
if __name__ == '__main__':
try:
start_time = time.time()
main(parse_args())
end_time = time.time()
print('##############################')
print('Time : %.4fs' % (end_time - start_time))
print('##############################')
except KeyboardInterrupt:
print('Interrupted by user.')
exit()

View File

@ -19,11 +19,11 @@ def main(args):
# Contacts for all users
cont_data = lmdk_lib.load_data(args, 'cont')
# Contacts for landmark's percentages for all users
lmdk_data = lmdk_lib.load_data(args, 'usrs_expt')
lmdk_data = lmdk_lib.load_data(args, 'usrs_data')
# The name of the dataset
d = 'Copenhagen'
# The user's id
uid = '623'
uid = '449'
# The landmarks percentages
lmdks_pct = [0, 20, 40, 60, 80, 100]
# The privacy budget
@ -39,7 +39,7 @@ def main(args):
print('\n##############################', d, '\n')
# Get user's contacts sequence
seq = cont_data[cont_data[:, 1] == float(uid)]
seq = cont_data[cont_data[:, 1] == float(uid)][:1000]
# Initialize plot
lmdk_lib.plot_init()
@ -50,15 +50,15 @@ def main(args):
# The y axis
plt.ylabel('Mean absolute error') # Set y axis label.
# plt.yscale('log')
plt.ylim(0, 1.4)
# plt.ylim(0, 1.4)
# Bar offset
x_offset = -(bar_width/2)*(n - 1)
mae_u = np.zeros(len(lmdks_pct))
mae_s = np.zeros(len(lmdks_pct))
mae_a = np.zeros(len(lmdks_pct))
mae_evt = np.zeros(len(lmdks_pct))
mae_usr = np.zeros(len(lmdks_pct))
mae_evt = 0
mae_usr = 0
for i, pct in enumerate(lmdks_pct):
# Find landmarks
@ -79,30 +79,28 @@ def main(args):
rls_data_a, _, _ = lmdk_bgt.adaptive_cont(seq, lmdks, epsilon, .5, .5)
mae_a[i] += lmdk_bgt.mae_cont(rls_data_a)/args.iter
# Event
# Calculate once
if i == 0:
# Event
rls_data_evt, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 0), epsilon)
mae_evt[i] += lmdk_bgt.mae_cont(rls_data_evt)/args.iter
# User
# Calculate once
if i == 0:
mae_evt += lmdk_bgt.mae_cont(rls_data_evt)/args.iter
# User
rls_data_usr, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 100), epsilon)
mae_usr[i] += lmdk_bgt.mae_cont(rls_data_usr)/args.iter
mae_usr += lmdk_bgt.mae_cont(rls_data_usr)/args.iter
plt.plot(
x_i,
mae_evt,
plt.axhline(
y = mae_evt,
color = '#212121',
linewidth=lmdk_lib.line_width
)
plt.text(x_i[-1], mae_evt[-1], ' event')
plt.text(x_i[-1] + x_i[-1]*.14, mae_evt - mae_evt*.14, 'event')
plt.plot(
x_i,
mae_usr,
plt.axhline(
y = mae_usr,
color = '#616161',
linewidth=lmdk_lib.line_width
)
plt.text(x_i[-1], mae_usr[-1], ' user')
plt.text(x_i[-1] + x_i[-1]*.14, mae_usr - mae_usr*.14, 'user')
plt.bar(
x_i + x_offset,

View File

@ -17,22 +17,11 @@ import time
import zipfile
'''
Suitable in first 1000 contacts.
usr n
311 8193
317 2044
323 1642
366 3406
368 5099
478 2616
486 3112
508 4981
585 9443
595 4459
616 4355
623 1586
637 4479
664 4315
688 19728
449 12167
550 4221
689 3228
705 5
'''
@ -90,15 +79,15 @@ def main(args):
# Check each user
goal = [.2, .4, .6, .8]
# Get users' data from previous parsing
usrs_expt = lmdk_lib.load_data(args, 'usrs_expt')
if usrs_expt.size == 0:
usrs_data = lmdk_lib.load_data(args, 'usrs_data')
if usrs_data.size == 0:
# Users suitable for experiments
usrs_expt = []
for usr_i, usr in enumerate(usrs):
print('Checking %d (%d%%: %d/%d)... ' %(usr, (usr_i + 1)*100/len(usrs), usr_i + 1, len(usrs)), end='', flush=True)
usrs_expt_cur = []
# User's contacts
usr_cont = cont_data[cont_data[:, uid_a] == usr]
# User's first 1000 contacts
usr_cont = cont_data[cont_data[:, uid_a] == usr][:1000]
# For each goal
for_expt = True
for g in goal:
@ -130,9 +119,10 @@ def main(args):
for_expt = False
break
# Save to results
lmdk_lib.save_data(args, np.array(usrs_expt, str), 'usrs_expt')
usrs_data = np.array(usrs_expt, str)
lmdk_lib.save_data(args, usrs_data, 'usrs_data')
# Get all users
usrs = np.unique(usrs_expt[:, 0])
usrs = np.unique(usrs_data[:, 0])
for usr in usrs:
usr_cont = cont_data[cont_data[:, uid_a] == float(usr)]
print(int(usr), len(usr_cont))
@ -145,7 +135,7 @@ def main(args):
# # All user contacts
# usr_cont = cont_data[cont_data[:, uid_a] == float(usr)]
# # All user landmarks for different goals
# usr_lmdk = usrs_expt[usrs_expt[:, 0] == usr]
# usr_lmdk = usrs_data[usrs_data[:, 0] == usr]
# for g in goal:
# # Get goal landmarks
# cont = ast.literal_eval(usr_lmdk[usr_lmdk[:, 1] == str(g)][0][2])