the-last-thing/code/expt/copenhagen-sel.py

196 lines
5.2 KiB
Python
Raw Normal View History

2021-10-06 01:44:50 +02:00
#!/usr/bin/env python3
import sys
sys.path.insert(1, '../lib')
import argparse
import ast
from datetime import datetime
from geopy.distance import distance
import lmdk_bgt
import lmdk_lib
import math
import numpy as np
from matplotlib import pyplot as plt
import time
def main(args):
res_file = '/home/manos/Cloud/Data/Copenhagen/Results.zip'
# Contacts for all users
cont_data = lmdk_lib.load_data(args, 'cont')
# Contacts for landmark's percentages for all users
lmdk_data = lmdk_lib.load_data(args, 'usrs_data')
# The name of the dataset
d = 'Copenhagen'
# The user's id
uid = '449'
# The landmarks percentages
lmdks_pct = [0, 20, 40, 60, 80, 100]
# The privacy budget
epsilon = 1.0
# Number of methods
n = 3
# Width of bars
bar_width = 1/(n + 1)
# The x axis
x_i = np.arange(len(lmdks_pct))
x_margin = bar_width*(n/2 + 1)
print('\n##############################', d, '\n')
# Get user's contacts sequence
seq = cont_data[cont_data[:, 1] == float(uid)][:1000]
# Initialize plot
lmdk_lib.plot_init()
# The x axis
plt.xticks(x_i, np.array(lmdks_pct, int))
plt.xlabel('Landmarks (%)') # Set x axis label.
plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin)
# The y axis
plt.ylabel('Mean absolute error (%)') # Set y axis label.
# plt.yscale('log')
plt.ylim(0, 100)
# Bar offset
x_offset = -(bar_width/2)*(n - 1)
mae_u = np.zeros(len(lmdks_pct))
mae_s = np.zeros(len(lmdks_pct))
mae_a = np.zeros(len(lmdks_pct))
mae_evt = 0
mae_usr = 0
for i, pct in enumerate(lmdks_pct):
# Find landmarks
lmdks = lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, pct)
for _ in range(args.iter):
eps_sel = 0
if pct != 0 and pct != 100:
# Get landmarks timestamps in sequence
lmdks_seq = lmdk_lib.find_lmdks_seq(seq, lmdks)
# Turn landmarks to histogram
hist, h = lmdk_lib.get_hist(lmdk_lib.get_seq(1, len(seq)), lmdks_seq)
# Find all possible options
opts = lmdk_sel.get_opts_from_top_h(lmdk_lib.get_seq(1, len(seq)), lmdks_seq)
# Landmarks selection budget
eps_sel = epsilon/(len(lmdks_seq) + 1)
# Get private landmarks timestamps
lmdks_seq, _ = exp_mech.exponential_pareto(hist, opts, exp_mech.score, 1.0, eps_sel)
# Get actual landmarks values
lmdks = seq[lmdks_seq]
# Skip
rls_data_s, bgts_s = lmdk_bgt.skip_cont(seq, lmdks, epsilon - eps_sel)
# lmdk_bgt.validate_bgts(seq, lmdks, epsilon, bgts_s)
mae_s[i] += lmdk_bgt.mae_cont(rls_data_s)/args.iter
# Uniform
rls_data_u, bgts_u = lmdk_bgt.uniform_cont(seq, lmdks, epsilon - eps_sel)
# lmdk_bgt.validate_bgts(seq, lmdks, epsilon, bgts_u)
mae_u[i] += lmdk_bgt.mae_cont(rls_data_u)/args.iter
# Adaptive
rls_data_a, _, _ = lmdk_bgt.adaptive_cont(seq, lmdks, epsilon - eps_sel, .5, .5)
mae_a[i] += lmdk_bgt.mae_cont(rls_data_a)/args.iter
# Calculate once
if i == 0:
# Event
rls_data_evt, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 0), epsilon)
mae_evt += lmdk_bgt.mae_cont(rls_data_evt)/args.iter
# User
rls_data_usr, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 100), epsilon)
mae_usr += lmdk_bgt.mae_cont(rls_data_usr)/args.iter
mae_u *= 100
mae_s *= 100
mae_a *= 100
mae_evt *= 100
mae_usr *= 100
plt.axhline(
y = mae_evt,
color = '#212121',
linewidth=lmdk_lib.line_width
)
plt.text(x_i[-1] + x_i[-1]*.14, mae_evt - mae_evt*.05, 'event')
plt.axhline(
y = mae_usr,
color = '#616161',
linewidth=lmdk_lib.line_width
)
plt.text(x_i[-1] + x_i[-1]*.14, mae_usr - mae_usr*.05, 'user')
plt.bar(
x_i + x_offset,
mae_s,
bar_width,
label='Skip',
linewidth=lmdk_lib.line_width
)
x_offset += bar_width
plt.bar(
x_i + x_offset,
mae_u,
bar_width,
label='Uniform',
linewidth=lmdk_lib.line_width
)
x_offset += bar_width
plt.bar(
x_i + x_offset,
mae_a,
bar_width,
label='Adaptive',
linewidth=lmdk_lib.line_width
)
x_offset += bar_width
path = str('../../rslt/bgt_cmp/' + d)
# Plot legend
lmdk_lib.plot_legend()
# # Show plot
# plt.show()
# Save plot
lmdk_lib.save_plot(path + '-sel.pdf')
print('[OK]', flush=True)
def parse_args():
'''
Parse arguments.
Optional:
res - The results archive file.
iter - The total iterations.
'''
# Create argument parser.
parser = argparse.ArgumentParser()
# Mandatory arguments.
# Optional arguments.
parser.add_argument('-r', '--res', help='The results archive file.', type=str, default='/home/manos/Cloud/Data/Copenhagen/Results.zip')
parser.add_argument('-i', '--iter', help='The total iterations.', type=int, default=1)
# Parse arguments.
args = parser.parse_args()
return args
if __name__ == '__main__':
try:
start_time = time.time()
main(parse_args())
end_time = time.time()
print('##############################')
print('Time elapsed: %s' % (time.strftime('%H:%M:%S', time.gmtime(end_time - start_time))))
print('##############################')
except KeyboardInterrupt:
print('Interrupted by user.')
exit()