code: WIP

This commit is contained in:
Manos Katsomallos 2021-10-06 01:44:50 +02:00
parent 0a45c9d1bd
commit 0aca4ec9c1
4 changed files with 599 additions and 0 deletions

195
code/expt/copenhagen-sel.py Normal file
View File

@ -0,0 +1,195 @@
#!/usr/bin/env python3
import sys
sys.path.insert(1, '../lib')
import argparse
import ast
from datetime import datetime
from geopy.distance import distance
import lmdk_bgt
import lmdk_lib
import math
import numpy as np
from matplotlib import pyplot as plt
import time
def main(args):
res_file = '/home/manos/Cloud/Data/Copenhagen/Results.zip'
# Contacts for all users
cont_data = lmdk_lib.load_data(args, 'cont')
# Contacts for landmark's percentages for all users
lmdk_data = lmdk_lib.load_data(args, 'usrs_data')
# The name of the dataset
d = 'Copenhagen'
# The user's id
uid = '449'
# The landmarks percentages
lmdks_pct = [0, 20, 40, 60, 80, 100]
# The privacy budget
epsilon = 1.0
# Number of methods
n = 3
# Width of bars
bar_width = 1/(n + 1)
# The x axis
x_i = np.arange(len(lmdks_pct))
x_margin = bar_width*(n/2 + 1)
print('\n##############################', d, '\n')
# Get user's contacts sequence
seq = cont_data[cont_data[:, 1] == float(uid)][:1000]
# Initialize plot
lmdk_lib.plot_init()
# The x axis
plt.xticks(x_i, np.array(lmdks_pct, int))
plt.xlabel('Landmarks (%)') # Set x axis label.
plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin)
# The y axis
plt.ylabel('Mean absolute error (%)') # Set y axis label.
# plt.yscale('log')
plt.ylim(0, 100)
# Bar offset
x_offset = -(bar_width/2)*(n - 1)
mae_u = np.zeros(len(lmdks_pct))
mae_s = np.zeros(len(lmdks_pct))
mae_a = np.zeros(len(lmdks_pct))
mae_evt = 0
mae_usr = 0
for i, pct in enumerate(lmdks_pct):
# Find landmarks
lmdks = lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, pct)
for _ in range(args.iter):
eps_sel = 0
if pct != 0 and pct != 100:
# Get landmarks timestamps in sequence
lmdks_seq = lmdk_lib.find_lmdks_seq(seq, lmdks)
# Turn landmarks to histogram
hist, h = lmdk_lib.get_hist(lmdk_lib.get_seq(1, len(seq)), lmdks_seq)
# Find all possible options
opts = lmdk_sel.get_opts_from_top_h(lmdk_lib.get_seq(1, len(seq)), lmdks_seq)
# Landmarks selection budget
eps_sel = epsilon/(len(lmdks_seq) + 1)
# Get private landmarks timestamps
lmdks_seq, _ = exp_mech.exponential_pareto(hist, opts, exp_mech.score, 1.0, eps_sel)
# Get actual landmarks values
lmdks = seq[lmdks_seq]
# Skip
rls_data_s, bgts_s = lmdk_bgt.skip_cont(seq, lmdks, epsilon - eps_sel)
# lmdk_bgt.validate_bgts(seq, lmdks, epsilon, bgts_s)
mae_s[i] += lmdk_bgt.mae_cont(rls_data_s)/args.iter
# Uniform
rls_data_u, bgts_u = lmdk_bgt.uniform_cont(seq, lmdks, epsilon - eps_sel)
# lmdk_bgt.validate_bgts(seq, lmdks, epsilon, bgts_u)
mae_u[i] += lmdk_bgt.mae_cont(rls_data_u)/args.iter
# Adaptive
rls_data_a, _, _ = lmdk_bgt.adaptive_cont(seq, lmdks, epsilon - eps_sel, .5, .5)
mae_a[i] += lmdk_bgt.mae_cont(rls_data_a)/args.iter
# Calculate once
if i == 0:
# Event
rls_data_evt, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 0), epsilon)
mae_evt += lmdk_bgt.mae_cont(rls_data_evt)/args.iter
# User
rls_data_usr, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 100), epsilon)
mae_usr += lmdk_bgt.mae_cont(rls_data_usr)/args.iter
mae_u *= 100
mae_s *= 100
mae_a *= 100
mae_evt *= 100
mae_usr *= 100
plt.axhline(
y = mae_evt,
color = '#212121',
linewidth=lmdk_lib.line_width
)
plt.text(x_i[-1] + x_i[-1]*.14, mae_evt - mae_evt*.05, 'event')
plt.axhline(
y = mae_usr,
color = '#616161',
linewidth=lmdk_lib.line_width
)
plt.text(x_i[-1] + x_i[-1]*.14, mae_usr - mae_usr*.05, 'user')
plt.bar(
x_i + x_offset,
mae_s,
bar_width,
label='Skip',
linewidth=lmdk_lib.line_width
)
x_offset += bar_width
plt.bar(
x_i + x_offset,
mae_u,
bar_width,
label='Uniform',
linewidth=lmdk_lib.line_width
)
x_offset += bar_width
plt.bar(
x_i + x_offset,
mae_a,
bar_width,
label='Adaptive',
linewidth=lmdk_lib.line_width
)
x_offset += bar_width
path = str('../../rslt/bgt_cmp/' + d)
# Plot legend
lmdk_lib.plot_legend()
# # Show plot
# plt.show()
# Save plot
lmdk_lib.save_plot(path + '-sel.pdf')
print('[OK]', flush=True)
def parse_args():
'''
Parse arguments.
Optional:
res - The results archive file.
iter - The total iterations.
'''
# Create argument parser.
parser = argparse.ArgumentParser()
# Mandatory arguments.
# Optional arguments.
parser.add_argument('-r', '--res', help='The results archive file.', type=str, default='/home/manos/Cloud/Data/Copenhagen/Results.zip')
parser.add_argument('-i', '--iter', help='The total iterations.', type=int, default=1)
# Parse arguments.
args = parser.parse_args()
return args
if __name__ == '__main__':
try:
start_time = time.time()
main(parse_args())
end_time = time.time()
print('##############################')
print('Time elapsed: %s' % (time.strftime('%H:%M:%S', time.gmtime(end_time - start_time))))
print('##############################')
except KeyboardInterrupt:
print('Interrupted by user.')
exit()

185
code/expt/hue-sel.py Normal file
View File

@ -0,0 +1,185 @@
#!/usr/bin/env python3
import sys
sys.path.insert(1, '../lib')
import argparse
import ast
from datetime import datetime
from geopy.distance import distance
import lmdk_bgt
import lmdk_lib
import math
import numpy as np
from matplotlib import pyplot as plt
import time
def main(args):
res_file = '/home/manos/Cloud/Data/HUE/Results.zip'
# User's consumption
seq = lmdk_lib.load_data(args, 'cons')
# The name of the dataset
d = 'HUE'
# The landmarks percentages
lmdks_pct = [0, 20, 40, 60, 80, 100]
# Landmarks' thresholds
lmdks_th = [0, .54, .68, .88, 1.12, 10]
# The privacy budget
epsilon = 10.0
# Number of methods
n = 3
# Width of bars
bar_width = 1/(n + 1)
# The x axis
x_i = np.arange(len(lmdks_pct))
x_margin = bar_width*(n/2 + 1)
print('\n##############################', d, '\n')
# Initialize plot
lmdk_lib.plot_init()
# The x axis
plt.xticks(x_i, np.array(lmdks_pct, int))
plt.xlabel('Landmarks (%)') # Set x axis label.
plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin)
# The y axis
plt.ylabel('Mean absolute error (kWh)') # Set y axis label.
plt.yscale('log')
# plt.ylim(.01, 10000)
# Bar offset
x_offset = -(bar_width/2)*(n - 1)
mae_u = np.zeros(len(lmdks_pct))
mae_s = np.zeros(len(lmdks_pct))
mae_a = np.zeros(len(lmdks_pct))
mae_evt = 0
mae_usr = 0
for i, pct in enumerate(lmdks_pct):
# Find landmarks
lmdks = seq[seq[:, 1] < lmdks_th[i]]
for _ in range(args.iter):
eps_sel = 0
if pct != 0 and pct != 100:
# Get landmarks timestamps in sequence
lmdks_seq = lmdk_lib.find_lmdks_seq(seq, lmdks)
# Turn landmarks to histogram
hist, h = lmdk_lib.get_hist(lmdk_lib.get_seq(1, len(seq)), lmdks_seq)
# Find all possible options
opts = lmdk_sel.get_opts_from_top_h(lmdk_lib.get_seq(1, len(seq)), lmdks_seq)
# Landmarks selection budget
eps_sel = epsilon/(len(lmdks_seq) + 1)
# Get private landmarks timestamps
lmdks_seq, _ = exp_mech.exponential_pareto(hist, opts, exp_mech.score, 1.0, eps_sel)
# Get actual landmarks values
lmdks = seq[lmdks_seq]
# Skip
rls_data_s, bgts_s = lmdk_bgt.skip_cons(seq, lmdks, epsilon - eps_sel)
# lmdk_bgt.validate_bgts(seq, lmdks, epsilon, bgts_s)
mae_s[i] += lmdk_bgt.mae_cons(seq, rls_data_s)/args.iter
# Uniform
rls_data_u, bgts_u = lmdk_bgt.uniform_cons(seq, lmdks, epsilon - eps_sel)
mae_u[i] += lmdk_bgt.mae_cons(seq, rls_data_u)/args.iter
# Adaptive
rls_data_a, _, _ = lmdk_bgt.adaptive_cons(seq, lmdks, epsilon - eps_sel, .5, .5)
mae_a[i] += lmdk_bgt.mae_cons(seq, rls_data_a)/args.iter
# Calculate once
# Event
if i == 0:
rls_data_evt, _ = lmdk_bgt.uniform_cons(seq, seq[seq[:, 1] < lmdks_th[0]], epsilon)
mae_evt += lmdk_bgt.mae_cons(seq, rls_data_evt)/args.iter
# User
if i == 0:
rls_data_usr, _ = lmdk_bgt.uniform_cons(seq, seq[seq[:, 1] < lmdks_th[len(lmdks_th)-1]], epsilon)
mae_usr += lmdk_bgt.mae_cons(seq, rls_data_usr)/args.iter
plt.axhline(
y = mae_evt,
color = '#212121',
linewidth=lmdk_lib.line_width
)
plt.text(x_i[-1] + x_i[-1]*.14, mae_evt - mae_evt*.14, 'event')
plt.axhline(
y = mae_usr,
color = '#616161',
linewidth=lmdk_lib.line_width
)
plt.text(x_i[-1] + x_i[-1]*.14, mae_usr - mae_usr*.14, 'user')
plt.bar(
x_i + x_offset,
mae_s,
bar_width,
label='Skip',
linewidth=lmdk_lib.line_width
)
x_offset += bar_width
plt.bar(
x_i + x_offset,
mae_u,
bar_width,
label='Uniform',
linewidth=lmdk_lib.line_width
)
x_offset += bar_width
plt.bar(
x_i + x_offset,
mae_a,
bar_width,
label='Adaptive',
linewidth=lmdk_lib.line_width
)
x_offset += bar_width
path = str('../../rslt/bgt_cmp/' + d)
# Plot legend
lmdk_lib.plot_legend()
# Show plot
# plt.show()
# Save plot
lmdk_lib.save_plot(path + '-sel.pdf')
print('[OK]', flush=True)
def parse_args():
'''
Parse arguments.
Optional:
res - The results archive file.
iter - The total iterations.
'''
# Create argument parser.
parser = argparse.ArgumentParser()
# Mandatory arguments.
# Optional arguments.
parser.add_argument('-r', '--res', help='The results archive file.', type=str, default='/home/manos/Cloud/Data/HUE/Results.zip')
parser.add_argument('-i', '--iter', help='The total iterations.', type=int, default=1)
# Parse arguments.
args = parser.parse_args()
return args
if __name__ == '__main__':
try:
start_time = time.time()
main(parse_args())
end_time = time.time()
print('##############################')
print('Time elapsed: %s' % (time.strftime('%H:%M:%S', time.gmtime(end_time - start_time))))
print('##############################')
except KeyboardInterrupt:
print('Interrupted by user.')
exit()

211
code/expt/t-drive-sel.py Normal file
View File

@ -0,0 +1,211 @@
#!/usr/bin/env python3
import sys
sys.path.insert(1, '../lib')
import argparse
from datetime import datetime
from geopy.distance import distance
import lmdk_bgt
import lmdk_lib
import numpy as np
from matplotlib import pyplot as plt
import time
def main(args):
# The data files
data_files = {
'T-drive': '/home/manos/Cloud/Data/T-drive/Results.zip',
}
# Data related info
data_info = {
'T-drive': {
'uid': 2,
'lmdks': {
0: {'dist': 0, 'per': 1000}, # 0.0%
20: {'dist': 2095, 'per': 30}, # 19.6%
40: {'dist': 2790, 'per': 30}, # 40.2%
60: {'dist': 3590, 'per': 30}, # 59.9%
80: {'dist': 4825, 'per': 30}, # 79.4%
100: {'dist': 10350, 'per': 30} # 100.0%
}
}
}
# The data sets
data_sets = {}
# Load data sets
for df in data_files:
args.res = data_files[df]
data_sets[df] = lmdk_lib.load_data(args, 'usrs_data')
# Geo-I configuration
# epsilon = level/radius
# Radius is in meters
bgt_conf = [
{'epsilon': 1},
]
# Number of methods
n = 3
# Width of bars
bar_width = 1/(n + 1)
# The x axis
x_i = np.arange(len(list(data_info.values())[0]['lmdks']))
x_margin = bar_width*(n/2 + 1)
for d in data_sets:
print('\n##############################', d, '\n')
args.res = data_files[d]
data = data_sets[d]
# Truncate trajectory according to arguments
seq = data[data[:,0]==data_info[d]['uid'], :][:args.time]
# Initialize plot
lmdk_lib.plot_init()
# The x axis
plt.xticks(x_i, np.array([key for key in data_info[d]['lmdks']]).astype(int))
plt.xlabel('Landmarks (%)') # Set x axis label.
plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin)
# The y axis
plt.ylabel('Mean absolute error (m)') # Set y axis label.
plt.yscale('log')
# plt.ylim(1, 100000000)
# Bar offset
x_offset = -(bar_width/2)*(n - 1)
mae_u = np.zeros(len(data_info[d]['lmdks']))
mae_s = np.zeros(len(data_info[d]['lmdks']))
mae_a = np.zeros(len(data_info[d]['lmdks']))
mae_evt = 0
mae_usr = 0
for i, lmdk in enumerate(data_info[d]['lmdks']):
# Find landmarks
args.dist = data_info[d]['lmdks'][lmdk]['dist']
args.per = data_info[d]['lmdks'][lmdk]['per']
lmdks = lmdk_lib.find_lmdks(seq, args)[:args.time]
for bgt in bgt_conf:
for _ in range(args.iter):
eps_sel = 0
if lmdk != 0 and lmdk != 100:
# Get landmarks timestamps in sequence
lmdks_seq = lmdk_lib.find_lmdks_seq(seq, lmdks)
# Turn landmarks to histogram
hist, h = lmdk_lib.get_hist(lmdk_lib.get_seq(1, len(seq)), lmdks_seq)
# Find all possible options
opts = lmdk_sel.get_opts_from_top_h(lmdk_lib.get_seq(1, len(seq)), lmdks_seq)
# Landmarks selection budget
eps_sel = bgt['epsilon']/(len(lmdks_seq) + 1)
# Get private landmarks timestamps
lmdks_seq, _ = exp_mech.exponential_pareto(hist, opts, exp_mech.score, 1.0, eps_sel)
# Get actual landmarks values
lmdks = seq[lmdks_seq]
# Skip
rls_data_s, _ = lmdk_bgt.skip(seq, lmdks, bgt['epsilon'] - eps_sel)
mae_s[i] += lmdk_bgt.mae(seq, rls_data_s)/args.iter
# Uniform
rls_data_u, _ = lmdk_bgt.uniform_r(seq, lmdks, bgt['epsilon'] - eps_sel)
mae_u[i] += lmdk_bgt.mae(seq, rls_data_u)/args.iter
# Adaptive
rls_data_a, _, _ = lmdk_bgt.adaptive(seq, lmdks, bgt['epsilon'] - eps_sel, .5, .5)
mae_a[i] += lmdk_bgt.mae(seq, rls_data_a)/args.iter
# Event
if lmdk == 0:
rls_data_evt, _ = lmdk_bgt.uniform_r(seq, lmdks, bgt['epsilon'])
mae_evt += lmdk_bgt.mae(seq, rls_data_evt)/args.iter
# User
if lmdk == 100:
rls_data_usr, _ = lmdk_bgt.uniform_r(seq, lmdks, bgt['epsilon'])
mae_usr += lmdk_bgt.mae(seq, rls_data_usr)/args.iter
# Plot lines
plt.axhline(
y = mae_evt,
color = '#212121',
linewidth=lmdk_lib.line_width
)
plt.text(x_i[-1] + x_i[-1]*.14, mae_evt - mae_evt*.14, 'event')
plt.axhline(
y = mae_usr,
color = '#616161',
linewidth=lmdk_lib.line_width
)
plt.text(x_i[-1] + x_i[-1]*.14, mae_usr - mae_usr*.14, 'user')
# Plot bars
plt.bar(
x_i + x_offset,
mae_s,
bar_width,
label='Skip',
linewidth=lmdk_lib.line_width
)
x_offset += bar_width
plt.bar(
x_i + x_offset,
mae_u,
bar_width,
label='Uniform',
linewidth=lmdk_lib.line_width
)
x_offset += bar_width
plt.bar(
x_i + x_offset,
mae_a,
bar_width,
label='Adaptive',
linewidth=lmdk_lib.line_width
)
path = str('../../rslt/bgt_cmp/' + d)
# Plot legend
lmdk_lib.plot_legend()
# Show plot
# plt.show()
# Save plot
lmdk_lib.save_plot(path + '-sel.pdf')
print('[OK]', flush=True)
def parse_args():
'''
Parse arguments.
Optional:
dist - The coordinates distance threshold in meters.
per - The timestaps period threshold in mimutes.
time - The total timestamps.
iter - The total iterations.
'''
# Create argument parser.
parser = argparse.ArgumentParser()
# Mandatory arguments.
# Optional arguments.
parser.add_argument('-l', '--dist', help='The coordinates distance threshold in meters.', type=int, default=200)
parser.add_argument('-p', '--per', help='The timestaps period threshold in mimutes.', type=int, default=30)
parser.add_argument('-r', '--res', help='The results archive file.', type=str, default='/home/manos/Cloud/Data/T-drive/Results.zip')
parser.add_argument('-t', '--time', help='The total timestamps.', type=int, default=1000)
parser.add_argument('-i', '--iter', help='The total iterations.', type=int, default=1)
# Parse arguments.
args = parser.parse_args()
return args
if __name__ == '__main__':
try:
start_time = time.time()
main(parse_args())
end_time = time.time()
print('##############################')
print('Time elapsed: %s' % (time.strftime('%H:%M:%S', time.gmtime(end_time - start_time))))
print('##############################')
except KeyboardInterrupt:
print('Interrupted by user.')
exit()

View File

@ -907,6 +907,14 @@ def find_lmdks(usrs_data, args):
return usrs_lmdks return usrs_lmdks
def find_lmdks_seq(seq, lmdks):
lmdks_seq = []
for i, p in enumerate(seq):
if any(np.equal(lmdks, p).all(1)):
lmdks_seq.append(i + 1)
return np.numpy(lmdks_seq, dtype = int)
def find_lmdks_tim(lmdk_data, seq, uid, pct): def find_lmdks_tim(lmdk_data, seq, uid, pct):
''' '''
Find user's landmarks timestamps. Find user's landmarks timestamps.