diff --git a/code/expt/hue-sel.py b/code/expt/hue-sel.py index 38bbb1c..22083f4 100644 --- a/code/expt/hue-sel.py +++ b/code/expt/hue-sel.py @@ -27,7 +27,7 @@ def main(args): # Landmarks' thresholds lmdks_th = [0, .54, .68, .88, 1.12, 10] # The privacy budget - epsilon = 10.0 + epsilon = 1.0 # Number of methods n = 3 @@ -48,7 +48,7 @@ def main(args): # The y axis plt.ylabel('Mean absolute error (kWh)') # Set y axis label. plt.yscale('log') - plt.ylim(.01, 1000) + plt.ylim(.1, 10000) # Bar offset x_offset = -(bar_width/2)*(n - 1) diff --git a/code/expt/hue.py b/code/expt/hue.py index 3c2e053..cb4cb2e 100644 --- a/code/expt/hue.py +++ b/code/expt/hue.py @@ -25,7 +25,7 @@ def main(args): # Landmarks' thresholds lmdks_th = [0, .54, .68, .88, 1.12, 10] # The privacy budget - epsilon = 10.0 + epsilon = 1.0 # Number of methods n = 3 @@ -46,7 +46,7 @@ def main(args): # The y axis plt.ylabel('Mean absolute error (kWh)') # Set y axis label. plt.yscale('log') - plt.ylim(.01, 1000) + plt.ylim(.1, 10000) # Bar offset x_offset = -(bar_width/2)*(n - 1) diff --git a/code/lib/lmdk_bgt.py b/code/lib/lmdk_bgt.py index b4b8518..f3e668f 100644 --- a/code/lib/lmdk_bgt.py +++ b/code/lib/lmdk_bgt.py @@ -477,7 +477,7 @@ def adaptive_cons(seq, lmdks, epsilon, inc_rt, dec_rt): skipped = 0 for i, p in enumerate(seq): # Check if current point is a landmark - is_landmark = any((lmdks[:]==p).all(1)) + is_landmark = any(np.equal(lmdks, p).all(1)) if is_landmark: lmdk_cur += 1 if lmdk_lib.should_sample(samp_rt) or i == 0: @@ -584,7 +584,7 @@ def skip_cons(seq, lmdks, epsilon): rls_data = [None]*len(seq) for i, p in enumerate(seq): # Check if current point is a landmark - is_landmark = any((lmdks[:]==p).all(1)) + is_landmark = any(np.equal(lmdks, p).all(1)) # Add noise o = [p[0], lmdk_lib.add_laplace_noise(p[1], 1, bgts[i])] if is_landmark: diff --git a/code/parse_t-drive.py b/code/parse_t-drive.py new file mode 100644 index 0000000..2172067 --- /dev/null +++ b/code/parse_t-drive.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 + +import argparse +import csv +from datetime import datetime +from geopy.distance import distance +import io +import lmdk_lib +import numpy as np +import os +import matplotlib.pyplot as plt +import sys +import time +import zipfile + + +''' +############ Stats ########### + +Trajectories: 10357 + Length + Total : 17662984 + Minimum : + Maximum : + +Landmarks : 212816 + Length + Total : 6851102 (27.54%) + Minimum : 1 + Maximum : 6896 + +############################## +''' + + +# https://cloud.delkappa.com/s/2Rs2wjS8zmt5bAE +# T-drive data format +# User ID +uid = 0 +# Coordinates +lat, lng = 3, 2 +# Timestamp +tim = 1 +# Timestamp format +tim_fmt = "%Y-%m-%d %H:%M:%S" + + +def main(args): + # Get users' landmarks from previous parsing + usrs_lmdks = lmdk_lib.load_data(args, 'usrs_lmdks') + if usrs_lmdks.size == 0: + # Get users' data from previous parsing + usrs_data = lmdk_lib.load_data(args, 'usrs_data') + if usrs_data.size == 0: + usrs_data = np.empty((0,4), np.float32) + # Parse users' data + try: + print('Extracting %s... ' %(os.path.abspath(args.arc)), end='', flush=True) + with zipfile.ZipFile(args.arc, 'r') as arc: + print('[OK]') + # Get the list of users + print('Getting users... ', end='', flush=True) + # List of users + usrs = list(info.filename.split('/')[2].split('.')[0] for info in arc.infolist() if '.txt' in info.filename) + # Sort users numerically + usrs.sort(key=int) + print('[OK]') + for usr in usrs: + points = [] + traj_file = 'release/taxi_log_2008_by_id/' + usr + '.txt' + print('[%d%% (%s/%d)] Loading data from %s... ' %((int(usr)/len(usrs))*100, usr, len(usrs), traj_file), end='', flush=True) + with io.TextIOWrapper(arc.open(traj_file), newline='\n') as dat: + try: + # df = pd.read_csv(dat, sep=',', index_col=None, names=data_cols) + traj = csv.reader(dat, delimiter=',') + for p in traj: + points += [[p[uid], p[lat], p[lng], datetime.strptime(p[tim], tim_fmt).timestamp()]] + print('[OK]') + except Exception as e: + print('[Error: %s]' %(e)) + if points: + usrs_data = np.append(usrs_data, np.asarray(points, dtype=np.float32), axis=0) + # Save to results + lmdk_lib.save_data(args, usrs_data, 'usrs_data') + except Exception as e: + print('[Error: %s]' %(e)) + # Find users' landmarks + usrs_lmdks = lmdk_lib.find_lmdks(usrs_data, args) + # Save to results + lmdk_lib.save_data(args, usrs_lmdks, 'usrs_lmdks') + # Landmarks' stats + lmdk_lib.lmdks_stats(args, usrs_lmdks) + + +''' + Parse arguments. + + Optional: + arc - The archive file. + dist - The coordinates distance threshold in meters. + per - The timestaps period threshold in mimutes. + res - The results zip file. +''' +def parse_args(): + # Create argument parser. + parser = argparse.ArgumentParser() + + # Mandatory arguments. + + # Optional arguments. + parser.add_argument('-a', '--arc', help='The data archive file.', type=str, default='/home/manos/Cloud/Data/T-drive/Data.zip') + parser.add_argument('-l', '--dist', help='The coordinates distance threshold in meters.', type=int, default=200) + parser.add_argument('-p', '--per', help='The timestaps period threshold in mimutes.', type=int, default=30) + parser.add_argument('-r', '--res', help='The results archive file.', type=str, default='/home/manos/Cloud/Data/T-drive/Results.zip') + + # Parse arguments. + args = parser.parse_args() + + return args + + +if __name__ == '__main__': + try: + start_time = time.time() + main(parse_args()) + end_time = time.time() + print('##############################') + print('Time : %.4fs' % (end_time - start_time)) + print('##############################') + except KeyboardInterrupt: + print('Interrupted by user.') + exit() diff --git a/rslt/bgt_cmp/Copenhagen-sel.pdf b/rslt/bgt_cmp/Copenhagen-sel.pdf index dd6cb5d..5db4b5b 100644 Binary files a/rslt/bgt_cmp/Copenhagen-sel.pdf and b/rslt/bgt_cmp/Copenhagen-sel.pdf differ diff --git a/rslt/bgt_cmp/Copenhagen.pdf b/rslt/bgt_cmp/Copenhagen.pdf index f83f3ad..b6788c5 100644 Binary files a/rslt/bgt_cmp/Copenhagen.pdf and b/rslt/bgt_cmp/Copenhagen.pdf differ diff --git a/rslt/bgt_cmp/HUE-sel.pdf b/rslt/bgt_cmp/HUE-sel.pdf index 8f36b43..50e6241 100644 Binary files a/rslt/bgt_cmp/HUE-sel.pdf and b/rslt/bgt_cmp/HUE-sel.pdf differ diff --git a/rslt/bgt_cmp/HUE.pdf b/rslt/bgt_cmp/HUE.pdf index 361abf9..cc1dc1b 100644 Binary files a/rslt/bgt_cmp/HUE.pdf and b/rslt/bgt_cmp/HUE.pdf differ