diff --git a/code/expt/bgt_cmp.py b/code/expt/bgt_cmp.py new file mode 100644 index 0000000..69a47cf --- /dev/null +++ b/code/expt/bgt_cmp.py @@ -0,0 +1,250 @@ +#!/usr/bin/env python3 + +import sys +sys.path.insert(1, 'code/lib') +import argparse +from datetime import datetime +from geopy.distance import distance +import lmdk_bgt +import lmdk_lib +import numpy as np +from matplotlib import pyplot as plt +import time + + +def main(args): + # The data files + data_files = { + 'T-drive': '/home/manos/Cloud/Data/T-drive/Results.zip', + 'Geolife': '/home/manos/Cloud/Data/Geolife/Results.zip' + } + # Data related info + data_info = { + 'T-drive': { + 'uid': 2, + 'lmdks': { + 0: {'dist': 0, 'per': 1000}, # 0.0% + 20: {'dist': 2095, 'per': 30}, # 19.6% + 40: {'dist': 2790, 'per': 30}, # 40.2% + 60: {'dist': 3590, 'per': 30}, # 59.9% + 80: {'dist': 4825, 'per': 30}, # 79.4% + 100: {'dist': 10350, 'per': 30} # 100.0% + } + }, + 'Geolife': { + 'uid': 97, + 'lmdks': { + 0: {'dist': 0, 'per': 100000}, # 0.0% + 20: {'dist': 205, 'per': 30}, # 19.8% + 40: {'dist': 450, 'per': 30}, # 41.7% + 60: {'dist': 725, 'per': 30}, # 59.2% + 80: {'dist': 855, 'per': 30}, # 82.1% + 100: {'dist': 50000, 'per': 30} # 100.0% + } + } + } + # The data sets + data_sets = {} + # Load data sets + for df in data_files: + args.res = data_files[df] + data_sets[df] = lmdk_lib.load_data(args, 'usrs_data') + # Geo-I configuration + # epsilon = level/radius + # Radius is in meters + bgt_conf = [ + {'epsilon': 1}, + # {'label': 'ln(2)/200', 'epsilon': 0.0035, 'level': 0.69314718056, 'radius': 200}, + # {'label': 'ln(4)/200', 'epsilon': 0.0069, 'level': 1.38629436112, 'radius': 200}, + # {'label': 'ln(6)/200', 'epsilon': 0.0090, 'level': 1.79175946923, 'radius': 200} + ] + + # Number of methods + n = 6 + # Width of bars + bar_width = 1/(n + 1) + # The x axis + x_i = np.arange(len(list(data_info.values())[0]['lmdks'])) + x_margin = bar_width*(n/2 + 1) + + for d in data_sets: + # d = 'T-drive' + # d = 'Geolife' + print('\n##############################', d, '\n') + args.res = data_files[d] + data = data_sets[d] + # Truncate trajectory according to arguments + seq = data[data[:,0]==data_info[d]['uid'], :][:args.time] + + # Initialize plot + lmdk_lib.plot_init() + # The x axis + plt.xticks(x_i, np.array([key for key in data_info[d]['lmdks']]).astype(int)) + plt.xlabel('Landmarks percentage') # Set x axis label. + plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin) + # The y axis + plt.ylabel('Mean absolute error (m)') # Set y axis label. + plt.yscale('log') + plt.ylim(1, 100000) + # Bar offset + x_offset = -(bar_width/2)*(n - 1) + + mae_u = np.zeros(len(data_info[d]['lmdks'])) + mae_s = np.zeros(len(data_info[d]['lmdks'])) + mae_a = np.zeros(len(data_info[d]['lmdks'])) + mae_r = np.zeros(len(data_info[d]['lmdks'])) + mae_d = np.zeros(len(data_info[d]['lmdks'])) + mae_i = np.zeros(len(data_info[d]['lmdks'])) + for i, lmdk in enumerate(data_info[d]['lmdks']): + # Find landmarks + args.dist = data_info[d]['lmdks'][lmdk]['dist'] + args.per = data_info[d]['lmdks'][lmdk]['per'] + lmdks = lmdk_lib.find_lmdks(seq, args)[:args.time] + # Print stats + lmdk_lib.lmdks_stats(args, lmdks) + # # Find long enough sequences + # usrs = np.unique(data[:,0]) + # for usr_i, usr in enumerate(usrs): + # traj = data[data[:,0]==usr, :] + # if(len(traj)) >= 1000 and len(traj) < 2000: + # print(usr, len(traj)) + for bgt in bgt_conf: + for _ in range(args.iter): + # Skip + rls_data_s, _ = lmdk_bgt.skip(seq, lmdks, bgt['epsilon']) + mae_s[i] += lmdk_bgt.mae(seq, rls_data_s)/args.iter + + # Uniform + rls_data_u, _ = lmdk_bgt.uniform_r(seq, lmdks, bgt['epsilon']) + mae_u[i] += lmdk_bgt.mae(seq, rls_data_u)/args.iter + + # Adaptive + rls_data_a, _, _ = lmdk_bgt.adaptive(seq, lmdks, bgt['epsilon'], .5, .5) + mae_a[i] += lmdk_bgt.mae(seq, rls_data_a)/args.iter + + # Sample + rls_data_r, _, _ = lmdk_bgt.sample(seq, lmdks, bgt['epsilon']) + mae_r[i] += lmdk_bgt.mae(seq, rls_data_r)/args.iter + + # Discount + rls_data_d, _, _ = lmdk_bgt.discount(seq, lmdks, bgt['epsilon']) + mae_d[i] += lmdk_bgt.mae(seq, rls_data_d)/args.iter + + # Incremental + rls_data_i, _, _ = lmdk_bgt.incremental(seq, lmdks, bgt['epsilon']) + mae_i[i] += lmdk_bgt.mae(seq, rls_data_i)/args.iter + + # print( + # '\nEpsilon : %f\n' + # 'Sampled : %d%% (%d/%d)\n' + # 'Landmarks: %d%% (%d/%d)\n' + # %(bgt['epsilon'], 100*(len(seq) - skipped)/len(seq), len(seq) - skipped, len(seq), 100*len(lmdks)/len(seq), len(lmdks), len(seq)) + # ) + # s, l = lmdk_lib.simplify_data(seq, lmdks) + # # Validate the process + # lmdk_bgt.validate_bgts(s, l, bgt['epsilon'], bgts) + + # # Analysis + # lmdk_bgt.utility_analysis(seq, lmdks, rls_data, bgt['epsilon']) + + plt.bar( + x_i + x_offset, + mae_s, + bar_width, + label='Skip', + linewidth=lmdk_lib.line_width + ) + x_offset += bar_width + # Plot bars + plt.bar( + x_i + x_offset, + mae_u, + bar_width, + label='Uniform', + linewidth=lmdk_lib.line_width + ) + x_offset += bar_width + plt.bar( + x_i + x_offset, + mae_a, + bar_width, + label='Adaptive', + linewidth=lmdk_lib.line_width + ) + x_offset += bar_width + plt.bar( + x_i + x_offset, + mae_r, + bar_width, + label='Sample', + linewidth=lmdk_lib.line_width + ) + x_offset += bar_width + plt.bar( + x_i + x_offset, + mae_d, + bar_width, + label='Discount', + linewidth=lmdk_lib.line_width + ) + x_offset += bar_width + plt.bar( + x_i + x_offset, + mae_i, + bar_width, + label='Incremental', + linewidth=lmdk_lib.line_width + ) + x_offset += bar_width + + path = str('rslt/bgt_cmp/' + d) + # Plot legend + lmdk_lib.plot_legend() + # Show plot + plt.show() + # Save plot + # lmdk_lib.save_plot(path + '.pdf') + print('[OK]', flush=True) + + + + +def parse_args(): + ''' + Parse arguments. + + Optional: + dist - The coordinates distance threshold in meters. + per - The timestaps period threshold in mimutes. + time - The total timestamps. + iter - The total iterations. + ''' + # Create argument parser. + parser = argparse.ArgumentParser() + + # Mandatory arguments. + + # Optional arguments. + parser.add_argument('-l', '--dist', help='The coordinates distance threshold in meters.', type=int, default=200) + parser.add_argument('-p', '--per', help='The timestaps period threshold in mimutes.', type=int, default=30) + parser.add_argument('-r', '--res', help='The results archive file.', type=str, default='/home/manos/Cloud/Data/T-drive/Results.zip') + parser.add_argument('-t', '--time', help='The total timestamps.', type=int, default=1000) + parser.add_argument('-i', '--iter', help='The total iterations.', type=int, default=1) + + # Parse arguments. + args = parser.parse_args() + + return args + + +if __name__ == '__main__': + try: + start_time = time.time() + main(parse_args()) + end_time = time.time() + print('##############################') + print('Time : %.4fs' % (end_time - start_time)) + print('##############################') + except KeyboardInterrupt: + print('Interrupted by user.') + exit()