#!/usr/bin/env python3 import sys sys.path.insert(1, 'code/lib') import argparse from datetime import datetime from geopy.distance import distance import lmdk_bgt import lmdk_lib import numpy as np from matplotlib import pyplot as plt import time def main(args): # The data files data_files = { 'T-drive': '/home/manos/Cloud/Data/T-drive/Results.zip', 'Geolife': '/home/manos/Cloud/Data/Geolife/Results.zip' } # Data related info data_info = { 'T-drive': { 'uid': 2, 'lmdks': { 0: {'dist': 0, 'per': 1000}, # 0.0% 20: {'dist': 2095, 'per': 30}, # 19.6% 40: {'dist': 2790, 'per': 30}, # 40.2% 60: {'dist': 3590, 'per': 30}, # 59.9% 80: {'dist': 4825, 'per': 30}, # 79.4% 100: {'dist': 10350, 'per': 30} # 100.0% } }, 'Geolife': { 'uid': 97, 'lmdks': { 0: {'dist': 0, 'per': 100000}, # 0.0% 20: {'dist': 205, 'per': 30}, # 19.8% 40: {'dist': 450, 'per': 30}, # 41.7% 60: {'dist': 725, 'per': 30}, # 59.2% 80: {'dist': 855, 'per': 30}, # 82.1% 100: {'dist': 50000, 'per': 30} # 100.0% } } } # The data sets data_sets = {} # Load data sets for df in data_files: args.res = data_files[df] data_sets[df] = lmdk_lib.load_data(args, 'usrs_data') # Geo-I configuration # epsilon = level/radius # Radius is in meters bgt_conf = [ {'epsilon': 1}, # {'label': 'ln(2)/200', 'epsilon': 0.0035, 'level': 0.69314718056, 'radius': 200}, # {'label': 'ln(4)/200', 'epsilon': 0.0069, 'level': 1.38629436112, 'radius': 200}, # {'label': 'ln(6)/200', 'epsilon': 0.0090, 'level': 1.79175946923, 'radius': 200} ] # Number of methods n = 6 # Width of bars bar_width = 1/(n + 1) # The x axis x_i = np.arange(len(list(data_info.values())[0]['lmdks'])) x_margin = bar_width*(n/2 + 1) for d in data_sets: # d = 'T-drive' # d = 'Geolife' print('\n##############################', d, '\n') args.res = data_files[d] data = data_sets[d] # Truncate trajectory according to arguments seq = data[data[:,0]==data_info[d]['uid'], :][:args.time] # Initialize plot lmdk_lib.plot_init() # The x axis plt.xticks(x_i, np.array([key for key in data_info[d]['lmdks']]).astype(int)) plt.xlabel('Landmarks percentage') # Set x axis label. plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin) # The y axis plt.ylabel('Mean absolute error (m)') # Set y axis label. plt.yscale('log') plt.ylim(1, 100000) # Bar offset x_offset = -(bar_width/2)*(n - 1) mae_u = np.zeros(len(data_info[d]['lmdks'])) mae_s = np.zeros(len(data_info[d]['lmdks'])) mae_a = np.zeros(len(data_info[d]['lmdks'])) mae_r = np.zeros(len(data_info[d]['lmdks'])) mae_d = np.zeros(len(data_info[d]['lmdks'])) mae_i = np.zeros(len(data_info[d]['lmdks'])) for i, lmdk in enumerate(data_info[d]['lmdks']): # Find landmarks args.dist = data_info[d]['lmdks'][lmdk]['dist'] args.per = data_info[d]['lmdks'][lmdk]['per'] lmdks = lmdk_lib.find_lmdks(seq, args)[:args.time] # Print stats lmdk_lib.lmdks_stats(args, lmdks) # # Find long enough sequences # usrs = np.unique(data[:,0]) # for usr_i, usr in enumerate(usrs): # traj = data[data[:,0]==usr, :] # if(len(traj)) >= 1000 and len(traj) < 2000: # print(usr, len(traj)) for bgt in bgt_conf: for _ in range(args.iter): # Skip rls_data_s, _ = lmdk_bgt.skip(seq, lmdks, bgt['epsilon']) mae_s[i] += lmdk_bgt.mae(seq, rls_data_s)/args.iter # Uniform rls_data_u, _ = lmdk_bgt.uniform_r(seq, lmdks, bgt['epsilon']) mae_u[i] += lmdk_bgt.mae(seq, rls_data_u)/args.iter # Adaptive rls_data_a, _, _ = lmdk_bgt.adaptive(seq, lmdks, bgt['epsilon'], .5, .5) mae_a[i] += lmdk_bgt.mae(seq, rls_data_a)/args.iter # Sample rls_data_r, _, _ = lmdk_bgt.sample(seq, lmdks, bgt['epsilon']) mae_r[i] += lmdk_bgt.mae(seq, rls_data_r)/args.iter # Discount rls_data_d, _, _ = lmdk_bgt.discount(seq, lmdks, bgt['epsilon']) mae_d[i] += lmdk_bgt.mae(seq, rls_data_d)/args.iter # Incremental rls_data_i, _, _ = lmdk_bgt.incremental(seq, lmdks, bgt['epsilon']) mae_i[i] += lmdk_bgt.mae(seq, rls_data_i)/args.iter # print( # '\nEpsilon : %f\n' # 'Sampled : %d%% (%d/%d)\n' # 'Landmarks: %d%% (%d/%d)\n' # %(bgt['epsilon'], 100*(len(seq) - skipped)/len(seq), len(seq) - skipped, len(seq), 100*len(lmdks)/len(seq), len(lmdks), len(seq)) # ) # s, l = lmdk_lib.simplify_data(seq, lmdks) # # Validate the process # lmdk_bgt.validate_bgts(s, l, bgt['epsilon'], bgts) # # Analysis # lmdk_bgt.utility_analysis(seq, lmdks, rls_data, bgt['epsilon']) plt.bar( x_i + x_offset, mae_s, bar_width, label='Skip', linewidth=lmdk_lib.line_width ) x_offset += bar_width # Plot bars plt.bar( x_i + x_offset, mae_u, bar_width, label='Uniform', linewidth=lmdk_lib.line_width ) x_offset += bar_width plt.bar( x_i + x_offset, mae_a, bar_width, label='Adaptive', linewidth=lmdk_lib.line_width ) x_offset += bar_width plt.bar( x_i + x_offset, mae_r, bar_width, label='Sample', linewidth=lmdk_lib.line_width ) x_offset += bar_width plt.bar( x_i + x_offset, mae_d, bar_width, label='Discount', linewidth=lmdk_lib.line_width ) x_offset += bar_width plt.bar( x_i + x_offset, mae_i, bar_width, label='Incremental', linewidth=lmdk_lib.line_width ) x_offset += bar_width path = str('rslt/bgt_cmp/' + d) # Plot legend lmdk_lib.plot_legend() # Show plot plt.show() # Save plot # lmdk_lib.save_plot(path + '.pdf') print('[OK]', flush=True) def parse_args(): ''' Parse arguments. Optional: dist - The coordinates distance threshold in meters. per - The timestaps period threshold in mimutes. time - The total timestamps. iter - The total iterations. ''' # Create argument parser. parser = argparse.ArgumentParser() # Mandatory arguments. # Optional arguments. parser.add_argument('-l', '--dist', help='The coordinates distance threshold in meters.', type=int, default=200) parser.add_argument('-p', '--per', help='The timestaps period threshold in mimutes.', type=int, default=30) parser.add_argument('-r', '--res', help='The results archive file.', type=str, default='/home/manos/Cloud/Data/T-drive/Results.zip') parser.add_argument('-t', '--time', help='The total timestamps.', type=int, default=1000) parser.add_argument('-i', '--iter', help='The total iterations.', type=int, default=1) # Parse arguments. args = parser.parse_args() return args if __name__ == '__main__': try: start_time = time.time() main(parse_args()) end_time = time.time() print('##############################') print('Time : %.4fs' % (end_time - start_time)) print('##############################') except KeyboardInterrupt: print('Interrupted by user.') exit()