bgt_cmp: Ready to test
This commit is contained in:
		
							
								
								
									
										250
									
								
								code/expt/bgt_cmp.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										250
									
								
								code/expt/bgt_cmp.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,250 @@
 | 
			
		||||
#!/usr/bin/env python3
 | 
			
		||||
 | 
			
		||||
import sys
 | 
			
		||||
sys.path.insert(1, 'code/lib')
 | 
			
		||||
import argparse
 | 
			
		||||
from datetime import datetime
 | 
			
		||||
from geopy.distance import distance
 | 
			
		||||
import lmdk_bgt
 | 
			
		||||
import lmdk_lib
 | 
			
		||||
import numpy as np
 | 
			
		||||
from matplotlib import pyplot as plt
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def main(args):
 | 
			
		||||
  # The data files
 | 
			
		||||
  data_files = {
 | 
			
		||||
    'T-drive': '/home/manos/Cloud/Data/T-drive/Results.zip',
 | 
			
		||||
    'Geolife': '/home/manos/Cloud/Data/Geolife/Results.zip'
 | 
			
		||||
  }
 | 
			
		||||
  # Data related info
 | 
			
		||||
  data_info = {
 | 
			
		||||
    'T-drive': {
 | 
			
		||||
      'uid': 2,
 | 
			
		||||
      'lmdks': {
 | 
			
		||||
          0: {'dist': 0, 'per': 1000},   #   0.0%
 | 
			
		||||
         20: {'dist': 2095, 'per': 30},  #  19.6%
 | 
			
		||||
         40: {'dist': 2790, 'per': 30},  #  40.2%
 | 
			
		||||
         60: {'dist': 3590, 'per': 30},  #  59.9%
 | 
			
		||||
         80: {'dist': 4825, 'per': 30},  #  79.4%
 | 
			
		||||
        100: {'dist': 10350, 'per': 30}  # 100.0%
 | 
			
		||||
      }
 | 
			
		||||
    },
 | 
			
		||||
    'Geolife': {
 | 
			
		||||
      'uid': 97,
 | 
			
		||||
      'lmdks': {
 | 
			
		||||
          0: {'dist': 0, 'per': 100000},  #   0.0%
 | 
			
		||||
         20: {'dist': 205, 'per': 30},    #  19.8%
 | 
			
		||||
         40: {'dist': 450, 'per': 30},    #  41.7%
 | 
			
		||||
         60: {'dist': 725, 'per': 30},    #  59.2%
 | 
			
		||||
         80: {'dist': 855, 'per': 30},    #  82.1%
 | 
			
		||||
        100: {'dist': 50000, 'per': 30}   # 100.0%
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  # The data sets
 | 
			
		||||
  data_sets = {}
 | 
			
		||||
  # Load data sets
 | 
			
		||||
  for df in data_files:
 | 
			
		||||
    args.res = data_files[df]
 | 
			
		||||
    data_sets[df] = lmdk_lib.load_data(args, 'usrs_data')
 | 
			
		||||
  # Geo-I configuration
 | 
			
		||||
  # epsilon = level/radius
 | 
			
		||||
  # Radius is in meters
 | 
			
		||||
  bgt_conf = [
 | 
			
		||||
    {'epsilon': 1},
 | 
			
		||||
    # {'label': 'ln(2)/200', 'epsilon': 0.0035, 'level': 0.69314718056, 'radius': 200},
 | 
			
		||||
    # {'label': 'ln(4)/200', 'epsilon': 0.0069, 'level': 1.38629436112, 'radius': 200},
 | 
			
		||||
    # {'label': 'ln(6)/200', 'epsilon': 0.0090, 'level': 1.79175946923, 'radius': 200}
 | 
			
		||||
  ]
 | 
			
		||||
 | 
			
		||||
  # Number of methods
 | 
			
		||||
  n = 6
 | 
			
		||||
  # Width of bars
 | 
			
		||||
  bar_width = 1/(n + 1)
 | 
			
		||||
  # The x axis
 | 
			
		||||
  x_i = np.arange(len(list(data_info.values())[0]['lmdks']))
 | 
			
		||||
  x_margin = bar_width*(n/2 + 1)
 | 
			
		||||
 | 
			
		||||
  for d in data_sets:
 | 
			
		||||
    # d = 'T-drive'
 | 
			
		||||
    # d = 'Geolife'
 | 
			
		||||
    print('\n##############################', d, '\n')
 | 
			
		||||
    args.res = data_files[d]
 | 
			
		||||
    data = data_sets[d]
 | 
			
		||||
    # Truncate trajectory according to arguments
 | 
			
		||||
    seq = data[data[:,0]==data_info[d]['uid'], :][:args.time]
 | 
			
		||||
 | 
			
		||||
    # Initialize plot
 | 
			
		||||
    lmdk_lib.plot_init()
 | 
			
		||||
    # The x axis
 | 
			
		||||
    plt.xticks(x_i, np.array([key for key in data_info[d]['lmdks']]).astype(int))
 | 
			
		||||
    plt.xlabel('Landmarks percentage')  # Set x axis label.
 | 
			
		||||
    plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin)
 | 
			
		||||
    # The y axis
 | 
			
		||||
    plt.ylabel('Mean absolute error (m)')  # Set y axis label.
 | 
			
		||||
    plt.yscale('log')
 | 
			
		||||
    plt.ylim(1, 100000)
 | 
			
		||||
    # Bar offset
 | 
			
		||||
    x_offset = -(bar_width/2)*(n - 1)
 | 
			
		||||
 | 
			
		||||
    mae_u = np.zeros(len(data_info[d]['lmdks']))
 | 
			
		||||
    mae_s = np.zeros(len(data_info[d]['lmdks']))
 | 
			
		||||
    mae_a = np.zeros(len(data_info[d]['lmdks']))
 | 
			
		||||
    mae_r = np.zeros(len(data_info[d]['lmdks']))
 | 
			
		||||
    mae_d = np.zeros(len(data_info[d]['lmdks']))
 | 
			
		||||
    mae_i = np.zeros(len(data_info[d]['lmdks']))
 | 
			
		||||
    for i, lmdk in enumerate(data_info[d]['lmdks']):
 | 
			
		||||
      # Find landmarks
 | 
			
		||||
      args.dist = data_info[d]['lmdks'][lmdk]['dist']
 | 
			
		||||
      args.per = data_info[d]['lmdks'][lmdk]['per']
 | 
			
		||||
      lmdks = lmdk_lib.find_lmdks(seq, args)[:args.time]
 | 
			
		||||
      # Print stats
 | 
			
		||||
      lmdk_lib.lmdks_stats(args, lmdks)
 | 
			
		||||
      # # Find long enough sequences
 | 
			
		||||
      # usrs = np.unique(data[:,0])
 | 
			
		||||
      # for usr_i, usr in enumerate(usrs):
 | 
			
		||||
      #   traj = data[data[:,0]==usr, :]
 | 
			
		||||
      #   if(len(traj)) >= 1000 and len(traj) < 2000:
 | 
			
		||||
      #     print(usr, len(traj))
 | 
			
		||||
      for bgt in bgt_conf:
 | 
			
		||||
        for _ in range(args.iter):
 | 
			
		||||
          # Skip
 | 
			
		||||
          rls_data_s, _ = lmdk_bgt.skip(seq, lmdks, bgt['epsilon'])
 | 
			
		||||
          mae_s[i] += lmdk_bgt.mae(seq, rls_data_s)/args.iter
 | 
			
		||||
 | 
			
		||||
          # Uniform
 | 
			
		||||
          rls_data_u, _ = lmdk_bgt.uniform_r(seq, lmdks, bgt['epsilon'])
 | 
			
		||||
          mae_u[i] += lmdk_bgt.mae(seq, rls_data_u)/args.iter
 | 
			
		||||
 | 
			
		||||
          # Adaptive
 | 
			
		||||
          rls_data_a, _, _ = lmdk_bgt.adaptive(seq, lmdks, bgt['epsilon'], .5, .5)
 | 
			
		||||
          mae_a[i] += lmdk_bgt.mae(seq, rls_data_a)/args.iter
 | 
			
		||||
 | 
			
		||||
          # Sample
 | 
			
		||||
          rls_data_r, _, _ = lmdk_bgt.sample(seq, lmdks, bgt['epsilon'])
 | 
			
		||||
          mae_r[i] += lmdk_bgt.mae(seq, rls_data_r)/args.iter
 | 
			
		||||
 | 
			
		||||
          # Discount
 | 
			
		||||
          rls_data_d, _, _ = lmdk_bgt.discount(seq, lmdks, bgt['epsilon'])
 | 
			
		||||
          mae_d[i] += lmdk_bgt.mae(seq, rls_data_d)/args.iter
 | 
			
		||||
 | 
			
		||||
          # Incremental
 | 
			
		||||
          rls_data_i, _, _ = lmdk_bgt.incremental(seq, lmdks, bgt['epsilon'])
 | 
			
		||||
          mae_i[i] += lmdk_bgt.mae(seq, rls_data_i)/args.iter
 | 
			
		||||
 | 
			
		||||
        # print(
 | 
			
		||||
        #   '\nEpsilon  : %f\n'
 | 
			
		||||
        #   'Sampled  : %d%% (%d/%d)\n'
 | 
			
		||||
        #   'Landmarks: %d%% (%d/%d)\n'
 | 
			
		||||
        #   %(bgt['epsilon'], 100*(len(seq) - skipped)/len(seq), len(seq) - skipped, len(seq), 100*len(lmdks)/len(seq), len(lmdks), len(seq))
 | 
			
		||||
        # )
 | 
			
		||||
        # s, l = lmdk_lib.simplify_data(seq, lmdks)
 | 
			
		||||
        # # Validate the process
 | 
			
		||||
        # lmdk_bgt.validate_bgts(s, l, bgt['epsilon'], bgts)
 | 
			
		||||
 | 
			
		||||
        # # Analysis
 | 
			
		||||
        # lmdk_bgt.utility_analysis(seq, lmdks, rls_data, bgt['epsilon'])
 | 
			
		||||
 | 
			
		||||
    plt.bar(
 | 
			
		||||
      x_i + x_offset,
 | 
			
		||||
      mae_s,
 | 
			
		||||
      bar_width,
 | 
			
		||||
      label='Skip',
 | 
			
		||||
      linewidth=lmdk_lib.line_width
 | 
			
		||||
    )
 | 
			
		||||
    x_offset += bar_width
 | 
			
		||||
    # Plot bars
 | 
			
		||||
    plt.bar(
 | 
			
		||||
      x_i + x_offset,
 | 
			
		||||
      mae_u,
 | 
			
		||||
      bar_width,
 | 
			
		||||
      label='Uniform',
 | 
			
		||||
      linewidth=lmdk_lib.line_width
 | 
			
		||||
    )
 | 
			
		||||
    x_offset += bar_width
 | 
			
		||||
    plt.bar(
 | 
			
		||||
      x_i + x_offset,
 | 
			
		||||
      mae_a,
 | 
			
		||||
      bar_width,
 | 
			
		||||
      label='Adaptive',
 | 
			
		||||
      linewidth=lmdk_lib.line_width
 | 
			
		||||
    )
 | 
			
		||||
    x_offset += bar_width
 | 
			
		||||
    plt.bar(
 | 
			
		||||
      x_i + x_offset,
 | 
			
		||||
      mae_r,
 | 
			
		||||
      bar_width,
 | 
			
		||||
      label='Sample',
 | 
			
		||||
      linewidth=lmdk_lib.line_width
 | 
			
		||||
    )
 | 
			
		||||
    x_offset += bar_width
 | 
			
		||||
    plt.bar(
 | 
			
		||||
      x_i + x_offset,
 | 
			
		||||
      mae_d,
 | 
			
		||||
      bar_width,
 | 
			
		||||
      label='Discount',
 | 
			
		||||
      linewidth=lmdk_lib.line_width
 | 
			
		||||
    )
 | 
			
		||||
    x_offset += bar_width
 | 
			
		||||
    plt.bar(
 | 
			
		||||
      x_i + x_offset,
 | 
			
		||||
      mae_i,
 | 
			
		||||
      bar_width,
 | 
			
		||||
      label='Incremental',
 | 
			
		||||
      linewidth=lmdk_lib.line_width
 | 
			
		||||
    )
 | 
			
		||||
    x_offset += bar_width
 | 
			
		||||
 | 
			
		||||
    path = str('rslt/bgt_cmp/' + d)
 | 
			
		||||
    # Plot legend
 | 
			
		||||
    lmdk_lib.plot_legend()
 | 
			
		||||
    # Show plot
 | 
			
		||||
    plt.show()
 | 
			
		||||
    # Save plot
 | 
			
		||||
    # lmdk_lib.save_plot(path + '.pdf')
 | 
			
		||||
    print('[OK]', flush=True)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def parse_args():
 | 
			
		||||
  '''
 | 
			
		||||
    Parse arguments.
 | 
			
		||||
 | 
			
		||||
    Optional:
 | 
			
		||||
      dist - The coordinates distance threshold in meters.
 | 
			
		||||
      per  - The timestaps period threshold in mimutes.
 | 
			
		||||
      time - The total timestamps.
 | 
			
		||||
      iter - The total iterations.
 | 
			
		||||
  '''
 | 
			
		||||
  # Create argument parser.
 | 
			
		||||
  parser = argparse.ArgumentParser()
 | 
			
		||||
 | 
			
		||||
  # Mandatory arguments.
 | 
			
		||||
 | 
			
		||||
  # Optional arguments.
 | 
			
		||||
  parser.add_argument('-l', '--dist', help='The coordinates distance threshold in meters.', type=int, default=200)
 | 
			
		||||
  parser.add_argument('-p', '--per', help='The timestaps period threshold in mimutes.', type=int, default=30)
 | 
			
		||||
  parser.add_argument('-r', '--res', help='The results archive file.', type=str, default='/home/manos/Cloud/Data/T-drive/Results.zip')
 | 
			
		||||
  parser.add_argument('-t', '--time', help='The total timestamps.', type=int, default=1000)
 | 
			
		||||
  parser.add_argument('-i', '--iter', help='The total iterations.', type=int, default=1)
 | 
			
		||||
 | 
			
		||||
  # Parse arguments.
 | 
			
		||||
  args = parser.parse_args()
 | 
			
		||||
 | 
			
		||||
  return args
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
  try:
 | 
			
		||||
    start_time = time.time()
 | 
			
		||||
    main(parse_args())
 | 
			
		||||
    end_time = time.time()
 | 
			
		||||
    print('##############################')
 | 
			
		||||
    print('Time   : %.4fs' % (end_time - start_time))
 | 
			
		||||
    print('##############################')
 | 
			
		||||
  except KeyboardInterrupt:
 | 
			
		||||
    print('Interrupted by user.')
 | 
			
		||||
    exit()
 | 
			
		||||
		Reference in New Issue
	
	Block a user