code: Update
This commit is contained in:
		@ -27,7 +27,7 @@ def main(args):
 | 
				
			|||||||
  # Landmarks' thresholds
 | 
					  # Landmarks' thresholds
 | 
				
			||||||
  lmdks_th = [0, .54, .68, .88, 1.12, 10]
 | 
					  lmdks_th = [0, .54, .68, .88, 1.12, 10]
 | 
				
			||||||
  # The privacy budget
 | 
					  # The privacy budget
 | 
				
			||||||
  epsilon = 10.0
 | 
					  epsilon = 1.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  # Number of methods
 | 
					  # Number of methods
 | 
				
			||||||
  n = 3
 | 
					  n = 3
 | 
				
			||||||
@ -48,7 +48,7 @@ def main(args):
 | 
				
			|||||||
  # The y axis
 | 
					  # The y axis
 | 
				
			||||||
  plt.ylabel('Mean absolute error (kWh)')  # Set y axis label.
 | 
					  plt.ylabel('Mean absolute error (kWh)')  # Set y axis label.
 | 
				
			||||||
  plt.yscale('log')
 | 
					  plt.yscale('log')
 | 
				
			||||||
  plt.ylim(.01, 1000)
 | 
					  plt.ylim(.1, 10000)
 | 
				
			||||||
  # Bar offset
 | 
					  # Bar offset
 | 
				
			||||||
  x_offset = -(bar_width/2)*(n - 1)
 | 
					  x_offset = -(bar_width/2)*(n - 1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -25,7 +25,7 @@ def main(args):
 | 
				
			|||||||
  # Landmarks' thresholds
 | 
					  # Landmarks' thresholds
 | 
				
			||||||
  lmdks_th = [0, .54, .68, .88, 1.12, 10]
 | 
					  lmdks_th = [0, .54, .68, .88, 1.12, 10]
 | 
				
			||||||
  # The privacy budget
 | 
					  # The privacy budget
 | 
				
			||||||
  epsilon = 10.0
 | 
					  epsilon = 1.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  # Number of methods
 | 
					  # Number of methods
 | 
				
			||||||
  n = 3
 | 
					  n = 3
 | 
				
			||||||
@ -46,7 +46,7 @@ def main(args):
 | 
				
			|||||||
  # The y axis
 | 
					  # The y axis
 | 
				
			||||||
  plt.ylabel('Mean absolute error (kWh)')  # Set y axis label.
 | 
					  plt.ylabel('Mean absolute error (kWh)')  # Set y axis label.
 | 
				
			||||||
  plt.yscale('log')
 | 
					  plt.yscale('log')
 | 
				
			||||||
  plt.ylim(.01, 1000)
 | 
					  plt.ylim(.1, 10000)
 | 
				
			||||||
  # Bar offset
 | 
					  # Bar offset
 | 
				
			||||||
  x_offset = -(bar_width/2)*(n - 1)
 | 
					  x_offset = -(bar_width/2)*(n - 1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -477,7 +477,7 @@ def adaptive_cons(seq, lmdks, epsilon, inc_rt, dec_rt):
 | 
				
			|||||||
  skipped = 0
 | 
					  skipped = 0
 | 
				
			||||||
  for i, p in enumerate(seq):
 | 
					  for i, p in enumerate(seq):
 | 
				
			||||||
    # Check if current point is a landmark
 | 
					    # Check if current point is a landmark
 | 
				
			||||||
    is_landmark = any((lmdks[:]==p).all(1))
 | 
					    is_landmark = any(np.equal(lmdks, p).all(1))
 | 
				
			||||||
    if is_landmark:
 | 
					    if is_landmark:
 | 
				
			||||||
      lmdk_cur += 1
 | 
					      lmdk_cur += 1
 | 
				
			||||||
    if lmdk_lib.should_sample(samp_rt) or i == 0:
 | 
					    if lmdk_lib.should_sample(samp_rt) or i == 0:
 | 
				
			||||||
@ -584,7 +584,7 @@ def skip_cons(seq, lmdks, epsilon):
 | 
				
			|||||||
  rls_data = [None]*len(seq)
 | 
					  rls_data = [None]*len(seq)
 | 
				
			||||||
  for i, p in enumerate(seq):
 | 
					  for i, p in enumerate(seq):
 | 
				
			||||||
    # Check if current point is a landmark
 | 
					    # Check if current point is a landmark
 | 
				
			||||||
    is_landmark = any((lmdks[:]==p).all(1))
 | 
					    is_landmark = any(np.equal(lmdks, p).all(1))
 | 
				
			||||||
    # Add noise
 | 
					    # Add noise
 | 
				
			||||||
    o = [p[0], lmdk_lib.add_laplace_noise(p[1], 1, bgts[i])]
 | 
					    o = [p[0], lmdk_lib.add_laplace_noise(p[1], 1, bgts[i])]
 | 
				
			||||||
    if is_landmark:
 | 
					    if is_landmark:
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										132
									
								
								code/parse_t-drive.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										132
									
								
								code/parse_t-drive.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,132 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/env python3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import argparse
 | 
				
			||||||
 | 
					import csv
 | 
				
			||||||
 | 
					from datetime import datetime
 | 
				
			||||||
 | 
					from geopy.distance import distance
 | 
				
			||||||
 | 
					import io
 | 
				
			||||||
 | 
					import lmdk_lib
 | 
				
			||||||
 | 
					import numpy as np
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					import matplotlib.pyplot as plt
 | 
				
			||||||
 | 
					import sys
 | 
				
			||||||
 | 
					import time
 | 
				
			||||||
 | 
					import zipfile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					############ Stats ###########
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Trajectories: 10357
 | 
				
			||||||
 | 
					  Length
 | 
				
			||||||
 | 
					    Total   : 17662984
 | 
				
			||||||
 | 
					    Minimum : 
 | 
				
			||||||
 | 
					    Maximum : 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Landmarks   : 212816
 | 
				
			||||||
 | 
					  Length
 | 
				
			||||||
 | 
					    Total   : 6851102 (27.54%)
 | 
				
			||||||
 | 
					    Minimum : 1
 | 
				
			||||||
 | 
					    Maximum : 6896
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					##############################
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# https://cloud.delkappa.com/s/2Rs2wjS8zmt5bAE
 | 
				
			||||||
 | 
					# T-drive data format
 | 
				
			||||||
 | 
					# User ID
 | 
				
			||||||
 | 
					uid = 0
 | 
				
			||||||
 | 
					# Coordinates
 | 
				
			||||||
 | 
					lat, lng = 3, 2
 | 
				
			||||||
 | 
					# Timestamp
 | 
				
			||||||
 | 
					tim = 1
 | 
				
			||||||
 | 
					# Timestamp format
 | 
				
			||||||
 | 
					tim_fmt = "%Y-%m-%d %H:%M:%S"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def main(args):
 | 
				
			||||||
 | 
					  # Get users' landmarks from previous parsing
 | 
				
			||||||
 | 
					  usrs_lmdks = lmdk_lib.load_data(args, 'usrs_lmdks')
 | 
				
			||||||
 | 
					  if usrs_lmdks.size == 0:
 | 
				
			||||||
 | 
					    # Get users' data from previous parsing
 | 
				
			||||||
 | 
					    usrs_data = lmdk_lib.load_data(args, 'usrs_data')
 | 
				
			||||||
 | 
					    if usrs_data.size == 0:
 | 
				
			||||||
 | 
					      usrs_data = np.empty((0,4), np.float32)
 | 
				
			||||||
 | 
					      # Parse users' data
 | 
				
			||||||
 | 
					      try:
 | 
				
			||||||
 | 
					        print('Extracting %s... ' %(os.path.abspath(args.arc)), end='', flush=True)
 | 
				
			||||||
 | 
					        with zipfile.ZipFile(args.arc, 'r') as arc:
 | 
				
			||||||
 | 
					          print('[OK]')
 | 
				
			||||||
 | 
					          # Get the list of users
 | 
				
			||||||
 | 
					          print('Getting users... ', end='', flush=True)
 | 
				
			||||||
 | 
					          # List of users
 | 
				
			||||||
 | 
					          usrs = list(info.filename.split('/')[2].split('.')[0] for info in arc.infolist() if '.txt' in info.filename)
 | 
				
			||||||
 | 
					          # Sort users numerically
 | 
				
			||||||
 | 
					          usrs.sort(key=int)
 | 
				
			||||||
 | 
					          print('[OK]')
 | 
				
			||||||
 | 
					          for usr in usrs:
 | 
				
			||||||
 | 
					            points = []
 | 
				
			||||||
 | 
					            traj_file = 'release/taxi_log_2008_by_id/' + usr + '.txt'
 | 
				
			||||||
 | 
					            print('[%d%% (%s/%d)] Loading data from %s... ' %((int(usr)/len(usrs))*100, usr, len(usrs), traj_file), end='', flush=True)
 | 
				
			||||||
 | 
					            with io.TextIOWrapper(arc.open(traj_file), newline='\n') as dat:
 | 
				
			||||||
 | 
					              try:
 | 
				
			||||||
 | 
					                # df = pd.read_csv(dat, sep=',', index_col=None, names=data_cols)
 | 
				
			||||||
 | 
					                traj = csv.reader(dat, delimiter=',')
 | 
				
			||||||
 | 
					                for p in traj:
 | 
				
			||||||
 | 
					                  points += [[p[uid], p[lat], p[lng], datetime.strptime(p[tim], tim_fmt).timestamp()]]
 | 
				
			||||||
 | 
					                print('[OK]')
 | 
				
			||||||
 | 
					              except Exception as e:
 | 
				
			||||||
 | 
					                print('[Error: %s]' %(e))
 | 
				
			||||||
 | 
					            if points:
 | 
				
			||||||
 | 
					              usrs_data = np.append(usrs_data, np.asarray(points, dtype=np.float32), axis=0)
 | 
				
			||||||
 | 
					        # Save to results
 | 
				
			||||||
 | 
					        lmdk_lib.save_data(args, usrs_data, 'usrs_data')
 | 
				
			||||||
 | 
					      except Exception as e:
 | 
				
			||||||
 | 
					        print('[Error: %s]' %(e))
 | 
				
			||||||
 | 
					    # Find users' landmarks
 | 
				
			||||||
 | 
					    usrs_lmdks = lmdk_lib.find_lmdks(usrs_data, args)
 | 
				
			||||||
 | 
					    # Save to results
 | 
				
			||||||
 | 
					    lmdk_lib.save_data(args, usrs_lmdks, 'usrs_lmdks')
 | 
				
			||||||
 | 
					  # Landmarks' stats
 | 
				
			||||||
 | 
					  lmdk_lib.lmdks_stats(args, usrs_lmdks)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					  Parse arguments.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Optional:
 | 
				
			||||||
 | 
					    arc  - The archive file.
 | 
				
			||||||
 | 
					    dist - The coordinates distance threshold in meters.
 | 
				
			||||||
 | 
					    per  - The timestaps period threshold in mimutes.
 | 
				
			||||||
 | 
					    res  - The results zip file.
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					def parse_args():
 | 
				
			||||||
 | 
					  # Create argument parser.
 | 
				
			||||||
 | 
					  parser = argparse.ArgumentParser()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # Mandatory arguments.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # Optional arguments.
 | 
				
			||||||
 | 
					  parser.add_argument('-a', '--arc', help='The data archive file.', type=str, default='/home/manos/Cloud/Data/T-drive/Data.zip')
 | 
				
			||||||
 | 
					  parser.add_argument('-l', '--dist', help='The coordinates distance threshold in meters.', type=int, default=200)
 | 
				
			||||||
 | 
					  parser.add_argument('-p', '--per', help='The timestaps period threshold in mimutes.', type=int, default=30)
 | 
				
			||||||
 | 
					  parser.add_argument('-r', '--res', help='The results archive file.', type=str, default='/home/manos/Cloud/Data/T-drive/Results.zip')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # Parse arguments.
 | 
				
			||||||
 | 
					  args = parser.parse_args()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return args
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == '__main__':
 | 
				
			||||||
 | 
					  try:
 | 
				
			||||||
 | 
					    start_time = time.time()
 | 
				
			||||||
 | 
					    main(parse_args())
 | 
				
			||||||
 | 
					    end_time = time.time()
 | 
				
			||||||
 | 
					    print('##############################')
 | 
				
			||||||
 | 
					    print('Time   : %.4fs' % (end_time - start_time))
 | 
				
			||||||
 | 
					    print('##############################')
 | 
				
			||||||
 | 
					  except KeyboardInterrupt:
 | 
				
			||||||
 | 
					    print('Interrupted by user.')
 | 
				
			||||||
 | 
					    exit()
 | 
				
			||||||
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
		Reference in New Issue
	
	Block a user