#!/usr/bin/env python3 import argparse import csv from datetime import datetime from geopy.distance import distance import io import lmdk_lib import numpy as np import os import matplotlib.pyplot as plt import sys import time import zipfile ''' ############ Stats ########### Trajectories: 10357 Length Total : 17662984 Minimum : Maximum : Landmarks : 212816 Length Total : 6851102 (27.54%) Minimum : 1 Maximum : 6896 ############################## ''' # https://cloud.delkappa.com/s/2Rs2wjS8zmt5bAE # T-drive data format # User ID uid = 0 # Coordinates lat, lng = 3, 2 # Timestamp tim = 1 # Timestamp format tim_fmt = "%Y-%m-%d %H:%M:%S" def main(args): # Get users' landmarks from previous parsing usrs_lmdks = lmdk_lib.load_data(args, 'usrs_lmdks') if usrs_lmdks.size == 0: # Get users' data from previous parsing usrs_data = lmdk_lib.load_data(args, 'usrs_data') if usrs_data.size == 0: usrs_data = np.empty((0,4), np.float32) # Parse users' data try: print('Extracting %s... ' %(os.path.abspath(args.arc)), end='', flush=True) with zipfile.ZipFile(args.arc, 'r') as arc: print('[OK]') # Get the list of users print('Getting users... ', end='', flush=True) # List of users usrs = list(info.filename.split('/')[2].split('.')[0] for info in arc.infolist() if '.txt' in info.filename) # Sort users numerically usrs.sort(key=int) print('[OK]') for usr in usrs: points = [] traj_file = 'release/taxi_log_2008_by_id/' + usr + '.txt' print('[%d%% (%s/%d)] Loading data from %s... ' %((int(usr)/len(usrs))*100, usr, len(usrs), traj_file), end='', flush=True) with io.TextIOWrapper(arc.open(traj_file), newline='\n') as dat: try: # df = pd.read_csv(dat, sep=',', index_col=None, names=data_cols) traj = csv.reader(dat, delimiter=',') for p in traj: points += [[p[uid], p[lat], p[lng], datetime.strptime(p[tim], tim_fmt).timestamp()]] print('[OK]') except Exception as e: print('[Error: %s]' %(e)) if points: usrs_data = np.append(usrs_data, np.asarray(points, dtype=np.float32), axis=0) # Save to results lmdk_lib.save_data(args, usrs_data, 'usrs_data') except Exception as e: print('[Error: %s]' %(e)) # Find users' landmarks usrs_lmdks = lmdk_lib.find_lmdks(usrs_data, args) # Save to results lmdk_lib.save_data(args, usrs_lmdks, 'usrs_lmdks') # Landmarks' stats lmdk_lib.lmdks_stats(args, usrs_lmdks) ''' Parse arguments. Optional: arc - The archive file. dist - The coordinates distance threshold in meters. per - The timestaps period threshold in mimutes. res - The results zip file. ''' def parse_args(): # Create argument parser. parser = argparse.ArgumentParser() # Mandatory arguments. # Optional arguments. parser.add_argument('-a', '--arc', help='The data archive file.', type=str, default='/home/manos/Cloud/Data/T-drive/Data.zip') parser.add_argument('-l', '--dist', help='The coordinates distance threshold in meters.', type=int, default=200) parser.add_argument('-p', '--per', help='The timestaps period threshold in mimutes.', type=int, default=30) parser.add_argument('-r', '--res', help='The results archive file.', type=str, default='/home/manos/Cloud/Data/T-drive/Results.zip') # Parse arguments. args = parser.parse_args() return args if __name__ == '__main__': try: start_time = time.time() main(parse_args()) end_time = time.time() print('##############################') print('Time : %.4fs' % (end_time - start_time)) print('##############################') except KeyboardInterrupt: print('Interrupted by user.') exit()