code: Update
This commit is contained in:
parent
8f3aea5dfb
commit
0c2f582e75
@ -27,7 +27,7 @@ def main(args):
|
|||||||
# Landmarks' thresholds
|
# Landmarks' thresholds
|
||||||
lmdks_th = [0, .54, .68, .88, 1.12, 10]
|
lmdks_th = [0, .54, .68, .88, 1.12, 10]
|
||||||
# The privacy budget
|
# The privacy budget
|
||||||
epsilon = 10.0
|
epsilon = 1.0
|
||||||
|
|
||||||
# Number of methods
|
# Number of methods
|
||||||
n = 3
|
n = 3
|
||||||
@ -48,7 +48,7 @@ def main(args):
|
|||||||
# The y axis
|
# The y axis
|
||||||
plt.ylabel('Mean absolute error (kWh)') # Set y axis label.
|
plt.ylabel('Mean absolute error (kWh)') # Set y axis label.
|
||||||
plt.yscale('log')
|
plt.yscale('log')
|
||||||
plt.ylim(.01, 1000)
|
plt.ylim(.1, 10000)
|
||||||
# Bar offset
|
# Bar offset
|
||||||
x_offset = -(bar_width/2)*(n - 1)
|
x_offset = -(bar_width/2)*(n - 1)
|
||||||
|
|
||||||
|
@ -25,7 +25,7 @@ def main(args):
|
|||||||
# Landmarks' thresholds
|
# Landmarks' thresholds
|
||||||
lmdks_th = [0, .54, .68, .88, 1.12, 10]
|
lmdks_th = [0, .54, .68, .88, 1.12, 10]
|
||||||
# The privacy budget
|
# The privacy budget
|
||||||
epsilon = 10.0
|
epsilon = 1.0
|
||||||
|
|
||||||
# Number of methods
|
# Number of methods
|
||||||
n = 3
|
n = 3
|
||||||
@ -46,7 +46,7 @@ def main(args):
|
|||||||
# The y axis
|
# The y axis
|
||||||
plt.ylabel('Mean absolute error (kWh)') # Set y axis label.
|
plt.ylabel('Mean absolute error (kWh)') # Set y axis label.
|
||||||
plt.yscale('log')
|
plt.yscale('log')
|
||||||
plt.ylim(.01, 1000)
|
plt.ylim(.1, 10000)
|
||||||
# Bar offset
|
# Bar offset
|
||||||
x_offset = -(bar_width/2)*(n - 1)
|
x_offset = -(bar_width/2)*(n - 1)
|
||||||
|
|
||||||
|
@ -477,7 +477,7 @@ def adaptive_cons(seq, lmdks, epsilon, inc_rt, dec_rt):
|
|||||||
skipped = 0
|
skipped = 0
|
||||||
for i, p in enumerate(seq):
|
for i, p in enumerate(seq):
|
||||||
# Check if current point is a landmark
|
# Check if current point is a landmark
|
||||||
is_landmark = any((lmdks[:]==p).all(1))
|
is_landmark = any(np.equal(lmdks, p).all(1))
|
||||||
if is_landmark:
|
if is_landmark:
|
||||||
lmdk_cur += 1
|
lmdk_cur += 1
|
||||||
if lmdk_lib.should_sample(samp_rt) or i == 0:
|
if lmdk_lib.should_sample(samp_rt) or i == 0:
|
||||||
@ -584,7 +584,7 @@ def skip_cons(seq, lmdks, epsilon):
|
|||||||
rls_data = [None]*len(seq)
|
rls_data = [None]*len(seq)
|
||||||
for i, p in enumerate(seq):
|
for i, p in enumerate(seq):
|
||||||
# Check if current point is a landmark
|
# Check if current point is a landmark
|
||||||
is_landmark = any((lmdks[:]==p).all(1))
|
is_landmark = any(np.equal(lmdks, p).all(1))
|
||||||
# Add noise
|
# Add noise
|
||||||
o = [p[0], lmdk_lib.add_laplace_noise(p[1], 1, bgts[i])]
|
o = [p[0], lmdk_lib.add_laplace_noise(p[1], 1, bgts[i])]
|
||||||
if is_landmark:
|
if is_landmark:
|
||||||
|
132
code/parse_t-drive.py
Normal file
132
code/parse_t-drive.py
Normal file
@ -0,0 +1,132 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import csv
|
||||||
|
from datetime import datetime
|
||||||
|
from geopy.distance import distance
|
||||||
|
import io
|
||||||
|
import lmdk_lib
|
||||||
|
import numpy as np
|
||||||
|
import os
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import zipfile
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
############ Stats ###########
|
||||||
|
|
||||||
|
Trajectories: 10357
|
||||||
|
Length
|
||||||
|
Total : 17662984
|
||||||
|
Minimum :
|
||||||
|
Maximum :
|
||||||
|
|
||||||
|
Landmarks : 212816
|
||||||
|
Length
|
||||||
|
Total : 6851102 (27.54%)
|
||||||
|
Minimum : 1
|
||||||
|
Maximum : 6896
|
||||||
|
|
||||||
|
##############################
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
# https://cloud.delkappa.com/s/2Rs2wjS8zmt5bAE
|
||||||
|
# T-drive data format
|
||||||
|
# User ID
|
||||||
|
uid = 0
|
||||||
|
# Coordinates
|
||||||
|
lat, lng = 3, 2
|
||||||
|
# Timestamp
|
||||||
|
tim = 1
|
||||||
|
# Timestamp format
|
||||||
|
tim_fmt = "%Y-%m-%d %H:%M:%S"
|
||||||
|
|
||||||
|
|
||||||
|
def main(args):
|
||||||
|
# Get users' landmarks from previous parsing
|
||||||
|
usrs_lmdks = lmdk_lib.load_data(args, 'usrs_lmdks')
|
||||||
|
if usrs_lmdks.size == 0:
|
||||||
|
# Get users' data from previous parsing
|
||||||
|
usrs_data = lmdk_lib.load_data(args, 'usrs_data')
|
||||||
|
if usrs_data.size == 0:
|
||||||
|
usrs_data = np.empty((0,4), np.float32)
|
||||||
|
# Parse users' data
|
||||||
|
try:
|
||||||
|
print('Extracting %s... ' %(os.path.abspath(args.arc)), end='', flush=True)
|
||||||
|
with zipfile.ZipFile(args.arc, 'r') as arc:
|
||||||
|
print('[OK]')
|
||||||
|
# Get the list of users
|
||||||
|
print('Getting users... ', end='', flush=True)
|
||||||
|
# List of users
|
||||||
|
usrs = list(info.filename.split('/')[2].split('.')[0] for info in arc.infolist() if '.txt' in info.filename)
|
||||||
|
# Sort users numerically
|
||||||
|
usrs.sort(key=int)
|
||||||
|
print('[OK]')
|
||||||
|
for usr in usrs:
|
||||||
|
points = []
|
||||||
|
traj_file = 'release/taxi_log_2008_by_id/' + usr + '.txt'
|
||||||
|
print('[%d%% (%s/%d)] Loading data from %s... ' %((int(usr)/len(usrs))*100, usr, len(usrs), traj_file), end='', flush=True)
|
||||||
|
with io.TextIOWrapper(arc.open(traj_file), newline='\n') as dat:
|
||||||
|
try:
|
||||||
|
# df = pd.read_csv(dat, sep=',', index_col=None, names=data_cols)
|
||||||
|
traj = csv.reader(dat, delimiter=',')
|
||||||
|
for p in traj:
|
||||||
|
points += [[p[uid], p[lat], p[lng], datetime.strptime(p[tim], tim_fmt).timestamp()]]
|
||||||
|
print('[OK]')
|
||||||
|
except Exception as e:
|
||||||
|
print('[Error: %s]' %(e))
|
||||||
|
if points:
|
||||||
|
usrs_data = np.append(usrs_data, np.asarray(points, dtype=np.float32), axis=0)
|
||||||
|
# Save to results
|
||||||
|
lmdk_lib.save_data(args, usrs_data, 'usrs_data')
|
||||||
|
except Exception as e:
|
||||||
|
print('[Error: %s]' %(e))
|
||||||
|
# Find users' landmarks
|
||||||
|
usrs_lmdks = lmdk_lib.find_lmdks(usrs_data, args)
|
||||||
|
# Save to results
|
||||||
|
lmdk_lib.save_data(args, usrs_lmdks, 'usrs_lmdks')
|
||||||
|
# Landmarks' stats
|
||||||
|
lmdk_lib.lmdks_stats(args, usrs_lmdks)
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
Parse arguments.
|
||||||
|
|
||||||
|
Optional:
|
||||||
|
arc - The archive file.
|
||||||
|
dist - The coordinates distance threshold in meters.
|
||||||
|
per - The timestaps period threshold in mimutes.
|
||||||
|
res - The results zip file.
|
||||||
|
'''
|
||||||
|
def parse_args():
|
||||||
|
# Create argument parser.
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
|
# Mandatory arguments.
|
||||||
|
|
||||||
|
# Optional arguments.
|
||||||
|
parser.add_argument('-a', '--arc', help='The data archive file.', type=str, default='/home/manos/Cloud/Data/T-drive/Data.zip')
|
||||||
|
parser.add_argument('-l', '--dist', help='The coordinates distance threshold in meters.', type=int, default=200)
|
||||||
|
parser.add_argument('-p', '--per', help='The timestaps period threshold in mimutes.', type=int, default=30)
|
||||||
|
parser.add_argument('-r', '--res', help='The results archive file.', type=str, default='/home/manos/Cloud/Data/T-drive/Results.zip')
|
||||||
|
|
||||||
|
# Parse arguments.
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
try:
|
||||||
|
start_time = time.time()
|
||||||
|
main(parse_args())
|
||||||
|
end_time = time.time()
|
||||||
|
print('##############################')
|
||||||
|
print('Time : %.4fs' % (end_time - start_time))
|
||||||
|
print('##############################')
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print('Interrupted by user.')
|
||||||
|
exit()
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue
Block a user