237 lines
5.8 KiB
Python
237 lines
5.8 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import sys
|
|
sys.path.insert(1, 'lib')
|
|
import argparse
|
|
import ast
|
|
import csv
|
|
from datetime import datetime
|
|
from geopy.distance import distance
|
|
import io
|
|
import itertools
|
|
import lmdk_lib
|
|
import numpy as np
|
|
import os
|
|
import matplotlib.pyplot as plt
|
|
import time
|
|
import zipfile
|
|
|
|
'''
|
|
usr n
|
|
109 6549
|
|
112 3277
|
|
113 4202
|
|
139 8681
|
|
14 9378
|
|
145 4192
|
|
147 4428
|
|
157 11503
|
|
164 8045
|
|
17 4238
|
|
176 19732
|
|
178 7738
|
|
186 3289
|
|
190 13843
|
|
216 6126
|
|
236 3810
|
|
244 10563
|
|
262 17567
|
|
264 9792
|
|
274 11572
|
|
275 4316
|
|
287 13394
|
|
299 7373
|
|
304 6502
|
|
317 3892
|
|
324 11349
|
|
344 14479
|
|
374 5512
|
|
383 18751
|
|
389 3438
|
|
396 7004
|
|
453 3957
|
|
474 7692
|
|
480 4009
|
|
504 2622
|
|
505 11200
|
|
561 4304
|
|
570 2136
|
|
576 6553
|
|
58 12755
|
|
593 3429
|
|
595 4982
|
|
619 6189
|
|
633 5229
|
|
657 10941
|
|
658 1658
|
|
663 11461
|
|
688 19794
|
|
691 6755
|
|
705 5
|
|
76 4875
|
|
77 25
|
|
79 8892
|
|
81 8944
|
|
88 17254
|
|
'''
|
|
|
|
# https://cloud.delkappa.com/s/ACMsDr2jnW3b6Np
|
|
# Copenhagen data format
|
|
# Header size
|
|
hdr = 1
|
|
# Timestamp
|
|
tim = 0
|
|
# User ID A
|
|
uid_a = 1
|
|
# User ID B
|
|
uid_b = 2
|
|
# Received Signal Strength Indicator (RSSI)
|
|
rssi = 3
|
|
|
|
|
|
def main(args):
|
|
'''
|
|
Load data
|
|
'''
|
|
# Get contacts from previous parsing
|
|
cont_data = lmdk_lib.load_data(args, 'cont')
|
|
if cont_data.size == 0:
|
|
# Contacts [tim, uid_a, uid_b, rssi]
|
|
cont = []
|
|
try:
|
|
print('Extracting %s... ' %(os.path.abspath(args.arc)), end='', flush=True)
|
|
with zipfile.ZipFile(args.arc, 'r') as arc:
|
|
print('[OK]')
|
|
with io.TextIOWrapper(arc.open(args.cont), newline='\n') as dat:
|
|
try:
|
|
print('Finding contacts... ', end='', flush=True)
|
|
# Get the contacts by skipping the header
|
|
cont_l = list(csv.reader(dat, delimiter=','))[hdr:]
|
|
# Check each contact
|
|
for c in cont_l:
|
|
if c[uid_b] != '-1' and c[rssi] != '0' and c[uid_b] != '-2' and c[uid_a] != c[uid_b]:
|
|
# Add valid contact
|
|
cont.append([c[tim], c[uid_a], c[uid_b], c[rssi]])
|
|
print('[OK]')
|
|
except Exception as e:
|
|
print('[Error: %s]' %(e))
|
|
except Exception as e:
|
|
print('[Error: %s]' %(e))
|
|
# Save to results
|
|
lmdk_lib.save_data(args, np.array(cont, np.float32), 'cont')
|
|
'''
|
|
Get users' landmarks
|
|
'''
|
|
# Get all users
|
|
usrs = np.unique(cont_data[:, uid_a])
|
|
# Check each user
|
|
goal = [.2, .4, .6, .8]
|
|
# Get users' data from previous parsing
|
|
usrs_expt = lmdk_lib.load_data(args, 'usrs_expt')
|
|
if usrs_expt.size == 0:
|
|
# Users suitable for experiments
|
|
usrs_expt = []
|
|
for usr_i, usr in enumerate(usrs):
|
|
print('Checking %d (%d%%: %d/%d)... ' %(usr, (usr_i + 1)*100/len(usrs), usr_i + 1, len(usrs)), end='', flush=True)
|
|
usrs_expt_cur = []
|
|
# User's contacts
|
|
usr_cont = cont_data[cont_data[:, uid_a] == usr]
|
|
# For each goal
|
|
for_expt = True
|
|
for g in goal:
|
|
if for_expt:
|
|
# Possible contacts
|
|
pos_cont = []
|
|
usrs_cur = list(usrs)
|
|
# Remove user
|
|
usrs_cur.remove(usr)
|
|
# Check for every possible contact
|
|
for u in usrs_cur:
|
|
# Add possible contacts gradually
|
|
pos_cont.append(int(u))
|
|
# Remove from user contacts
|
|
usr_cont_cur = np.copy(usr_cont)
|
|
for pos_c in pos_cont:
|
|
usr_cont_cur = usr_cont_cur[usr_cont_cur[:, uid_a] != pos_c]
|
|
# Compare the difference
|
|
diff = (len(usr_cont) - len(usr_cont_cur))/len(usr_cont)
|
|
# Check if it is close enough to what we need
|
|
if abs(diff - g)/g < .01:
|
|
usrs_expt_cur.append([int(usr), g, str(pos_cont)])
|
|
if g == goal[len(goal) - 1]:
|
|
# That's a keeper
|
|
print('[OK]')
|
|
usrs_expt += usrs_expt_cur
|
|
break
|
|
elif diff > g:
|
|
print('[NOK]')
|
|
for_expt = False
|
|
break
|
|
# This user is no bueno
|
|
print('[NOK]')
|
|
# Save to results
|
|
lmdk_lib.save_data(args, np.array(usrs_expt, str), 'usrs_expt')
|
|
# Get all users
|
|
# usrs = np.unique(usrs_expt[:, 0])
|
|
# for usr in usrs:
|
|
# usr_cont = cont_data[cont_data[:, uid_a] == float(usr)]
|
|
# print(int(usr), len(usr_cont))
|
|
# exit()
|
|
'''
|
|
Get contacts for user 14
|
|
9378 contacts
|
|
'''
|
|
usr = '14'
|
|
# All user contacts
|
|
usr_cont = cont_data[cont_data[:, uid_a] == float(usr)]
|
|
# All user landmarks for different goals
|
|
usr_lmdk = usrs_expt[usrs_expt[:, 0] == usr]
|
|
for g in goal:
|
|
# Get goal landmarks
|
|
cont = ast.literal_eval(usr_lmdk[usr_lmdk[:, 1] == str(g)][0][2])
|
|
usr_cont_cur = np.copy(usr_cont)
|
|
# Remove goal landmarks from contacts
|
|
for c in cont:
|
|
usr_cont_cur = usr_cont_cur[usr_cont_cur[:, uid_a] != c]
|
|
# Check
|
|
print(g, (len(usr_cont) - len(usr_cont_cur))/len(usr_cont))
|
|
|
|
|
|
'''
|
|
Parse arguments.
|
|
|
|
Optional:
|
|
arc - The data archive file.
|
|
cont - The contacts data file.
|
|
res - The results archive file.
|
|
'''
|
|
def parse_args():
|
|
# Create argument parser.
|
|
parser = argparse.ArgumentParser()
|
|
|
|
# Mandatory arguments.
|
|
|
|
# Optional arguments.
|
|
parser.add_argument('-a', '--arc', help='The data archive file.', type=str, default='/home/manos/Cloud/Data/Copenhagen/Data.zip')
|
|
parser.add_argument('-c', '--cont', help='The contacts data file.', type=str, default='bt_symmetric.csv')
|
|
|
|
parser.add_argument('-r', '--res', help='The results archive file.', type=str, default='/home/manos/Cloud/Data/Copenhagen/Results.zip')
|
|
|
|
# Parse arguments.
|
|
args = parser.parse_args()
|
|
|
|
return args
|
|
|
|
|
|
if __name__ == '__main__':
|
|
try:
|
|
start_time = time.time()
|
|
main(parse_args())
|
|
end_time = time.time()
|
|
print('##############################')
|
|
print('Time : %.4fs' % (end_time - start_time))
|
|
print('##############################')
|
|
except KeyboardInterrupt:
|
|
print('Interrupted by user.')
|
|
exit()
|