the-last-thing/code/parse_copenhagen.py

197 lines
5.4 KiB
Python
Raw Normal View History

2021-09-22 21:44:38 +02:00
#!/usr/bin/env python3
import sys
sys.path.insert(1, 'lib')
import argparse
import ast
2021-09-22 21:44:38 +02:00
import csv
from datetime import datetime
from geopy.distance import distance
import io
import itertools
2021-09-22 21:44:38 +02:00
import lmdk_lib
import numpy as np
import os
import matplotlib.pyplot as plt
import time
import zipfile
2021-09-23 19:52:53 +02:00
'''
usr n
2021-09-28 22:40:21 +02:00
311 8193
317 2044
323 1642
366 3406
368 5099
478 2616
486 3112
508 4981
585 9443
595 4459
616 4355
623 1586
637 4479
664 4315
688 19728
705 5
2021-09-23 19:52:53 +02:00
'''
2021-09-22 21:44:38 +02:00
# https://cloud.delkappa.com/s/ACMsDr2jnW3b6Np
# Copenhagen data format
# Header size
hdr = 1
# Timestamp
tim = 0
# User ID A
uid_a = 1
# User ID B
uid_b = 2
# Received Signal Strength Indicator (RSSI)
rssi = 3
def main(args):
'''
Load data
'''
2021-09-22 22:15:10 +02:00
# Get contacts from previous parsing
cont_data = lmdk_lib.load_data(args, 'cont')
if cont_data.size == 0:
# Contacts [tim, uid_a, uid_b, rssi]
cont = []
try:
print('Extracting %s... ' %(os.path.abspath(args.arc)), end='', flush=True)
with zipfile.ZipFile(args.arc, 'r') as arc:
print('[OK]')
with io.TextIOWrapper(arc.open(args.cont), newline='\n') as dat:
try:
print('Finding contacts... ', end='', flush=True)
# Get the contacts by skipping the header
cont_l = list(csv.reader(dat, delimiter=','))[hdr:]
# Check each contact
for c in cont_l:
if c[uid_b] != '-1' and c[rssi] != '0' and c[uid_b] != '-2' and c[uid_a] != c[uid_b]:
# Add valid contact
2021-09-22 22:15:10 +02:00
cont.append([c[tim], c[uid_a], c[uid_b], c[rssi]])
print('[OK]')
except Exception as e:
print('[Error: %s]' %(e))
except Exception as e:
print('[Error: %s]' %(e))
2021-09-29 12:49:12 +02:00
# Remove duplicates
cont_data = np.unique(np.array(cont, np.float32), axis=0)
# Save to results
2021-09-29 12:49:12 +02:00
lmdk_lib.save_data(args, cont_data, 'cont')
'''
Get users' landmarks
'''
# Get all users
usrs = np.unique(cont_data[:, uid_a])
# Check each user
2021-09-23 12:31:20 +02:00
goal = [.2, .4, .6, .8]
# Get users' data from previous parsing
usrs_expt = lmdk_lib.load_data(args, 'usrs_expt')
if usrs_expt.size == 0:
# Users suitable for experiments
usrs_expt = []
for usr_i, usr in enumerate(usrs):
print('Checking %d (%d%%: %d/%d)... ' %(usr, (usr_i + 1)*100/len(usrs), usr_i + 1, len(usrs)), end='', flush=True)
usrs_expt_cur = []
# User's contacts
usr_cont = cont_data[cont_data[:, uid_a] == usr]
# For each goal
for_expt = True
for g in goal:
if for_expt:
2021-09-28 22:40:21 +02:00
# User's contacts
conts = np.unique(usr_cont[:, uid_b])
# Possible contacts
pos_cont = []
# Check for every possible contact
2021-09-28 22:40:21 +02:00
for c in conts:
# Add possible contacts gradually
2021-09-28 22:40:21 +02:00
pos_cont.append(int(c))
# Remove from user contacts
usr_cont_cur = np.copy(usr_cont)
for pos_c in pos_cont:
2021-09-28 22:40:21 +02:00
usr_cont_cur = usr_cont_cur[usr_cont_cur[:, uid_b] != pos_c]
# Compare the difference
diff = (len(usr_cont) - len(usr_cont_cur))/len(usr_cont)
# Check if it is close enough to what we need
if abs(diff - g)/g < .01:
usrs_expt_cur.append([int(usr), g, str(pos_cont)])
if g == goal[len(goal) - 1]:
# That's a keeper
print('[OK]')
usrs_expt += usrs_expt_cur
break
elif diff > g:
2021-09-28 22:40:21 +02:00
print('[%.2f]' %(diff))
for_expt = False
break
# Save to results
lmdk_lib.save_data(args, np.array(usrs_expt, str), 'usrs_expt')
2021-09-29 12:49:12 +02:00
# Get all users
usrs = np.unique(usrs_expt[:, 0])
for usr in usrs:
usr_cont = cont_data[cont_data[:, uid_a] == float(usr)]
print(int(usr), len(usr_cont))
exit()
# '''
# Get contacts for user 623
# 9378 contacts
# '''
# usr = '623'
# # All user contacts
# usr_cont = cont_data[cont_data[:, uid_a] == float(usr)]
# # All user landmarks for different goals
# usr_lmdk = usrs_expt[usrs_expt[:, 0] == usr]
# for g in goal:
# # Get goal landmarks
# cont = ast.literal_eval(usr_lmdk[usr_lmdk[:, 1] == str(g)][0][2])
# usr_cont_cur = np.copy(usr_cont)
# # Remove goal landmarks from contacts
# for c in cont:
# usr_cont_cur = usr_cont_cur[usr_cont_cur[:, uid_b] != c]
# # Check
# print(g, (len(usr_cont) - len(usr_cont_cur))/len(usr_cont))
2021-09-22 21:44:38 +02:00
'''
Parse arguments.
Optional:
arc - The data archive file.
cont - The contacts data file.
res - The results archive file.
'''
def parse_args():
# Create argument parser.
parser = argparse.ArgumentParser()
# Mandatory arguments.
# Optional arguments.
parser.add_argument('-a', '--arc', help='The data archive file.', type=str, default='/home/manos/Cloud/Data/Copenhagen/Data.zip')
parser.add_argument('-c', '--cont', help='The contacts data file.', type=str, default='bt_symmetric.csv')
parser.add_argument('-r', '--res', help='The results archive file.', type=str, default='/home/manos/Cloud/Data/Copenhagen/Results.zip')
# Parse arguments.
args = parser.parse_args()
return args
if __name__ == '__main__':
try:
start_time = time.time()
main(parse_args())
end_time = time.time()
print('##############################')
print('Time : %.4fs' % (end_time - start_time))
print('##############################')
except KeyboardInterrupt:
print('Interrupted by user.')
exit()