#!/usr/bin/env python3 import sys sys.path.insert(1, 'lib') import argparse import ast import csv from datetime import datetime from geopy.distance import distance import io import itertools import lmdk_lib import numpy as np import os import matplotlib.pyplot as plt import time import zipfile ''' usr n 109 6549 112 3277 113 4202 139 8681 14 9378 145 4192 147 4428 157 11503 164 8045 17 4238 176 19732 178 7738 186 3289 190 13843 216 6126 236 3810 244 10563 262 17567 264 9792 274 11572 275 4316 287 13394 299 7373 304 6502 317 3892 324 11349 344 14479 374 5512 383 18751 389 3438 396 7004 453 3957 474 7692 480 4009 504 2622 505 11200 561 4304 570 2136 576 6553 58 12755 593 3429 595 4982 619 6189 633 5229 657 10941 658 1658 663 11461 688 19794 691 6755 705 5 76 4875 77 25 79 8892 81 8944 88 17254 ''' # https://cloud.delkappa.com/s/ACMsDr2jnW3b6Np # Copenhagen data format # Header size hdr = 1 # Timestamp tim = 0 # User ID A uid_a = 1 # User ID B uid_b = 2 # Received Signal Strength Indicator (RSSI) rssi = 3 def main(args): ''' Load data ''' # Get contacts from previous parsing cont_data = lmdk_lib.load_data(args, 'cont') if cont_data.size == 0: # Contacts [tim, uid_a, uid_b, rssi] cont = [] try: print('Extracting %s... ' %(os.path.abspath(args.arc)), end='', flush=True) with zipfile.ZipFile(args.arc, 'r') as arc: print('[OK]') with io.TextIOWrapper(arc.open(args.cont), newline='\n') as dat: try: print('Finding contacts... ', end='', flush=True) # Get the contacts by skipping the header cont_l = list(csv.reader(dat, delimiter=','))[hdr:] # Check each contact for c in cont_l: if c[uid_b] != '-1' and c[rssi] != '0' and c[uid_b] != '-2' and c[uid_a] != c[uid_b]: # Add valid contact cont.append([c[tim], c[uid_a], c[uid_b], c[rssi]]) print('[OK]') except Exception as e: print('[Error: %s]' %(e)) except Exception as e: print('[Error: %s]' %(e)) # Save to results lmdk_lib.save_data(args, np.array(cont, np.float32), 'cont') ''' Get users' landmarks ''' # Get all users usrs = np.unique(np.concatenate((cont_data[:, uid_a], cont_data[:, uid_b]), 0)) # Check each user goal = [.2, .4, .6, .8] # Get users' data from previous parsing usrs_expt = lmdk_lib.load_data(args, 'usrs_expt') if usrs_expt.size == 0: # Users suitable for experiments usrs_expt = [] for usr_i, usr in enumerate(usrs): print('Checking %d (%d%%: %d/%d)... ' %(usr, (usr_i + 1)*100/len(usrs), usr_i + 1, len(usrs)), end='', flush=True) usrs_expt_cur = [] # User's contacts usr_cont = cont_data[(cont_data[:, uid_a] == usr) | (cont_data[:, uid_b] == usr)] # For each goal for_expt = True for g in goal: if for_expt: # Possible contacts pos_cont = [] usrs_cur = list(usrs) # Remove user usrs_cur.remove(usr) # Check for every possible contact for u in usrs_cur: # Add possible contacts gradually pos_cont.append(int(u)) # Remove from user contacts usr_cont_cur = np.copy(usr_cont) for pos_c in pos_cont: usr_cont_cur = usr_cont_cur[(usr_cont_cur[:, uid_a] != pos_c) & (usr_cont_cur[:, uid_b] != pos_c)] # Compare the difference diff = (len(usr_cont) - len(usr_cont_cur))/len(usr_cont) # Check if it is close enough to what we need if abs(diff - g)/g < .01: usrs_expt_cur.append([int(usr), g, str(pos_cont)]) if g == goal[len(goal) - 1]: # That's a keeper print('[OK]') usrs_expt += usrs_expt_cur break elif diff > g: print('[NOK]') for_expt = False break # Save to results lmdk_lib.save_data(args, np.array(usrs_expt, str), 'usrs_expt') # Get all users # usrs = np.unique(usrs_expt[:, 0]) # for usr in usrs: # usr_cont = cont_data[(cont_data[:, uid_a] == float(usr)) | (cont_data[:, uid_b] == float(usr))] # print(int(usr), len(usr_cont)) # exit() ''' Get contacts for user 14 9378 contacts ''' usr = '14' # All user contacts usr_cont = cont_data[(cont_data[:, uid_a] == float(usr)) | (cont_data[:, uid_b] == float(usr))] # All user landmarks for different goals usr_lmdk = usrs_expt[usrs_expt[:, 0] == usr] for g in goal: # Get goal landmarks cont = ast.literal_eval(usr_lmdk[usr_lmdk[:, 1] == str(g)][0][2]) usr_cont_cur = np.copy(usr_cont) # Remove goal landmarks from contacts for c in cont: usr_cont_cur = usr_cont_cur[(usr_cont_cur[:, uid_a] != c) & (usr_cont_cur[:, uid_b] != c)] # Check print(g, (len(usr_cont) - len(usr_cont_cur))/len(usr_cont)) ''' Parse arguments. Optional: arc - The data archive file. cont - The contacts data file. res - The results archive file. ''' def parse_args(): # Create argument parser. parser = argparse.ArgumentParser() # Mandatory arguments. # Optional arguments. parser.add_argument('-a', '--arc', help='The data archive file.', type=str, default='/home/manos/Cloud/Data/Copenhagen/Data.zip') parser.add_argument('-c', '--cont', help='The contacts data file.', type=str, default='bt_symmetric.csv') parser.add_argument('-r', '--res', help='The results archive file.', type=str, default='/home/manos/Cloud/Data/Copenhagen/Results.zip') # Parse arguments. args = parser.parse_args() return args if __name__ == '__main__': try: start_time = time.time() main(parse_args()) end_time = time.time() print('##############################') print('Time : %.4fs' % (end_time - start_time)) print('##############################') except KeyboardInterrupt: print('Interrupted by user.') exit()