diff --git a/code/parse_copenhagen.py b/code/parse_copenhagen.py index d5aaaaf..00fa8f7 100644 --- a/code/parse_copenhagen.py +++ b/code/parse_copenhagen.py @@ -7,6 +7,7 @@ import csv from datetime import datetime from geopy.distance import distance import io +import itertools import lmdk_lib import numpy as np import os @@ -56,6 +57,48 @@ def main(args): print('[Error: %s]' %(e)) # Save to results lmdk_lib.save_data(args, np.array(cont, np.float32), 'cont') + # Get all users + usrs = np.unique(np.concatenate((cont_data[:, uid_a], cont_data[:, uid_b]), 0)) + # Check each user + goal = [.2, .4, .6, .8, 1] + # Users suitable for experiments + usrs_expt = [] + for usr_i, usr in enumerate(usrs): + print('Checking %d (%d%%: %d/%d)... ' %(usr, (usr_i + 1)*100/len(usrs), usr_i + 1, len(usrs)), end='', flush=True) + # User's contacts + usr_cont = cont_data[(cont_data[:, uid_a] == usr) | (cont_data[:, uid_b] == usr)] + # For each goal + for_expt = True + for g in goal: + if for_expt: + # Possible contacts + pos_cont = [] + usrs_cur = list(usrs) + # Remove user + usrs_cur.remove(usr) + # Check for every possible contact + for u in usrs_cur: + # Add possible contacts gradually + pos_cont.append(u) + # Remove from user contacts + usr_cont_cur = np.copy(usr_cont) + for pos_c in pos_cont: + usr_cont_cur = usr_cont_cur[(usr_cont_cur[:, uid_a] != pos_c) & (usr_cont_cur[:, uid_b] != pos_c)] + # Compare the difference + diff = (len(usr_cont) - len(usr_cont_cur))/len(usr_cont) + # Check if it's close enough to what we need + if abs(diff - g)/g < .025: + usrs_expt.append([usr, g, pos_cont]) + if g == 1: + # That's a keeper + print('[OK]') + break + elif diff > g: + print('[NOK]') + for_expt = False + break + # Save to results + lmdk_lib.save_data(args, np.array(usrs_expt, str), 'usrs_expt') '''