code: Validated parsing copenhagen data set

This commit is contained in:
Manos Katsomallos 2021-09-23 18:15:24 +02:00
parent 04fdbb7c83
commit fae40756d0

View File

@ -3,6 +3,7 @@
import sys
sys.path.insert(1, 'lib')
import argparse
import ast
import csv
from datetime import datetime
from geopy.distance import distance
@ -31,6 +32,9 @@ rssi = 3
def main(args):
'''
Load data
'''
# Get contacts from previous parsing
cont_data = lmdk_lib.load_data(args, 'cont')
if cont_data.size == 0:
@ -57,50 +61,75 @@ def main(args):
print('[Error: %s]' %(e))
# Save to results
lmdk_lib.save_data(args, np.array(cont, np.float32), 'cont')
'''
Get users' landmarks
'''
# Get all users
usrs = np.unique(np.concatenate((cont_data[:, uid_a], cont_data[:, uid_b]), 0))
# Check each user
goal = [.2, .4, .6, .8]
# Users suitable for experiments
usrs_expt = []
for usr_i, usr in enumerate(usrs):
print('Checking %d (%d%%: %d/%d)... ' %(usr, (usr_i + 1)*100/len(usrs), usr_i + 1, len(usrs)), end='', flush=True)
usrs_expt_cur = []
# User's contacts
usr_cont = cont_data[(cont_data[:, uid_a] == usr) | (cont_data[:, uid_b] == usr)]
# For each goal
for_expt = True
for g in goal:
if for_expt:
# Possible contacts
pos_cont = []
usrs_cur = list(usrs)
# Remove user
usrs_cur.remove(usr)
# Check for every possible contact
for u in usrs_cur:
# Add possible contacts gradually
pos_cont.append(int(u))
# Remove from user contacts
usr_cont_cur = np.copy(usr_cont)
for pos_c in pos_cont:
usr_cont_cur = usr_cont_cur[(usr_cont_cur[:, uid_a] != pos_c) & (usr_cont_cur[:, uid_b] != pos_c)]
# Compare the difference
diff = (len(usr_cont) - len(usr_cont_cur))/len(usr_cont)
# Check if it is close enough to what we need
if abs(diff - g)/g < .01:
usrs_expt_cur.append([int(usr), g, str(pos_cont)])
if g == goal[len(goal) - 1]:
# That's a keeper
print('[OK]')
usrs_expt += usrs_expt_cur
break
elif diff > g:
print('[NOK]')
for_expt = False
break
# Save to results
lmdk_lib.save_data(args, np.array(usrs_expt, str), 'usrs_expt')
# Get users' data from previous parsing
usrs_expt = lmdk_lib.load_data(args, 'usrs_expt')
if usrs_expt.size == 0:
# Users suitable for experiments
usrs_expt = []
for usr_i, usr in enumerate(usrs):
print('Checking %d (%d%%: %d/%d)... ' %(usr, (usr_i + 1)*100/len(usrs), usr_i + 1, len(usrs)), end='', flush=True)
usrs_expt_cur = []
# User's contacts
usr_cont = cont_data[(cont_data[:, uid_a] == usr) | (cont_data[:, uid_b] == usr)]
# For each goal
for_expt = True
for g in goal:
if for_expt:
# Possible contacts
pos_cont = []
usrs_cur = list(usrs)
# Remove user
usrs_cur.remove(usr)
# Check for every possible contact
for u in usrs_cur:
# Add possible contacts gradually
pos_cont.append(int(u))
# Remove from user contacts
usr_cont_cur = np.copy(usr_cont)
for pos_c in pos_cont:
usr_cont_cur = usr_cont_cur[(usr_cont_cur[:, uid_a] != pos_c) & (usr_cont_cur[:, uid_b] != pos_c)]
# Compare the difference
diff = (len(usr_cont) - len(usr_cont_cur))/len(usr_cont)
# Check if it is close enough to what we need
if abs(diff - g)/g < .01:
usrs_expt_cur.append([int(usr), g, str(pos_cont)])
if g == goal[len(goal) - 1]:
# That's a keeper
print('[OK]')
usrs_expt += usrs_expt_cur
break
elif diff > g:
print('[NOK]')
for_expt = False
break
# Save to results
lmdk_lib.save_data(args, np.array(usrs_expt, str), 'usrs_expt')
'''
Get contacts for uid 14
'''
usr = '14'
# All user contacts
usr_cont = cont_data[(cont_data[:, uid_a] == float(usr)) | (cont_data[:, uid_b] == float(usr))]
# All user landmarks for different goals
usr_lmdk = usrs_expt[usrs_expt[:, 0] == usr]
for g in goal:
# Get goal landmarks
cont = ast.literal_eval(usr_lmdk[usr_lmdk[:, 1] == str(g)][0][2])
usr_cont_cur = np.copy(usr_cont)
# Remove goal landmarks from contacts
for c in cont:
# print(usr_cont)
# exit()
usr_cont_cur = usr_cont_cur[(usr_cont_cur[:, uid_a] != c) & (usr_cont_cur[:, uid_b] != c)]
# Check
print(g, (len(usr_cont) - len(usr_cont_cur))/len(usr_cont))
'''