code: Checking for duplicates

This commit is contained in:
Manos Katsomallos 2021-09-29 12:49:12 +02:00
parent fabd69bcf0
commit 4ea7fc2054

View File

@ -78,8 +78,10 @@ def main(args):
print('[Error: %s]' %(e)) print('[Error: %s]' %(e))
except Exception as e: except Exception as e:
print('[Error: %s]' %(e)) print('[Error: %s]' %(e))
# Remove duplicates
cont_data = np.unique(np.array(cont, np.float32), axis=0)
# Save to results # Save to results
lmdk_lib.save_data(args, np.array(cont, np.float32), 'cont') lmdk_lib.save_data(args, cont_data, 'cont')
''' '''
Get users' landmarks Get users' landmarks
''' '''
@ -129,30 +131,30 @@ def main(args):
break break
# Save to results # Save to results
lmdk_lib.save_data(args, np.array(usrs_expt, str), 'usrs_expt') lmdk_lib.save_data(args, np.array(usrs_expt, str), 'usrs_expt')
# # Get all users # Get all users
# usrs = np.unique(usrs_expt[:, 0]) usrs = np.unique(usrs_expt[:, 0])
# for usr in usrs: for usr in usrs:
# usr_cont = cont_data[cont_data[:, uid_a] == float(usr)]
# print(int(usr), len(usr_cont))
# exit()
'''
Get contacts for user 623
9378 contacts
'''
usr = '623'
# All user contacts
usr_cont = cont_data[cont_data[:, uid_a] == float(usr)] usr_cont = cont_data[cont_data[:, uid_a] == float(usr)]
# All user landmarks for different goals print(int(usr), len(usr_cont))
usr_lmdk = usrs_expt[usrs_expt[:, 0] == usr] exit()
for g in goal: # '''
# Get goal landmarks # Get contacts for user 623
cont = ast.literal_eval(usr_lmdk[usr_lmdk[:, 1] == str(g)][0][2]) # 9378 contacts
usr_cont_cur = np.copy(usr_cont) # '''
# Remove goal landmarks from contacts # usr = '623'
for c in cont: # # All user contacts
usr_cont_cur = usr_cont_cur[usr_cont_cur[:, uid_b] != c] # usr_cont = cont_data[cont_data[:, uid_a] == float(usr)]
# Check # # All user landmarks for different goals
print(g, (len(usr_cont) - len(usr_cont_cur))/len(usr_cont)) # usr_lmdk = usrs_expt[usrs_expt[:, 0] == usr]
# for g in goal:
# # Get goal landmarks
# cont = ast.literal_eval(usr_lmdk[usr_lmdk[:, 1] == str(g)][0][2])
# usr_cont_cur = np.copy(usr_cont)
# # Remove goal landmarks from contacts
# for c in cont:
# usr_cont_cur = usr_cont_cur[usr_cont_cur[:, uid_b] != c]
# # Check
# print(g, (len(usr_cont) - len(usr_cont_cur))/len(usr_cont))
''' '''