code: Corrections and cleaning up

This commit is contained in:
2021-10-07 12:32:46 +02:00
parent 192c6b8caf
commit 9707ef3e98
27 changed files with 49 additions and 36 deletions

View File

@ -492,7 +492,8 @@ def skip_cont(seq, lmdks, epsilon):
lmdks - The landmarks.
epsilon - The available privacy budget.
Returns:
rls_data - The perturbed data.
rls_data - The new data.
[0: The true answer, 1: The perturbed answer]
bgts - The privacy budget allocation.
'''
# Event-level budget allocation
@ -501,15 +502,15 @@ def skip_cont(seq, lmdks, epsilon):
rls_data = [None]*len(seq)
for i, p in enumerate(seq):
# Check if current point is a landmark
r = any((lmdks[:]==p).all(1))
is_landmark = any(np.equal(lmdks, p).all(1))
# Add noise
o = lmdk_lib.randomized_response(r, bgts[i])
if r:
o = lmdk_lib.randomized_response(is_landmark, bgts[i])
if is_landmark:
if i > 0:
# Approximate with previous
o = rls_data[i - 1][1]
bgts[i] = 0
rls_data[i] = [r, o]
bgts[i] = 0
rls_data[i] = [is_landmark, o]
return rls_data, bgts
@ -720,7 +721,7 @@ def uniform_cont(seq, lmdks, epsilon):
# Budgets
bgts = uniform(seq, lmdks, epsilon)
for i, p in enumerate(seq):
r = any((lmdks[:]==p).all(1))
r = any(np.equal(lmdks, p).all(1))
# [original, perturbed]
rls_data[i] = [r, lmdk_lib.randomized_response(r, bgts[i])]
return rls_data, bgts

View File

@ -959,13 +959,13 @@ def find_lmdks_cont(lmdk_data, seq, uid, pct):
landmarks percentage.
0: uid, 1: lmdk_pct, 2: contacts
seq - The users' data.
0: tim, 1: uid, 2: cont, 3: rssi
0: tim, 1: uid, 2: cont, 3: rssi
uid - The user's id that we are interested in.
pct - The landmarks percentage.
Returns:
lmdks - The user's landmarks contacts for the given
landmarks percentage.
0: uid_b
0: tim, 1: uid, 2: cont, 3: rssi
'''
# Initialize user's landmarks
lmdks = np.empty(0).reshape(0,4)

View File

@ -5,8 +5,8 @@ from lmdk_lib import *
import exp_mech
import numpy as np
import random
import scipy.stats as stats
import time
from scipy.spatial.distance import cdist
'''
@ -163,7 +163,7 @@ def get_opts_from_top_h(seq, lmdks):
# The options to be returned
hist_opts = []
# Keep adding points until the maximum is reached
while np.sum(hist_cur) < max(seq):
while np.sum(hist_cur) < len(seq):
# Track the minimum (best) evaluation
diff_min = float('inf')
# The candidate option
@ -175,8 +175,8 @@ def get_opts_from_top_h(seq, lmdks):
hist_tmp = np.copy(hist_cur)
hist_tmp[i] += 1
# Find difference from original
# diff_cur = get_norm(hist, hist_tmp) # Euclidean
diff_cur = get_emd(hist, hist_tmp) # Wasserstein
diff_cur = get_norm(hist, hist_tmp) # Euclidean
# diff_cur = get_emd(hist, hist_tmp) # Wasserstein
# Remember if it is the best that you've seen
if diff_cur < diff_min:
diff_min = diff_cur
@ -326,19 +326,37 @@ def find_lmdks(seq, lmdks, epsilon):
opts = get_opts_from_top_h(get_seq(1, len(seq)), lmdks_seq)
# Landmarks selection budget
eps_sel = epsilon/(len(lmdks_seq) + 1)
# Get private landmarks timestamps
lmdks_seq, _ = exp_mech.exponential(hist, opts, exp_mech.score, 1.0, eps_sel)
# Get landmarks histogram with dummy landmarks
hist_new, _ = exp_mech.exponential(hist, opts, exp_mech.score, 1.0, eps_sel)
# Split sequence in parts of size h
pt_idx = []
for idx in range(1, len(seq), h):
pt_idx.append([idx, idx + h - 1])
pt_idx[-1][1] = len(seq)
# Get new landmarks indexes
lmdks_seq_new = np.array([], dtype=int)
for i, pt in enumerate(pt_idx):
# Already landmarks
lmdks_seq_pt = lmdks_seq[(lmdks_seq >= pt[0]) & (lmdks_seq <= pt[1])]
# Sample randomly from the rest of the sequence
size = hist_new[i] - len(lmdks_seq_pt)
rglr = np.setdiff1d(np.arange(pt[0], pt[1] + 1), lmdks_seq_pt)
# Add already landmarks
lmdks_seq_new = np.concatenate([lmdks_seq_new, lmdks_seq_pt])
# Add new landmarks
if size > 0 and len(rglr) > size:
lmdks_seq_new = np.concatenate([lmdks_seq_new,
np.random.choice(
rglr,
size = size,
replace = False
)
])
# Get actual landmarks values
lmdks_new = seq[lmdks_seq]
lmdks_new = seq[lmdks_seq_new - 1]
return lmdks_new, epsilon - eps_sel
def test():
A = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
B = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
print(get_norm(A, B))
exit()
# Start and end points of the sequence
# # Nonrandom
# start = 1