code: Corrections and cleaning up

This commit is contained in:
Manos Katsomallos 2021-10-07 12:32:46 +02:00
parent 192c6b8caf
commit 9707ef3e98
27 changed files with 49 additions and 36 deletions

View File

@ -27,7 +27,7 @@ def main(args):
# The landmarks percentages # The landmarks percentages
lmdks_pct = [0, 20, 40, 60, 80, 100] lmdks_pct = [0, 20, 40, 60, 80, 100]
# The privacy budget # The privacy budget
epsilon = 1.0 epsilon = .1
# Number of methods # Number of methods
n = 3 n = 3
@ -68,32 +68,26 @@ def main(args):
# Skip # Skip
rls_data_s, bgts_s = lmdk_bgt.skip_cont(seq, lmdks, epsilon) rls_data_s, bgts_s = lmdk_bgt.skip_cont(seq, lmdks, epsilon)
# lmdk_bgt.validate_bgts(seq, lmdks, epsilon, bgts_s) # lmdk_bgt.validate_bgts(seq, lmdks, epsilon, bgts_s)
mae_s[i] += lmdk_bgt.mae_cont(rls_data_s)/args.iter mae_s[i] += (lmdk_bgt.mae_cont(rls_data_s)/args.iter)*100
# Uniform # Uniform
rls_data_u, bgts_u = lmdk_bgt.uniform_cont(seq, lmdks, epsilon) rls_data_u, bgts_u = lmdk_bgt.uniform_cont(seq, lmdks, epsilon)
# lmdk_bgt.validate_bgts(seq, lmdks, epsilon, bgts_u) # lmdk_bgt.validate_bgts(seq, lmdks, epsilon, bgts_u)
mae_u[i] += lmdk_bgt.mae_cont(rls_data_u)/args.iter mae_u[i] += (lmdk_bgt.mae_cont(rls_data_u)/args.iter)*100
# Adaptive # Adaptive
rls_data_a, _, _ = lmdk_bgt.adaptive_cont(seq, lmdks, epsilon, .5, .5) rls_data_a, _, _ = lmdk_bgt.adaptive_cont(seq, lmdks, epsilon, .5, .5)
mae_a[i] += lmdk_bgt.mae_cont(rls_data_a)/args.iter mae_a[i] += (lmdk_bgt.mae_cont(rls_data_a)/args.iter)*100
# Calculate once # Calculate once
if i == 0: if i == 0:
# Event # Event
rls_data_evt, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 0), epsilon) rls_data_evt, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 0), epsilon)
mae_evt += lmdk_bgt.mae_cont(rls_data_evt)/args.iter mae_evt += (lmdk_bgt.mae_cont(rls_data_evt)/args.iter)*100
# User # User
rls_data_usr, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 100), epsilon) rls_data_usr, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 100), epsilon)
mae_usr += lmdk_bgt.mae_cont(rls_data_usr)/args.iter mae_usr += (lmdk_bgt.mae_cont(rls_data_usr)/args.iter)*100
exit()
mae_u *= 100
mae_s *= 100
mae_a *= 100
mae_evt *= 100
mae_usr *= 100
plt.axhline( plt.axhline(
y = mae_evt, y = mae_evt,
color = '#212121', color = '#212121',
@ -172,7 +166,7 @@ if __name__ == '__main__':
main(parse_args()) main(parse_args())
end_time = time.time() end_time = time.time()
print('##############################') print('##############################')
print('Time : %.4fs' % (end_time - start_time)) print('Time elapsed: %s' % (time.strftime('%H:%M:%S', time.gmtime(end_time - start_time))))
print('##############################') print('##############################')
except KeyboardInterrupt: except KeyboardInterrupt:
print('Interrupted by user.') print('Interrupted by user.')

View File

@ -492,7 +492,8 @@ def skip_cont(seq, lmdks, epsilon):
lmdks - The landmarks. lmdks - The landmarks.
epsilon - The available privacy budget. epsilon - The available privacy budget.
Returns: Returns:
rls_data - The perturbed data. rls_data - The new data.
[0: The true answer, 1: The perturbed answer]
bgts - The privacy budget allocation. bgts - The privacy budget allocation.
''' '''
# Event-level budget allocation # Event-level budget allocation
@ -501,15 +502,15 @@ def skip_cont(seq, lmdks, epsilon):
rls_data = [None]*len(seq) rls_data = [None]*len(seq)
for i, p in enumerate(seq): for i, p in enumerate(seq):
# Check if current point is a landmark # Check if current point is a landmark
r = any((lmdks[:]==p).all(1)) is_landmark = any(np.equal(lmdks, p).all(1))
# Add noise # Add noise
o = lmdk_lib.randomized_response(r, bgts[i]) o = lmdk_lib.randomized_response(is_landmark, bgts[i])
if r: if is_landmark:
if i > 0: if i > 0:
# Approximate with previous # Approximate with previous
o = rls_data[i - 1][1] o = rls_data[i - 1][1]
bgts[i] = 0 bgts[i] = 0
rls_data[i] = [r, o] rls_data[i] = [is_landmark, o]
return rls_data, bgts return rls_data, bgts
@ -720,7 +721,7 @@ def uniform_cont(seq, lmdks, epsilon):
# Budgets # Budgets
bgts = uniform(seq, lmdks, epsilon) bgts = uniform(seq, lmdks, epsilon)
for i, p in enumerate(seq): for i, p in enumerate(seq):
r = any((lmdks[:]==p).all(1)) r = any(np.equal(lmdks, p).all(1))
# [original, perturbed] # [original, perturbed]
rls_data[i] = [r, lmdk_lib.randomized_response(r, bgts[i])] rls_data[i] = [r, lmdk_lib.randomized_response(r, bgts[i])]
return rls_data, bgts return rls_data, bgts

View File

@ -965,7 +965,7 @@ def find_lmdks_cont(lmdk_data, seq, uid, pct):
Returns: Returns:
lmdks - The user's landmarks contacts for the given lmdks - The user's landmarks contacts for the given
landmarks percentage. landmarks percentage.
0: uid_b 0: tim, 1: uid, 2: cont, 3: rssi
''' '''
# Initialize user's landmarks # Initialize user's landmarks
lmdks = np.empty(0).reshape(0,4) lmdks = np.empty(0).reshape(0,4)

View File

@ -5,8 +5,8 @@ from lmdk_lib import *
import exp_mech import exp_mech
import numpy as np import numpy as np
import random import random
import scipy.stats as stats
import time import time
from scipy.spatial.distance import cdist
''' '''
@ -163,7 +163,7 @@ def get_opts_from_top_h(seq, lmdks):
# The options to be returned # The options to be returned
hist_opts = [] hist_opts = []
# Keep adding points until the maximum is reached # Keep adding points until the maximum is reached
while np.sum(hist_cur) < max(seq): while np.sum(hist_cur) < len(seq):
# Track the minimum (best) evaluation # Track the minimum (best) evaluation
diff_min = float('inf') diff_min = float('inf')
# The candidate option # The candidate option
@ -175,8 +175,8 @@ def get_opts_from_top_h(seq, lmdks):
hist_tmp = np.copy(hist_cur) hist_tmp = np.copy(hist_cur)
hist_tmp[i] += 1 hist_tmp[i] += 1
# Find difference from original # Find difference from original
# diff_cur = get_norm(hist, hist_tmp) # Euclidean diff_cur = get_norm(hist, hist_tmp) # Euclidean
diff_cur = get_emd(hist, hist_tmp) # Wasserstein # diff_cur = get_emd(hist, hist_tmp) # Wasserstein
# Remember if it is the best that you've seen # Remember if it is the best that you've seen
if diff_cur < diff_min: if diff_cur < diff_min:
diff_min = diff_cur diff_min = diff_cur
@ -326,19 +326,37 @@ def find_lmdks(seq, lmdks, epsilon):
opts = get_opts_from_top_h(get_seq(1, len(seq)), lmdks_seq) opts = get_opts_from_top_h(get_seq(1, len(seq)), lmdks_seq)
# Landmarks selection budget # Landmarks selection budget
eps_sel = epsilon/(len(lmdks_seq) + 1) eps_sel = epsilon/(len(lmdks_seq) + 1)
# Get private landmarks timestamps # Get landmarks histogram with dummy landmarks
lmdks_seq, _ = exp_mech.exponential(hist, opts, exp_mech.score, 1.0, eps_sel) hist_new, _ = exp_mech.exponential(hist, opts, exp_mech.score, 1.0, eps_sel)
# Split sequence in parts of size h
pt_idx = []
for idx in range(1, len(seq), h):
pt_idx.append([idx, idx + h - 1])
pt_idx[-1][1] = len(seq)
# Get new landmarks indexes
lmdks_seq_new = np.array([], dtype=int)
for i, pt in enumerate(pt_idx):
# Already landmarks
lmdks_seq_pt = lmdks_seq[(lmdks_seq >= pt[0]) & (lmdks_seq <= pt[1])]
# Sample randomly from the rest of the sequence
size = hist_new[i] - len(lmdks_seq_pt)
rglr = np.setdiff1d(np.arange(pt[0], pt[1] + 1), lmdks_seq_pt)
# Add already landmarks
lmdks_seq_new = np.concatenate([lmdks_seq_new, lmdks_seq_pt])
# Add new landmarks
if size > 0 and len(rglr) > size:
lmdks_seq_new = np.concatenate([lmdks_seq_new,
np.random.choice(
rglr,
size = size,
replace = False
)
])
# Get actual landmarks values # Get actual landmarks values
lmdks_new = seq[lmdks_seq] lmdks_new = seq[lmdks_seq_new - 1]
return lmdks_new, epsilon - eps_sel return lmdks_new, epsilon - eps_sel
def test(): def test():
A = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
B = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
print(get_norm(A, B))
exit()
# Start and end points of the sequence # Start and end points of the sequence
# # Nonrandom # # Nonrandom
# start = 1 # start = 1

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.