diff --git a/code/expt/copenhagen.py b/code/expt/copenhagen.py index adb7610..0da5a07 100644 --- a/code/expt/copenhagen.py +++ b/code/expt/copenhagen.py @@ -27,7 +27,7 @@ def main(args): # The landmarks percentages lmdks_pct = [0, 20, 40, 60, 80, 100] # The privacy budget - epsilon = 1.0 + epsilon = .1 # Number of methods n = 3 @@ -68,32 +68,26 @@ def main(args): # Skip rls_data_s, bgts_s = lmdk_bgt.skip_cont(seq, lmdks, epsilon) # lmdk_bgt.validate_bgts(seq, lmdks, epsilon, bgts_s) - mae_s[i] += lmdk_bgt.mae_cont(rls_data_s)/args.iter + mae_s[i] += (lmdk_bgt.mae_cont(rls_data_s)/args.iter)*100 # Uniform rls_data_u, bgts_u = lmdk_bgt.uniform_cont(seq, lmdks, epsilon) # lmdk_bgt.validate_bgts(seq, lmdks, epsilon, bgts_u) - mae_u[i] += lmdk_bgt.mae_cont(rls_data_u)/args.iter + mae_u[i] += (lmdk_bgt.mae_cont(rls_data_u)/args.iter)*100 # Adaptive rls_data_a, _, _ = lmdk_bgt.adaptive_cont(seq, lmdks, epsilon, .5, .5) - mae_a[i] += lmdk_bgt.mae_cont(rls_data_a)/args.iter + mae_a[i] += (lmdk_bgt.mae_cont(rls_data_a)/args.iter)*100 # Calculate once if i == 0: # Event rls_data_evt, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 0), epsilon) - mae_evt += lmdk_bgt.mae_cont(rls_data_evt)/args.iter + mae_evt += (lmdk_bgt.mae_cont(rls_data_evt)/args.iter)*100 # User rls_data_usr, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 100), epsilon) - mae_usr += lmdk_bgt.mae_cont(rls_data_usr)/args.iter - - mae_u *= 100 - mae_s *= 100 - mae_a *= 100 - mae_evt *= 100 - mae_usr *= 100 - + mae_usr += (lmdk_bgt.mae_cont(rls_data_usr)/args.iter)*100 + exit() plt.axhline( y = mae_evt, color = '#212121', @@ -172,7 +166,7 @@ if __name__ == '__main__': main(parse_args()) end_time = time.time() print('##############################') - print('Time : %.4fs' % (end_time - start_time)) + print('Time elapsed: %s' % (time.strftime('%H:%M:%S', time.gmtime(end_time - start_time)))) print('##############################') except KeyboardInterrupt: print('Interrupted by user.') diff --git a/code/lib/lmdk_bgt.py b/code/lib/lmdk_bgt.py index 0e6c764..8ff6a7f 100644 --- a/code/lib/lmdk_bgt.py +++ b/code/lib/lmdk_bgt.py @@ -492,7 +492,8 @@ def skip_cont(seq, lmdks, epsilon): lmdks - The landmarks. epsilon - The available privacy budget. Returns: - rls_data - The perturbed data. + rls_data - The new data. + [0: The true answer, 1: The perturbed answer] bgts - The privacy budget allocation. ''' # Event-level budget allocation @@ -501,15 +502,15 @@ def skip_cont(seq, lmdks, epsilon): rls_data = [None]*len(seq) for i, p in enumerate(seq): # Check if current point is a landmark - r = any((lmdks[:]==p).all(1)) + is_landmark = any(np.equal(lmdks, p).all(1)) # Add noise - o = lmdk_lib.randomized_response(r, bgts[i]) - if r: + o = lmdk_lib.randomized_response(is_landmark, bgts[i]) + if is_landmark: if i > 0: # Approximate with previous o = rls_data[i - 1][1] - bgts[i] = 0 - rls_data[i] = [r, o] + bgts[i] = 0 + rls_data[i] = [is_landmark, o] return rls_data, bgts @@ -720,7 +721,7 @@ def uniform_cont(seq, lmdks, epsilon): # Budgets bgts = uniform(seq, lmdks, epsilon) for i, p in enumerate(seq): - r = any((lmdks[:]==p).all(1)) + r = any(np.equal(lmdks, p).all(1)) # [original, perturbed] rls_data[i] = [r, lmdk_lib.randomized_response(r, bgts[i])] return rls_data, bgts diff --git a/code/lib/lmdk_lib.py b/code/lib/lmdk_lib.py index 0d1bf80..3820b6e 100644 --- a/code/lib/lmdk_lib.py +++ b/code/lib/lmdk_lib.py @@ -959,13 +959,13 @@ def find_lmdks_cont(lmdk_data, seq, uid, pct): landmarks percentage. 0: uid, 1: lmdk_pct, 2: contacts seq - The users' data. - 0: tim, 1: uid, 2: cont, 3: rssi + 0: tim, 1: uid, 2: cont, 3: rssi uid - The user's id that we are interested in. pct - The landmarks percentage. Returns: lmdks - The user's landmarks contacts for the given landmarks percentage. - 0: uid_b + 0: tim, 1: uid, 2: cont, 3: rssi ''' # Initialize user's landmarks lmdks = np.empty(0).reshape(0,4) diff --git a/code/lib/lmdk_sel.py b/code/lib/lmdk_sel.py index cdbef0f..eb2071a 100644 --- a/code/lib/lmdk_sel.py +++ b/code/lib/lmdk_sel.py @@ -5,8 +5,8 @@ from lmdk_lib import * import exp_mech import numpy as np import random +import scipy.stats as stats import time -from scipy.spatial.distance import cdist ''' @@ -163,7 +163,7 @@ def get_opts_from_top_h(seq, lmdks): # The options to be returned hist_opts = [] # Keep adding points until the maximum is reached - while np.sum(hist_cur) < max(seq): + while np.sum(hist_cur) < len(seq): # Track the minimum (best) evaluation diff_min = float('inf') # The candidate option @@ -175,8 +175,8 @@ def get_opts_from_top_h(seq, lmdks): hist_tmp = np.copy(hist_cur) hist_tmp[i] += 1 # Find difference from original - # diff_cur = get_norm(hist, hist_tmp) # Euclidean - diff_cur = get_emd(hist, hist_tmp) # Wasserstein + diff_cur = get_norm(hist, hist_tmp) # Euclidean + # diff_cur = get_emd(hist, hist_tmp) # Wasserstein # Remember if it is the best that you've seen if diff_cur < diff_min: diff_min = diff_cur @@ -326,19 +326,37 @@ def find_lmdks(seq, lmdks, epsilon): opts = get_opts_from_top_h(get_seq(1, len(seq)), lmdks_seq) # Landmarks selection budget eps_sel = epsilon/(len(lmdks_seq) + 1) - # Get private landmarks timestamps - lmdks_seq, _ = exp_mech.exponential(hist, opts, exp_mech.score, 1.0, eps_sel) + # Get landmarks histogram with dummy landmarks + hist_new, _ = exp_mech.exponential(hist, opts, exp_mech.score, 1.0, eps_sel) + # Split sequence in parts of size h + pt_idx = [] + for idx in range(1, len(seq), h): + pt_idx.append([idx, idx + h - 1]) + pt_idx[-1][1] = len(seq) + # Get new landmarks indexes + lmdks_seq_new = np.array([], dtype=int) + for i, pt in enumerate(pt_idx): + # Already landmarks + lmdks_seq_pt = lmdks_seq[(lmdks_seq >= pt[0]) & (lmdks_seq <= pt[1])] + # Sample randomly from the rest of the sequence + size = hist_new[i] - len(lmdks_seq_pt) + rglr = np.setdiff1d(np.arange(pt[0], pt[1] + 1), lmdks_seq_pt) + # Add already landmarks + lmdks_seq_new = np.concatenate([lmdks_seq_new, lmdks_seq_pt]) + # Add new landmarks + if size > 0 and len(rglr) > size: + lmdks_seq_new = np.concatenate([lmdks_seq_new, + np.random.choice( + rglr, + size = size, + replace = False + ) + ]) # Get actual landmarks values - lmdks_new = seq[lmdks_seq] + lmdks_new = seq[lmdks_seq_new - 1] return lmdks_new, epsilon - eps_sel def test(): - - A = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) - B = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) - print(get_norm(A, B)) - exit() - # Start and end points of the sequence # # Nonrandom # start = 1 diff --git a/rslt/bgt_cmp/Copenhagen-sel-pareto.pdf b/rslt/bgt_cmp/Copenhagen-sel-pareto.pdf deleted file mode 100644 index a0d05cc..0000000 Binary files a/rslt/bgt_cmp/Copenhagen-sel-pareto.pdf and /dev/null differ diff --git a/rslt/bgt_cmp/Copenhagen-sel.pdf b/rslt/bgt_cmp/Copenhagen-sel.pdf deleted file mode 100644 index c53fd40..0000000 Binary files a/rslt/bgt_cmp/Copenhagen-sel.pdf and /dev/null differ diff --git a/rslt/bgt_cmp/Copenhagen.pdf b/rslt/bgt_cmp/Copenhagen.pdf index 259ff42..a4bdeb6 100644 Binary files a/rslt/bgt_cmp/Copenhagen.pdf and b/rslt/bgt_cmp/Copenhagen.pdf differ diff --git a/rslt/bgt_cmp/HUE-001.pdf b/rslt/bgt_cmp/HUE-001.pdf deleted file mode 100644 index 4d15c6f..0000000 Binary files a/rslt/bgt_cmp/HUE-001.pdf and /dev/null differ diff --git a/rslt/bgt_cmp/HUE-log.pdf b/rslt/bgt_cmp/HUE-log.pdf deleted file mode 100644 index 2b16030..0000000 Binary files a/rslt/bgt_cmp/HUE-log.pdf and /dev/null differ diff --git a/rslt/bgt_cmp/HUE-new.pdf b/rslt/bgt_cmp/HUE-new.pdf deleted file mode 100644 index 1d204b8..0000000 Binary files a/rslt/bgt_cmp/HUE-new.pdf and /dev/null differ diff --git a/rslt/bgt_cmp/HUE-sel-pareto.pdf b/rslt/bgt_cmp/HUE-sel-pareto.pdf deleted file mode 100644 index 165e305..0000000 Binary files a/rslt/bgt_cmp/HUE-sel-pareto.pdf and /dev/null differ diff --git a/rslt/bgt_cmp/HUE-sel.pdf b/rslt/bgt_cmp/HUE-sel.pdf deleted file mode 100644 index eb1da32..0000000 Binary files a/rslt/bgt_cmp/HUE-sel.pdf and /dev/null differ diff --git a/rslt/bgt_cmp/T-drive-sel-pareto.pdf b/rslt/bgt_cmp/T-drive-sel-pareto.pdf deleted file mode 100644 index 3347f90..0000000 Binary files a/rslt/bgt_cmp/T-drive-sel-pareto.pdf and /dev/null differ diff --git a/rslt/bgt_cmp/T-drive-sel.pdf b/rslt/bgt_cmp/T-drive-sel.pdf deleted file mode 100644 index 8986162..0000000 Binary files a/rslt/bgt_cmp/T-drive-sel.pdf and /dev/null differ diff --git a/rslt/lmdk_sel-pareto/Bimodal landmark distribution.pdf b/rslt/lmdk_sel-pareto/Bimodal landmark distribution.pdf deleted file mode 100644 index b166c23..0000000 Binary files a/rslt/lmdk_sel-pareto/Bimodal landmark distribution.pdf and /dev/null differ diff --git a/rslt/lmdk_sel-pareto/Left-skewed landmark distribution.pdf b/rslt/lmdk_sel-pareto/Left-skewed landmark distribution.pdf deleted file mode 100644 index aed6768..0000000 Binary files a/rslt/lmdk_sel-pareto/Left-skewed landmark distribution.pdf and /dev/null differ diff --git a/rslt/lmdk_sel-pareto/Right-skewed landmark distribution.pdf b/rslt/lmdk_sel-pareto/Right-skewed landmark distribution.pdf deleted file mode 100644 index e5a7b60..0000000 Binary files a/rslt/lmdk_sel-pareto/Right-skewed landmark distribution.pdf and /dev/null differ diff --git a/rslt/lmdk_sel-pareto/Symmetric landmark distribution.pdf b/rslt/lmdk_sel-pareto/Symmetric landmark distribution.pdf deleted file mode 100644 index 12063e3..0000000 Binary files a/rslt/lmdk_sel-pareto/Symmetric landmark distribution.pdf and /dev/null differ diff --git a/rslt/lmdk_sel-pareto/Uniform landmark distribution.pdf b/rslt/lmdk_sel-pareto/Uniform landmark distribution.pdf deleted file mode 100644 index 43f2dc0..0000000 Binary files a/rslt/lmdk_sel-pareto/Uniform landmark distribution.pdf and /dev/null differ diff --git a/rslt/lmdk_sel/Bimodal landmark distribution.pdf b/rslt/lmdk_sel/Bimodal landmark distribution.pdf deleted file mode 100644 index 9f15ae1..0000000 Binary files a/rslt/lmdk_sel/Bimodal landmark distribution.pdf and /dev/null differ diff --git a/rslt/lmdk_sel/Left-skewed landmark distribution.pdf b/rslt/lmdk_sel/Left-skewed landmark distribution.pdf deleted file mode 100644 index 36fbde2..0000000 Binary files a/rslt/lmdk_sel/Left-skewed landmark distribution.pdf and /dev/null differ diff --git a/rslt/lmdk_sel/Right-skewed landmark distribution.pdf b/rslt/lmdk_sel/Right-skewed landmark distribution.pdf deleted file mode 100644 index e2cd236..0000000 Binary files a/rslt/lmdk_sel/Right-skewed landmark distribution.pdf and /dev/null differ diff --git a/rslt/lmdk_sel/Symmetric landmark distribution.pdf b/rslt/lmdk_sel/Symmetric landmark distribution.pdf deleted file mode 100644 index 06705c7..0000000 Binary files a/rslt/lmdk_sel/Symmetric landmark distribution.pdf and /dev/null differ diff --git a/rslt/lmdk_sel/Uniform landmark distribution.pdf b/rslt/lmdk_sel/Uniform landmark distribution.pdf deleted file mode 100644 index 36e2bcc..0000000 Binary files a/rslt/lmdk_sel/Uniform landmark distribution.pdf and /dev/null differ diff --git a/rslt/lmdk_sel_cmp-pareto/lmdk_sel_cmp.pdf b/rslt/lmdk_sel_cmp-pareto/lmdk_sel_cmp.pdf deleted file mode 100644 index ef787d4..0000000 Binary files a/rslt/lmdk_sel_cmp-pareto/lmdk_sel_cmp.pdf and /dev/null differ diff --git a/rslt/lmdk_sel_cmp/lmdk_sel_cmp-emd.pdf b/rslt/lmdk_sel_cmp/lmdk_sel_cmp-emd.pdf deleted file mode 100644 index 27efa2b..0000000 Binary files a/rslt/lmdk_sel_cmp/lmdk_sel_cmp-emd.pdf and /dev/null differ diff --git a/rslt/lmdk_sel_cmp/lmdk_sel_cmp-norm.pdf b/rslt/lmdk_sel_cmp/lmdk_sel_cmp-norm.pdf deleted file mode 100644 index c32cb9e..0000000 Binary files a/rslt/lmdk_sel_cmp/lmdk_sel_cmp-norm.pdf and /dev/null differ