diff --git a/code/expt/bgt_cmp_copenhagen.py b/code/expt/bgt_cmp_copenhagen.py index cd7b256..e5b9455 100644 --- a/code/expt/bgt_cmp_copenhagen.py +++ b/code/expt/bgt_cmp_copenhagen.py @@ -3,10 +3,12 @@ import sys sys.path.insert(1, '../lib') import argparse +import ast from datetime import datetime from geopy.distance import distance import lmdk_bgt import lmdk_lib +import math import numpy as np from matplotlib import pyplot as plt import time @@ -28,7 +30,7 @@ def main(args): epsilon = 1.0 # Number of methods - n = 6 + n = 3 # Width of bars bar_width = 1/(n + 1) # The x axis @@ -46,111 +48,89 @@ def main(args): plt.xlabel('Landmarks percentage') # Set x axis label. plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin) # The y axis - plt.ylabel('Mean absolute error (m)') # Set y axis label. - plt.yscale('log') - plt.ylim(1, 100000000) + plt.ylabel('Mean absolute error') # Set y axis label. + # plt.yscale('log') + plt.ylim(0, 1.4) # Bar offset x_offset = -(bar_width/2)*(n - 1) mae_u = np.zeros(len(lmdks_pct)) mae_s = np.zeros(len(lmdks_pct)) mae_a = np.zeros(len(lmdks_pct)) - mae_r = np.zeros(len(lmdks_pct)) - mae_d = np.zeros(len(lmdks_pct)) - mae_i = np.zeros(len(lmdks_pct)) + mae_evt = np.zeros(len(lmdks_pct)) + mae_usr = np.zeros(len(lmdks_pct)) for i, pct in enumerate(lmdks_pct): # Find landmarks + # lmdks = lmdk_lib.find_lmdks_tim(lmdk_data, seq, uid, pct) lmdks = lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, pct) - print(pct, np.shape(lmdks)[0]/np.shape(seq)[0]) + for _ in range(args.iter): + # Skip + rls_data_s, _ = lmdk_bgt.skip_cont(seq, lmdks, epsilon) + mae_s[i] += lmdk_bgt.mae_cont(rls_data_s)/args.iter - # for _ in range(args.iter): - # # Skip - # rls_data_s, _ = lmdk_bgt.skip(seq, lmdks, epsilon) - # mae_s[i] += lmdk_bgt.mae(seq, rls_data_s)/args.iter + # Uniform + rls_data_u, _ = lmdk_bgt.uniform_cont(seq, lmdks, epsilon) + mae_u[i] += lmdk_bgt.mae_cont(rls_data_u)/args.iter - # # Uniform - # rls_data_u, _ = lmdk_bgt.uniform_r(seq, lmdks, epsilon) - # mae_u[i] += lmdk_bgt.mae(seq, rls_data_u)/args.iter + # Adaptive + rls_data_a, _, _ = lmdk_bgt.adaptive_cont(seq, lmdks, epsilon, .5, .5) + mae_a[i] += lmdk_bgt.mae_cont(rls_data_a)/args.iter - # # Adaptive - # rls_data_a, _, _ = lmdk_bgt.adaptive(seq, lmdks, epsilon, .5, .5) - # mae_a[i] += lmdk_bgt.mae(seq, rls_data_a)/args.iter + # Event + rls_data_evt, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 0), epsilon) + mae_evt[i] += lmdk_bgt.mae_cont(rls_data_evt)/args.iter + # User + rls_data_usr, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 100), epsilon) + mae_usr[i] += lmdk_bgt.mae_cont(rls_data_usr)/args.iter - # # Sample - # rls_data_r, _, _ = lmdk_bgt.sample(seq, lmdks, epsilon) - # mae_r[i] += lmdk_bgt.mae(seq, rls_data_r)/args.iter + plt.plot( + x_i, + mae_evt, + linewidth=lmdk_lib.line_width + ) - # # Discount - # rls_data_d, _, _ = lmdk_bgt.discount(seq, lmdks, epsilon) - # mae_d[i] += lmdk_bgt.mae(seq, rls_data_d)/args.iter + plt.plot( + x_i, + mae_usr, + linewidth=lmdk_lib.line_width + ) - # # Incremental - # rls_data_i, _, _ = lmdk_bgt.incremental(seq, lmdks, epsilon, .5) - # mae_i[i] += lmdk_bgt.mae(seq, rls_data_i)/args.iter + plt.bar( + x_i + x_offset, + mae_s, + bar_width, + label='Skip', + linewidth=lmdk_lib.line_width + ) + x_offset += bar_width + plt.bar( + x_i + x_offset, + mae_u, + bar_width, + label='Uniform', + linewidth=lmdk_lib.line_width + ) + x_offset += bar_width + plt.bar( + x_i + x_offset, + mae_a, + bar_width, + label='Adaptive', + linewidth=lmdk_lib.line_width + ) + x_offset += bar_width - # plt.bar( - # x_i + x_offset, - # mae_s, - # bar_width, - # label='Skip', - # linewidth=lmdk_lib.line_width - # ) - # x_offset += bar_width - # # Plot bars - # plt.bar( - # x_i + x_offset, - # mae_u, - # bar_width, - # label='Uniform', - # linewidth=lmdk_lib.line_width - # ) - # x_offset += bar_width - # plt.bar( - # x_i + x_offset, - # mae_a, - # bar_width, - # label='Adaptive', - # linewidth=lmdk_lib.line_width - # ) - # x_offset += bar_width - # plt.bar( - # x_i + x_offset, - # mae_r, - # bar_width, - # label='Sample', - # linewidth=lmdk_lib.line_width - # ) - # x_offset += bar_width - # plt.bar( - # x_i + x_offset, - # mae_d, - # bar_width, - # label='Discount', - # linewidth=lmdk_lib.line_width - # ) - # x_offset += bar_width - # plt.bar( - # x_i + x_offset, - # mae_i, - # bar_width, - # label='Incremental', - # linewidth=lmdk_lib.line_width - # ) - # x_offset += bar_width - - # path = str('rslt/bgt_cmp/' + d) - # # Plot legend - # lmdk_lib.plot_legend() + path = str('rslt/bgt_cmp/' + d) + # Plot legend + lmdk_lib.plot_legend() # # Show plot - # # plt.show() + # plt.show() # # Save plot # lmdk_lib.save_plot(path + '.pdf') print('[OK]', flush=True) - - def parse_args(): ''' Parse arguments. diff --git a/code/lib/lmdk_bgt.py b/code/lib/lmdk_bgt.py index 07474b3..f1cae78 100644 --- a/code/lib/lmdk_bgt.py +++ b/code/lib/lmdk_bgt.py @@ -343,6 +343,61 @@ def adaptive(seq, lmdks, epsilon, inc_rt, dec_rt): return rls_data, bgts, skipped +def adaptive_cont(seq, lmdks, epsilon, inc_rt, dec_rt): + ''' + Adaptive budget allocation. + + Parameters: + seq - The point sequence. + lmdks - The landmarks. + epsilon - The available privacy budget. + inc_rt - Sampling rate increase rate. + dec_rt - Sampling rate decrease rate. + Returns: + rls_data - The perturbed data. + bgts - The privacy budget allocation. + skipped - The number of skipped releases. + ''' + # Uniform budget allocation + bgts = uniform(seq, lmdks, epsilon) + # Released + rls_data = [None]*len(seq) + # The sampling rate + samp_rt = 1 + # Track landmarks + lmdk_cur = 0 + # Track skipped releases + skipped = 0 + for i, p in enumerate(seq): + # Check if current point is a landmark + r = p[2] in lmdks + if r: + lmdk_cur += 1 + if lmdk_lib.should_sample(samp_rt) or i == 0: + # Add noise to original data + o = lmdk_lib.randomized_response(r, bgts[i]) + rls_data[i] = [r, o] + # Adjust sampling rate + if i > 0: + if rls_data[i - 1][1] == o: + # Decrease + samp_rt -= samp_rt*dec_rt + else: + # Increase + samp_rt += (1 - samp_rt)*inc_rt + else: + skipped += 1 + # Skip current release and approximate with previous + rls_data[i] = rls_data[i - 1] + if r: + # Allocate the current budget to the following releases uniformly + for j in range(i + 1, len(seq)): + bgts[j] += bgts[i]/(len(lmdks) - lmdk_cur + 1) + # No budget was spent + bgts[i] = 0 + return rls_data, bgts, skipped + + def skip(seq, lmdks, epsilon): ''' Skip landmarks.