diff --git a/code/expt/expt_lmdk_sel-pareto.py b/code/expt/expt_lmdk_sel-pareto.py new file mode 100644 index 0000000..0bc4ff7 --- /dev/null +++ b/code/expt/expt_lmdk_sel-pareto.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python3 + +import sys +sys.path.insert(1, '../lib') +import argparse +import lmdk_lib +import lmdk_sel +import exp_mech +import numpy as np +import os +from matplotlib import pyplot as plt +import time + + +def main(args): + # Privacy goal + epsilon = [.001, .01, .1, 1.0, 10.0, 100.0] + # Number of timestamps + seq = lmdk_lib.get_seq(1, args.time) + # Distribution type + dist_type = np.array(range(-1, 4)) + # Number of landmarks + lmdk_n = np.array(range(int(.2*args.time), args.time, int(args.time/5))) + # Width of bars + bar_width = 1/(len(epsilon) + 1) + # The x axis + x_i = np.arange(len(lmdk_n)) + x_margin = bar_width*(len(epsilon)/2 + 1) + for d_i, d in enumerate(dist_type): + # Logging + title = lmdk_lib.dist_type_to_str(d) + ' landmark distribution' + print('(%d/%d) %s... ' %(d_i + 1, len(dist_type), title), end='', flush=True) + # Initialize plot + lmdk_lib.plot_init() + # The x axis + plt.xticks(x_i, ((lmdk_n/len(seq))*100).astype(int)) + plt.xlabel('Landmarks (%)') # Set x axis label. + plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin) + # The y axis + plt.ylabel('Mean absolute error') # Set y axis label. + # plt.ylim(0, len(seq)*1.5) + # Bar offset + x_offset = -(bar_width/2)*(len(epsilon) - 1) + for e_i, e in enumerate(epsilon): + mae = np.zeros(len(lmdk_n)) + for n_i, n in enumerate(lmdk_n): + for r in range(args.reps): + lmdks = lmdk_lib.get_lmdks(seq, n, d) + hist, h = lmdk_lib.get_hist(seq, lmdks) + opts = lmdk_sel.get_opts_from_top_h(seq, lmdks) + delta = 1.0 + res, _ = exp_mech.exponential_pareto(hist, opts, exp_mech.score, delta, e) + mae[n_i] += lmdk_lib.get_norm(hist, res)/args.reps + # Plot bar for current epsilon + plt.bar( + x_i + x_offset, + mae, + bar_width, + label=u'\u03B5 = ' + str("{:.0e}".format(e)), + linewidth=lmdk_lib.line_width + ) + # Change offset for next bar + x_offset += bar_width + path = str('../../rslt/lmdk_sel-pareto/' + title) + # Plot legend + lmdk_lib.plot_legend() + # Show plot + # plt.show() + # Save plot + lmdk_lib.save_plot(path + '.pdf') + print('[OK]', flush=True) + + +''' + Parse arguments. + + Optional: + reps - The number of repetitions. + time - The time limit of the sequence. +''' +def parse_args(): + # Create argument parser. + parser = argparse.ArgumentParser() + + # Mandatory arguments. + + # Optional arguments. + parser.add_argument('-r', '--reps', help='The number of repetitions.', type=int, default=1) + parser.add_argument('-t', '--time', help='The time limit of the sequence.', type=int, default=100) + + # Parse arguments. + args = parser.parse_args() + + return args + + +if __name__ == '__main__': + try: + start_time = time.time() + main(parse_args()) + end_time = time.time() + print('##############################') + print('Time elapsed: %s' % (time.strftime('%H:%M:%S', time.gmtime(end_time - start_time)))) + print('##############################') + except KeyboardInterrupt: + print('Interrupted by user.') + exit() diff --git a/code/lib/exp_mech.py b/code/lib/exp_mech.py index 25c7ea5..cd298fb 100644 --- a/code/lib/exp_mech.py +++ b/code/lib/exp_mech.py @@ -62,6 +62,47 @@ def exponential(x, R, u, delta, epsilon): return np.array([]), pr +''' + The exponential mechanism. + + Parameters: + x - The data. + R - The possible outputs. + u - The scoring function. + delta - The sensitivity of the scoring function. + epsilon - The privacy budget. + Returns: + res - A randomly sampled output. + pr - The PDF of all possible outputs. +''' +def exponential_pareto(x, R, u, delta, epsilon): + # Calculate the score for each element of R + scores = [u(x, r) for r in R] + # Keep the top 20% + n = int(len(scores)*.2) + scores = np.sort(scores)[-n : ] + # Normalize the scores between 0 and 1 + # (the higher, the better the utility) + scores = 1 - (scores - np.min(scores))/(np.max(scores) - np.min(scores)) + + # Calculate the probability for each element, based on its score + pr = [np.exp(epsilon*score/(2*delta)) for score in scores] + + # Normalize the probabilities so that they sum to 1 + pr = pr/np.linalg.norm(pr, ord = 1) + + # Debugging + # print(R[np.argmax(pr)], pr.max(), scores[np.argmax(pr)]) + # print(R[np.argmin(pr)], pr.min(), scores[np.argmin(pr)]) + # print(abs(pr.max() - pr.min()), abs(scores[np.argmax(pr)] - scores[np.argmin(pr)])) + + # Choose an element from R based on the probabilities + if len(pr) > 0: + return R[np.random.choice(range(n), 1, p = pr)[0]], pr + else: + return np.array([]), pr + + def main(): start, end = 1.0, 10.0 scale = 1.0 diff --git a/rslt/lmdk_sel-pareto/Bimodal landmark distribution.pdf b/rslt/lmdk_sel-pareto/Bimodal landmark distribution.pdf new file mode 100644 index 0000000..b166c23 Binary files /dev/null and b/rslt/lmdk_sel-pareto/Bimodal landmark distribution.pdf differ diff --git a/rslt/lmdk_sel-pareto/Left-skewed landmark distribution.pdf b/rslt/lmdk_sel-pareto/Left-skewed landmark distribution.pdf new file mode 100644 index 0000000..aed6768 Binary files /dev/null and b/rslt/lmdk_sel-pareto/Left-skewed landmark distribution.pdf differ diff --git a/rslt/lmdk_sel-pareto/Right-skewed landmark distribution.pdf b/rslt/lmdk_sel-pareto/Right-skewed landmark distribution.pdf new file mode 100644 index 0000000..e5a7b60 Binary files /dev/null and b/rslt/lmdk_sel-pareto/Right-skewed landmark distribution.pdf differ diff --git a/rslt/lmdk_sel-pareto/Symmetric landmark distribution.pdf b/rslt/lmdk_sel-pareto/Symmetric landmark distribution.pdf new file mode 100644 index 0000000..12063e3 Binary files /dev/null and b/rslt/lmdk_sel-pareto/Symmetric landmark distribution.pdf differ diff --git a/rslt/lmdk_sel-pareto/Uniform landmark distribution.pdf b/rslt/lmdk_sel-pareto/Uniform landmark distribution.pdf new file mode 100644 index 0000000..43f2dc0 Binary files /dev/null and b/rslt/lmdk_sel-pareto/Uniform landmark distribution.pdf differ