the-last-thing/code/lib/exp_mech.py

184 lines
5.0 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
import numpy as np
import math
import heapq
import itertools
import random
import scipy.stats as stats
import lmdk_lib
from matplotlib import pyplot as plt
import time
def score(data, option):
'''
The scoring function.
Parameters:
data - The data.
option - The option to evaluate.
Returns:
The score for the option.
'''
return (option.sum() - data.sum())
# return lmdk_lib.get_norm(data, option)
def exponential(x, R, u, delta, epsilon):
'''
The exponential mechanism.
Parameters:
x - The data.
R - The possible outputs.
u - The scoring function.
delta - The sensitivity of the scoring function.
epsilon - The privacy budget.
Returns:
res - A randomly sampled output.
pr - The PDF of all possible outputs.
'''
# Calculate the score for each element of R
scores = [u(x, r) for r in R]
# Normalize the scores between 0 and 1
# (the higher, the better the utility)
scores = 1 - (scores - np.min(scores))/(np.max(scores) - np.min(scores))
# Calculate the probability for each element, based on its score
pr = [np.exp(epsilon*score/(2*delta)) for score in scores]
# Normalize the probabilities so that they sum to 1
pr = pr/np.linalg.norm(pr, ord = 1)
# Debugging
# print(R[np.argmax(pr)], pr.max(), scores[np.argmax(pr)])
# print(R[np.argmin(pr)], pr.min(), scores[np.argmin(pr)])
# print(abs(pr.max() - pr.min()), abs(scores[np.argmax(pr)] - scores[np.argmin(pr)]))
# Choose an element from R based on the probabilities
if len(pr) > 0:
return R[np.random.choice(range(len(R)), 1, p = pr)[0]], pr
else:
return np.array([]), pr
'''
The exponential mechanism.
Parameters:
x - The data.
R - The possible outputs.
u - The scoring function.
delta - The sensitivity of the scoring function.
epsilon - The privacy budget.
Returns:
res - A randomly sampled output.
pr - The PDF of all possible outputs.
'''
def exponential_pareto(x, R, u, delta, epsilon):
# Calculate the score for each element of R
scores = [u(x, r) for r in R]
# Keep the top 20%
n = int(len(scores)*.2)
scores = np.sort(scores)[-n : ]
# Normalize the scores between 0 and 1
# (the higher, the better the utility)
scores = 1 - (scores - np.min(scores))/(np.max(scores) - np.min(scores))
# Calculate the probability for each element, based on its score
pr = [np.exp(epsilon*score/(2*delta)) for score in scores]
# Normalize the probabilities so that they sum to 1
pr = pr/np.linalg.norm(pr, ord = 1)
# Debugging
# print(R[np.argmax(pr)], pr.max(), scores[np.argmax(pr)])
# print(R[np.argmin(pr)], pr.min(), scores[np.argmin(pr)])
# print(abs(pr.max() - pr.min()), abs(scores[np.argmax(pr)] - scores[np.argmin(pr)]))
# Choose an element from R based on the probabilities
if len(pr) > 0:
return R[np.random.choice(range(n), 1, p = pr)[0]], pr
else:
return np.array([]), pr
def main():
start, end = 1.0, 10.0
scale = 1.0
locs = [2.0, 4.0, 8.0]
k = len(locs)
# dists = [truncnorm(start, end, loc, scale) for loc in locs]
dists = [stats.laplace(loc, scale) for loc in locs]
mix = lmdk_lib.MixtureModel(dists)
delta = 1.0
epsilon = 10.0
combos = list(itertools.combinations(range(int(start), int(end) + 1), k))
res, pr = exponential(mix, combos, score, delta, epsilon)
plt.rc('font', family='serif')
plt.rc('font', size=10)
plt.rc('text', usetex=True)
# Plot the options' probabilities.
# pr.sort()
# n = np.arange(1.0, len(pr) + 1.0, 1.0)
# plt.plot(n,\
# pr,\
# label=r'$\textrm{Pr}$',\
# color='blue')
# # Configure the plot.
# plt.axis([1.0, len(pr), 0.0, max(pr)]) # Set plot box.
# plt.legend(loc='best', frameon=False) # Set plot legend.
# plt.grid(axis='y', alpha=1.0) # Add grid on y axis.
# plt.xlabel('Options') # Set x axis label.
# plt.ylabel('Likelihood') # Set y axis label.
# plt.show() # Show the plot in a new window.
x = np.arange(start, end, 0.01)
plt.plot(x,\
mix.pdf(x),\
label=r'$\textrm{Mix}$',\
color='red')
# print(mix.sample(start, end, 10))
# Test MixtureModel's sample function
# t = 1000
# c = np.array(int(end)*[0])
# for i in range(t):
# c[mix.sample(start, end, 1)[0] - 1] += 1
# plt.plot(range(int(start), int(end) + 1),\
# c/t,\
# label=r'$\textrm{Test}$',\
# color='blue')
# Configure the plot.
plt.axis([start, end, 0.0, 1.0]) # Set plot box.
plt.legend(loc='best', frameon=False) # Set plot legend.
plt.grid(axis='y', alpha=1.0) # Add grid on y axis.
plt.xlabel('Timestamp') # Set x axis label.
plt.ylabel('Likelihood') # Set y axis label.
plt.show() # Show the plot in a new window.
if __name__ == '__main__':
try:
start_time = time.time()
main()
end_time = time.time()
print('##############################')
print('Time : %.4fs' % (end_time - start_time))
print('##############################')
except KeyboardInterrupt:
print('Interrupted by user.')
exit()