183 lines
5.0 KiB
Python
183 lines
5.0 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import numpy as np
|
|
import math
|
|
import heapq
|
|
import itertools
|
|
import random
|
|
import scipy.stats as stats
|
|
import lmdk_lib
|
|
from matplotlib import pyplot as plt
|
|
import time
|
|
|
|
|
|
'''
|
|
The scoring function.
|
|
|
|
Parameters:
|
|
data - The data.
|
|
option - The option to evaluate.
|
|
Returns:
|
|
The score for the option.
|
|
'''
|
|
def score(data, option):
|
|
return (option.sum() - data.sum())
|
|
|
|
|
|
'''
|
|
The exponential mechanism.
|
|
|
|
Parameters:
|
|
x - The data.
|
|
R - The possible outputs.
|
|
u - The scoring function.
|
|
delta - The sensitivity of the scoring function.
|
|
epsilon - The privacy budget.
|
|
Returns:
|
|
res - A randomly sampled output.
|
|
pr - The PDF of all possible outputs.
|
|
'''
|
|
def exponential(x, R, u, delta, epsilon):
|
|
# Calculate the score for each element of R
|
|
scores = [u(x, r) for r in R]
|
|
# Normalize the scores between 0 and 1
|
|
# (the higher, the better the utility)
|
|
scores = 1 - (scores - np.min(scores))/(np.max(scores) - np.min(scores))
|
|
|
|
# Calculate the probability for each element, based on its score
|
|
pr = [np.exp(epsilon*score/(2*delta)) for score in scores]
|
|
|
|
# Normalize the probabilities so that they sum to 1
|
|
pr = pr/np.linalg.norm(pr, ord = 1)
|
|
|
|
# Debugging
|
|
# print(R[np.argmax(pr)], pr.max(), scores[np.argmax(pr)])
|
|
# print(R[np.argmin(pr)], pr.min(), scores[np.argmin(pr)])
|
|
# print(abs(pr.max() - pr.min()), abs(scores[np.argmax(pr)] - scores[np.argmin(pr)]))
|
|
|
|
# Choose an element from R based on the probabilities
|
|
if len(pr) > 0:
|
|
return R[np.random.choice(range(len(R)), 1, p = pr)[0]], pr
|
|
else:
|
|
return np.array([]), pr
|
|
|
|
|
|
'''
|
|
The exponential mechanism.
|
|
|
|
Parameters:
|
|
x - The data.
|
|
R - The possible outputs.
|
|
u - The scoring function.
|
|
delta - The sensitivity of the scoring function.
|
|
epsilon - The privacy budget.
|
|
Returns:
|
|
res - A randomly sampled output.
|
|
pr - The PDF of all possible outputs.
|
|
'''
|
|
def exponential_pareto(x, R, u, delta, epsilon):
|
|
# Calculate the score for each element of R
|
|
scores = [u(x, r) for r in R]
|
|
# Keep the top 20%
|
|
n = int(len(scores)*.2)
|
|
scores = np.sort(scores)[-n : ]
|
|
# Normalize the scores between 0 and 1
|
|
# (the higher, the better the utility)
|
|
scores = 1 - (scores - np.min(scores))/(np.max(scores) - np.min(scores))
|
|
|
|
# Calculate the probability for each element, based on its score
|
|
pr = [np.exp(epsilon*score/(2*delta)) for score in scores]
|
|
|
|
# Normalize the probabilities so that they sum to 1
|
|
pr = pr/np.linalg.norm(pr, ord = 1)
|
|
|
|
# Debugging
|
|
# print(R[np.argmax(pr)], pr.max(), scores[np.argmax(pr)])
|
|
# print(R[np.argmin(pr)], pr.min(), scores[np.argmin(pr)])
|
|
# print(abs(pr.max() - pr.min()), abs(scores[np.argmax(pr)] - scores[np.argmin(pr)]))
|
|
|
|
# Choose an element from R based on the probabilities
|
|
if len(pr) > 0:
|
|
return R[np.random.choice(range(n), 1, p = pr)[0]], pr
|
|
else:
|
|
return np.array([]), pr
|
|
|
|
|
|
def main():
|
|
start, end = 1.0, 10.0
|
|
scale = 1.0
|
|
locs = [2.0, 4.0, 8.0]
|
|
k = len(locs)
|
|
|
|
# dists = [truncnorm(start, end, loc, scale) for loc in locs]
|
|
dists = [stats.laplace(loc, scale) for loc in locs]
|
|
|
|
mix = lmdk_lib.MixtureModel(dists)
|
|
|
|
delta = 1.0
|
|
epsilon = 10.0
|
|
|
|
combos = list(itertools.combinations(range(int(start), int(end) + 1), k))
|
|
|
|
res, pr = exponential(mix, combos, score, delta, epsilon)
|
|
|
|
plt.rc('font', family='serif')
|
|
plt.rc('font', size=10)
|
|
plt.rc('text', usetex=True)
|
|
|
|
# Plot the options' probabilities.
|
|
# pr.sort()
|
|
# n = np.arange(1.0, len(pr) + 1.0, 1.0)
|
|
# plt.plot(n,\
|
|
# pr,\
|
|
# label=r'$\textrm{Pr}$',\
|
|
# color='blue')
|
|
# # Configure the plot.
|
|
# plt.axis([1.0, len(pr), 0.0, max(pr)]) # Set plot box.
|
|
# plt.legend(loc='best', frameon=False) # Set plot legend.
|
|
# plt.grid(axis='y', alpha=1.0) # Add grid on y axis.
|
|
# plt.xlabel('Options') # Set x axis label.
|
|
# plt.ylabel('Likelihood') # Set y axis label.
|
|
# plt.show() # Show the plot in a new window.
|
|
|
|
x = np.arange(start, end, 0.01)
|
|
plt.plot(x,\
|
|
mix.pdf(x),\
|
|
label=r'$\textrm{Mix}$',\
|
|
color='red')
|
|
|
|
# print(mix.sample(start, end, 10))
|
|
|
|
# Test MixtureModel's sample function
|
|
# t = 1000
|
|
# c = np.array(int(end)*[0])
|
|
# for i in range(t):
|
|
# c[mix.sample(start, end, 1)[0] - 1] += 1
|
|
|
|
# plt.plot(range(int(start), int(end) + 1),\
|
|
# c/t,\
|
|
# label=r'$\textrm{Test}$',\
|
|
# color='blue')
|
|
|
|
# Configure the plot.
|
|
plt.axis([start, end, 0.0, 1.0]) # Set plot box.
|
|
plt.legend(loc='best', frameon=False) # Set plot legend.
|
|
plt.grid(axis='y', alpha=1.0) # Add grid on y axis.
|
|
plt.xlabel('Timestamp') # Set x axis label.
|
|
plt.ylabel('Likelihood') # Set y axis label.
|
|
|
|
plt.show() # Show the plot in a new window.
|
|
|
|
|
|
if __name__ == '__main__':
|
|
try:
|
|
start_time = time.time()
|
|
main()
|
|
end_time = time.time()
|
|
print('##############################')
|
|
print('Time : %.4fs' % (end_time - start_time))
|
|
print('##############################')
|
|
except KeyboardInterrupt:
|
|
print('Interrupted by user.')
|
|
exit()
|