code: Ready to experiment with lmdk_sel
This commit is contained in:
parent
da7301c908
commit
840bd95c63
@ -1,5 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
sys.path.insert(1, '../lib')
|
||||
import argparse
|
||||
import lmdk_lib
|
||||
import lmdk_sel
|
||||
@ -36,7 +38,7 @@ def main(args):
|
||||
plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin)
|
||||
# The y axis
|
||||
plt.ylabel('Mean absolute error') # Set y axis label.
|
||||
plt.ylim(0, len(seq)/3)
|
||||
# plt.ylim(0, len(seq)/3)
|
||||
# Bar offset
|
||||
x_offset = -(bar_width/2)*(len(epsilon) - 1)
|
||||
for e_i, e in enumerate(epsilon):
|
||||
@ -45,9 +47,26 @@ def main(args):
|
||||
for r in range(args.reps):
|
||||
lmdks = lmdk_lib.get_lmdks(seq, n, d)
|
||||
hist, h = lmdk_lib.get_hist(seq, lmdks)
|
||||
opts = lmdk_sel.get_opts_from_top_h(seq, lmdks)
|
||||
delta = 1.0
|
||||
res, _ = exp_mech.exponential(hist, opts, exp_mech.score, delta, e)
|
||||
res = np.zeros([len(hist)])
|
||||
# Split sequence in parts of size h
|
||||
pt_idx = []
|
||||
for idx in range(h, len(seq), h):
|
||||
pt_idx.append(idx)
|
||||
seq_pt = np.split(seq, pt_idx)
|
||||
for pt_i, pt in enumerate(seq_pt):
|
||||
# Find this part's landmarks
|
||||
lmdks_pt = np.intersect1d(pt, lmdks)
|
||||
# Find possible options for this part
|
||||
opts = lmdk_sel.get_opts_from_top_h(pt, lmdks_pt)
|
||||
# Turn part to histogram
|
||||
hist_pt, _ = lmdk_lib.get_hist(pt, lmdks_pt)
|
||||
# Get an option for this part
|
||||
res_pt = np.array([])
|
||||
if len(opts) > 0:
|
||||
res_pt, _ = exp_mech.exponential(hist_pt, opts, exp_mech.score, 1.0, e)
|
||||
# Merge options of all parts
|
||||
res[pt_i] = np.sum(res_pt)
|
||||
# Calculate MAE
|
||||
mae[n_i] += lmdk_lib.get_norm(hist, res)/args.reps
|
||||
# Plot bar for current epsilon
|
||||
plt.bar(
|
||||
@ -59,7 +78,7 @@ def main(args):
|
||||
)
|
||||
# Change offset for next bar
|
||||
x_offset += bar_width
|
||||
path = str('/home/manos/Git/the-thing/code/expt_lmdk_sel/' + title)
|
||||
path = str('../../rslt/lmdk_sel/' + title)
|
||||
# Plot legend
|
||||
lmdk_lib.plot_legend()
|
||||
# Show plot
|
||||
|
141
code/lib/exp_mech.py
Normal file
141
code/lib/exp_mech.py
Normal file
@ -0,0 +1,141 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import numpy as np
|
||||
import math
|
||||
import heapq
|
||||
import itertools
|
||||
import random
|
||||
import scipy.stats as stats
|
||||
import lmdk_lib
|
||||
from matplotlib import pyplot as plt
|
||||
import time
|
||||
|
||||
|
||||
'''
|
||||
The scoring function.
|
||||
|
||||
Parameters:
|
||||
data - The data.
|
||||
option - The option to evaluate.
|
||||
Returns:
|
||||
The score for the option.
|
||||
'''
|
||||
def score(data, option):
|
||||
return (option.sum() - data.sum())
|
||||
|
||||
|
||||
'''
|
||||
The exponential mechanism.
|
||||
|
||||
Parameters:
|
||||
x - The data.
|
||||
R - The possible outputs.
|
||||
u - The scoring function.
|
||||
delta - The sensitivity of the scoring function.
|
||||
epsilon - The privacy budget.
|
||||
Returns:
|
||||
res - A randomly sampled output.
|
||||
pr - The PDF of all possible outputs.
|
||||
'''
|
||||
def exponential(x, R, u, delta, epsilon):
|
||||
# Calculate the score for each element of R
|
||||
scores = [u(x, r) for r in R]
|
||||
# Normalize the scores between 0 and 1
|
||||
# (the higher, the better the utility)
|
||||
scores = 1 - (scores - np.min(scores))/(np.max(scores) - np.min(scores))
|
||||
|
||||
# Calculate the probability for each element, based on its score
|
||||
pr = [np.exp(epsilon*score/(2*delta)) for score in scores]
|
||||
|
||||
# Normalize the probabilities so that they sum to 1
|
||||
pr = pr/np.linalg.norm(pr, ord = 1)
|
||||
|
||||
# Debugging
|
||||
# print(R[np.argmax(pr)], pr.max(), scores[np.argmax(pr)])
|
||||
# print(R[np.argmin(pr)], pr.min(), scores[np.argmin(pr)])
|
||||
# print(abs(pr.max() - pr.min()), abs(scores[np.argmax(pr)] - scores[np.argmin(pr)]))
|
||||
|
||||
# Choose an element from R based on the probabilities
|
||||
if len(pr) > 0:
|
||||
return R[np.random.choice(range(len(R)), 1, p = pr)[0]], pr
|
||||
else:
|
||||
return np.array([]), pr
|
||||
|
||||
|
||||
def main():
|
||||
start, end = 1.0, 10.0
|
||||
scale = 1.0
|
||||
locs = [2.0, 4.0, 8.0]
|
||||
k = len(locs)
|
||||
|
||||
# dists = [truncnorm(start, end, loc, scale) for loc in locs]
|
||||
dists = [stats.laplace(loc, scale) for loc in locs]
|
||||
|
||||
mix = lmdk_lib.MixtureModel(dists)
|
||||
|
||||
delta = 1.0
|
||||
epsilon = 10.0
|
||||
|
||||
combos = list(itertools.combinations(range(int(start), int(end) + 1), k))
|
||||
|
||||
res, pr = exponential(mix, combos, score, delta, epsilon)
|
||||
|
||||
plt.rc('font', family='serif')
|
||||
plt.rc('font', size=10)
|
||||
plt.rc('text', usetex=True)
|
||||
|
||||
# Plot the options' probabilities.
|
||||
# pr.sort()
|
||||
# n = np.arange(1.0, len(pr) + 1.0, 1.0)
|
||||
# plt.plot(n,\
|
||||
# pr,\
|
||||
# label=r'$\textrm{Pr}$',\
|
||||
# color='blue')
|
||||
# # Configure the plot.
|
||||
# plt.axis([1.0, len(pr), 0.0, max(pr)]) # Set plot box.
|
||||
# plt.legend(loc='best', frameon=False) # Set plot legend.
|
||||
# plt.grid(axis='y', alpha=1.0) # Add grid on y axis.
|
||||
# plt.xlabel('Options') # Set x axis label.
|
||||
# plt.ylabel('Likelihood') # Set y axis label.
|
||||
# plt.show() # Show the plot in a new window.
|
||||
|
||||
x = np.arange(start, end, 0.01)
|
||||
plt.plot(x,\
|
||||
mix.pdf(x),\
|
||||
label=r'$\textrm{Mix}$',\
|
||||
color='red')
|
||||
|
||||
# print(mix.sample(start, end, 10))
|
||||
|
||||
# Test MixtureModel's sample function
|
||||
# t = 1000
|
||||
# c = np.array(int(end)*[0])
|
||||
# for i in range(t):
|
||||
# c[mix.sample(start, end, 1)[0] - 1] += 1
|
||||
|
||||
# plt.plot(range(int(start), int(end) + 1),\
|
||||
# c/t,\
|
||||
# label=r'$\textrm{Test}$',\
|
||||
# color='blue')
|
||||
|
||||
# Configure the plot.
|
||||
plt.axis([start, end, 0.0, 1.0]) # Set plot box.
|
||||
plt.legend(loc='best', frameon=False) # Set plot legend.
|
||||
plt.grid(axis='y', alpha=1.0) # Add grid on y axis.
|
||||
plt.xlabel('Timestamp') # Set x axis label.
|
||||
plt.ylabel('Likelihood') # Set y axis label.
|
||||
|
||||
plt.show() # Show the plot in a new window.
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
start_time = time.time()
|
||||
main()
|
||||
end_time = time.time()
|
||||
print('##############################')
|
||||
print('Time : %.4fs' % (end_time - start_time))
|
||||
print('##############################')
|
||||
except KeyboardInterrupt:
|
||||
print('Interrupted by user.')
|
||||
exit()
|
388
code/lib/lmdk_sel.py
Normal file
388
code/lib/lmdk_sel.py
Normal file
@ -0,0 +1,388 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import itertools
|
||||
from lmdk_lib import *
|
||||
import numpy as np
|
||||
import random
|
||||
import time
|
||||
|
||||
|
||||
'''
|
||||
Print all the points.
|
||||
|
||||
Parameters:
|
||||
seq - The point sequence.
|
||||
combs - All the possible point combinations for a specified size.
|
||||
lmdks - The landmarks.
|
||||
Returns:
|
||||
Nothing.
|
||||
'''
|
||||
def print_rslt(seq, combs, lmdks):
|
||||
eval_sum = .0
|
||||
for idx, c in enumerate(combs):
|
||||
rslt = str(idx + 1) + ':\t'
|
||||
for i in seq:
|
||||
# Selected
|
||||
if i in c:
|
||||
rslt += '(' + str(i) + ')\t'
|
||||
# Landmark
|
||||
elif i in lmdks:
|
||||
rslt += '*' + str(i) + '*\t'
|
||||
# Not selected
|
||||
else:
|
||||
rslt += ' ' + str(i) + ' \t'
|
||||
dists = get_rel_dists(seq, list(c), lmdks)
|
||||
eval_orig = eval_seq(get_rel_dists(seq, [], lmdks))
|
||||
eval_cur = eval_seq(dists)
|
||||
eval_sum += eval_cur
|
||||
# print(rslt, '\t', dists, '\t', sum(dists), '\t', eval_cur)
|
||||
print(rslt, eval_cur)
|
||||
eval_avg = eval_sum/len(combs)
|
||||
print('Average STD (difference with original): %.4f (%.2f%%)' %(eval_avg, 100*(eval_avg - eval_orig)/eval_orig))
|
||||
|
||||
|
||||
'''
|
||||
Print the difference with the original.
|
||||
|
||||
Parameters:
|
||||
seq - The point sequence.
|
||||
combs - All the possible point combinations for a specified size.
|
||||
lmdks - The landmarks.
|
||||
Returns:
|
||||
The difference with the original.
|
||||
'''
|
||||
def print_diff(seq, combs, lmdks):
|
||||
eval_sum = .0
|
||||
for c in combs:
|
||||
dists = get_rel_dists(seq, list(c), lmdks)
|
||||
eval_orig = eval_seq(get_rel_dists(seq, [], lmdks))
|
||||
eval_cur = eval_seq(dists)
|
||||
eval_sum += eval_cur
|
||||
eval_avg = eval_sum/len(combs)
|
||||
diff = 100*(eval_avg - eval_orig)/eval_orig
|
||||
print('Average STD (difference with original): %.4f (%.2f%%)' %(eval_avg, diff))
|
||||
return diff
|
||||
|
||||
|
||||
'''
|
||||
Finds the optimal set of regular points.
|
||||
|
||||
Parameters:
|
||||
seq - The point sequence.
|
||||
lmdks - The landmarks.
|
||||
Returns:
|
||||
The optimal option.
|
||||
|
||||
Requirements:
|
||||
n = Regular points
|
||||
r = The size of a combination
|
||||
Time - O(C(n, r) + 2^C(n, r))
|
||||
Space - O(r*C(n, r))
|
||||
'''
|
||||
def get_opts_optim(seq, lmdks):
|
||||
# Evaluate the original
|
||||
eval_orig = eval_seq(get_rel_dists(seq, [], lmdks))
|
||||
|
||||
# Get all possible options
|
||||
opts = get_opts(seq, lmdks)
|
||||
|
||||
# Evaluate options
|
||||
# Track the minimum (best) evaluation
|
||||
diff_min = float('inf')
|
||||
|
||||
# Track the optimal sequence (the one with the best evaluation)
|
||||
optim = []
|
||||
for opt in opts:
|
||||
eval_sum = 0
|
||||
for o in opt:
|
||||
eval_sum += eval_seq(get_rel_dists(seq, o, lmdks))
|
||||
# Compare with current optimal
|
||||
diff_cur = abs(eval_sum/len(opt) - eval_orig)
|
||||
if diff_cur < diff_min:
|
||||
diff_min = diff_cur
|
||||
optim = list(opt)
|
||||
|
||||
return optim
|
||||
|
||||
|
||||
'''
|
||||
Finds a set of regular points from top (less) to bottom (many)
|
||||
(seems to perform better than the bottom-to-top approach).
|
||||
|
||||
Parameters:
|
||||
seq - The point sequence.
|
||||
lmdks - The landmarks.
|
||||
Returns:
|
||||
The resulting set of options.
|
||||
|
||||
Requirements:
|
||||
For all possible sets of regular points n such that n = len(seq) - len(lmdks).
|
||||
The result is a set of options for every possible value of n and for all possible combinations for each n.
|
||||
Time - O(n^2)
|
||||
Space - O(n^2)
|
||||
'''
|
||||
def get_opts_from_top(seq, lmdks):
|
||||
# Evaluate the original
|
||||
eval_orig = eval_seq(get_rel_dists(seq, [], lmdks))
|
||||
|
||||
opts = []
|
||||
lmdks_cur = np.array(lmdks)
|
||||
while not np.array_equal(lmdks_cur, seq):
|
||||
# Find the combinations for one more point
|
||||
reg = get_reg(seq, lmdks_cur)
|
||||
|
||||
# Track the minimum (best) evaluation
|
||||
diff_min = float('inf')
|
||||
|
||||
point = ()
|
||||
for r in reg:
|
||||
# Evaluate current
|
||||
eval_cur = eval_seq(get_rel_dists(seq, r, lmdks_cur))
|
||||
|
||||
# Compare evaluations
|
||||
if abs(eval_cur - eval_orig) <= diff_min:
|
||||
diff_min = abs(eval_cur - eval_orig)
|
||||
point = r
|
||||
|
||||
# Save new point to landmarks
|
||||
lmdks_cur = np.append(lmdks_cur, point)
|
||||
lmdks_cur.sort()
|
||||
|
||||
# Add new option
|
||||
opts.append(np.setdiff1d(lmdks_cur, lmdks))
|
||||
|
||||
return opts
|
||||
|
||||
def get_opts_from_top_h(seq, lmdks):
|
||||
# Create histogram
|
||||
hist, h = get_hist(seq, lmdks)
|
||||
# Keep track of points
|
||||
hist_cur = np.copy(hist)
|
||||
# The options to be returned
|
||||
hist_opts = []
|
||||
# Keep adding points until the maximum is reached
|
||||
while np.sum(hist_cur) < max(seq):
|
||||
# Track the minimum (best) evaluation
|
||||
diff_min = float('inf')
|
||||
# The candidate option
|
||||
hist_cand = np.copy(hist_cur)
|
||||
# Check every possibility
|
||||
for i, h_i in enumerate(hist_cur):
|
||||
# Can we add one more point?
|
||||
if h_i + 1 <= h:
|
||||
hist_tmp = np.copy(hist_cur)
|
||||
hist_tmp[i] += 1
|
||||
# Find difference from original
|
||||
diff_cur = get_norm(hist, hist_tmp)
|
||||
# Remember if it is the best that you've seen
|
||||
if diff_cur < diff_min:
|
||||
diff_min = diff_cur
|
||||
hist_cand = np.copy(hist_tmp)
|
||||
# Update current histogram
|
||||
hist_cur = np.copy(hist_cand)
|
||||
# Add current best to options
|
||||
hist_opts.append(hist_cand)
|
||||
# Return options
|
||||
return hist_opts
|
||||
|
||||
|
||||
def get_non_opts_from_top(seq, lmdks):
|
||||
# Evaluate the original
|
||||
eval_orig = eval_seq(get_rel_dists(seq, [], lmdks))
|
||||
|
||||
non_opts = []
|
||||
lmdks_cur = np.array(lmdks)
|
||||
while not np.array_equal(lmdks_cur, seq):
|
||||
# Find the combinations for one more point
|
||||
reg = get_reg(seq, lmdks_cur)
|
||||
|
||||
# Track the maximum (worst) evaluation
|
||||
diff_max = .0
|
||||
|
||||
point = ()
|
||||
for r in reg:
|
||||
# Evaluate current
|
||||
eval_cur = eval_seq(get_rel_dists(seq, r, lmdks_cur))
|
||||
|
||||
# Compare evaluations
|
||||
if abs(eval_cur - eval_orig) >= diff_max:
|
||||
diff_max = abs(eval_cur - eval_orig)
|
||||
point = r
|
||||
|
||||
# Save new point to landmarks
|
||||
lmdks_cur = np.append(lmdks_cur, point)
|
||||
lmdks_cur.sort()
|
||||
|
||||
# Add new option
|
||||
non_opts.append(np.setdiff1d(lmdks_cur, lmdks))
|
||||
|
||||
return non_opts
|
||||
|
||||
|
||||
'''
|
||||
Finds a set of regular points from bottom (many) to top (less)
|
||||
(seems to perform worse than the top-to-bottom approach).
|
||||
|
||||
Parameters:
|
||||
seq - The point sequence.
|
||||
lmdks - The landmarks.
|
||||
Returns:
|
||||
The resulting set of options.
|
||||
|
||||
Requirements:
|
||||
For all possible sets of regular points n such that n = len(seq) - len(lmdks).
|
||||
The result is a set of options for every possible value of n and for all possible combinations for each n.
|
||||
Time - O(n^2)
|
||||
Space - O(n^2)
|
||||
'''
|
||||
def get_opts_from_bottom(seq, lmdks):
|
||||
# Evaluate the original
|
||||
eval_orig = eval_seq(get_rel_dists(seq, [], lmdks))
|
||||
|
||||
# Start with all regular points as landmarks
|
||||
lmdks_cur = np.array(get_reg(seq, lmdks))
|
||||
|
||||
opts = [lmdks_cur]
|
||||
while lmdks_cur.size != 1:
|
||||
# Track the minimum (best) evaluation
|
||||
diff_min = float('inf')
|
||||
|
||||
point = ()
|
||||
for lmdk in lmdks_cur:
|
||||
# Evaluate current by removing one point
|
||||
eval_cur = eval_seq(get_rel_dists(seq, [], np.setdiff1d(lmdks_cur, lmdk)))
|
||||
|
||||
# Compare evaluations
|
||||
if abs(eval_cur - eval_orig) <= diff_min:
|
||||
diff_min = abs(eval_cur - eval_orig)
|
||||
point = lmdk
|
||||
|
||||
# Remove point from landmarks
|
||||
lmdks_cur = np.setdiff1d(lmdks_cur, point)
|
||||
|
||||
# Add new option
|
||||
opts.append(np.setdiff1d(lmdks_cur, lmdks))
|
||||
|
||||
return opts
|
||||
|
||||
|
||||
def get_non_opts_from_bottom(seq, lmdks):
|
||||
# Evaluate the original
|
||||
eval_orig = eval_seq(get_rel_dists(seq, [], lmdks))
|
||||
|
||||
# Start with all regular points as landmarks
|
||||
lmdks_cur = np.array(get_reg(seq, lmdks))
|
||||
|
||||
non_opts = [lmdks_cur]
|
||||
while lmdks_cur.size != 1:
|
||||
# Track the maximum (worst) evaluation
|
||||
diff_max = .0
|
||||
|
||||
point = ()
|
||||
for lmdk in lmdks_cur:
|
||||
# Evaluate current by removing one point
|
||||
eval_cur = eval_seq(get_rel_dists(seq, [], np.setdiff1d(lmdks_cur, lmdk)))
|
||||
|
||||
# Compare evaluations
|
||||
if abs(eval_cur - eval_orig) >= diff_max:
|
||||
diff_max = abs(eval_cur - eval_orig)
|
||||
point = lmdk
|
||||
|
||||
# Remove point from landmarks
|
||||
lmdks_cur = np.setdiff1d(lmdks_cur, point)
|
||||
|
||||
# Add new option
|
||||
non_opts.append(np.setdiff1d(lmdks_cur, lmdks))
|
||||
|
||||
return non_opts
|
||||
|
||||
|
||||
def test():
|
||||
# Start and end points of the sequence
|
||||
# # Nonrandom
|
||||
# start = 1
|
||||
# end = 10
|
||||
# Random
|
||||
start = 1
|
||||
end = random.randint(start + 1, 100)
|
||||
|
||||
# Landmarks
|
||||
# # Nonrandom
|
||||
# lmdks = np.array([1, 3, 5, 8])
|
||||
# Random
|
||||
size = random.randint(start, end - 1)
|
||||
lmdks = np.array(random.sample(range(start, end), size))
|
||||
lmdks.sort()
|
||||
|
||||
# Print the parameters
|
||||
print('Start : %d\n'
|
||||
'End : %d\n'
|
||||
'Size : %d\n'
|
||||
'Landmarks: %s'
|
||||
%(start, end, len(lmdks), str(lmdks)))
|
||||
|
||||
# Get the point sequence
|
||||
seq = get_seq(start, end)
|
||||
|
||||
# Almost optimal solution
|
||||
# print('\nOptimal...')
|
||||
# t = time.time()
|
||||
|
||||
# opts_optim = get_opts_optim(seq, lmdks)
|
||||
|
||||
# print('Time:', time.time() - t, 'secs\n')
|
||||
|
||||
# print_rslt(seq, opts_optim, lmdks)
|
||||
|
||||
|
||||
# Top to bottom approach
|
||||
print('\nTop to bottom heuristic...')
|
||||
t = time.time()
|
||||
|
||||
opts = get_opts_from_top(seq, lmdks)
|
||||
|
||||
print('Time:', time.time() - t, 'secs')
|
||||
|
||||
# print_rslt(seq, opts, lmdks)
|
||||
diff_opt = print_diff(seq, opts, lmdks)
|
||||
|
||||
print('Non optimal version...')
|
||||
non_opts = get_non_opts_from_top(seq, lmdks)
|
||||
diff_non_opt = print_diff(seq, non_opts, lmdks)
|
||||
print('Non optimal is %.2f%% different ([+]: worse | [-]: better).' %(100*(diff_non_opt - diff_opt)/diff_opt))
|
||||
|
||||
|
||||
# Bottom to top approach
|
||||
# Seems to perform worse
|
||||
print('\nBottom to top heuristic...')
|
||||
t = time.time()
|
||||
|
||||
opts = get_opts_from_bottom(seq, lmdks)
|
||||
|
||||
print('Time:', time.time() - t, 'secs')
|
||||
|
||||
# print_rslt(seq, opts, lmdks)
|
||||
diff_opt = print_diff(seq, opts, lmdks)
|
||||
|
||||
print('Non optimal version...')
|
||||
non_opts = get_non_opts_from_bottom(seq, lmdks)
|
||||
diff_non_opt = print_diff(seq, non_opts, lmdks)
|
||||
print('Non optimal is %.2f%% different ([+]: worse | [-]: better).' %(100*(diff_non_opt - diff_opt)/diff_opt))
|
||||
|
||||
|
||||
# # Debugging
|
||||
# # Number of desired actual and dummy landmarks
|
||||
# k = len(lmdks) + 5
|
||||
# # Number of dummy landmarks
|
||||
# n = k - len(lmdks)
|
||||
# combs = get_combs(reg, n)
|
||||
# print_rslt(seq, combs, lmdks)
|
||||
# exit()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
test()
|
||||
except KeyboardInterrupt:
|
||||
print('Interrupted by user.')
|
||||
exit()
|
Loading…
Reference in New Issue
Block a user