the-last-thing/code/lib/exp_mech.py

#!/usr/bin/env python3

import numpy as np
import math
import heapq
import itertools
import random
import scipy.stats as stats
import lmdk_lib
from matplotlib import pyplot as plt
import time


def score(data, option):
  '''
    The scoring function.

    Parameters:
      data - The data.
      option - The option to evaluate.
    Returns:
      The score for the option.
  '''
  return (option.sum() - data.sum())
  # return lmdk_lib.get_norm(data, option)


def exponential(x, R, u, delta, epsilon):
  '''
    The exponential mechanism.

    Parameters:
      x - The data.
      R - The possible outputs.
      u - The scoring function.
      delta - The sensitivity of the scoring function.
      epsilon - The privacy budget.
    Returns:
      res - A randomly sampled output.
      pr - The PDF of all possible outputs.
  '''
  # Calculate the score for each element of R
  scores = [u(x, r) for r in R]
  # Normalize the scores between 0 and 1
  # (the higher, the better the utility)
  scores = 1 - (scores - np.min(scores))/(np.max(scores) - np.min(scores))

  # Calculate the probability for each element, based on its score
  pr = [np.exp(epsilon*score/(2*delta)) for score in scores]

  # Normalize the probabilities so that they sum to 1
  pr = pr/np.linalg.norm(pr, ord = 1)

  # Debugging
  # print(R[np.argmax(pr)], pr.max(), scores[np.argmax(pr)])
  # print(R[np.argmin(pr)], pr.min(), scores[np.argmin(pr)])
  # print(abs(pr.max() - pr.min()), abs(scores[np.argmax(pr)] - scores[np.argmin(pr)]))

  # Choose an element from R based on the probabilities
  if len(pr) > 0:
    return R[np.random.choice(range(len(R)), 1, p = pr)[0]], pr
  else:
    return np.array([]), pr


'''
  The exponential mechanism.

  Parameters:
    x - The data.
    R - The possible outputs.
    u - The scoring function.
    delta - The sensitivity of the scoring function.
    epsilon - The privacy budget.
  Returns:
    res - A randomly sampled output.
    pr - The PDF of all possible outputs.
'''
def exponential_pareto(x, R, u, delta, epsilon):
  # Calculate the score for each element of R
  scores = [u(x, r) for r in R]
  # Keep the top 20%
  n = int(len(scores)*.2)
  scores = np.sort(scores)[-n : ]
  # Normalize the scores between 0 and 1
  # (the higher, the better the utility)
  scores = 1 - (scores - np.min(scores))/(np.max(scores) - np.min(scores))

  # Calculate the probability for each element, based on its score
  pr = [np.exp(epsilon*score/(2*delta)) for score in scores]

  # Normalize the probabilities so that they sum to 1
  pr = pr/np.linalg.norm(pr, ord = 1)

  # Debugging
  # print(R[np.argmax(pr)], pr.max(), scores[np.argmax(pr)])
  # print(R[np.argmin(pr)], pr.min(), scores[np.argmin(pr)])
  # print(abs(pr.max() - pr.min()), abs(scores[np.argmax(pr)] - scores[np.argmin(pr)]))

  # Choose an element from R based on the probabilities
  if len(pr) > 0:
    return R[np.random.choice(range(n), 1, p = pr)[0]], pr
  else:
    return np.array([]), pr


def main():
  start, end = 1.0, 10.0
  scale = 1.0
  locs = [2.0, 4.0, 8.0]
  k = len(locs)

  # dists = [truncnorm(start, end, loc, scale) for loc in locs]
  dists = [stats.laplace(loc, scale) for loc in locs]

  mix = lmdk_lib.MixtureModel(dists)

  delta = 1.0
  epsilon = 10.0

  combos = list(itertools.combinations(range(int(start), int(end) + 1), k))

  res, pr = exponential(mix, combos, score, delta, epsilon)

  plt.rc('font', family='serif')
  plt.rc('font', size=10)
  plt.rc('text', usetex=True)

  # Plot the options' probabilities.
  # pr.sort()
  # n = np.arange(1.0, len(pr) + 1.0, 1.0)
  # plt.plot(n,\
  #          pr,\
  #          label=r'$\textrm{Pr}$',\
  #          color='blue')
  # # Configure the plot.
  # plt.axis([1.0, len(pr), 0.0, max(pr)])  # Set plot box.
  # plt.legend(loc='best', frameon=False)  # Set plot legend.
  # plt.grid(axis='y', alpha=1.0)  # Add grid on y axis.
  # plt.xlabel('Options')  # Set x axis label.
  # plt.ylabel('Likelihood')  # Set y axis label.
  # plt.show()  # Show the plot in a new window.

  x = np.arange(start, end, 0.01)
  plt.plot(x,\
          mix.pdf(x),\
          label=r'$\textrm{Mix}$',\
          color='red')

  # print(mix.sample(start, end, 10))

  # Test MixtureModel's sample function
  # t = 1000
  # c = np.array(int(end)*[0])
  # for i in range(t):
  #   c[mix.sample(start, end, 1)[0] - 1] += 1

  # plt.plot(range(int(start), int(end) + 1),\
  #          c/t,\
  #          label=r'$\textrm{Test}$',\
  #          color='blue')

  # Configure the plot.
  plt.axis([start, end, 0.0, 1.0])  # Set plot box.
  plt.legend(loc='best', frameon=False)  # Set plot legend.
  plt.grid(axis='y', alpha=1.0)  # Add grid on y axis.
  plt.xlabel('Timestamp')  # Set x axis label.
  plt.ylabel('Likelihood')  # Set y axis label.

  plt.show()  # Show the plot in a new window.


if __name__ == '__main__':
  try:
    start_time = time.time()
    main()
    end_time = time.time()
    print('##############################')
    print('Time   : %.4fs' % (end_time - start_time))
    print('##############################')
  except KeyboardInterrupt:
    print('Interrupted by user.')
    exit()
code: Ready to experiment with lmdk_sel 2021-10-05 04:37:13 +02:00			`#!/usr/bin/env python3`

			`import numpy as np`
			`import math`
			`import heapq`
			`import itertools`
			`import random`
			`import scipy.stats as stats`
			`import lmdk_lib`
			`from matplotlib import pyplot as plt`
			`import time`


			`def score(data, option):`
lmdk_sel: Tested the Eucl distance in exp_mech 2021-10-12 12:04:00 +02:00			`'''`
			`The scoring function.`
code: Ready to experiment with lmdk_sel 2021-10-05 04:37:13 +02:00
lmdk_sel: Tested the Eucl distance in exp_mech 2021-10-12 12:04:00 +02:00			`Parameters:`
			`data - The data.`
			`option - The option to evaluate.`
			`Returns:`
			`The score for the option.`
			`'''`
			`return (option.sum() - data.sum())`
			`# return lmdk_lib.get_norm(data, option)`
code: Ready to experiment with lmdk_sel 2021-10-05 04:37:13 +02:00

			`def exponential(x, R, u, delta, epsilon):`
lmdk_sel: Tested the Eucl distance in exp_mech 2021-10-12 12:04:00 +02:00			`'''`
			`The exponential mechanism.`

			`Parameters:`
			`x - The data.`
			`R - The possible outputs.`
			`u - The scoring function.`
			`delta - The sensitivity of the scoring function.`
			`epsilon - The privacy budget.`
			`Returns:`
			`res - A randomly sampled output.`
			`pr - The PDF of all possible outputs.`
			`'''`
code: Ready to experiment with lmdk_sel 2021-10-05 04:37:13 +02:00			`# Calculate the score for each element of R`
			`scores = [u(x, r) for r in R]`
			`# Normalize the scores between 0 and 1`
			`# (the higher, the better the utility)`
			`scores = 1 - (scores - np.min(scores))/(np.max(scores) - np.min(scores))`

			`# Calculate the probability for each element, based on its score`
			`pr = [np.exp(epsilonscore/(2delta)) for score in scores]`

			`# Normalize the probabilities so that they sum to 1`
			`pr = pr/np.linalg.norm(pr, ord = 1)`

			`# Debugging`
			`# print(R[np.argmax(pr)], pr.max(), scores[np.argmax(pr)])`
			`# print(R[np.argmin(pr)], pr.min(), scores[np.argmin(pr)])`
			`# print(abs(pr.max() - pr.min()), abs(scores[np.argmax(pr)] - scores[np.argmin(pr)]))`

			`# Choose an element from R based on the probabilities`
			`if len(pr) > 0:`
			`return R[np.random.choice(range(len(R)), 1, p = pr)[0]], pr`
			`else:`
			`return np.array([]), pr`


expt_lmdk_sel: Testing the Pareto principle 2021-10-05 23:40:15 +02:00			`'''`
			`The exponential mechanism.`

			`Parameters:`
			`x - The data.`
			`R - The possible outputs.`
			`u - The scoring function.`
			`delta - The sensitivity of the scoring function.`
			`epsilon - The privacy budget.`
			`Returns:`
			`res - A randomly sampled output.`
			`pr - The PDF of all possible outputs.`
			`'''`
			`def exponential_pareto(x, R, u, delta, epsilon):`
			`# Calculate the score for each element of R`
			`scores = [u(x, r) for r in R]`
			`# Keep the top 20%`
			`n = int(len(scores)*.2)`
			`scores = np.sort(scores)[-n : ]`
			`# Normalize the scores between 0 and 1`
			`# (the higher, the better the utility)`
			`scores = 1 - (scores - np.min(scores))/(np.max(scores) - np.min(scores))`

			`# Calculate the probability for each element, based on its score`
			`pr = [np.exp(epsilonscore/(2delta)) for score in scores]`

			`# Normalize the probabilities so that they sum to 1`
			`pr = pr/np.linalg.norm(pr, ord = 1)`

			`# Debugging`
			`# print(R[np.argmax(pr)], pr.max(), scores[np.argmax(pr)])`
			`# print(R[np.argmin(pr)], pr.min(), scores[np.argmin(pr)])`
			`# print(abs(pr.max() - pr.min()), abs(scores[np.argmax(pr)] - scores[np.argmin(pr)]))`

			`# Choose an element from R based on the probabilities`
			`if len(pr) > 0:`
			`return R[np.random.choice(range(n), 1, p = pr)[0]], pr`
			`else:`
			`return np.array([]), pr`


code: Ready to experiment with lmdk_sel 2021-10-05 04:37:13 +02:00			`def main():`
			`start, end = 1.0, 10.0`
			`scale = 1.0`
			`locs = [2.0, 4.0, 8.0]`
			`k = len(locs)`

			`# dists = [truncnorm(start, end, loc, scale) for loc in locs]`
			`dists = [stats.laplace(loc, scale) for loc in locs]`

			`mix = lmdk_lib.MixtureModel(dists)`

			`delta = 1.0`
			`epsilon = 10.0`

			`combos = list(itertools.combinations(range(int(start), int(end) + 1), k))`

			`res, pr = exponential(mix, combos, score, delta, epsilon)`

			`plt.rc('font', family='serif')`
			`plt.rc('font', size=10)`
			`plt.rc('text', usetex=True)`

			`# Plot the options' probabilities.`
			`# pr.sort()`
			`# n = np.arange(1.0, len(pr) + 1.0, 1.0)`
			`# plt.plot(n,\`
			`# pr,\`
			`# label=r'$\textrm{Pr}$',\`
			`# color='blue')`
			`# # Configure the plot.`
			`# plt.axis([1.0, len(pr), 0.0, max(pr)]) # Set plot box.`
			`# plt.legend(loc='best', frameon=False) # Set plot legend.`
			`# plt.grid(axis='y', alpha=1.0) # Add grid on y axis.`
			`# plt.xlabel('Options') # Set x axis label.`
			`# plt.ylabel('Likelihood') # Set y axis label.`
			`# plt.show() # Show the plot in a new window.`

			`x = np.arange(start, end, 0.01)`
			`plt.plot(x,\`
			`mix.pdf(x),\`
			`label=r'$\textrm{Mix}$',\`
			`color='red')`

			`# print(mix.sample(start, end, 10))`

			`# Test MixtureModel's sample function`
			`# t = 1000`
			`# c = np.array(int(end)*[0])`
			`# for i in range(t):`
			`# c[mix.sample(start, end, 1)[0] - 1] += 1`

			`# plt.plot(range(int(start), int(end) + 1),\`
			`# c/t,\`
			`# label=r'$\textrm{Test}$',\`
			`# color='blue')`

			`# Configure the plot.`
			`plt.axis([start, end, 0.0, 1.0]) # Set plot box.`
			`plt.legend(loc='best', frameon=False) # Set plot legend.`
			`plt.grid(axis='y', alpha=1.0) # Add grid on y axis.`
			`plt.xlabel('Timestamp') # Set x axis label.`
			`plt.ylabel('Likelihood') # Set y axis label.`

			`plt.show() # Show the plot in a new window.`


			`if __name__ == '__main__':`
			`try:`
			`start_time = time.time()`
			`main()`
			`end_time = time.time()`
			`print('##############################')`
			`print('Time : %.4fs' % (end_time - start_time))`
			`print('##############################')`
			`except KeyboardInterrupt:`
			`print('Interrupted by user.')`
			`exit()`