the-last-thing/code/lib/exp_mech.py

#!/usr/bin/env python3

import numpy as np
import math
import heapq
import itertools
import random
import scipy.stats as stats
import lmdk_lib
from matplotlib import pyplot as plt
import time


'''
  The scoring function.

  Parameters:
    data - The data.
    option - The option to evaluate.
  Returns:
    The score for the option.
'''
def score(data, option):
  return (option.sum() - data.sum())


'''
  The exponential mechanism.

  Parameters:
    x - The data.
    R - The possible outputs.
    u - The scoring function.
    delta - The sensitivity of the scoring function.
    epsilon - The privacy budget.
  Returns:
    res - A randomly sampled output.
    pr - The PDF of all possible outputs.
'''
def exponential(x, R, u, delta, epsilon):
  # Calculate the score for each element of R
  scores = [u(x, r) for r in R]
  # Normalize the scores between 0 and 1
  # (the higher, the better the utility)
  scores = 1 - (scores - np.min(scores))/(np.max(scores) - np.min(scores))

  # Calculate the probability for each element, based on its score
  pr = [np.exp(epsilon*score/(2*delta)) for score in scores]

  # Normalize the probabilities so that they sum to 1
  pr = pr/np.linalg.norm(pr, ord = 1)

  # Debugging
  # print(R[np.argmax(pr)], pr.max(), scores[np.argmax(pr)])
  # print(R[np.argmin(pr)], pr.min(), scores[np.argmin(pr)])
  # print(abs(pr.max() - pr.min()), abs(scores[np.argmax(pr)] - scores[np.argmin(pr)]))

  # Choose an element from R based on the probabilities
  if len(pr) > 0:
    return R[np.random.choice(range(len(R)), 1, p = pr)[0]], pr
  else:
    return np.array([]), pr


'''
  The exponential mechanism.

  Parameters:
    x - The data.
    R - The possible outputs.
    u - The scoring function.
    delta - The sensitivity of the scoring function.
    epsilon - The privacy budget.
  Returns:
    res - A randomly sampled output.
    pr - The PDF of all possible outputs.
'''
def exponential_pareto(x, R, u, delta, epsilon):
  # Calculate the score for each element of R
  scores = [u(x, r) for r in R]
  # Keep the top 20%
  n = int(len(scores)*.2)
  scores = np.sort(scores)[-n : ]
  # Normalize the scores between 0 and 1
  # (the higher, the better the utility)
  scores = 1 - (scores - np.min(scores))/(np.max(scores) - np.min(scores))

  # Calculate the probability for each element, based on its score
  pr = [np.exp(epsilon*score/(2*delta)) for score in scores]

  # Normalize the probabilities so that they sum to 1
  pr = pr/np.linalg.norm(pr, ord = 1)

  # Debugging
  # print(R[np.argmax(pr)], pr.max(), scores[np.argmax(pr)])
  # print(R[np.argmin(pr)], pr.min(), scores[np.argmin(pr)])
  # print(abs(pr.max() - pr.min()), abs(scores[np.argmax(pr)] - scores[np.argmin(pr)]))

  # Choose an element from R based on the probabilities
  if len(pr) > 0:
    return R[np.random.choice(range(n), 1, p = pr)[0]], pr
  else:
    return np.array([]), pr


def main():
  start, end = 1.0, 10.0
  scale = 1.0
  locs = [2.0, 4.0, 8.0]
  k = len(locs)

  # dists = [truncnorm(start, end, loc, scale) for loc in locs]
  dists = [stats.laplace(loc, scale) for loc in locs]

  mix = lmdk_lib.MixtureModel(dists)

  delta = 1.0
  epsilon = 10.0

  combos = list(itertools.combinations(range(int(start), int(end) + 1), k))

  res, pr = exponential(mix, combos, score, delta, epsilon)

  plt.rc('font', family='serif')
  plt.rc('font', size=10)
  plt.rc('text', usetex=True)

  # Plot the options' probabilities.
  # pr.sort()
  # n = np.arange(1.0, len(pr) + 1.0, 1.0)
  # plt.plot(n,\
  #          pr,\
  #          label=r'$\textrm{Pr}$',\
  #          color='blue')
  # # Configure the plot.
  # plt.axis([1.0, len(pr), 0.0, max(pr)])  # Set plot box.
  # plt.legend(loc='best', frameon=False)  # Set plot legend.
  # plt.grid(axis='y', alpha=1.0)  # Add grid on y axis.
  # plt.xlabel('Options')  # Set x axis label.
  # plt.ylabel('Likelihood')  # Set y axis label.
  # plt.show()  # Show the plot in a new window.

  x = np.arange(start, end, 0.01)
  plt.plot(x,\
          mix.pdf(x),\
          label=r'$\textrm{Mix}$',\
          color='red')

  # print(mix.sample(start, end, 10))

  # Test MixtureModel's sample function
  # t = 1000
  # c = np.array(int(end)*[0])
  # for i in range(t):
  #   c[mix.sample(start, end, 1)[0] - 1] += 1

  # plt.plot(range(int(start), int(end) + 1),\
  #          c/t,\
  #          label=r'$\textrm{Test}$',\
  #          color='blue')

  # Configure the plot.
  plt.axis([start, end, 0.0, 1.0])  # Set plot box.
  plt.legend(loc='best', frameon=False)  # Set plot legend.
  plt.grid(axis='y', alpha=1.0)  # Add grid on y axis.
  plt.xlabel('Timestamp')  # Set x axis label.
  plt.ylabel('Likelihood')  # Set y axis label.

  plt.show()  # Show the plot in a new window.


if __name__ == '__main__':
  try:
    start_time = time.time()
    main()
    end_time = time.time()
    print('##############################')
    print('Time   : %.4fs' % (end_time - start_time))
    print('##############################')
  except KeyboardInterrupt:
    print('Interrupted by user.')
    exit()