the-last-thing/code/lib/lmdk_sel.py

#!/usr/bin/env python3

import itertools
from lmdk_lib import *
import exp_mech
import numpy as np
import random
import scipy.stats as stats
import time


'''
  Print all the points.

  Parameters:
    seq - The point sequence.
    combs - All the possible point combinations for a specified size.
    lmdks - The landmarks.
  Returns:
    Nothing.
'''
def print_rslt(seq, combs, lmdks):
  eval_sum = .0
  for idx, c in enumerate(combs):
    rslt = str(idx + 1) + ':\t'
    for i in seq:
      # Selected
      if i in c:
        rslt += '(' + str(i) + ')\t'
      # Landmark
      elif i in lmdks:
        rslt += '*' + str(i) + '*\t'
      # Not selected
      else:
        rslt += ' ' + str(i) + ' \t'
    dists = get_rel_dists(seq, list(c), lmdks)
    eval_orig = eval_seq(get_rel_dists(seq, [], lmdks))
    eval_cur = eval_seq(dists)
    eval_sum += eval_cur
    # print(rslt, '\t', dists, '\t', sum(dists), '\t', eval_cur)
    print(rslt, eval_cur)
  eval_avg = eval_sum/len(combs)
  print('Average STD (difference with original): %.4f (%.2f%%)' %(eval_avg, 100*(eval_avg - eval_orig)/eval_orig))


'''
  Print the difference with the original.

  Parameters:
    seq - The point sequence.
    combs - All the possible point combinations for a specified size.
    lmdks - The landmarks.
  Returns:
    The difference with the original.
'''
def print_diff(seq, combs, lmdks):
  eval_sum = .0
  for c in combs:
    dists = get_rel_dists(seq, list(c), lmdks)
    eval_orig = eval_seq(get_rel_dists(seq, [], lmdks))
    eval_cur = eval_seq(dists)
    eval_sum += eval_cur
  eval_avg = eval_sum/len(combs)
  diff = 100*(eval_avg - eval_orig)/eval_orig
  print('Average STD (difference with original): %.4f (%.2f%%)' %(eval_avg, diff))
  return diff


'''
  Finds the optimal set of regular points.

  Parameters:
    seq - The point sequence.
    lmdks - The landmarks.
  Returns:
    The optimal option.

  Requirements:
    n = Regular points
    r = The size of a combination
    Time  - O(C(n, r) + 2^C(n, r))
    Space - O(r*C(n, r))
'''
def get_opts_optim(seq, lmdks):
  # Evaluate the original
  eval_orig = eval_seq(get_rel_dists(seq, [], lmdks))

  # Get all possible options
  opts = get_opts(seq, lmdks)

  # Evaluate options
  # Track the minimum (best) evaluation
  diff_min = float('inf')

  # Track the optimal sequence (the one with the best evaluation)
  optim = []
  for opt in opts:
    eval_sum = 0
    for o in opt:
      eval_sum += eval_seq(get_rel_dists(seq, o, lmdks))
    # Compare with current optimal
    diff_cur = abs(eval_sum/len(opt) - eval_orig)
    if diff_cur < diff_min:
      diff_min = diff_cur
      optim = list(opt)

  return optim


'''
  Finds a set of regular points from top (less) to bottom (many)
  (seems to perform better than the bottom-to-top approach).

  Parameters:
    seq - The point sequence.
    lmdks - The landmarks.
  Returns:
    The resulting set of options.

  Requirements:
    For all possible sets of regular points n such that n = len(seq) - len(lmdks).
    The result is a set of options for every possible value of n and for all possible combinations for each n.
    Time  - O(n^2)
    Space - O(n^2)
'''
def get_opts_from_top(seq, lmdks):
  # Evaluate the original
  eval_orig = eval_seq(get_rel_dists(seq, [], lmdks))

  opts = []
  lmdks_cur = np.array(lmdks)
  while not np.array_equal(lmdks_cur, seq):
    # Find the combinations for one more point
    reg = get_reg(seq, lmdks_cur)

    # Track the minimum (best) evaluation
    diff_min = float('inf')

    point = ()
    for r in reg:
      # Evaluate current
      eval_cur = eval_seq(get_rel_dists(seq, r, lmdks_cur))

      # Compare evaluations
      if abs(eval_cur - eval_orig) <= diff_min:
        diff_min = abs(eval_cur - eval_orig)
        point = r

    # Save new point to landmarks
    lmdks_cur = np.append(lmdks_cur, point)
    lmdks_cur.sort()

    # Add new option
    opts.append(np.setdiff1d(lmdks_cur, lmdks))

  return opts

def get_opts_from_top_h(seq, lmdks):
  # Create histogram
  hist, h = get_hist(seq, lmdks)
  # Keep track of points
  hist_cur = np.copy(hist)
  # The options to be returned
  hist_opts = []
  # Keep adding points until the maximum is reached
  while np.sum(hist_cur) < len(seq):
    # Track the minimum (best) evaluation
    diff_min = float('inf')
    # The candidate option
    hist_cand = np.copy(hist_cur)
    # Check every possibility
    for i, h_i in enumerate(hist_cur):
      # Can we add one more point?
      if h_i + 1 <= h:
        hist_tmp = np.copy(hist_cur)
        hist_tmp[i] += 1
        # Find difference from original
        diff_cur = get_norm(hist, hist_tmp)  # Euclidean
        # diff_cur = get_emd(hist, hist_tmp)  # Wasserstein
        # Remember if it is the best that you've seen
        if diff_cur < diff_min:
          diff_min = diff_cur
          hist_cand = np.copy(hist_tmp)
    # Update current histogram
    hist_cur = np.copy(hist_cand)
    # Add current best to options
    hist_opts.append(hist_cand)
  # Return options
  return hist_opts


def get_non_opts_from_top(seq, lmdks):
  # Evaluate the original
  eval_orig = eval_seq(get_rel_dists(seq, [], lmdks))

  non_opts = []
  lmdks_cur = np.array(lmdks)
  while not np.array_equal(lmdks_cur, seq):
    # Find the combinations for one more point
    reg = get_reg(seq, lmdks_cur)

    # Track the maximum (worst) evaluation
    diff_max = .0

    point = ()
    for r in reg:
      # Evaluate current
      eval_cur = eval_seq(get_rel_dists(seq, r, lmdks_cur))

      # Compare evaluations
      if abs(eval_cur - eval_orig) >= diff_max:
        diff_max = abs(eval_cur - eval_orig)
        point = r

    # Save new point to landmarks
    lmdks_cur = np.append(lmdks_cur, point)
    lmdks_cur.sort()

    # Add new option
    non_opts.append(np.setdiff1d(lmdks_cur, lmdks))

  return non_opts


'''
  Finds a set of regular points from bottom (many) to top (less)
  (seems to perform worse than the top-to-bottom approach).

  Parameters:
    seq - The point sequence.
    lmdks - The landmarks.
  Returns:
    The resulting set of options.

  Requirements:
    For all possible sets of regular points n such that n = len(seq) - len(lmdks).
    The result is a set of options for every possible value of n and for all possible combinations for each n.
    Time  - O(n^2)
    Space - O(n^2)
'''
def get_opts_from_bottom(seq, lmdks):
  # Evaluate the original
  eval_orig = eval_seq(get_rel_dists(seq, [], lmdks))

  # Start with all regular points as landmarks
  lmdks_cur = np.array(get_reg(seq, lmdks))

  opts = [lmdks_cur]
  while lmdks_cur.size != 1:
    # Track the minimum (best) evaluation
    diff_min = float('inf')

    point = ()
    for lmdk in lmdks_cur:
      # Evaluate current by removing one point
      eval_cur = eval_seq(get_rel_dists(seq, [], np.setdiff1d(lmdks_cur, lmdk)))

      # Compare evaluations
      if abs(eval_cur - eval_orig) <= diff_min:
        diff_min = abs(eval_cur - eval_orig)
        point = lmdk

    # Remove point from landmarks
    lmdks_cur = np.setdiff1d(lmdks_cur, point)

    # Add new option
    opts.append(np.setdiff1d(lmdks_cur, lmdks))

  return opts


def get_opts_from_bottom_h(seq, lmdks):
  # Create histogram
  hist, h = get_hist(seq, lmdks)
  # Keep track of points
  hist_cur = np.array([h]*len(hist))
  while np.sum(hist_cur) > len(seq):
    hist_cur[-1] -= 1
  # The options to be returned
  hist_opts = []
  # Keep removing points until the minimum is reached
  while np.sum(hist_cur) > np.sum(hist):
    # Track the minimum (best) evaluation
    diff_min = float('inf')
    # The candidate option
    hist_cand = np.copy(hist_cur)
    # Check every possibility
    for i, h_i in enumerate(hist_cur):
      # Can we remove one more point?
      if h_i - 1 >= hist[i]:
        hist_tmp = np.copy(hist_cur)
        hist_tmp[i] -= 1
        # Find difference from original
        # diff_cur = get_norm(hist, hist_tmp)  # Euclidean
        diff_cur = get_emd(hist, hist_tmp)  # Wasserstein
        # Remember if it is the best that you've seen
        if diff_cur < diff_min:
          diff_min = diff_cur
          hist_cand = np.copy(hist_tmp)
    # Update current histogram
    hist_cur = np.copy(hist_cand)
    # Add current best to options
    hist_opts.append(hist_cand)
  # Return options
  return hist_opts

def get_non_opts_from_bottom(seq, lmdks):
  # Evaluate the original
  eval_orig = eval_seq(get_rel_dists(seq, [], lmdks))

  # Start with all regular points as landmarks
  lmdks_cur = np.array(get_reg(seq, lmdks))

  non_opts = [lmdks_cur]
  while lmdks_cur.size != 1:
    # Track the maximum (worst) evaluation
    diff_max = .0

    point = ()
    for lmdk in lmdks_cur:
      # Evaluate current by removing one point
      eval_cur = eval_seq(get_rel_dists(seq, [], np.setdiff1d(lmdks_cur, lmdk)))

      # Compare evaluations
      if abs(eval_cur - eval_orig) >= diff_max:
        diff_max = abs(eval_cur - eval_orig)
        point = lmdk

    # Remove point from landmarks
    lmdks_cur = np.setdiff1d(lmdks_cur, point)

    # Add new option
    non_opts.append(np.setdiff1d(lmdks_cur, lmdks))

  return non_opts


def find_lmdks(seq, lmdks, epsilon):
  '''
    Add dummy landmarks to original landmarks.

    Parameters:
      seq     - All of the data points.
      lmdks   - The original landmarks.
      epsilon - The available privacy budget.

    Returns:
      lmdks_new - The new landmarks.
      The remaining privacy budget. 
  '''
  # The new landmarks
  lmdks_new = lmdks
  # The privacy budget to use
  eps_sel = 0
  if len(lmdks) > 0 and len(seq) != len(lmdks):
    # Get landmarks timestamps in sequence
    lmdks_seq = find_lmdks_seq(seq, lmdks)
    # Turn landmarks to histogram
    hist, h = get_hist(get_seq(1, len(seq)), lmdks_seq)
    # Find all possible options
    opts = get_opts_from_top_h(get_seq(1, len(seq)), lmdks_seq)
    # Landmarks selection budget
    eps_sel = epsilon/(len(lmdks_seq) + 1)
    # Get landmarks histogram with dummy landmarks
    hist_new, _ = exp_mech.exponential(hist, opts, exp_mech.score, 1.0, eps_sel)
    # Split sequence in parts of size h 
    pt_idx = []
    for idx in range(1, len(seq), h):
      pt_idx.append([idx, idx + h - 1])
    pt_idx[-1][1] = len(seq)
    # Get new landmarks indexes
    lmdks_seq_new = np.array([], dtype=int)
    for i, pt in enumerate(pt_idx):
      # Already landmarks
      lmdks_seq_pt = lmdks_seq[(lmdks_seq >= pt[0]) & (lmdks_seq <= pt[1])]
      # Sample randomly from the rest of the sequence
      size = hist_new[i] - len(lmdks_seq_pt)
      rglr = np.setdiff1d(np.arange(pt[0], pt[1] + 1), lmdks_seq_pt)
      # Add already landmarks
      lmdks_seq_new = np.concatenate([lmdks_seq_new, lmdks_seq_pt])
      # Add new landmarks
      if size > 0 and len(rglr) > size:
        lmdks_seq_new = np.concatenate([lmdks_seq_new,
          np.random.choice(
            rglr, 
            size = size, 
            replace = False
          )
        ])
    # Get actual landmarks values
    lmdks_new = seq[lmdks_seq_new - 1]
  return lmdks_new, epsilon - eps_sel


def find_lmdks_eps(seq, lmdks, epsilon):
  '''
    Add dummy landmarks to original landmarks.

    Parameters:
      seq     - All of the data points.
      lmdks   - The original landmarks.
      epsilon - The available privacy budget.

    Returns:
      lmdks_new - The new landmarks.
  '''
  # The new landmarks
  lmdks_new = lmdks
  if len(seq) != len(lmdks):
    # Get landmarks timestamps in sequence
    lmdks_seq = find_lmdks_seq(seq, lmdks)
    # Turn landmarks to histogram
    hist, h = get_hist(get_seq(1, len(seq)), lmdks_seq)
    # Find all possible options
    opts = get_opts_from_top_h(get_seq(1, len(seq)), lmdks_seq)
    # Get landmarks histogram with dummy landmarks
    hist_new, _ = exp_mech.exponential(hist, opts, exp_mech.score, 1.0, epsilon)
    # Split sequence in parts of size h 
    pt_idx = []
    for idx in range(1, len(seq), h):
      pt_idx.append([idx, idx + h - 1])
    pt_idx[-1][1] = len(seq)
    # Get new landmarks indexes
    lmdks_seq_new = np.array([], dtype=int)
    for i, pt in enumerate(pt_idx):
      # Already landmarks
      lmdks_seq_pt = lmdks_seq[(lmdks_seq >= pt[0]) & (lmdks_seq <= pt[1])]
      # Sample randomly from the rest of the sequence
      size = int(hist_new[i] - len(lmdks_seq_pt))
      rglr = np.setdiff1d(np.arange(pt[0], pt[1] + 1), lmdks_seq_pt)
      # Add already landmarks
      lmdks_seq_new = np.concatenate([lmdks_seq_new, lmdks_seq_pt])
      # Add new landmarks
      if size > 0 and len(rglr) > size:
        lmdks_seq_new = np.concatenate([lmdks_seq_new,
          np.random.choice(
            rglr, 
            size = size, 
            replace = False
          )
        ])
    # Get actual landmarks values
    lmdks_new = seq[lmdks_seq_new - 1]
  return lmdks_new


def test():
  # Start and end points of the sequence
  # # Nonrandom
  # start = 1
  # end = 10
  # Random
  start = 1
  end = random.randint(start + 1, 100)

  # Landmarks
  # # Nonrandom
  # lmdks = np.array([1, 3, 5, 8])
  # Random
  size = random.randint(start, end - 1)
  lmdks = np.array(random.sample(range(start, end), size))
  lmdks.sort()

  # Print the parameters
  print('Start    : %d\n'
        'End      : %d\n'
        'Size     : %d\n'
        'Landmarks: %s'
        %(start, end, len(lmdks), str(lmdks)))

  # Get the point sequence
  seq = get_seq(start, end)

  # Almost optimal solution
  # print('\nOptimal...')
  # t = time.time()

  # opts_optim = get_opts_optim(seq, lmdks)

  # print('Time:', time.time() - t, 'secs\n')

  # print_rslt(seq, opts_optim, lmdks)


  # Top to bottom approach
  print('\nTop to bottom heuristic...')
  t = time.time()

  opts = get_opts_from_top(seq, lmdks)

  print('Time:', time.time() - t, 'secs')

  # print_rslt(seq, opts, lmdks)
  diff_opt = print_diff(seq, opts, lmdks)

  print('Non optimal version...')
  non_opts = get_non_opts_from_top(seq, lmdks)
  diff_non_opt = print_diff(seq, non_opts, lmdks)
  print('Non optimal is %.2f%% different ([+]: worse | [-]: better).' %(100*(diff_non_opt - diff_opt)/diff_opt))


  # Bottom to top approach
  # Seems to perform worse
  print('\nBottom to top heuristic...')
  t = time.time()

  opts = get_opts_from_bottom(seq, lmdks)

  print('Time:', time.time() - t, 'secs')

  # print_rslt(seq, opts, lmdks)
  diff_opt = print_diff(seq, opts, lmdks)

  print('Non optimal version...')
  non_opts = get_non_opts_from_bottom(seq, lmdks)
  diff_non_opt = print_diff(seq, non_opts, lmdks)
  print('Non optimal is %.2f%% different ([+]: worse | [-]: better).' %(100*(diff_non_opt - diff_opt)/diff_opt))


  # # Debugging
  # # Number of desired actual and dummy landmarks
  # k = len(lmdks) + 5
  # # Number of dummy landmarks
  # n = k - len(lmdks)
  # combs = get_combs(reg, n)
  # print_rslt(seq, combs, lmdks)
  # exit()


if __name__ == '__main__':
  try:
    test()
  except KeyboardInterrupt:
    print('Interrupted by user.')
    exit()
code: Ready to experiment with lmdk_sel 2021-10-05 04:37:13 +02:00			`#!/usr/bin/env python3`

			`import itertools`
			`from lmdk_lib import *`
code: WIP 2021-10-06 13:14:28 +02:00			`import exp_mech`
code: Ready to experiment with lmdk_sel 2021-10-05 04:37:13 +02:00			`import numpy as np`
			`import random`
code: Corrections and cleaning up 2021-10-07 12:32:46 +02:00			`import scipy.stats as stats`
code: Ready to experiment with lmdk_sel 2021-10-05 04:37:13 +02:00			`import time`


			`'''`
			`Print all the points.`

			`Parameters:`
			`seq - The point sequence.`
			`combs - All the possible point combinations for a specified size.`
			`lmdks - The landmarks.`
			`Returns:`
			`Nothing.`
			`'''`
			`def print_rslt(seq, combs, lmdks):`
			`eval_sum = .0`
			`for idx, c in enumerate(combs):`
			`rslt = str(idx + 1) + ':\t'`
			`for i in seq:`
			`# Selected`
			`if i in c:`
			`rslt += '(' + str(i) + ')\t'`
			`# Landmark`
			`elif i in lmdks:`
			`rslt += '' + str(i) + '\t'`
			`# Not selected`
			`else:`
			`rslt += ' ' + str(i) + ' \t'`
			`dists = get_rel_dists(seq, list(c), lmdks)`
			`eval_orig = eval_seq(get_rel_dists(seq, [], lmdks))`
			`eval_cur = eval_seq(dists)`
			`eval_sum += eval_cur`
			`# print(rslt, '\t', dists, '\t', sum(dists), '\t', eval_cur)`
			`print(rslt, eval_cur)`
			`eval_avg = eval_sum/len(combs)`
			`print('Average STD (difference with original): %.4f (%.2f%%)' %(eval_avg, 100*(eval_avg - eval_orig)/eval_orig))`


			`'''`
			`Print the difference with the original.`

			`Parameters:`
			`seq - The point sequence.`
			`combs - All the possible point combinations for a specified size.`
			`lmdks - The landmarks.`
			`Returns:`
			`The difference with the original.`
			`'''`
			`def print_diff(seq, combs, lmdks):`
			`eval_sum = .0`
			`for c in combs:`
			`dists = get_rel_dists(seq, list(c), lmdks)`
			`eval_orig = eval_seq(get_rel_dists(seq, [], lmdks))`
			`eval_cur = eval_seq(dists)`
			`eval_sum += eval_cur`
			`eval_avg = eval_sum/len(combs)`
			`diff = 100*(eval_avg - eval_orig)/eval_orig`
			`print('Average STD (difference with original): %.4f (%.2f%%)' %(eval_avg, diff))`
			`return diff`


			`'''`
			`Finds the optimal set of regular points.`

			`Parameters:`
			`seq - The point sequence.`
			`lmdks - The landmarks.`
			`Returns:`
			`The optimal option.`

			`Requirements:`
			`n = Regular points`
			`r = The size of a combination`
			`Time - O(C(n, r) + 2^C(n, r))`
			`Space - O(r*C(n, r))`
			`'''`
			`def get_opts_optim(seq, lmdks):`
			`# Evaluate the original`
			`eval_orig = eval_seq(get_rel_dists(seq, [], lmdks))`

			`# Get all possible options`
			`opts = get_opts(seq, lmdks)`

			`# Evaluate options`
			`# Track the minimum (best) evaluation`
			`diff_min = float('inf')`

			`# Track the optimal sequence (the one with the best evaluation)`
			`optim = []`
			`for opt in opts:`
			`eval_sum = 0`
			`for o in opt:`
			`eval_sum += eval_seq(get_rel_dists(seq, o, lmdks))`
			`# Compare with current optimal`
			`diff_cur = abs(eval_sum/len(opt) - eval_orig)`
			`if diff_cur < diff_min:`
			`diff_min = diff_cur`
			`optim = list(opt)`

			`return optim`


			`'''`
			`Finds a set of regular points from top (less) to bottom (many)`
			`(seems to perform better than the bottom-to-top approach).`

			`Parameters:`
			`seq - The point sequence.`
			`lmdks - The landmarks.`
			`Returns:`
			`The resulting set of options.`

			`Requirements:`
			`For all possible sets of regular points n such that n = len(seq) - len(lmdks).`
			`The result is a set of options for every possible value of n and for all possible combinations for each n.`
			`Time - O(n^2)`
			`Space - O(n^2)`
			`'''`
			`def get_opts_from_top(seq, lmdks):`
			`# Evaluate the original`
			`eval_orig = eval_seq(get_rel_dists(seq, [], lmdks))`

			`opts = []`
			`lmdks_cur = np.array(lmdks)`
			`while not np.array_equal(lmdks_cur, seq):`
			`# Find the combinations for one more point`
			`reg = get_reg(seq, lmdks_cur)`

			`# Track the minimum (best) evaluation`
			`diff_min = float('inf')`

			`point = ()`
			`for r in reg:`
			`# Evaluate current`
			`eval_cur = eval_seq(get_rel_dists(seq, r, lmdks_cur))`

			`# Compare evaluations`
			`if abs(eval_cur - eval_orig) <= diff_min:`
			`diff_min = abs(eval_cur - eval_orig)`
			`point = r`

			`# Save new point to landmarks`
			`lmdks_cur = np.append(lmdks_cur, point)`
			`lmdks_cur.sort()`

			`# Add new option`
			`opts.append(np.setdiff1d(lmdks_cur, lmdks))`

			`return opts`

			`def get_opts_from_top_h(seq, lmdks):`
			`# Create histogram`
			`hist, h = get_hist(seq, lmdks)`
			`# Keep track of points`
			`hist_cur = np.copy(hist)`
			`# The options to be returned`
			`hist_opts = []`
			`# Keep adding points until the maximum is reached`
code: Corrections and cleaning up 2021-10-07 12:32:46 +02:00			`while np.sum(hist_cur) < len(seq):`
code: Ready to experiment with lmdk_sel 2021-10-05 04:37:13 +02:00			`# Track the minimum (best) evaluation`
			`diff_min = float('inf')`
			`# The candidate option`
			`hist_cand = np.copy(hist_cur)`
			`# Check every possibility`
			`for i, h_i in enumerate(hist_cur):`
			`# Can we add one more point?`
			`if h_i + 1 <= h:`
			`hist_tmp = np.copy(hist_cur)`
			`hist_tmp[i] += 1`
			`# Find difference from original`
code: Corrections and cleaning up 2021-10-07 12:32:46 +02:00			`diff_cur = get_norm(hist, hist_tmp) # Euclidean`
			`# diff_cur = get_emd(hist, hist_tmp) # Wasserstein`
code: Ready to experiment with lmdk_sel 2021-10-05 04:37:13 +02:00			`# Remember if it is the best that you've seen`
			`if diff_cur < diff_min:`
			`diff_min = diff_cur`
			`hist_cand = np.copy(hist_tmp)`
			`# Update current histogram`
			`hist_cur = np.copy(hist_cand)`
			`# Add current best to options`
			`hist_opts.append(hist_cand)`
			`# Return options`
			`return hist_opts`


			`def get_non_opts_from_top(seq, lmdks):`
			`# Evaluate the original`
			`eval_orig = eval_seq(get_rel_dists(seq, [], lmdks))`

			`non_opts = []`
			`lmdks_cur = np.array(lmdks)`
			`while not np.array_equal(lmdks_cur, seq):`
			`# Find the combinations for one more point`
			`reg = get_reg(seq, lmdks_cur)`

			`# Track the maximum (worst) evaluation`
			`diff_max = .0`

			`point = ()`
			`for r in reg:`
			`# Evaluate current`
			`eval_cur = eval_seq(get_rel_dists(seq, r, lmdks_cur))`

			`# Compare evaluations`
			`if abs(eval_cur - eval_orig) >= diff_max:`
			`diff_max = abs(eval_cur - eval_orig)`
			`point = r`

			`# Save new point to landmarks`
			`lmdks_cur = np.append(lmdks_cur, point)`
			`lmdks_cur.sort()`

			`# Add new option`
			`non_opts.append(np.setdiff1d(lmdks_cur, lmdks))`

			`return non_opts`


			`'''`
			`Finds a set of regular points from bottom (many) to top (less)`
			`(seems to perform worse than the top-to-bottom approach).`

			`Parameters:`
			`seq - The point sequence.`
			`lmdks - The landmarks.`
			`Returns:`
			`The resulting set of options.`

			`Requirements:`
			`For all possible sets of regular points n such that n = len(seq) - len(lmdks).`
			`The result is a set of options for every possible value of n and for all possible combinations for each n.`
			`Time - O(n^2)`
			`Space - O(n^2)`
			`'''`
			`def get_opts_from_bottom(seq, lmdks):`
			`# Evaluate the original`
			`eval_orig = eval_seq(get_rel_dists(seq, [], lmdks))`

			`# Start with all regular points as landmarks`
			`lmdks_cur = np.array(get_reg(seq, lmdks))`

			`opts = [lmdks_cur]`
			`while lmdks_cur.size != 1:`
			`# Track the minimum (best) evaluation`
			`diff_min = float('inf')`

			`point = ()`
			`for lmdk in lmdks_cur:`
			`# Evaluate current by removing one point`
			`eval_cur = eval_seq(get_rel_dists(seq, [], np.setdiff1d(lmdks_cur, lmdk)))`

			`# Compare evaluations`
			`if abs(eval_cur - eval_orig) <= diff_min:`
			`diff_min = abs(eval_cur - eval_orig)`
			`point = lmdk`

			`# Remove point from landmarks`
			`lmdks_cur = np.setdiff1d(lmdks_cur, point)`

			`# Add new option`
			`opts.append(np.setdiff1d(lmdks_cur, lmdks))`

			`return opts`


expt: Testing bottom to top approach 2021-10-07 18:34:48 +02:00			`def get_opts_from_bottom_h(seq, lmdks):`
			`# Create histogram`
			`hist, h = get_hist(seq, lmdks)`
			`# Keep track of points`
			`hist_cur = np.array([h]*len(hist))`
			`while np.sum(hist_cur) > len(seq):`
			`hist_cur[-1] -= 1`
			`# The options to be returned`
			`hist_opts = []`
			`# Keep removing points until the minimum is reached`
			`while np.sum(hist_cur) > np.sum(hist):`
			`# Track the minimum (best) evaluation`
			`diff_min = float('inf')`
			`# The candidate option`
			`hist_cand = np.copy(hist_cur)`
			`# Check every possibility`
			`for i, h_i in enumerate(hist_cur):`
			`# Can we remove one more point?`
			`if h_i - 1 >= hist[i]:`
			`hist_tmp = np.copy(hist_cur)`
			`hist_tmp[i] -= 1`
			`# Find difference from original`
			`# diff_cur = get_norm(hist, hist_tmp) # Euclidean`
			`diff_cur = get_emd(hist, hist_tmp) # Wasserstein`
			`# Remember if it is the best that you've seen`
			`if diff_cur < diff_min:`
			`diff_min = diff_cur`
			`hist_cand = np.copy(hist_tmp)`
			`# Update current histogram`
			`hist_cur = np.copy(hist_cand)`
			`# Add current best to options`
			`hist_opts.append(hist_cand)`
			`# Return options`
			`return hist_opts`

code: Ready to experiment with lmdk_sel 2021-10-05 04:37:13 +02:00			`def get_non_opts_from_bottom(seq, lmdks):`
			`# Evaluate the original`
			`eval_orig = eval_seq(get_rel_dists(seq, [], lmdks))`

			`# Start with all regular points as landmarks`
			`lmdks_cur = np.array(get_reg(seq, lmdks))`

			`non_opts = [lmdks_cur]`
			`while lmdks_cur.size != 1:`
			`# Track the maximum (worst) evaluation`
			`diff_max = .0`

			`point = ()`
			`for lmdk in lmdks_cur:`
			`# Evaluate current by removing one point`
			`eval_cur = eval_seq(get_rel_dists(seq, [], np.setdiff1d(lmdks_cur, lmdk)))`

			`# Compare evaluations`
			`if abs(eval_cur - eval_orig) >= diff_max:`
			`diff_max = abs(eval_cur - eval_orig)`
			`point = lmdk`

			`# Remove point from landmarks`
			`lmdks_cur = np.setdiff1d(lmdks_cur, point)`

			`# Add new option`
			`non_opts.append(np.setdiff1d(lmdks_cur, lmdks))`

			`return non_opts`


code: WIP 2021-10-06 13:14:28 +02:00			`def find_lmdks(seq, lmdks, epsilon):`
lmdk_sel: Trying without Pareto 2021-10-06 13:30:15 +02:00			`'''`
			`Add dummy landmarks to original landmarks.`

			`Parameters:`
			`seq - All of the data points.`
			`lmdks - The original landmarks.`
			`epsilon - The available privacy budget.`

			`Returns:`
			`lmdks_new - The new landmarks.`
			`The remaining privacy budget.`
			`'''`
lmdk_sel: Minor corrections 2021-10-06 15:59:28 +02:00			`# The new landmarks`
			`lmdks_new = lmdks`
lmdk_sel: Trying without Pareto 2021-10-06 13:30:15 +02:00			`# The privacy budget to use`
code: WIP 2021-10-06 13:14:28 +02:00			`eps_sel = 0`
			`if len(lmdks) > 0 and len(seq) != len(lmdks):`
			`# Get landmarks timestamps in sequence`
			`lmdks_seq = find_lmdks_seq(seq, lmdks)`
			`# Turn landmarks to histogram`
			`hist, h = get_hist(get_seq(1, len(seq)), lmdks_seq)`
			`# Find all possible options`
			`opts = get_opts_from_top_h(get_seq(1, len(seq)), lmdks_seq)`
			`# Landmarks selection budget`
			`eps_sel = epsilon/(len(lmdks_seq) + 1)`
code: Corrections and cleaning up 2021-10-07 12:32:46 +02:00			`# Get landmarks histogram with dummy landmarks`
			`hist_new, _ = exp_mech.exponential(hist, opts, exp_mech.score, 1.0, eps_sel)`
			`# Split sequence in parts of size h`
			`pt_idx = []`
			`for idx in range(1, len(seq), h):`
			`pt_idx.append([idx, idx + h - 1])`
			`pt_idx[-1][1] = len(seq)`
			`# Get new landmarks indexes`
			`lmdks_seq_new = np.array([], dtype=int)`
			`for i, pt in enumerate(pt_idx):`
			`# Already landmarks`
			`lmdks_seq_pt = lmdks_seq[(lmdks_seq >= pt[0]) & (lmdks_seq <= pt[1])]`
			`# Sample randomly from the rest of the sequence`
			`size = hist_new[i] - len(lmdks_seq_pt)`
			`rglr = np.setdiff1d(np.arange(pt[0], pt[1] + 1), lmdks_seq_pt)`
			`# Add already landmarks`
			`lmdks_seq_new = np.concatenate([lmdks_seq_new, lmdks_seq_pt])`
			`# Add new landmarks`
			`if size > 0 and len(rglr) > size:`
			`lmdks_seq_new = np.concatenate([lmdks_seq_new,`
			`np.random.choice(`
			`rglr,`
			`size = size,`
			`replace = False`
			`)`
			`])`
code: WIP 2021-10-06 13:14:28 +02:00			`# Get actual landmarks values`
code: Corrections and cleaning up 2021-10-07 12:32:46 +02:00			`lmdks_new = seq[lmdks_seq_new - 1]`
lmdk_sel: Trying without Pareto 2021-10-06 13:30:15 +02:00			`return lmdks_new, epsilon - eps_sel`
code: WIP 2021-10-06 13:14:28 +02:00
expt: Testing epsilon percentages 2021-10-12 23:26:38 +02:00
			`def find_lmdks_eps(seq, lmdks, epsilon):`
			`'''`
			`Add dummy landmarks to original landmarks.`

			`Parameters:`
			`seq - All of the data points.`
			`lmdks - The original landmarks.`
			`epsilon - The available privacy budget.`

			`Returns:`
			`lmdks_new - The new landmarks.`
			`'''`
			`# The new landmarks`
			`lmdks_new = lmdks`
code: Several fixes 2021-10-13 09:03:36 +02:00			`if len(seq) != len(lmdks):`
expt: Testing epsilon percentages 2021-10-12 23:26:38 +02:00			`# Get landmarks timestamps in sequence`
			`lmdks_seq = find_lmdks_seq(seq, lmdks)`
			`# Turn landmarks to histogram`
			`hist, h = get_hist(get_seq(1, len(seq)), lmdks_seq)`
			`# Find all possible options`
			`opts = get_opts_from_top_h(get_seq(1, len(seq)), lmdks_seq)`
			`# Get landmarks histogram with dummy landmarks`
			`hist_new, _ = exp_mech.exponential(hist, opts, exp_mech.score, 1.0, epsilon)`
			`# Split sequence in parts of size h`
			`pt_idx = []`
			`for idx in range(1, len(seq), h):`
			`pt_idx.append([idx, idx + h - 1])`
			`pt_idx[-1][1] = len(seq)`
			`# Get new landmarks indexes`
			`lmdks_seq_new = np.array([], dtype=int)`
			`for i, pt in enumerate(pt_idx):`
			`# Already landmarks`
			`lmdks_seq_pt = lmdks_seq[(lmdks_seq >= pt[0]) & (lmdks_seq <= pt[1])]`
			`# Sample randomly from the rest of the sequence`
code: Several fixes 2021-10-13 09:03:36 +02:00			`size = int(hist_new[i] - len(lmdks_seq_pt))`
expt: Testing epsilon percentages 2021-10-12 23:26:38 +02:00			`rglr = np.setdiff1d(np.arange(pt[0], pt[1] + 1), lmdks_seq_pt)`
			`# Add already landmarks`
			`lmdks_seq_new = np.concatenate([lmdks_seq_new, lmdks_seq_pt])`
			`# Add new landmarks`
			`if size > 0 and len(rglr) > size:`
			`lmdks_seq_new = np.concatenate([lmdks_seq_new,`
			`np.random.choice(`
			`rglr,`
			`size = size,`
			`replace = False`
			`)`
			`])`
			`# Get actual landmarks values`
			`lmdks_new = seq[lmdks_seq_new - 1]`
			`return lmdks_new`



code: Ready to experiment with lmdk_sel 2021-10-05 04:37:13 +02:00			`def test():`
			`# Start and end points of the sequence`
			`# # Nonrandom`
			`# start = 1`
			`# end = 10`
			`# Random`
			`start = 1`
			`end = random.randint(start + 1, 100)`

			`# Landmarks`
			`# # Nonrandom`
			`# lmdks = np.array([1, 3, 5, 8])`
			`# Random`
			`size = random.randint(start, end - 1)`
			`lmdks = np.array(random.sample(range(start, end), size))`
			`lmdks.sort()`

			`# Print the parameters`
			`print('Start : %d\n'`
			`'End : %d\n'`
			`'Size : %d\n'`
			`'Landmarks: %s'`
			`%(start, end, len(lmdks), str(lmdks)))`

			`# Get the point sequence`
			`seq = get_seq(start, end)`

			`# Almost optimal solution`
			`# print('\nOptimal...')`
			`# t = time.time()`

			`# opts_optim = get_opts_optim(seq, lmdks)`

			`# print('Time:', time.time() - t, 'secs\n')`

			`# print_rslt(seq, opts_optim, lmdks)`


			`# Top to bottom approach`
			`print('\nTop to bottom heuristic...')`
			`t = time.time()`

			`opts = get_opts_from_top(seq, lmdks)`

			`print('Time:', time.time() - t, 'secs')`

			`# print_rslt(seq, opts, lmdks)`
			`diff_opt = print_diff(seq, opts, lmdks)`

			`print('Non optimal version...')`
			`non_opts = get_non_opts_from_top(seq, lmdks)`
			`diff_non_opt = print_diff(seq, non_opts, lmdks)`
			`print('Non optimal is %.2f%% different ([+]: worse \| [-]: better).' %(100*(diff_non_opt - diff_opt)/diff_opt))`


			`# Bottom to top approach`
			`# Seems to perform worse`
			`print('\nBottom to top heuristic...')`
			`t = time.time()`

			`opts = get_opts_from_bottom(seq, lmdks)`

			`print('Time:', time.time() - t, 'secs')`

			`# print_rslt(seq, opts, lmdks)`
			`diff_opt = print_diff(seq, opts, lmdks)`

			`print('Non optimal version...')`
			`non_opts = get_non_opts_from_bottom(seq, lmdks)`
			`diff_non_opt = print_diff(seq, non_opts, lmdks)`
			`print('Non optimal is %.2f%% different ([+]: worse \| [-]: better).' %(100*(diff_non_opt - diff_opt)/diff_opt))`


			`# # Debugging`
			`# # Number of desired actual and dummy landmarks`
			`# k = len(lmdks) + 5`
			`# # Number of dummy landmarks`
			`# n = k - len(lmdks)`
			`# combs = get_combs(reg, n)`
			`# print_rslt(seq, combs, lmdks)`
			`# exit()`


			`if __name__ == '__main__':`
			`try:`
			`test()`
			`except KeyboardInterrupt:`
			`print('Interrupted by user.')`
			`exit()`