code: Ready to experiment with lmdk_sel
This commit is contained in:
		@ -1,5 +1,7 @@
 | 
				
			|||||||
#!/usr/bin/env python3
 | 
					#!/usr/bin/env python3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import sys
 | 
				
			||||||
 | 
					sys.path.insert(1, '../lib')
 | 
				
			||||||
import argparse
 | 
					import argparse
 | 
				
			||||||
import lmdk_lib
 | 
					import lmdk_lib
 | 
				
			||||||
import lmdk_sel
 | 
					import lmdk_sel
 | 
				
			||||||
@ -36,7 +38,7 @@ def main(args):
 | 
				
			|||||||
    plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin)
 | 
					    plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin)
 | 
				
			||||||
    # The y axis
 | 
					    # The y axis
 | 
				
			||||||
    plt.ylabel('Mean absolute error')  # Set y axis label.
 | 
					    plt.ylabel('Mean absolute error')  # Set y axis label.
 | 
				
			||||||
    plt.ylim(0, len(seq)/3)
 | 
					    # plt.ylim(0, len(seq)/3)
 | 
				
			||||||
    # Bar offset
 | 
					    # Bar offset
 | 
				
			||||||
    x_offset = -(bar_width/2)*(len(epsilon) - 1)
 | 
					    x_offset = -(bar_width/2)*(len(epsilon) - 1)
 | 
				
			||||||
    for e_i, e in enumerate(epsilon):
 | 
					    for e_i, e in enumerate(epsilon):
 | 
				
			||||||
@ -45,9 +47,26 @@ def main(args):
 | 
				
			|||||||
        for r in range(args.reps):
 | 
					        for r in range(args.reps):
 | 
				
			||||||
          lmdks = lmdk_lib.get_lmdks(seq, n, d)
 | 
					          lmdks = lmdk_lib.get_lmdks(seq, n, d)
 | 
				
			||||||
          hist, h = lmdk_lib.get_hist(seq, lmdks)
 | 
					          hist, h = lmdk_lib.get_hist(seq, lmdks)
 | 
				
			||||||
          opts = lmdk_sel.get_opts_from_top_h(seq, lmdks)
 | 
					          res = np.zeros([len(hist)])
 | 
				
			||||||
          delta = 1.0
 | 
					          # Split sequence in parts of size h 
 | 
				
			||||||
          res, _ = exp_mech.exponential(hist, opts, exp_mech.score, delta, e)
 | 
					          pt_idx = []
 | 
				
			||||||
 | 
					          for idx in range(h, len(seq), h):
 | 
				
			||||||
 | 
					            pt_idx.append(idx)
 | 
				
			||||||
 | 
					          seq_pt = np.split(seq, pt_idx)
 | 
				
			||||||
 | 
					          for pt_i, pt in enumerate(seq_pt):
 | 
				
			||||||
 | 
					            # Find this part's landmarks
 | 
				
			||||||
 | 
					            lmdks_pt = np.intersect1d(pt, lmdks)
 | 
				
			||||||
 | 
					            # Find possible options for this part
 | 
				
			||||||
 | 
					            opts = lmdk_sel.get_opts_from_top_h(pt, lmdks_pt)
 | 
				
			||||||
 | 
					            # Turn part to histogram
 | 
				
			||||||
 | 
					            hist_pt, _ = lmdk_lib.get_hist(pt, lmdks_pt)
 | 
				
			||||||
 | 
					            # Get an option for this part
 | 
				
			||||||
 | 
					            res_pt = np.array([])
 | 
				
			||||||
 | 
					            if len(opts) > 0:
 | 
				
			||||||
 | 
					              res_pt, _ = exp_mech.exponential(hist_pt, opts, exp_mech.score, 1.0, e)
 | 
				
			||||||
 | 
					            # Merge options of all parts
 | 
				
			||||||
 | 
					            res[pt_i] = np.sum(res_pt)
 | 
				
			||||||
 | 
					          # Calculate MAE
 | 
				
			||||||
          mae[n_i] += lmdk_lib.get_norm(hist, res)/args.reps
 | 
					          mae[n_i] += lmdk_lib.get_norm(hist, res)/args.reps
 | 
				
			||||||
      # Plot bar for current epsilon
 | 
					      # Plot bar for current epsilon
 | 
				
			||||||
      plt.bar(
 | 
					      plt.bar(
 | 
				
			||||||
@ -59,7 +78,7 @@ def main(args):
 | 
				
			|||||||
      )
 | 
					      )
 | 
				
			||||||
      # Change offset for next bar
 | 
					      # Change offset for next bar
 | 
				
			||||||
      x_offset += bar_width
 | 
					      x_offset += bar_width
 | 
				
			||||||
    path = str('/home/manos/Git/the-thing/code/expt_lmdk_sel/' + title)
 | 
					    path = str('../../rslt/lmdk_sel/' + title)
 | 
				
			||||||
    # Plot legend
 | 
					    # Plot legend
 | 
				
			||||||
    lmdk_lib.plot_legend()
 | 
					    lmdk_lib.plot_legend()
 | 
				
			||||||
    # Show plot
 | 
					    # Show plot
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										141
									
								
								code/lib/exp_mech.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										141
									
								
								code/lib/exp_mech.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,141 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/env python3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import numpy as np
 | 
				
			||||||
 | 
					import math
 | 
				
			||||||
 | 
					import heapq
 | 
				
			||||||
 | 
					import itertools
 | 
				
			||||||
 | 
					import random
 | 
				
			||||||
 | 
					import scipy.stats as stats
 | 
				
			||||||
 | 
					import lmdk_lib
 | 
				
			||||||
 | 
					from matplotlib import pyplot as plt
 | 
				
			||||||
 | 
					import time
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					  The scoring function.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Parameters:
 | 
				
			||||||
 | 
					    data - The data.
 | 
				
			||||||
 | 
					    option - The option to evaluate.
 | 
				
			||||||
 | 
					  Returns:
 | 
				
			||||||
 | 
					    The score for the option.
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					def score(data, option):
 | 
				
			||||||
 | 
					  return (option.sum() - data.sum())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					  The exponential mechanism.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Parameters:
 | 
				
			||||||
 | 
					    x - The data.
 | 
				
			||||||
 | 
					    R - The possible outputs.
 | 
				
			||||||
 | 
					    u - The scoring function.
 | 
				
			||||||
 | 
					    delta - The sensitivity of the scoring function.
 | 
				
			||||||
 | 
					    epsilon - The privacy budget.
 | 
				
			||||||
 | 
					  Returns:
 | 
				
			||||||
 | 
					    res - A randomly sampled output.
 | 
				
			||||||
 | 
					    pr - The PDF of all possible outputs.
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					def exponential(x, R, u, delta, epsilon):
 | 
				
			||||||
 | 
					  # Calculate the score for each element of R
 | 
				
			||||||
 | 
					  scores = [u(x, r) for r in R]
 | 
				
			||||||
 | 
					  # Normalize the scores between 0 and 1
 | 
				
			||||||
 | 
					  # (the higher, the better the utility)
 | 
				
			||||||
 | 
					  scores = 1 - (scores - np.min(scores))/(np.max(scores) - np.min(scores))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # Calculate the probability for each element, based on its score
 | 
				
			||||||
 | 
					  pr = [np.exp(epsilon*score/(2*delta)) for score in scores]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # Normalize the probabilities so that they sum to 1
 | 
				
			||||||
 | 
					  pr = pr/np.linalg.norm(pr, ord = 1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # Debugging
 | 
				
			||||||
 | 
					  # print(R[np.argmax(pr)], pr.max(), scores[np.argmax(pr)])
 | 
				
			||||||
 | 
					  # print(R[np.argmin(pr)], pr.min(), scores[np.argmin(pr)])
 | 
				
			||||||
 | 
					  # print(abs(pr.max() - pr.min()), abs(scores[np.argmax(pr)] - scores[np.argmin(pr)]))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # Choose an element from R based on the probabilities
 | 
				
			||||||
 | 
					  if len(pr) > 0:
 | 
				
			||||||
 | 
					    return R[np.random.choice(range(len(R)), 1, p = pr)[0]], pr
 | 
				
			||||||
 | 
					  else:
 | 
				
			||||||
 | 
					    return np.array([]), pr
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def main():
 | 
				
			||||||
 | 
					  start, end = 1.0, 10.0
 | 
				
			||||||
 | 
					  scale = 1.0
 | 
				
			||||||
 | 
					  locs = [2.0, 4.0, 8.0]
 | 
				
			||||||
 | 
					  k = len(locs)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # dists = [truncnorm(start, end, loc, scale) for loc in locs]
 | 
				
			||||||
 | 
					  dists = [stats.laplace(loc, scale) for loc in locs]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  mix = lmdk_lib.MixtureModel(dists)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  delta = 1.0
 | 
				
			||||||
 | 
					  epsilon = 10.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  combos = list(itertools.combinations(range(int(start), int(end) + 1), k))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  res, pr = exponential(mix, combos, score, delta, epsilon)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  plt.rc('font', family='serif')
 | 
				
			||||||
 | 
					  plt.rc('font', size=10)
 | 
				
			||||||
 | 
					  plt.rc('text', usetex=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # Plot the options' probabilities.
 | 
				
			||||||
 | 
					  # pr.sort()
 | 
				
			||||||
 | 
					  # n = np.arange(1.0, len(pr) + 1.0, 1.0)
 | 
				
			||||||
 | 
					  # plt.plot(n,\
 | 
				
			||||||
 | 
					  #          pr,\
 | 
				
			||||||
 | 
					  #          label=r'$\textrm{Pr}$',\
 | 
				
			||||||
 | 
					  #          color='blue')
 | 
				
			||||||
 | 
					  # # Configure the plot.
 | 
				
			||||||
 | 
					  # plt.axis([1.0, len(pr), 0.0, max(pr)])  # Set plot box.
 | 
				
			||||||
 | 
					  # plt.legend(loc='best', frameon=False)  # Set plot legend.
 | 
				
			||||||
 | 
					  # plt.grid(axis='y', alpha=1.0)  # Add grid on y axis.
 | 
				
			||||||
 | 
					  # plt.xlabel('Options')  # Set x axis label.
 | 
				
			||||||
 | 
					  # plt.ylabel('Likelihood')  # Set y axis label.
 | 
				
			||||||
 | 
					  # plt.show()  # Show the plot in a new window.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  x = np.arange(start, end, 0.01)
 | 
				
			||||||
 | 
					  plt.plot(x,\
 | 
				
			||||||
 | 
					          mix.pdf(x),\
 | 
				
			||||||
 | 
					          label=r'$\textrm{Mix}$',\
 | 
				
			||||||
 | 
					          color='red')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # print(mix.sample(start, end, 10))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # Test MixtureModel's sample function
 | 
				
			||||||
 | 
					  # t = 1000
 | 
				
			||||||
 | 
					  # c = np.array(int(end)*[0])
 | 
				
			||||||
 | 
					  # for i in range(t):
 | 
				
			||||||
 | 
					  #   c[mix.sample(start, end, 1)[0] - 1] += 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # plt.plot(range(int(start), int(end) + 1),\
 | 
				
			||||||
 | 
					  #          c/t,\
 | 
				
			||||||
 | 
					  #          label=r'$\textrm{Test}$',\
 | 
				
			||||||
 | 
					  #          color='blue')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # Configure the plot.
 | 
				
			||||||
 | 
					  plt.axis([start, end, 0.0, 1.0])  # Set plot box.
 | 
				
			||||||
 | 
					  plt.legend(loc='best', frameon=False)  # Set plot legend.
 | 
				
			||||||
 | 
					  plt.grid(axis='y', alpha=1.0)  # Add grid on y axis.
 | 
				
			||||||
 | 
					  plt.xlabel('Timestamp')  # Set x axis label.
 | 
				
			||||||
 | 
					  plt.ylabel('Likelihood')  # Set y axis label.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  plt.show()  # Show the plot in a new window.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == '__main__':
 | 
				
			||||||
 | 
					  try:
 | 
				
			||||||
 | 
					    start_time = time.time()
 | 
				
			||||||
 | 
					    main()
 | 
				
			||||||
 | 
					    end_time = time.time()
 | 
				
			||||||
 | 
					    print('##############################')
 | 
				
			||||||
 | 
					    print('Time   : %.4fs' % (end_time - start_time))
 | 
				
			||||||
 | 
					    print('##############################')
 | 
				
			||||||
 | 
					  except KeyboardInterrupt:
 | 
				
			||||||
 | 
					    print('Interrupted by user.')
 | 
				
			||||||
 | 
					    exit()
 | 
				
			||||||
							
								
								
									
										388
									
								
								code/lib/lmdk_sel.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										388
									
								
								code/lib/lmdk_sel.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,388 @@
 | 
				
			|||||||
 | 
					#!/usr/bin/env python3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import itertools
 | 
				
			||||||
 | 
					from lmdk_lib import *
 | 
				
			||||||
 | 
					import numpy as np
 | 
				
			||||||
 | 
					import random
 | 
				
			||||||
 | 
					import time
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					  Print all the points.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Parameters:
 | 
				
			||||||
 | 
					    seq - The point sequence.
 | 
				
			||||||
 | 
					    combs - All the possible point combinations for a specified size.
 | 
				
			||||||
 | 
					    lmdks - The landmarks.
 | 
				
			||||||
 | 
					  Returns:
 | 
				
			||||||
 | 
					    Nothing.
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					def print_rslt(seq, combs, lmdks):
 | 
				
			||||||
 | 
					  eval_sum = .0
 | 
				
			||||||
 | 
					  for idx, c in enumerate(combs):
 | 
				
			||||||
 | 
					    rslt = str(idx + 1) + ':\t'
 | 
				
			||||||
 | 
					    for i in seq:
 | 
				
			||||||
 | 
					      # Selected
 | 
				
			||||||
 | 
					      if i in c:
 | 
				
			||||||
 | 
					        rslt += '(' + str(i) + ')\t'
 | 
				
			||||||
 | 
					      # Landmark
 | 
				
			||||||
 | 
					      elif i in lmdks:
 | 
				
			||||||
 | 
					        rslt += '*' + str(i) + '*\t'
 | 
				
			||||||
 | 
					      # Not selected
 | 
				
			||||||
 | 
					      else:
 | 
				
			||||||
 | 
					        rslt += ' ' + str(i) + ' \t'
 | 
				
			||||||
 | 
					    dists = get_rel_dists(seq, list(c), lmdks)
 | 
				
			||||||
 | 
					    eval_orig = eval_seq(get_rel_dists(seq, [], lmdks))
 | 
				
			||||||
 | 
					    eval_cur = eval_seq(dists)
 | 
				
			||||||
 | 
					    eval_sum += eval_cur
 | 
				
			||||||
 | 
					    # print(rslt, '\t', dists, '\t', sum(dists), '\t', eval_cur)
 | 
				
			||||||
 | 
					    print(rslt, eval_cur)
 | 
				
			||||||
 | 
					  eval_avg = eval_sum/len(combs)
 | 
				
			||||||
 | 
					  print('Average STD (difference with original): %.4f (%.2f%%)' %(eval_avg, 100*(eval_avg - eval_orig)/eval_orig))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					  Print the difference with the original.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Parameters:
 | 
				
			||||||
 | 
					    seq - The point sequence.
 | 
				
			||||||
 | 
					    combs - All the possible point combinations for a specified size.
 | 
				
			||||||
 | 
					    lmdks - The landmarks.
 | 
				
			||||||
 | 
					  Returns:
 | 
				
			||||||
 | 
					    The difference with the original.
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					def print_diff(seq, combs, lmdks):
 | 
				
			||||||
 | 
					  eval_sum = .0
 | 
				
			||||||
 | 
					  for c in combs:
 | 
				
			||||||
 | 
					    dists = get_rel_dists(seq, list(c), lmdks)
 | 
				
			||||||
 | 
					    eval_orig = eval_seq(get_rel_dists(seq, [], lmdks))
 | 
				
			||||||
 | 
					    eval_cur = eval_seq(dists)
 | 
				
			||||||
 | 
					    eval_sum += eval_cur
 | 
				
			||||||
 | 
					  eval_avg = eval_sum/len(combs)
 | 
				
			||||||
 | 
					  diff = 100*(eval_avg - eval_orig)/eval_orig
 | 
				
			||||||
 | 
					  print('Average STD (difference with original): %.4f (%.2f%%)' %(eval_avg, diff))
 | 
				
			||||||
 | 
					  return diff
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					  Finds the optimal set of regular points.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Parameters:
 | 
				
			||||||
 | 
					    seq - The point sequence.
 | 
				
			||||||
 | 
					    lmdks - The landmarks.
 | 
				
			||||||
 | 
					  Returns:
 | 
				
			||||||
 | 
					    The optimal option.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Requirements:
 | 
				
			||||||
 | 
					    n = Regular points
 | 
				
			||||||
 | 
					    r = The size of a combination
 | 
				
			||||||
 | 
					    Time  - O(C(n, r) + 2^C(n, r))
 | 
				
			||||||
 | 
					    Space - O(r*C(n, r))
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					def get_opts_optim(seq, lmdks):
 | 
				
			||||||
 | 
					  # Evaluate the original
 | 
				
			||||||
 | 
					  eval_orig = eval_seq(get_rel_dists(seq, [], lmdks))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # Get all possible options
 | 
				
			||||||
 | 
					  opts = get_opts(seq, lmdks)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # Evaluate options
 | 
				
			||||||
 | 
					  # Track the minimum (best) evaluation
 | 
				
			||||||
 | 
					  diff_min = float('inf')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # Track the optimal sequence (the one with the best evaluation)
 | 
				
			||||||
 | 
					  optim = []
 | 
				
			||||||
 | 
					  for opt in opts:
 | 
				
			||||||
 | 
					    eval_sum = 0
 | 
				
			||||||
 | 
					    for o in opt:
 | 
				
			||||||
 | 
					      eval_sum += eval_seq(get_rel_dists(seq, o, lmdks))
 | 
				
			||||||
 | 
					    # Compare with current optimal
 | 
				
			||||||
 | 
					    diff_cur = abs(eval_sum/len(opt) - eval_orig)
 | 
				
			||||||
 | 
					    if diff_cur < diff_min:
 | 
				
			||||||
 | 
					      diff_min = diff_cur
 | 
				
			||||||
 | 
					      optim = list(opt)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return optim
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					  Finds a set of regular points from top (less) to bottom (many)
 | 
				
			||||||
 | 
					  (seems to perform better than the bottom-to-top approach).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Parameters:
 | 
				
			||||||
 | 
					    seq - The point sequence.
 | 
				
			||||||
 | 
					    lmdks - The landmarks.
 | 
				
			||||||
 | 
					  Returns:
 | 
				
			||||||
 | 
					    The resulting set of options.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Requirements:
 | 
				
			||||||
 | 
					    For all possible sets of regular points n such that n = len(seq) - len(lmdks).
 | 
				
			||||||
 | 
					    The result is a set of options for every possible value of n and for all possible combinations for each n.
 | 
				
			||||||
 | 
					    Time  - O(n^2)
 | 
				
			||||||
 | 
					    Space - O(n^2)
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					def get_opts_from_top(seq, lmdks):
 | 
				
			||||||
 | 
					  # Evaluate the original
 | 
				
			||||||
 | 
					  eval_orig = eval_seq(get_rel_dists(seq, [], lmdks))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  opts = []
 | 
				
			||||||
 | 
					  lmdks_cur = np.array(lmdks)
 | 
				
			||||||
 | 
					  while not np.array_equal(lmdks_cur, seq):
 | 
				
			||||||
 | 
					    # Find the combinations for one more point
 | 
				
			||||||
 | 
					    reg = get_reg(seq, lmdks_cur)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Track the minimum (best) evaluation
 | 
				
			||||||
 | 
					    diff_min = float('inf')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    point = ()
 | 
				
			||||||
 | 
					    for r in reg:
 | 
				
			||||||
 | 
					      # Evaluate current
 | 
				
			||||||
 | 
					      eval_cur = eval_seq(get_rel_dists(seq, r, lmdks_cur))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      # Compare evaluations
 | 
				
			||||||
 | 
					      if abs(eval_cur - eval_orig) <= diff_min:
 | 
				
			||||||
 | 
					        diff_min = abs(eval_cur - eval_orig)
 | 
				
			||||||
 | 
					        point = r
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Save new point to landmarks
 | 
				
			||||||
 | 
					    lmdks_cur = np.append(lmdks_cur, point)
 | 
				
			||||||
 | 
					    lmdks_cur.sort()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Add new option
 | 
				
			||||||
 | 
					    opts.append(np.setdiff1d(lmdks_cur, lmdks))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return opts
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_opts_from_top_h(seq, lmdks):
 | 
				
			||||||
 | 
					  # Create histogram
 | 
				
			||||||
 | 
					  hist, h = get_hist(seq, lmdks)
 | 
				
			||||||
 | 
					  # Keep track of points
 | 
				
			||||||
 | 
					  hist_cur = np.copy(hist)
 | 
				
			||||||
 | 
					  # The options to be returned
 | 
				
			||||||
 | 
					  hist_opts = []
 | 
				
			||||||
 | 
					  # Keep adding points until the maximum is reached
 | 
				
			||||||
 | 
					  while np.sum(hist_cur) < max(seq):
 | 
				
			||||||
 | 
					    # Track the minimum (best) evaluation
 | 
				
			||||||
 | 
					    diff_min = float('inf')
 | 
				
			||||||
 | 
					    # The candidate option
 | 
				
			||||||
 | 
					    hist_cand = np.copy(hist_cur)
 | 
				
			||||||
 | 
					    # Check every possibility
 | 
				
			||||||
 | 
					    for i, h_i in enumerate(hist_cur):
 | 
				
			||||||
 | 
					      # Can we add one more point?
 | 
				
			||||||
 | 
					      if h_i + 1 <= h:
 | 
				
			||||||
 | 
					        hist_tmp = np.copy(hist_cur)
 | 
				
			||||||
 | 
					        hist_tmp[i] += 1
 | 
				
			||||||
 | 
					        # Find difference from original
 | 
				
			||||||
 | 
					        diff_cur = get_norm(hist, hist_tmp)
 | 
				
			||||||
 | 
					        # Remember if it is the best that you've seen
 | 
				
			||||||
 | 
					        if diff_cur < diff_min:
 | 
				
			||||||
 | 
					          diff_min = diff_cur
 | 
				
			||||||
 | 
					          hist_cand = np.copy(hist_tmp)
 | 
				
			||||||
 | 
					    # Update current histogram
 | 
				
			||||||
 | 
					    hist_cur = np.copy(hist_cand)
 | 
				
			||||||
 | 
					    # Add current best to options
 | 
				
			||||||
 | 
					    hist_opts.append(hist_cand)
 | 
				
			||||||
 | 
					  # Return options
 | 
				
			||||||
 | 
					  return hist_opts
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_non_opts_from_top(seq, lmdks):
 | 
				
			||||||
 | 
					  # Evaluate the original
 | 
				
			||||||
 | 
					  eval_orig = eval_seq(get_rel_dists(seq, [], lmdks))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  non_opts = []
 | 
				
			||||||
 | 
					  lmdks_cur = np.array(lmdks)
 | 
				
			||||||
 | 
					  while not np.array_equal(lmdks_cur, seq):
 | 
				
			||||||
 | 
					    # Find the combinations for one more point
 | 
				
			||||||
 | 
					    reg = get_reg(seq, lmdks_cur)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Track the maximum (worst) evaluation
 | 
				
			||||||
 | 
					    diff_max = .0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    point = ()
 | 
				
			||||||
 | 
					    for r in reg:
 | 
				
			||||||
 | 
					      # Evaluate current
 | 
				
			||||||
 | 
					      eval_cur = eval_seq(get_rel_dists(seq, r, lmdks_cur))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      # Compare evaluations
 | 
				
			||||||
 | 
					      if abs(eval_cur - eval_orig) >= diff_max:
 | 
				
			||||||
 | 
					        diff_max = abs(eval_cur - eval_orig)
 | 
				
			||||||
 | 
					        point = r
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Save new point to landmarks
 | 
				
			||||||
 | 
					    lmdks_cur = np.append(lmdks_cur, point)
 | 
				
			||||||
 | 
					    lmdks_cur.sort()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Add new option
 | 
				
			||||||
 | 
					    non_opts.append(np.setdiff1d(lmdks_cur, lmdks))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return non_opts
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					  Finds a set of regular points from bottom (many) to top (less)
 | 
				
			||||||
 | 
					  (seems to perform worse than the top-to-bottom approach).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Parameters:
 | 
				
			||||||
 | 
					    seq - The point sequence.
 | 
				
			||||||
 | 
					    lmdks - The landmarks.
 | 
				
			||||||
 | 
					  Returns:
 | 
				
			||||||
 | 
					    The resulting set of options.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Requirements:
 | 
				
			||||||
 | 
					    For all possible sets of regular points n such that n = len(seq) - len(lmdks).
 | 
				
			||||||
 | 
					    The result is a set of options for every possible value of n and for all possible combinations for each n.
 | 
				
			||||||
 | 
					    Time  - O(n^2)
 | 
				
			||||||
 | 
					    Space - O(n^2)
 | 
				
			||||||
 | 
					'''
 | 
				
			||||||
 | 
					def get_opts_from_bottom(seq, lmdks):
 | 
				
			||||||
 | 
					  # Evaluate the original
 | 
				
			||||||
 | 
					  eval_orig = eval_seq(get_rel_dists(seq, [], lmdks))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # Start with all regular points as landmarks
 | 
				
			||||||
 | 
					  lmdks_cur = np.array(get_reg(seq, lmdks))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  opts = [lmdks_cur]
 | 
				
			||||||
 | 
					  while lmdks_cur.size != 1:
 | 
				
			||||||
 | 
					    # Track the minimum (best) evaluation
 | 
				
			||||||
 | 
					    diff_min = float('inf')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    point = ()
 | 
				
			||||||
 | 
					    for lmdk in lmdks_cur:
 | 
				
			||||||
 | 
					      # Evaluate current by removing one point
 | 
				
			||||||
 | 
					      eval_cur = eval_seq(get_rel_dists(seq, [], np.setdiff1d(lmdks_cur, lmdk)))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      # Compare evaluations
 | 
				
			||||||
 | 
					      if abs(eval_cur - eval_orig) <= diff_min:
 | 
				
			||||||
 | 
					        diff_min = abs(eval_cur - eval_orig)
 | 
				
			||||||
 | 
					        point = lmdk
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Remove point from landmarks
 | 
				
			||||||
 | 
					    lmdks_cur = np.setdiff1d(lmdks_cur, point)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Add new option
 | 
				
			||||||
 | 
					    opts.append(np.setdiff1d(lmdks_cur, lmdks))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return opts
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_non_opts_from_bottom(seq, lmdks):
 | 
				
			||||||
 | 
					  # Evaluate the original
 | 
				
			||||||
 | 
					  eval_orig = eval_seq(get_rel_dists(seq, [], lmdks))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # Start with all regular points as landmarks
 | 
				
			||||||
 | 
					  lmdks_cur = np.array(get_reg(seq, lmdks))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  non_opts = [lmdks_cur]
 | 
				
			||||||
 | 
					  while lmdks_cur.size != 1:
 | 
				
			||||||
 | 
					    # Track the maximum (worst) evaluation
 | 
				
			||||||
 | 
					    diff_max = .0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    point = ()
 | 
				
			||||||
 | 
					    for lmdk in lmdks_cur:
 | 
				
			||||||
 | 
					      # Evaluate current by removing one point
 | 
				
			||||||
 | 
					      eval_cur = eval_seq(get_rel_dists(seq, [], np.setdiff1d(lmdks_cur, lmdk)))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      # Compare evaluations
 | 
				
			||||||
 | 
					      if abs(eval_cur - eval_orig) >= diff_max:
 | 
				
			||||||
 | 
					        diff_max = abs(eval_cur - eval_orig)
 | 
				
			||||||
 | 
					        point = lmdk
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Remove point from landmarks
 | 
				
			||||||
 | 
					    lmdks_cur = np.setdiff1d(lmdks_cur, point)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Add new option
 | 
				
			||||||
 | 
					    non_opts.append(np.setdiff1d(lmdks_cur, lmdks))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return non_opts
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def test():
 | 
				
			||||||
 | 
					  # Start and end points of the sequence
 | 
				
			||||||
 | 
					  # # Nonrandom
 | 
				
			||||||
 | 
					  # start = 1
 | 
				
			||||||
 | 
					  # end = 10
 | 
				
			||||||
 | 
					  # Random
 | 
				
			||||||
 | 
					  start = 1
 | 
				
			||||||
 | 
					  end = random.randint(start + 1, 100)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # Landmarks
 | 
				
			||||||
 | 
					  # # Nonrandom
 | 
				
			||||||
 | 
					  # lmdks = np.array([1, 3, 5, 8])
 | 
				
			||||||
 | 
					  # Random
 | 
				
			||||||
 | 
					  size = random.randint(start, end - 1)
 | 
				
			||||||
 | 
					  lmdks = np.array(random.sample(range(start, end), size))
 | 
				
			||||||
 | 
					  lmdks.sort()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # Print the parameters
 | 
				
			||||||
 | 
					  print('Start    : %d\n'
 | 
				
			||||||
 | 
					        'End      : %d\n'
 | 
				
			||||||
 | 
					        'Size     : %d\n'
 | 
				
			||||||
 | 
					        'Landmarks: %s'
 | 
				
			||||||
 | 
					        %(start, end, len(lmdks), str(lmdks)))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # Get the point sequence
 | 
				
			||||||
 | 
					  seq = get_seq(start, end)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # Almost optimal solution
 | 
				
			||||||
 | 
					  # print('\nOptimal...')
 | 
				
			||||||
 | 
					  # t = time.time()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # opts_optim = get_opts_optim(seq, lmdks)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # print('Time:', time.time() - t, 'secs\n')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # print_rslt(seq, opts_optim, lmdks)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # Top to bottom approach
 | 
				
			||||||
 | 
					  print('\nTop to bottom heuristic...')
 | 
				
			||||||
 | 
					  t = time.time()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  opts = get_opts_from_top(seq, lmdks)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  print('Time:', time.time() - t, 'secs')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # print_rslt(seq, opts, lmdks)
 | 
				
			||||||
 | 
					  diff_opt = print_diff(seq, opts, lmdks)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  print('Non optimal version...')
 | 
				
			||||||
 | 
					  non_opts = get_non_opts_from_top(seq, lmdks)
 | 
				
			||||||
 | 
					  diff_non_opt = print_diff(seq, non_opts, lmdks)
 | 
				
			||||||
 | 
					  print('Non optimal is %.2f%% different ([+]: worse | [-]: better).' %(100*(diff_non_opt - diff_opt)/diff_opt))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # Bottom to top approach
 | 
				
			||||||
 | 
					  # Seems to perform worse
 | 
				
			||||||
 | 
					  print('\nBottom to top heuristic...')
 | 
				
			||||||
 | 
					  t = time.time()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  opts = get_opts_from_bottom(seq, lmdks)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  print('Time:', time.time() - t, 'secs')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # print_rslt(seq, opts, lmdks)
 | 
				
			||||||
 | 
					  diff_opt = print_diff(seq, opts, lmdks)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  print('Non optimal version...')
 | 
				
			||||||
 | 
					  non_opts = get_non_opts_from_bottom(seq, lmdks)
 | 
				
			||||||
 | 
					  diff_non_opt = print_diff(seq, non_opts, lmdks)
 | 
				
			||||||
 | 
					  print('Non optimal is %.2f%% different ([+]: worse | [-]: better).' %(100*(diff_non_opt - diff_opt)/diff_opt))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  # # Debugging
 | 
				
			||||||
 | 
					  # # Number of desired actual and dummy landmarks
 | 
				
			||||||
 | 
					  # k = len(lmdks) + 5
 | 
				
			||||||
 | 
					  # # Number of dummy landmarks
 | 
				
			||||||
 | 
					  # n = k - len(lmdks)
 | 
				
			||||||
 | 
					  # combs = get_combs(reg, n)
 | 
				
			||||||
 | 
					  # print_rslt(seq, combs, lmdks)
 | 
				
			||||||
 | 
					  # exit()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == '__main__':
 | 
				
			||||||
 | 
					  try:
 | 
				
			||||||
 | 
					    test()
 | 
				
			||||||
 | 
					  except KeyboardInterrupt:
 | 
				
			||||||
 | 
					    print('Interrupted by user.')
 | 
				
			||||||
 | 
					    exit()
 | 
				
			||||||
		Reference in New Issue
	
	Block a user