code: Added libraries
This commit is contained in:
		
							
								
								
									
										479
									
								
								code/lib/lmdk_bgt.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										479
									
								
								code/lib/lmdk_bgt.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,479 @@
 | 
			
		||||
#!/usr/bin/env python3
 | 
			
		||||
 | 
			
		||||
from geopy.distance import distance
 | 
			
		||||
import math
 | 
			
		||||
import numpy as np
 | 
			
		||||
import random
 | 
			
		||||
import sympy as sp
 | 
			
		||||
import time
 | 
			
		||||
import lmdk_lib
 | 
			
		||||
from matplotlib import pyplot as plt
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def draw_bgts(title, bgts):
 | 
			
		||||
  '''
 | 
			
		||||
    Plots the allocated budget.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      title - The title of the plot.
 | 
			
		||||
      bgts - The allocated privacy budget.
 | 
			
		||||
    Returns:
 | 
			
		||||
      Nothing.
 | 
			
		||||
  '''
 | 
			
		||||
  lmdk_lib.plot_init()
 | 
			
		||||
  p = np.arange(1, len(bgts) + 1, 1)
 | 
			
		||||
  plt.plot(
 | 
			
		||||
    p,
 | 
			
		||||
    bgts,
 | 
			
		||||
    linewidth=lmdk_lib.line_width,
 | 
			
		||||
    markersize=lmdk_lib.marker_size,
 | 
			
		||||
    markeredgewidth=0,
 | 
			
		||||
    label=r'$\varepsilon$',
 | 
			
		||||
    marker='s'
 | 
			
		||||
  )
 | 
			
		||||
  lmdk_lib.plot_legend()
 | 
			
		||||
  # Set plot box
 | 
			
		||||
  plt.axis([1, len(bgts), .0, max(bgts) +  max(bgts)/2])
 | 
			
		||||
  # Set x axis label
 | 
			
		||||
  plt.xlabel('Timestamp')
 | 
			
		||||
  # Set y axis label
 | 
			
		||||
  plt.ylabel('Privacy budget')
 | 
			
		||||
  plt.title(title.replace('_', '-'))
 | 
			
		||||
  plt.show()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def validate_bgts(seq, lmdks, epsilon, bgts):
 | 
			
		||||
  '''
 | 
			
		||||
    Budget allocation validation.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      seq - The point sequence.
 | 
			
		||||
      lmdks - The landmarks.
 | 
			
		||||
      epsilon - The available privacy budget.
 | 
			
		||||
      bgts - The privacy budget allocation.
 | 
			
		||||
    Returns:
 | 
			
		||||
      The index of the privacy budget that caused the failure or -1 if successful.
 | 
			
		||||
  '''
 | 
			
		||||
  bgts_sum = .0
 | 
			
		||||
  # Landmarks
 | 
			
		||||
  for i, p in enumerate(seq):
 | 
			
		||||
    if np.any(lmdks[:] == p):
 | 
			
		||||
      bgts_sum += bgts[i]
 | 
			
		||||
  # Regular events
 | 
			
		||||
  bgts_max = bgts_sum
 | 
			
		||||
  for i, p in enumerate(seq):
 | 
			
		||||
    if not np.any(lmdks[:] == p):
 | 
			
		||||
      bgts_cur = bgts_sum + bgts[i]
 | 
			
		||||
      if bgts_cur > bgts_max:
 | 
			
		||||
        bgts_max = bgts_cur
 | 
			
		||||
      if bgts_cur > epsilon and not math.isclose(bgts_cur, epsilon, rel_tol=.001):
 | 
			
		||||
        print('Budget allocation validation failed: %.2f%% (%.4f/%.4f)' %(100*bgts_max/epsilon, bgts_max, epsilon))
 | 
			
		||||
        return i
 | 
			
		||||
  print(
 | 
			
		||||
    'Landmark budget allocation: %.2f%% (%.4f/%.4f)\n' 
 | 
			
		||||
    'Overall budget allocation : %.2f%% (%.4f/%.4f)\n' 
 | 
			
		||||
    %(100*bgts_max/epsilon, bgts_max, epsilon,
 | 
			
		||||
    100*sum(bgts)/epsilon, sum(bgts), epsilon,))
 | 
			
		||||
  return -1
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def uniform(seq, lmdks, epsilon):
 | 
			
		||||
  '''
 | 
			
		||||
    Uniform budget allocation.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      seq - The point sequence.
 | 
			
		||||
      lmdks - The landmarks.
 | 
			
		||||
      epsilon - The available privacy budget.
 | 
			
		||||
    Returns:
 | 
			
		||||
      bgts - The privacy budget allocation.
 | 
			
		||||
  '''
 | 
			
		||||
  bgts = np.zeros(len(seq))
 | 
			
		||||
  # Allocate enough budget for all landmarks and one regular point
 | 
			
		||||
  k = len(lmdks) + 1
 | 
			
		||||
  # All points are landmarks
 | 
			
		||||
  if k > len(seq):
 | 
			
		||||
    k = len(lmdks)
 | 
			
		||||
  # Allocate the budget
 | 
			
		||||
  for i, _ in enumerate(bgts):
 | 
			
		||||
    bgts[i] = epsilon/k
 | 
			
		||||
  return bgts
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def geometric(seq, lmdks, epsilon):
 | 
			
		||||
  '''
 | 
			
		||||
    Geometric budget allocation.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      seq - The point sequence.
 | 
			
		||||
      lmdks - The landmarks.
 | 
			
		||||
      epsilon - The available privacy budget.
 | 
			
		||||
    Returns:
 | 
			
		||||
      bgts - The privacy budget distribution.
 | 
			
		||||
  '''
 | 
			
		||||
  bgts = np.zeros([len(seq)])
 | 
			
		||||
  epsilon_avail = epsilon
 | 
			
		||||
  # Landmarks
 | 
			
		||||
  for i, p in enumerate(seq):
 | 
			
		||||
    if np.any(lmdks[:] == p):
 | 
			
		||||
      bgts[i] = epsilon_avail/2
 | 
			
		||||
      epsilon_avail = epsilon_avail - epsilon_avail/2
 | 
			
		||||
  # Regular events
 | 
			
		||||
  for i, p in enumerate(seq):
 | 
			
		||||
    if not np.any(lmdks[:] == p):
 | 
			
		||||
      bgts[i] = epsilon_avail
 | 
			
		||||
  return bgts
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def exponential(seq, lmdks, epsilon):
 | 
			
		||||
  '''
 | 
			
		||||
    Exponential uniform budget allocation (max to user-level).
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      seq - The point sequence.
 | 
			
		||||
      lmdks - The landmarks.
 | 
			
		||||
      epsilon - The available privacy budget.
 | 
			
		||||
    Returns:
 | 
			
		||||
      bgts - The privacy budget allocation.
 | 
			
		||||
  '''
 | 
			
		||||
  # Fallback to uniform if zero or max landmarks
 | 
			
		||||
  if len(lmdks) == 0 or len(lmdks) == len(seq):
 | 
			
		||||
    return uniform(seq, lmdks, epsilon)
 | 
			
		||||
  # # In case of seq == lmdks
 | 
			
		||||
  # l = 0
 | 
			
		||||
  # N = epsilon/len(seq)
 | 
			
		||||
  # Otherwise
 | 
			
		||||
  bgts = np.zeros([len(seq)])
 | 
			
		||||
  if len(seq) != len(lmdks):
 | 
			
		||||
    # Find worst case regural point
 | 
			
		||||
    p_sel = 0
 | 
			
		||||
    for p in seq:
 | 
			
		||||
      if not np.any(lmdks[:] == p):
 | 
			
		||||
        p_sel = p
 | 
			
		||||
        break
 | 
			
		||||
    # List all landmark timestamps with the worst regular
 | 
			
		||||
    points = np.append(lmdks, [p_sel])
 | 
			
		||||
    points = np.sort(points)
 | 
			
		||||
    # epsilon_t = N*e^-l*t
 | 
			
		||||
    l = sp.symbols('l', real=True)
 | 
			
		||||
    # Cumulative allocation at landmarks and one extra point, i.e., L union {t}
 | 
			
		||||
    T = seq.max()
 | 
			
		||||
    # Bounding the privacy budgets at L union {t}
 | 
			
		||||
    # epsilon = sum(N*e^-l*t) for t in L union {t}
 | 
			
		||||
    t = sp.symbols('t')
 | 
			
		||||
    eq = 0
 | 
			
		||||
    for t in points:
 | 
			
		||||
      eq += (((epsilon/len(seq))/(sp.exp(-1*l*T)))*sp.exp(-1*l*t))
 | 
			
		||||
    try:
 | 
			
		||||
      l = sp.solve(eq - epsilon, simplify=False, rational=False)[0]
 | 
			
		||||
      # l = sp.solveset(eq <= epsilon, l, sp.S.Reals).sup
 | 
			
		||||
    except Exception:
 | 
			
		||||
      return bgts
 | 
			
		||||
    # Allocate to the last point T epsilon/len(seq), i.e., user-level
 | 
			
		||||
    N = (epsilon/len(seq))/sp.exp(-1*l*T)
 | 
			
		||||
  # Allocate the budget
 | 
			
		||||
  for i, t in enumerate(seq):
 | 
			
		||||
    bgts[i] = N*sp.exp(-1*l*t)
 | 
			
		||||
  return bgts
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def linear_zero(seq, lmdks, epsilon):
 | 
			
		||||
  '''
 | 
			
		||||
    Linear uniform budget allocation (max to zero).
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      seq - The point sequence.
 | 
			
		||||
      lmdks - The landmarks.
 | 
			
		||||
      epsilon - The available privacy budget.
 | 
			
		||||
    Returns:
 | 
			
		||||
      bgts - The privacy budget allocation.
 | 
			
		||||
  '''
 | 
			
		||||
  bgts = np.zeros([len(seq)])
 | 
			
		||||
  # Find worst case regural point
 | 
			
		||||
  p_sel = 0
 | 
			
		||||
  for p in seq:
 | 
			
		||||
    if not np.any(lmdks[:] == p):
 | 
			
		||||
      p_sel = p
 | 
			
		||||
      break
 | 
			
		||||
  # Sum all landmark timestamps with the worst regular
 | 
			
		||||
  sum_cur = p_sel
 | 
			
		||||
  for p in lmdks:
 | 
			
		||||
    sum_cur += p
 | 
			
		||||
  # epsilon_t = a*t + b
 | 
			
		||||
  b = sp.symbols('b')
 | 
			
		||||
  # Cumulative allocation at landmarks and one extra point
 | 
			
		||||
  T = seq[len(seq) - 1]
 | 
			
		||||
  b = sp.solve(b - ((b/T)*sum_cur + epsilon)/(len(lmdks) + 1))[0]
 | 
			
		||||
  # Allocate to the last point 0
 | 
			
		||||
  a = -b/T
 | 
			
		||||
  # Allocate the budget
 | 
			
		||||
  for i, t in enumerate(seq):
 | 
			
		||||
    bgts[i] = a*t + b
 | 
			
		||||
  return bgts
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def linear(seq, lmdks, epsilon):
 | 
			
		||||
  '''
 | 
			
		||||
    Linear uniform budget allocation (max to user-level).
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      seq - The point sequence.
 | 
			
		||||
      lmdks - The landmarks.
 | 
			
		||||
      epsilon - The available privacy budget.
 | 
			
		||||
    Returns:
 | 
			
		||||
      bgts - The privacy budget allocation.
 | 
			
		||||
  '''
 | 
			
		||||
  # Fallback to uniform if zero or max landmarks
 | 
			
		||||
  if len(lmdks) == 0 or len(lmdks) == len(seq):
 | 
			
		||||
    return uniform(seq, lmdks, epsilon)
 | 
			
		||||
  # Find worst case regural point
 | 
			
		||||
  p_sel = 0
 | 
			
		||||
  for p in seq:
 | 
			
		||||
    if not np.any(lmdks[:] == p):
 | 
			
		||||
      p_sel = p
 | 
			
		||||
      break
 | 
			
		||||
  # Sum all landmark timestamps with the worst regular
 | 
			
		||||
  sum_cur = p_sel + np.sum(lmdks)
 | 
			
		||||
  # epsilon_t = a*t + b
 | 
			
		||||
  b = sp.symbols('b', real=True)
 | 
			
		||||
  # Cumulative allocation at landmarks and one extra point, i.e., L union {t}
 | 
			
		||||
  T = seq.max()
 | 
			
		||||
  # Number of points to take into account
 | 
			
		||||
  k = len(lmdks) + 1
 | 
			
		||||
  # if len(lmdks) == len(seq):
 | 
			
		||||
  #   k = len(lmdks)
 | 
			
		||||
  # Bounding the privacy budgets at L union {t}
 | 
			
		||||
  # epsilon = a*sum(L union {t}) + |L union {t}|*b
 | 
			
		||||
  b = sp.solve(b + (((epsilon/len(seq) - b)/T)*sum_cur - epsilon)/k, simplify=False, rational=False)[0]
 | 
			
		||||
  # Allocate to the last point T epsilon/len(seq), i.e., user-level
 | 
			
		||||
  a = (epsilon/len(seq) - b)/T
 | 
			
		||||
  # Allocate the budget
 | 
			
		||||
  bgts = np.zeros([len(seq)])
 | 
			
		||||
  for i, t in enumerate(seq):
 | 
			
		||||
    bgts[i] = a*t + b
 | 
			
		||||
  return bgts
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def plot_line(x_i, arr, label, marker, line):
 | 
			
		||||
  plt.plot(x_i,
 | 
			
		||||
          arr,
 | 
			
		||||
          label=label,
 | 
			
		||||
          color='black',
 | 
			
		||||
          marker=marker,
 | 
			
		||||
          linestyle=line,
 | 
			
		||||
          # linewidth=1.0,
 | 
			
		||||
          markerfacecolor='none')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def stepped(seq, lmdks, epsilon):
 | 
			
		||||
  '''
 | 
			
		||||
    Stepped budget allocation.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      seq - The point sequence.
 | 
			
		||||
      lmdks - The landmarks.
 | 
			
		||||
      epsilon - The available privacy budget.
 | 
			
		||||
    Returns:
 | 
			
		||||
      bgts - The privacy budget allocation.
 | 
			
		||||
  '''
 | 
			
		||||
  bgts = np.zeros([len(seq)])
 | 
			
		||||
  epsilon_avail = epsilon/2
 | 
			
		||||
  for i, p in enumerate(seq):
 | 
			
		||||
    # Reduce the available budget when a landmark is reached
 | 
			
		||||
    if np.any(lmdks[:] == p):
 | 
			
		||||
      epsilon_avail = epsilon_avail/2
 | 
			
		||||
    bgts[i] = epsilon_avail
 | 
			
		||||
  return bgts
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def adaptive(seq, lmdks, epsilon):
 | 
			
		||||
  '''
 | 
			
		||||
    Adaptive budget allocation.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      seq - The point sequence.
 | 
			
		||||
      lmdks - The landmarks.
 | 
			
		||||
      epsilon - The available privacy budget.
 | 
			
		||||
    Returns:
 | 
			
		||||
      rls_data - The perturbed data.
 | 
			
		||||
      bgts - The privacy budget allocation.
 | 
			
		||||
      skipped - The number of skipped releases.
 | 
			
		||||
  '''
 | 
			
		||||
  # Uniform budget allocation
 | 
			
		||||
  bgts = uniform(seq, lmdks, epsilon)
 | 
			
		||||
  # Released
 | 
			
		||||
  rls_data = [None]*len(seq)
 | 
			
		||||
  # The sampling rate
 | 
			
		||||
  samp_rt = 1
 | 
			
		||||
  # Track landmarks
 | 
			
		||||
  lmdk_cur = 0
 | 
			
		||||
  # Track skipped releases
 | 
			
		||||
  skipped = 0
 | 
			
		||||
  for i, p in enumerate(seq):
 | 
			
		||||
    # Check if current point is a landmark
 | 
			
		||||
    if lmdk_lib.is_landmark(p, lmdks):
 | 
			
		||||
      lmdk_cur += 1
 | 
			
		||||
    # Get coordinates
 | 
			
		||||
    loc = (p[1], p[2])
 | 
			
		||||
    if lmdk_lib.should_sample(samp_rt) or i == 0:
 | 
			
		||||
      # Add noise to original data
 | 
			
		||||
      new_loc = lmdk_lib.add_polar_noise(loc, bgts[i])
 | 
			
		||||
      rls_data[i] = [p[0], new_loc[0], new_loc[1], p[3]]
 | 
			
		||||
      # Adjust sampling rate
 | 
			
		||||
      if i > 0:
 | 
			
		||||
        if distance((rls_data[i - 1][1], rls_data[i - 1][2]), new_loc).km*1000 < 1/bgts[i]:
 | 
			
		||||
          # Decrease
 | 
			
		||||
          # samp_rt -= samp_rt*.9
 | 
			
		||||
          # samp_rt -= samp_rt*.75
 | 
			
		||||
          samp_rt -= samp_rt*.5
 | 
			
		||||
          # samp_rt -= samp_rt*.25
 | 
			
		||||
          # samp_rt -= samp_rt*.1
 | 
			
		||||
        else:
 | 
			
		||||
          # Increase
 | 
			
		||||
          # samp_rt += (1 - samp_rt)*.9
 | 
			
		||||
          # samp_rt += (1 - samp_rt)*.75
 | 
			
		||||
          samp_rt += (1 - samp_rt)*.5
 | 
			
		||||
          # samp_rt += (1 - samp_rt)*.25
 | 
			
		||||
          # samp_rt += (1 - samp_rt)*.1
 | 
			
		||||
    else:
 | 
			
		||||
      skipped += 1
 | 
			
		||||
      # Skip current release and approximate with previous
 | 
			
		||||
      rls_data[i] = rls_data[i - 1]
 | 
			
		||||
      if lmdk_lib.is_landmark(p, lmdks):
 | 
			
		||||
        # Allocate the current budget to the following releases uniformly
 | 
			
		||||
        for j in range(i + 1, len(seq)):
 | 
			
		||||
          bgts[j] += bgts[i]/(len(lmdks) - lmdk_cur + 1)
 | 
			
		||||
      # No budget was spent
 | 
			
		||||
      bgts[i] = 0
 | 
			
		||||
  return rls_data, bgts, skipped
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def skip(seq, lmdks, epsilon):
 | 
			
		||||
  '''
 | 
			
		||||
    Skip landmarks.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      seq - The point sequence.
 | 
			
		||||
      lmdks - The landmarks.
 | 
			
		||||
      epsilon - The available privacy budget.
 | 
			
		||||
    Returns:
 | 
			
		||||
      rls_data - The perturbed data.
 | 
			
		||||
      bgts - The privacy budget allocation.
 | 
			
		||||
  '''
 | 
			
		||||
  # Event-level budget allocation
 | 
			
		||||
  bgts = np.array(len(seq)*[epsilon])
 | 
			
		||||
  # Released
 | 
			
		||||
  rls_data = [None]*len(seq)
 | 
			
		||||
  for i, p in enumerate(seq):
 | 
			
		||||
    # Get coordinates
 | 
			
		||||
    loc = (p[1], p[2])
 | 
			
		||||
    # Add noise to original data
 | 
			
		||||
    new_loc = lmdk_lib.add_polar_noise(loc, bgts[i])
 | 
			
		||||
    # Check if current point is a landmark
 | 
			
		||||
    if lmdk_lib.is_landmark(p, lmdks):
 | 
			
		||||
      if i > 0:
 | 
			
		||||
        # Approximate with previous location
 | 
			
		||||
        new_loc = (rls_data[i - 1][1], rls_data[i - 1][2])
 | 
			
		||||
      bgts[i] = 0
 | 
			
		||||
    rls_data[i] = [p[0], new_loc[0], new_loc[1], p[3]]
 | 
			
		||||
  return rls_data, bgts
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def sample(seq, lmdks, epsilon):
 | 
			
		||||
  '''
 | 
			
		||||
    Publish randomly.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      seq - The point sequence.
 | 
			
		||||
      lmdks - The landmarks.
 | 
			
		||||
      epsilon - The available privacy budget.
 | 
			
		||||
    Returns:
 | 
			
		||||
      rls_data - The perturbed data.
 | 
			
		||||
      bgts - The privacy budget allocation.
 | 
			
		||||
      skipped - The number of skipped releases.
 | 
			
		||||
  '''
 | 
			
		||||
  # Uniform budget allocation
 | 
			
		||||
  bgts = uniform(seq, lmdks, epsilon)
 | 
			
		||||
  # Released
 | 
			
		||||
  rls_data = [None]*len(seq)
 | 
			
		||||
  # The sampling rate
 | 
			
		||||
  # (publish with 50% chance)
 | 
			
		||||
  samp_rt = .5
 | 
			
		||||
  # Track landmarks
 | 
			
		||||
  lmdk_cur = 0
 | 
			
		||||
  # Track skipped releases
 | 
			
		||||
  skipped = 0
 | 
			
		||||
  for i, p in enumerate(seq):
 | 
			
		||||
    # Check if current point is a landmark
 | 
			
		||||
    if lmdk_lib.is_landmark(p, lmdks):
 | 
			
		||||
      lmdk_cur += 1
 | 
			
		||||
    # Get coordinates
 | 
			
		||||
    loc = (p[1], p[2])
 | 
			
		||||
    if i == 0 or lmdk_lib.should_sample(samp_rt):
 | 
			
		||||
      # Add noise to original data
 | 
			
		||||
      new_loc = lmdk_lib.add_polar_noise(loc, bgts[i])
 | 
			
		||||
      rls_data[i] = [p[0], new_loc[0], new_loc[1], p[3]]
 | 
			
		||||
    else:
 | 
			
		||||
      skipped += 1
 | 
			
		||||
      # Skip current release and approximate with previous
 | 
			
		||||
      rls_data[i] = rls_data[i - 1]
 | 
			
		||||
      if lmdk_lib.is_landmark(p, lmdks):
 | 
			
		||||
        # Allocate the current budget to the following releases uniformly
 | 
			
		||||
        for j in range(i + 1, len(seq)):
 | 
			
		||||
          bgts[j] += bgts[i]/(len(lmdks) - lmdk_cur + 1)
 | 
			
		||||
      # No budget was spent
 | 
			
		||||
      bgts[i] = 0
 | 
			
		||||
  return rls_data, bgts, skipped
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def uniform_r(seq, lmdks, epsilon):
 | 
			
		||||
  # Released
 | 
			
		||||
  rls_data = [None]*len(seq)
 | 
			
		||||
  # Budgets
 | 
			
		||||
  bgts = uniform(seq, lmdks, epsilon)
 | 
			
		||||
  for i, p in enumerate(seq):
 | 
			
		||||
    # Get coordinates
 | 
			
		||||
    loc = (p[1], p[2])
 | 
			
		||||
    # Add noise to original data
 | 
			
		||||
    new_loc = lmdk_lib.add_polar_noise(loc, bgts[i])
 | 
			
		||||
    rls_data[i] = [p[0], new_loc[0], new_loc[1], p[3]]
 | 
			
		||||
  return rls_data, bgts
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def utility_analysis(seq, lmdks, o, epsilon):
 | 
			
		||||
  '''
 | 
			
		||||
    Analyze the utility.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      seq     - The point sequence.
 | 
			
		||||
      lmdks   - The landmarks.
 | 
			
		||||
      o       - The perturbed data.
 | 
			
		||||
      epsilon - The available privacy budget.
 | 
			
		||||
    Returns:
 | 
			
		||||
      Nothing.
 | 
			
		||||
  '''
 | 
			
		||||
  # Estimate Mean Absolute Error
 | 
			
		||||
  mae = 0
 | 
			
		||||
  # Compare with uniform
 | 
			
		||||
  mae_u = 0
 | 
			
		||||
  bgts_u = uniform(seq, lmdks, epsilon)
 | 
			
		||||
  for i, p in enumerate(seq):
 | 
			
		||||
    mae += distance((p[1], p[2]), (o[i][1], o[i][2])).km*1000/len(seq)
 | 
			
		||||
    new_loc = lmdk_lib.add_polar_noise((p[1], p[2]), bgts_u[i])
 | 
			
		||||
    mae_u += distance((p[1], p[2]), (new_loc[0], new_loc[1])).km*1000/len(seq)
 | 
			
		||||
    
 | 
			
		||||
  print(
 | 
			
		||||
    '\n########## Analysis ##########\n'
 | 
			
		||||
    'MAE uniform : %.2fm\n'
 | 
			
		||||
    'MAE current : %.2fm\n'
 | 
			
		||||
    'Difference  : %.2f%%\n'
 | 
			
		||||
    '##############################\n'
 | 
			
		||||
    %(mae_u, mae, 100*(mae - mae_u)/mae_u)
 | 
			
		||||
  )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def mae(seq, o):
 | 
			
		||||
  mae = 0
 | 
			
		||||
  for i, p in enumerate(seq):
 | 
			
		||||
    mae += distance((p[1], p[2]), (o[i][1], o[i][2])).km*1000/len(seq)
 | 
			
		||||
  return mae
 | 
			
		||||
							
								
								
									
										967
									
								
								code/lib/lmdk_lib.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										967
									
								
								code/lib/lmdk_lib.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,967 @@
 | 
			
		||||
#!/usr/bin/env python3
 | 
			
		||||
 | 
			
		||||
from datetime import datetime
 | 
			
		||||
from geopy.distance import distance
 | 
			
		||||
import heapq
 | 
			
		||||
import itertools
 | 
			
		||||
from scipy.special import lambertw
 | 
			
		||||
import math
 | 
			
		||||
import numpy as np
 | 
			
		||||
import os
 | 
			
		||||
import random
 | 
			
		||||
import scipy.stats as stats
 | 
			
		||||
import time
 | 
			
		||||
from matplotlib import pyplot as plt
 | 
			
		||||
import zipfile
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Plot globals
 | 
			
		||||
dpi = 300
 | 
			
		||||
font_size = 24.0
 | 
			
		||||
line_width = 2.0
 | 
			
		||||
marker_size = 14.0
 | 
			
		||||
tick_length = 8.0
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def add_polar_noise(loc, epsilon):
 | 
			
		||||
  '''
 | 
			
		||||
    Add noise from planar Laplace.
 | 
			
		||||
    [https://github.com/chatziko/location-guard/blob/master/src/js/common/laplace.js]
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      loc     - The original location 
 | 
			
		||||
    loc     - The original location 
 | 
			
		||||
      loc     - The original location 
 | 
			
		||||
      epsilon - The privacy budget
 | 
			
		||||
    Returns:
 | 
			
		||||
      The perturbed location (lat, lng)
 | 
			
		||||
  '''
 | 
			
		||||
  # The bearing in radians.
 | 
			
		||||
  # Random number in [0, 2*pi),
 | 
			
		||||
  theta = np.random.uniform(low = 0, high = np.pi*2)
 | 
			
		||||
  # The distance in meters.
 | 
			
		||||
  # Use the inverse cumulative polar laplacian distribution function.
 | 
			
		||||
  r = -np.real((lambertw((np.random.uniform(low = 0, high = 1) - 1)/np.e, k = -1) + 1)/epsilon)
 | 
			
		||||
  new_loc = None
 | 
			
		||||
  while not is_valid(new_loc):
 | 
			
		||||
    new_loc = distance(kilometers = r/1000).destination(point = loc, bearing = np.degrees(theta))
 | 
			
		||||
  return new_loc
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def draw_line(line, x, label, marker):
 | 
			
		||||
  axis_x = list(range(len(x)))
 | 
			
		||||
  # plt.xticks(axis_x, x)
 | 
			
		||||
  plt.plot(axis_x, x, label=label, marker=marker)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def eval_seq(dists):
 | 
			
		||||
  '''
 | 
			
		||||
    Calculate the standard deviation of a list of distances.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      dists - A list of distances.
 | 
			
		||||
    Returns:
 | 
			
		||||
      The standard deviation of the distances.
 | 
			
		||||
  '''
 | 
			
		||||
  return np.std(dists)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_abs_dists(seq, comb, lmdks):
 | 
			
		||||
  '''
 | 
			
		||||
    Get the distances of the points in a combination from the start and end of the sequence.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      seq - The point sequence.
 | 
			
		||||
      comb - A possible point combination for a specified size.
 | 
			
		||||
      lmdks - The landmarks.
 | 
			
		||||
    Returns:
 | 
			
		||||
      dists - The distances of the points in a combination.
 | 
			
		||||
  '''
 | 
			
		||||
  cur = np.append(comb, lmdks)
 | 
			
		||||
  cur.sort()
 | 
			
		||||
  start = seq[0]
 | 
			
		||||
  end = seq[len(seq) - 1]
 | 
			
		||||
  dists = np.zeros([len(cur)*2])
 | 
			
		||||
  # Check if there are any points
 | 
			
		||||
  if cur.size:
 | 
			
		||||
    for i in range(0, len(cur)):
 | 
			
		||||
      # From the start
 | 
			
		||||
      dists[i*2] = abs(cur[i] - start)
 | 
			
		||||
      # From the end
 | 
			
		||||
      dists[i*2 + 1] = abs(cur[i] - end)
 | 
			
		||||
  return dists
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_combs(seq, size):
 | 
			
		||||
  '''
 | 
			
		||||
    Get all the possible landmark point combinations for a specified size.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      seq - The point sequence.
 | 
			
		||||
      size - The desired number of landmarks.
 | 
			
		||||
    Returns:
 | 
			
		||||
      All the possible landmark combinations for a specified size.
 | 
			
		||||
  '''
 | 
			
		||||
  return itertools.combinations(seq, size)
 | 
			
		||||
 | 
			
		||||
def get_combs_h(h, hist, max):
 | 
			
		||||
  combs = []
 | 
			
		||||
  for comb in itertools.product(range(h + 1), repeat=len(hist)):
 | 
			
		||||
    check = True
 | 
			
		||||
    for i, c in enumerate(comb):
 | 
			
		||||
      if sum(comb) > max or c < hist[i] or c > h:
 | 
			
		||||
        check = False
 | 
			
		||||
        break
 | 
			
		||||
    if check:
 | 
			
		||||
      combs.append(comb)
 | 
			
		||||
  return combs
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_emd(h1, h2):
 | 
			
		||||
  '''
 | 
			
		||||
    The earth mover's distance (EMD), or Wasserstein distance, of two histograms.
 | 
			
		||||
    [https://stats.stackexchange.com/a/151362]
 | 
			
		||||
    Pele et al., The quadratic-chi histogram distance family
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      h1 - A histogram.
 | 
			
		||||
      h2 - Another histogram.
 | 
			
		||||
    Return:
 | 
			
		||||
      The EMD of the histograms.
 | 
			
		||||
  '''
 | 
			
		||||
  return stats.wasserstein_distance(h1,h2)
 | 
			
		||||
  # return stats.wasserstein_distance(get_norm_hist(h1), get_norm_hist(h2))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_hist(seq, lmdks):
 | 
			
		||||
  '''
 | 
			
		||||
    Create a histogram from a set of landmarks.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      seq - The point sequence.
 | 
			
		||||
      lmdks - The landmarks.
 | 
			
		||||
    Return:
 | 
			
		||||
      hist - The histogram of landmarks.
 | 
			
		||||
      h - The bin size.
 | 
			
		||||
  '''
 | 
			
		||||
  # # Create a landmark sequence relative to the entire sequence
 | 
			
		||||
  # lmdks_rel = np.array(lmdks)
 | 
			
		||||
  # # Add the start of the sequence if not in landmarks
 | 
			
		||||
  # if not np.any(lmdks_rel[:] == start):
 | 
			
		||||
  #   lmdks_rel = np.insert(lmdks_rel, 0, start)
 | 
			
		||||
  # # Add the end of the sequence if not in landmarks
 | 
			
		||||
  # if not np.any(lmdks_rel[:] == end):
 | 
			
		||||
  #   lmdks_rel = np.append(lmdks_rel, end)
 | 
			
		||||
 | 
			
		||||
  # Dealing with zeros.
 | 
			
		||||
  if len(seq) == 0 or len(lmdks) == 0:
 | 
			
		||||
    return np.zeros(math.ceil(max(seq))), 1
 | 
			
		||||
 | 
			
		||||
  # Interquartile range (IQR) is a measure of statistical dispersion, being equal to the difference between 75th and 25th percentiles, or between upper and lower quartiles.
 | 
			
		||||
  # https://en.wikipedia.org/wiki/Interquartile_range
 | 
			
		||||
  iqr = stats.iqr(lmdks)
 | 
			
		||||
 | 
			
		||||
  # Use the Freedman–Diaconis rule to select the width of the bins to be used in the histogram.
 | 
			
		||||
  # Robust (resilient to outliers) estimator that takes into account data variability and data size.
 | 
			
		||||
  # https://en.wikipedia.org/wiki/Freedman%E2%80%93Diaconis_rule
 | 
			
		||||
  h = 2*iqr*len(lmdks)**(-1/3)
 | 
			
		||||
  if h < 1:
 | 
			
		||||
    h = 1
 | 
			
		||||
  # On the number of bins and width:
 | 
			
		||||
  # https://en.wikipedia.org/wiki/Histogram#Number_of_bins_and_width
 | 
			
		||||
 | 
			
		||||
  # Normalize the interval
 | 
			
		||||
  h = math.ceil(max(seq)/math.ceil(max(seq)/math.ceil(h)))
 | 
			
		||||
 | 
			
		||||
  # Create an empty histogram with intervals of size h
 | 
			
		||||
  hist = np.zeros(math.ceil(max(seq)/h))
 | 
			
		||||
 | 
			
		||||
  for lmdk in lmdks:
 | 
			
		||||
    hist[int(lmdk/h) - 1] += 1
 | 
			
		||||
 | 
			
		||||
  return hist, h
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_lmdks(seq, size, dist):
 | 
			
		||||
  '''
 | 
			
		||||
    Get a landmark set for a distribution type.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      seq  - The point sequence.
 | 
			
		||||
      size - The landmarks' size.
 | 
			
		||||
      dist - The distribution type code.
 | 
			
		||||
            -1 - Left-skewed
 | 
			
		||||
              0 - Symmetric
 | 
			
		||||
            +1 - Right-skewed
 | 
			
		||||
            +2 - Bimodal
 | 
			
		||||
            +3 - Uniform
 | 
			
		||||
    Return:
 | 
			
		||||
      hist - The histogram of landmarks.
 | 
			
		||||
      h - The bin size.
 | 
			
		||||
  '''
 | 
			
		||||
  scale = len(seq)/10
 | 
			
		||||
  if dist == -1 or dist == 0 or dist == 1:
 | 
			
		||||
    return MixtureModel([truncnorm(min(seq), max(seq), get_loc(seq, dist), scale)]).sample(min(seq), max(seq), [], size)
 | 
			
		||||
  elif dist == 2:
 | 
			
		||||
    return MixtureModel([
 | 
			
		||||
      truncnorm(min(seq), max(seq), get_loc(seq, -1), scale),
 | 
			
		||||
      truncnorm(min(seq), max(seq), get_loc(seq, +1), scale)
 | 
			
		||||
    ]).sample(min(seq), max(seq), [], size)
 | 
			
		||||
  else:
 | 
			
		||||
    return np.sort(np.random.choice(np.arange(min(seq), max(seq) + 1, 1), size=size, replace=False))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_lmdks_rand(seq, size, skew):
 | 
			
		||||
  '''
 | 
			
		||||
    Get a landmark set with a given skewness.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      seq - The point sequence.
 | 
			
		||||
      size - The landmarks' size.
 | 
			
		||||
      skew - The desired skewness.
 | 
			
		||||
    Return:
 | 
			
		||||
      hist - The histogram of landmarks.
 | 
			
		||||
      h - The bin size.
 | 
			
		||||
  '''
 | 
			
		||||
  # Get all possible landmark combinations
 | 
			
		||||
  lmdk_combs = get_combs(seq, size)
 | 
			
		||||
  # Find a set of landmarks for the given requirements
 | 
			
		||||
  lmdks = ()
 | 
			
		||||
  for comb in lmdk_combs:
 | 
			
		||||
    skew_cur = get_skew(comb)
 | 
			
		||||
    # Check if skewness is close to the requirement or have the same sign
 | 
			
		||||
    if math.isclose(skew_cur, skew, rel_tol=.1) or ((skew_cur < 0) == (skew < 0)):
 | 
			
		||||
      lmdks = comb
 | 
			
		||||
      break
 | 
			
		||||
  return lmdks
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_loc(seq, skew):
 | 
			
		||||
  '''
 | 
			
		||||
    Get the location of the sequence with a given skewness.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      seq  - The point sequence.
 | 
			
		||||
      skew - The desired skewness.
 | 
			
		||||
    Return:
 | 
			
		||||
      The location.
 | 
			
		||||
  '''
 | 
			
		||||
  loc_min = min(seq)
 | 
			
		||||
  loc_max = max(seq)
 | 
			
		||||
  if skew < 0:
 | 
			
		||||
    loc_min = (max(seq) - min(seq))/2
 | 
			
		||||
  elif skew > 0:
 | 
			
		||||
    loc_max = (max(seq) - min(seq))/2
 | 
			
		||||
  return int(loc_min + (loc_max - loc_min)/2)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_mean(seq, hist, h):
 | 
			
		||||
  '''
 | 
			
		||||
    Get the mean of the histogram.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      seq - The point sequence.
 | 
			
		||||
      hist - The histogram of landmarks.
 | 
			
		||||
      h - The bin size.
 | 
			
		||||
    Return:
 | 
			
		||||
      The mean of the histogram.
 | 
			
		||||
  '''
 | 
			
		||||
  sum = 0
 | 
			
		||||
  for idx, count in enumerate(hist):
 | 
			
		||||
    # Find bin limits
 | 
			
		||||
    start = min(seq) + h*idx
 | 
			
		||||
    end = min(seq) + h*(idx + 1)
 | 
			
		||||
    if(end > max(seq)):
 | 
			
		||||
      end = max(seq)
 | 
			
		||||
    
 | 
			
		||||
    sum += (start + (end - start)/2)*count
 | 
			
		||||
  
 | 
			
		||||
  return sum/np.sum(hist)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_norm(h1, h2):
 | 
			
		||||
  '''
 | 
			
		||||
    The Euclidean distance of two histograms.
 | 
			
		||||
    [https://stackoverflow.com/a/1401828/13123075]
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      h1 - A histogram.
 | 
			
		||||
      h2 - Another histogram.
 | 
			
		||||
    Return:
 | 
			
		||||
      The Euclidean distance of the histograms.
 | 
			
		||||
  '''
 | 
			
		||||
  return np.linalg.norm(h1 - h2)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_norm_hist(hist):
 | 
			
		||||
  '''
 | 
			
		||||
    Normalize the histogram.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      hist - The original histogram.
 | 
			
		||||
    Return:
 | 
			
		||||
      The normalized histogram.
 | 
			
		||||
  '''
 | 
			
		||||
  # In-place type conversion
 | 
			
		||||
  norm_hist = hist.astype(np.float32)
 | 
			
		||||
  n = np.sum(norm_hist)
 | 
			
		||||
  for i, a in enumerate(norm_hist):
 | 
			
		||||
    if a:
 | 
			
		||||
      norm_hist[i] = a/n
 | 
			
		||||
  return norm_hist
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_opts(seq, lmdks):
 | 
			
		||||
  '''
 | 
			
		||||
    Finds all the possible valid options.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      seq - The point sequence.
 | 
			
		||||
      lmdks - The landmarks.
 | 
			
		||||
    Returns:
 | 
			
		||||
      A list with all the possible options.
 | 
			
		||||
 | 
			
		||||
    Requirements:
 | 
			
		||||
      n = Regular points
 | 
			
		||||
      r = The size of a combination
 | 
			
		||||
      Time  - O(C(n, r) + 2^C(n, r)), because O(n*C(n, r)) = O(n*n^min(r, n - r))
 | 
			
		||||
      Space - O(r*C(n, r))
 | 
			
		||||
  '''
 | 
			
		||||
  reg = get_reg(seq, lmdks)
 | 
			
		||||
  # Find all possible combinations for every k
 | 
			
		||||
  combs = []
 | 
			
		||||
  for k in range(len(lmdks) + 1, seq[len(seq) - 1] + 1):
 | 
			
		||||
    combs.append(get_combs(reg, k - len(lmdks)))
 | 
			
		||||
 | 
			
		||||
  # Find all possible options for all combinations
 | 
			
		||||
  opts = itertools.product(*combs)
 | 
			
		||||
 | 
			
		||||
  # Keep only valid options, i.e., larger sets must contain every element of every smaller set
 | 
			
		||||
  rslts = []
 | 
			
		||||
  for opt in opts:
 | 
			
		||||
    for i, _ in enumerate(opt):
 | 
			
		||||
      if i and not set(opt[i - 1]).issubset(opt[i]):
 | 
			
		||||
        break
 | 
			
		||||
    if i == len(opt) - 1:
 | 
			
		||||
      rslts.append(opt)
 | 
			
		||||
 | 
			
		||||
  return rslts
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_reg(seq, lmdks):
 | 
			
		||||
  '''
 | 
			
		||||
    Get the regular points, i.e., non-landmarks, in a sequence.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      seq - The point sequence.
 | 
			
		||||
      lmdks - The landmarks.
 | 
			
		||||
    Returns:
 | 
			
		||||
      The non-landmark points in a sequence.
 | 
			
		||||
  '''
 | 
			
		||||
  return np.array([i for i in seq if i not in lmdks])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_rel_dists(seq, comb, lmdks):
 | 
			
		||||
  '''
 | 
			
		||||
    Get the distances of the points in a combination from the nearest point of reference.
 | 
			
		||||
    That is, the previous/next point or the start/end if closer.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      seq - The point sequence.
 | 
			
		||||
      comb - A possible point combination for a specified size.
 | 
			
		||||
      lmdks - The landmarks.
 | 
			
		||||
    Returns:
 | 
			
		||||
      dists - The distances of the points in a combination.
 | 
			
		||||
  '''
 | 
			
		||||
  # TODO: Review distances. Maybe take into account both ways?
 | 
			
		||||
  cur = np.append(comb, lmdks)
 | 
			
		||||
  cur.sort()
 | 
			
		||||
  start = seq[0]
 | 
			
		||||
  end = seq[len(seq) - 1]
 | 
			
		||||
  dists = np.zeros([len(cur) + 1])
 | 
			
		||||
  # Check if there are any points
 | 
			
		||||
  if cur.size:
 | 
			
		||||
    # First
 | 
			
		||||
    dists[0] = abs(cur[0] - start)
 | 
			
		||||
    # In-between
 | 
			
		||||
    for i in range(0, len(cur) - 1):
 | 
			
		||||
      dists[i + 1] = abs(cur[i] - cur[i + 1])
 | 
			
		||||
    # Last
 | 
			
		||||
    dists[len(cur)] = abs(cur[len(cur) - 1] - end)
 | 
			
		||||
  return dists
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_seq(start, end):
 | 
			
		||||
  '''
 | 
			
		||||
    Get a sequence of points.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      start - The starting point of the sequence.
 | 
			
		||||
      end - The ending point of the sequence.
 | 
			
		||||
    Returns:
 | 
			
		||||
      A point sequence.
 | 
			
		||||
  '''
 | 
			
		||||
  return np.arange(start, end + 1)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_shannon_ent(a):
 | 
			
		||||
  '''
 | 
			
		||||
    Get the Shannon entropy of an array.
 | 
			
		||||
    It can be used to calculate the uniformity of a histogram.
 | 
			
		||||
    Uniform probability yields maximum uncertainty and therefore maximum entropy.
 | 
			
		||||
    Entropy, then, can only decrease from the value associated with uniform probability.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      a - An array of integers
 | 
			
		||||
    Return:
 | 
			
		||||
      The Shannon entropy.
 | 
			
		||||
  '''
 | 
			
		||||
  # Keep only the non-zero elements of array a because log2(0) is -inf.
 | 
			
		||||
  # The entropy of an event with zero probability is 0.
 | 
			
		||||
  a = a[a != 0]
 | 
			
		||||
  # Histograms are discrete probability distributions.
 | 
			
		||||
  # We convert the counts to probabilities.
 | 
			
		||||
  p_a = a/np.sum(a)
 | 
			
		||||
  # Base 2 gives the unit of bits (or 'shannons').
 | 
			
		||||
  return -np.sum(p_a*np.log2(p_a))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_shannon_ent_norm(a):
 | 
			
		||||
  '''
 | 
			
		||||
    Get the normalized Shannon entropy of an array.
 | 
			
		||||
    It ranges from 0 (high entropy - close to uniform) to 1 (low entropy). 
 | 
			
		||||
  It ranges from 0 (high entropy - close to uniform) to 1 (low entropy). 
 | 
			
		||||
    It ranges from 0 (high entropy - close to uniform) to 1 (low entropy). 
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      a - An array of integers
 | 
			
		||||
    Return:
 | 
			
		||||
      The normalized Shannon entropy.
 | 
			
		||||
  '''
 | 
			
		||||
  return 1 - get_shannon_ent(a)/np.log2(len(a))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_skew(lmdks):
 | 
			
		||||
  '''
 | 
			
		||||
    Fisher-Pearson coefficient of skewness.
 | 
			
		||||
    negative - left (more data items right)
 | 
			
		||||
    zero     - symmetric data
 | 
			
		||||
    positive - right (more data items left) 
 | 
			
		||||
  positive - right (more data items left) 
 | 
			
		||||
    positive - right (more data items left) 
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      seq - The point sequence.
 | 
			
		||||
      hist - The histogram of landmarks.
 | 
			
		||||
      h - The bin size.
 | 
			
		||||
    Return:
 | 
			
		||||
      Fisher-Pearson coefficient of skewness.
 | 
			
		||||
  '''
 | 
			
		||||
# def get_skew(seq, hist, h):
 | 
			
		||||
#   return get_third(seq, hist, h)/get_std(seq, hist, h)**3
 | 
			
		||||
  return stats.skew(lmdks)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_std(seq, hist, h):
 | 
			
		||||
  '''
 | 
			
		||||
    Get the standard deviation.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      seq - The point sequence.
 | 
			
		||||
      hist - The histogram of landmarks.
 | 
			
		||||
      h - The bin size.
 | 
			
		||||
    Return:
 | 
			
		||||
      The standard deviation.
 | 
			
		||||
  '''
 | 
			
		||||
  hist_mean = get_mean(seq, hist, h)
 | 
			
		||||
 | 
			
		||||
  sum = 0
 | 
			
		||||
  for idx, count in enumerate(hist):
 | 
			
		||||
    # Find bin limits
 | 
			
		||||
    start = min(seq) + h*idx
 | 
			
		||||
    end = min(seq) + h*(idx + 1)
 | 
			
		||||
    if(end > max(seq)):
 | 
			
		||||
      end = max(seq)
 | 
			
		||||
    
 | 
			
		||||
    sum += (((start + (end - start)/2) - hist_mean)**2)*count
 | 
			
		||||
 | 
			
		||||
  return sum/np.sum(hist)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_third(seq, hist, h):
 | 
			
		||||
  '''
 | 
			
		||||
    Get the third central moment.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      seq - The point sequence.
 | 
			
		||||
      hist - The histogram of landmarks.
 | 
			
		||||
      h - The bin size.
 | 
			
		||||
    Return:
 | 
			
		||||
      The third central moment.
 | 
			
		||||
  '''
 | 
			
		||||
  hist_mean = get_mean(seq, hist, h)
 | 
			
		||||
 | 
			
		||||
  sum = 0
 | 
			
		||||
  for idx, count in enumerate(hist):
 | 
			
		||||
    # Find bin limits
 | 
			
		||||
    start = min(seq) + h*idx
 | 
			
		||||
    end = min(seq) + h*(idx + 1)
 | 
			
		||||
    if(end > max(seq)):
 | 
			
		||||
      end = max(seq)
 | 
			
		||||
    
 | 
			
		||||
    sum += (((start + (end - start)/2) - hist_mean)**3)*count
 | 
			
		||||
 | 
			
		||||
  return sum/np.sum(hist)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def dist_type_to_str(d):
 | 
			
		||||
  '''
 | 
			
		||||
    Convert the distribution type code to string.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      t - The distribution type code.
 | 
			
		||||
          -1 - Left-skewed
 | 
			
		||||
          0 - Symmetric
 | 
			
		||||
          +1 - Right-skewed
 | 
			
		||||
          +2 - Bimodal
 | 
			
		||||
          +3 - Uniform
 | 
			
		||||
    Return:
 | 
			
		||||
      The distribution type.
 | 
			
		||||
  '''
 | 
			
		||||
  if d == -1:
 | 
			
		||||
    return 'Left-skewed'
 | 
			
		||||
  elif d == 0:
 | 
			
		||||
    return 'Symmetric'
 | 
			
		||||
  elif d == 1:
 | 
			
		||||
    return 'Right-skewed'
 | 
			
		||||
  elif d == 2:
 | 
			
		||||
    return 'Bimodal'
 | 
			
		||||
  elif d == 3:
 | 
			
		||||
    return 'Uniform'
 | 
			
		||||
  else:
 | 
			
		||||
    return 'Undefined'
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MixtureModel(stats.rv_continuous):
 | 
			
		||||
  '''
 | 
			
		||||
    A mixture model of continuous probability distributions
 | 
			
		||||
    [https://stackoverflow.com/a/47763145/13123075]
 | 
			
		||||
  '''
 | 
			
		||||
  def __init__(self, submodels, *args, **kwargs):
 | 
			
		||||
    super().__init__(*args, **kwargs)
 | 
			
		||||
    self.submodels = submodels
 | 
			
		||||
 | 
			
		||||
  def _pdf(self, x):
 | 
			
		||||
    pdf = self.submodels[0].pdf(x)
 | 
			
		||||
    for submodel in self.submodels[1:]:
 | 
			
		||||
      pdf += submodel.pdf(x)
 | 
			
		||||
    pdf /= len(self.submodels)
 | 
			
		||||
    return pdf
 | 
			
		||||
 | 
			
		||||
  def rvs(self, size):
 | 
			
		||||
    submodel_choices = np.random.randint(len(self.submodels), size=size)
 | 
			
		||||
    submodel_samples = [submodel.rvs(size=size) for submodel in self.submodels]
 | 
			
		||||
    rvs = np.choose(submodel_choices, submodel_samples)
 | 
			
		||||
    return rvs
 | 
			
		||||
 | 
			
		||||
  def sample(self, lower, upper, sample, size):
 | 
			
		||||
    '''
 | 
			
		||||
      Sample from the mixture model without replacement.
 | 
			
		||||
      [https://stackoverflow.com/a/20548895/13123075]
 | 
			
		||||
      
 | 
			
		||||
      Parameters:
 | 
			
		||||
        self   - The mixture model.
 | 
			
		||||
        lower  - The lower bound of the range.
 | 
			
		||||
        upper  - The upper bound of the range.
 | 
			
		||||
        sample - Items to be excluded.
 | 
			
		||||
        size   - The sample size.
 | 
			
		||||
      Returns:
 | 
			
		||||
        A sample.
 | 
			
		||||
    '''
 | 
			
		||||
    w = self.pdf(range(int(lower), int(upper) + 1))
 | 
			
		||||
    idx = []
 | 
			
		||||
    for i in range(int(lower), int(upper) + 1):
 | 
			
		||||
      if i not in sample:
 | 
			
		||||
        idx.append(i)
 | 
			
		||||
    elt = [(math.log(random.random()) / w[i - 1], i) for i in idx]
 | 
			
		||||
    return np.sort([x[1] for x in heapq.nlargest(size, elt)])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def plot_init():
 | 
			
		||||
  '''
 | 
			
		||||
    Initialize the plot.
 | 
			
		||||
  '''
 | 
			
		||||
  # Reset
 | 
			
		||||
  plt.close('all')
 | 
			
		||||
  # Style
 | 
			
		||||
  plt.style.use('classic')
 | 
			
		||||
  # DPI
 | 
			
		||||
  plt.figure(dpi=dpi)
 | 
			
		||||
  # Font
 | 
			
		||||
  plt.rc('font', family='sans-serif')
 | 
			
		||||
  plt.rc('font', **{'sans-serif':['Liberation Sans']})
 | 
			
		||||
  plt.rc('font', size=font_size)
 | 
			
		||||
  # Grid
 | 
			
		||||
  plt.setp(plt.figure().add_subplot(111).spines.values(), linewidth=line_width)
 | 
			
		||||
  plt.grid(True, axis='y', linewidth=line_width)
 | 
			
		||||
  # Ticks
 | 
			
		||||
  plt.gca().tick_params(which='both', width=line_width)
 | 
			
		||||
  plt.gca().tick_params(which='major', length=tick_length)
 | 
			
		||||
  plt.gca().tick_params(which='minor', length=tick_length/2)
 | 
			
		||||
  # Colors
 | 
			
		||||
  plt.subplot(111).set_prop_cycle('color', ['#212121', '#616161', '#9e9e9e', '#bdbdbd', '#e0e0e0', 'f5f5f5'])
 | 
			
		||||
  # Layout
 | 
			
		||||
  plt.tight_layout()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def plot_legend():
 | 
			
		||||
  '''
 | 
			
		||||
    Initialize the plot legend.
 | 
			
		||||
  '''
 | 
			
		||||
  plt.legend(
 | 
			
		||||
    loc='best',
 | 
			
		||||
    fontsize=font_size,
 | 
			
		||||
    numpoints=1,
 | 
			
		||||
    borderpad=.2,      # default: 0.4
 | 
			
		||||
    labelspacing=.25,  # default: 0.5
 | 
			
		||||
    handlelength=2.0,  # default: 2.0
 | 
			
		||||
    handletextpad=.4,  # default: 0.8
 | 
			
		||||
    borderaxespad=.5,  # default: 0.5
 | 
			
		||||
  )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def save_plot(path):
 | 
			
		||||
  '''
 | 
			
		||||
    Save the plot to a file.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      path - The desired location.
 | 
			
		||||
    Returns:
 | 
			
		||||
      Nothing.
 | 
			
		||||
  '''
 | 
			
		||||
  # Save plot
 | 
			
		||||
  os.makedirs(os.path.dirname(path), exist_ok=True)
 | 
			
		||||
  plt.savefig(path, bbox_inches='tight')
 | 
			
		||||
  # Clean up
 | 
			
		||||
  plt.clf()
 | 
			
		||||
  plt.close()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def print_lmdks(seq, lmdks):
 | 
			
		||||
  '''
 | 
			
		||||
    Print the landmarks.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      seq - The point sequence.
 | 
			
		||||
      lmdks - The landmarks.
 | 
			
		||||
    Returns:
 | 
			
		||||
      Nothing.
 | 
			
		||||
  '''
 | 
			
		||||
  print('\n######### Landmarks ##########')
 | 
			
		||||
  print(lmdks)
 | 
			
		||||
  print('[', end='', flush=True)
 | 
			
		||||
  for p in seq:
 | 
			
		||||
    if np.any(lmdks[:] == p):
 | 
			
		||||
      print('\'', end='', flush=True)
 | 
			
		||||
    else:
 | 
			
		||||
      print('.', end='', flush=True)
 | 
			
		||||
  print(']', end='', flush=True)
 | 
			
		||||
  print('\n##############################\n')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def plot_dist(seq, dist):
 | 
			
		||||
  '''
 | 
			
		||||
    Plot the probability distribution.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      seq  - The point sequence.
 | 
			
		||||
      size - The landmarks' size.
 | 
			
		||||
      dist - The distribution type code.
 | 
			
		||||
            -1 - Left-skewed
 | 
			
		||||
              0 - Symmetric
 | 
			
		||||
            +1 - Right-skewed
 | 
			
		||||
            +2 - Bimodal
 | 
			
		||||
            +3 - Uniform
 | 
			
		||||
    Returns:
 | 
			
		||||
      Nothing.
 | 
			
		||||
  '''
 | 
			
		||||
  scale = len(seq)/10
 | 
			
		||||
  p = stats.uniform.pdf(seq)
 | 
			
		||||
  if dist == -1 or dist == 0 or dist == 1:
 | 
			
		||||
    p = MixtureModel([truncnorm(min(seq), max(seq), get_loc(seq, dist), scale)]).pdf(seq)
 | 
			
		||||
  elif dist == 2:
 | 
			
		||||
    p = MixtureModel([
 | 
			
		||||
      truncnorm(min(seq), max(seq), get_loc(seq, -1), scale),
 | 
			
		||||
      truncnorm(min(seq), max(seq), get_loc(seq, +1), scale)
 | 
			
		||||
    ]).pdf(seq)
 | 
			
		||||
  plt.plot(seq, p)
 | 
			
		||||
  plt.show()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def simplify_data(seq, lmdks):
 | 
			
		||||
  '''
 | 
			
		||||
    Get synthetic from real data.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      seq   - The trajectory.
 | 
			
		||||
      lmdks - The landmarks.
 | 
			
		||||
    Returns:
 | 
			
		||||
      The simplified sequence and landmarks.
 | 
			
		||||
  '''
 | 
			
		||||
  seq_s = get_seq(1, len(seq))
 | 
			
		||||
  lmdks_s = []
 | 
			
		||||
  for i, p in enumerate(seq):
 | 
			
		||||
    if is_landmark(p, lmdks):
 | 
			
		||||
      lmdks_s.append(i + 1)
 | 
			
		||||
  return seq_s, np.array(lmdks_s)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def truncnorm(lower, upper, loc, scale):
 | 
			
		||||
  '''
 | 
			
		||||
    A truncated normal continuous random variable.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      lower - The lower bound of the range.
 | 
			
		||||
      upper - The upper bound of the range.
 | 
			
		||||
      loc - The location of the distribution.
 | 
			
		||||
      scale - The spread of the distribution.
 | 
			
		||||
    Returns:
 | 
			
		||||
      A truncated normal continuous random variable.
 | 
			
		||||
  '''
 | 
			
		||||
  return stats.truncnorm((lower - loc)/scale, (upper - loc)/scale, loc, scale)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def is_valid(coord):
 | 
			
		||||
  '''
 | 
			
		||||
    Check if coordinates are valid.
 | 
			
		||||
    
 | 
			
		||||
    Parameters:
 | 
			
		||||
      coord - The coordinates to be checked.
 | 
			
		||||
    Returns:
 | 
			
		||||
      True  - Valid coordinates.
 | 
			
		||||
      False - Invalid coordinates.
 | 
			
		||||
  '''
 | 
			
		||||
  if coord is None or (float(coord[0]) > 90 or float(coord[0]) < -90  # lat
 | 
			
		||||
  or float(coord[1]) > 180 or float(coord[1]) < -180):  #lng
 | 
			
		||||
    return False
 | 
			
		||||
  return True
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def save_data(args, data, name):
 | 
			
		||||
  '''
 | 
			
		||||
    Save to file and compress.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      args - The execution arguments.
 | 
			
		||||
      data - The data.
 | 
			
		||||
      name - The name.
 | 
			
		||||
    Returns:
 | 
			
		||||
      Nothing.
 | 
			
		||||
  '''
 | 
			
		||||
  try:
 | 
			
		||||
    path = os.path.dirname(os.path.abspath(args.res)) + '/' + name
 | 
			
		||||
    if 'lmdks' in name:
 | 
			
		||||
      path += '(dis=' + str(args.dist) + ', per=' + str(args.per) + ')'
 | 
			
		||||
    print('Saving to %s... ' %(path), end='', flush=True)
 | 
			
		||||
    with open(path, 'wb') as file:
 | 
			
		||||
      _, idx = np.unique(data, axis=0, return_index=True)
 | 
			
		||||
      np.save(file, data[np.sort(idx)], allow_pickle=False)
 | 
			
		||||
    print('[OK]')
 | 
			
		||||
    # Compress file
 | 
			
		||||
    print('Compressing in %s... ' %(args.res), end='', flush=True)
 | 
			
		||||
    with zipfile.ZipFile(args.res, 'a') as zip_file:
 | 
			
		||||
      zip_file.write(path, os.path.basename(path))
 | 
			
		||||
    print('[OK]')
 | 
			
		||||
    # Delete file
 | 
			
		||||
    print('Deleting %s... ' %(path), end='', flush=True)
 | 
			
		||||
    os.remove(path)
 | 
			
		||||
    print('[OK]')
 | 
			
		||||
  except Exception as e:
 | 
			
		||||
    print('[Error: %s]' %(e))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def load_data(args, name):
 | 
			
		||||
  '''
 | 
			
		||||
    Load data from file.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      args - The execution arguments.
 | 
			
		||||
      data - The data.
 | 
			
		||||
      name - The name.
 | 
			
		||||
    Returns:
 | 
			
		||||
      data - The data set.
 | 
			
		||||
             0: uid, 1: lat, 2: lng, 3: tim
 | 
			
		||||
  '''
 | 
			
		||||
  data = np.array([])
 | 
			
		||||
  try:
 | 
			
		||||
    path = os.path.dirname(os.path.abspath(args.res)) + '/' + name
 | 
			
		||||
    if 'lmdks' in name:
 | 
			
		||||
      path += '(dis=' + str(args.dist) + ', per=' + str(args.per) + ')'
 | 
			
		||||
    print('Loading %s... ' %(path), end='', flush=True)
 | 
			
		||||
    with zipfile.ZipFile(args.res, 'r') as zip_file:
 | 
			
		||||
      data = np.load(zip_file.open(os.path.basename(path)))
 | 
			
		||||
      print('[OK]')
 | 
			
		||||
  except Exception as e:
 | 
			
		||||
    print('[Error: %s]' %(e))
 | 
			
		||||
  return data
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def find_lmdks(usrs_data, args):
 | 
			
		||||
  '''
 | 
			
		||||
    Find users' landmarks.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      args      - The execution arguments.
 | 
			
		||||
      usrs_data - The users' data.
 | 
			
		||||
                  0: uid, 1: lat, 2: lng, 3: tim
 | 
			
		||||
    Returns:
 | 
			
		||||
      usrs_lmdks - The users' landmarks.
 | 
			
		||||
                  0: lid, 1: uid, 2: lat, 3: lng, 4: tim
 | 
			
		||||
  '''
 | 
			
		||||
  usrs_lmdks = np.empty((0,5), np.float32)
 | 
			
		||||
  traj_cur = 0
 | 
			
		||||
  lmdk_id = 0
 | 
			
		||||
  usrs = np.unique(usrs_data[:,0])
 | 
			
		||||
  for usr_i, usr in enumerate(usrs):
 | 
			
		||||
    # Initialize user's landmarks list
 | 
			
		||||
    lmdks = []
 | 
			
		||||
    traj = usrs_data[usrs_data[:,0]==usr, :]
 | 
			
		||||
    traj_cur += len(traj)
 | 
			
		||||
    print(
 | 
			
		||||
      '[%d%%] Points: %d/%d | Users: %d/%d... '
 | 
			
		||||
      %((traj_cur/usrs_data.shape[0])*100, traj_cur, usrs_data.shape[0], usr, usrs[len(usrs) - 1]),
 | 
			
		||||
      end='', flush=True
 | 
			
		||||
    )
 | 
			
		||||
    # Check the actual points
 | 
			
		||||
    i = 0
 | 
			
		||||
    while i < len(traj) - 1:
 | 
			
		||||
      lmdk_cur = []
 | 
			
		||||
      if is_valid((traj[i][1], traj[i][2])):
 | 
			
		||||
        for j in range(i + 1, len(traj)):
 | 
			
		||||
          if is_valid((traj[j][1], traj[j][2])):
 | 
			
		||||
            # Add the beginning only the first time
 | 
			
		||||
            if j == i + 1:
 | 
			
		||||
              lmdk_cur.append([lmdk_id, usr, traj[i][1], traj[i][2], traj[i][3]])
 | 
			
		||||
            # Add the new point
 | 
			
		||||
            lmdk_cur.append([lmdk_id, usr, traj[j][1], traj[j][2], traj[j][3]])
 | 
			
		||||
            # Distance in meters
 | 
			
		||||
            dist = distance((traj[i][1], traj[i][2]), (traj[j][1], traj[j][2])).km*1000
 | 
			
		||||
            # Distance exceeded or reached end of iteration
 | 
			
		||||
            if dist > args.dist or j == len(traj) - 1:
 | 
			
		||||
              # Period in minutes
 | 
			
		||||
              per = abs(datetime.fromtimestamp(int(traj[i][3])) - datetime.fromtimestamp(int(traj[j][3]))).total_seconds()/60
 | 
			
		||||
              # Check if enough time passed
 | 
			
		||||
              if per > args.per:
 | 
			
		||||
                # usrs_id starts from 1
 | 
			
		||||
                lmdk_id += 1
 | 
			
		||||
                # Assign id to current landmark
 | 
			
		||||
                for l in lmdk_cur:
 | 
			
		||||
                  l[0] = lmdk_id
 | 
			
		||||
                # Append current landmark
 | 
			
		||||
                lmdks += lmdk_cur
 | 
			
		||||
              # Continue checking from the current point
 | 
			
		||||
              i = j
 | 
			
		||||
              break
 | 
			
		||||
      # No landmark was found, continue from next point
 | 
			
		||||
      if i == 0 or not is_valid((traj[i][1], traj[i][2])) or 'j' not in vars() or i != j:
 | 
			
		||||
        i += 1
 | 
			
		||||
    print('[OK]')
 | 
			
		||||
    if lmdks:
 | 
			
		||||
      usrs_lmdks = np.append(usrs_lmdks, np.asarray(lmdks, dtype=np.float32), axis=0)
 | 
			
		||||
  return usrs_lmdks
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def lmdks_stats(args, usrs_lmdks):
 | 
			
		||||
  '''
 | 
			
		||||
    Generate landmarks' stats.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      args       - The execution arguments.
 | 
			
		||||
      usrs_lmdks - The user' landmarks.
 | 
			
		||||
                  0: lid, 1: uid, 2: lat, 3: lng, 4: tim
 | 
			
		||||
    Returns:
 | 
			
		||||
      Nothing.
 | 
			
		||||
  '''
 | 
			
		||||
  lmdks_stats = {
 | 
			
		||||
    'total': 0,
 | 
			
		||||
    'len': 0,
 | 
			
		||||
    'count': {},
 | 
			
		||||
  }
 | 
			
		||||
  # Check every user
 | 
			
		||||
  usrs = np.unique(usrs_lmdks[:, 1])
 | 
			
		||||
  for usr in usrs:
 | 
			
		||||
    print(
 | 
			
		||||
      '[%d%%] Calculating landmark stats for user %d/%d... ' 
 | 
			
		||||
      %(usr*100/len(usrs), usr, np.max(usrs)),
 | 
			
		||||
      end='', flush=True
 | 
			
		||||
    )
 | 
			
		||||
    # Check each user's landmarks
 | 
			
		||||
    for lmdk in np.unique(usrs_lmdks[usrs_lmdks[:, 1] == usr, 0]):
 | 
			
		||||
      lmdk_traj = usrs_lmdks[usrs_lmdks[:, 0] == lmdk, :]
 | 
			
		||||
      lmdks_stats['total'] += 1
 | 
			
		||||
      lmdks_stats['len'] += len(lmdk_traj)
 | 
			
		||||
      if lmdks_stats['count'].get(len(lmdk_traj)) == None:
 | 
			
		||||
        lmdks_stats['count'][len(lmdk_traj)] = 1
 | 
			
		||||
      else:
 | 
			
		||||
        lmdks_stats['count'][len(lmdk_traj)] += 1
 | 
			
		||||
    print('[OK]')
 | 
			
		||||
  hist_min, hist_max = min(lmdks_stats['count'], key=int, default=0), max(lmdks_stats['count'], key=int,  default=0)
 | 
			
		||||
  # Make histogram
 | 
			
		||||
  x = []
 | 
			
		||||
  for i in range(hist_min, hist_max + 1):
 | 
			
		||||
    if lmdks_stats['count'].get(i) != None:
 | 
			
		||||
      x.extend([i]*lmdks_stats['count'].get(i))
 | 
			
		||||
  # Show stats
 | 
			
		||||
  print(
 | 
			
		||||
    '\n############ Stats ###########\n'
 | 
			
		||||
    'Landmarks   : %d\n'
 | 
			
		||||
    '  Length\n'
 | 
			
		||||
    '    Total   : %d (%.2f%%)\n'
 | 
			
		||||
    '    Minimum : %d\n'
 | 
			
		||||
    '    Maximum : %d\n'
 | 
			
		||||
    '##############################\n'
 | 
			
		||||
    %(lmdks_stats['total'], lmdks_stats['len'], (lmdks_stats['len']/args.time)*100, min(lmdks_stats['count'], key=int, default=0), max(lmdks_stats['count'], key=int, default=0))
 | 
			
		||||
  )
 | 
			
		||||
  # # Initialize plot
 | 
			
		||||
  # plot_init()
 | 
			
		||||
  # # Set x axis
 | 
			
		||||
  # plt.xlabel('Landmarks sequence length')  
 | 
			
		||||
  # plt.xticks(rotation='vertical')
 | 
			
		||||
  # # The y axis
 | 
			
		||||
  # plt.ylabel('Number of sequences')
 | 
			
		||||
  # plt.yscale('log')
 | 
			
		||||
  # Create histogram
 | 
			
		||||
  # plt.hist(x, bins=(hist_max - hist_min))
 | 
			
		||||
  # Show plot
 | 
			
		||||
  # plt.show()
 | 
			
		||||
  # Save plot
 | 
			
		||||
  # save_plot(os.path.dirname(args.arc) + '/' + 'results' + '(dis=' + str(args.dist) + ', per=' + str(args.per) + ').pdf')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def should_sample(samp_rt):
 | 
			
		||||
  '''
 | 
			
		||||
    Randomly decide to release with noise.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      samp_rt - The sampling rate (0, 1]
 | 
			
		||||
    Returns:
 | 
			
		||||
      True/False
 | 
			
		||||
  '''
 | 
			
		||||
  return random.choices(population=[True, False], weights=[samp_rt, 1 - samp_rt], k=1)[0]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def is_landmark(p, lmdks):
 | 
			
		||||
  '''
 | 
			
		||||
    Check is a point is a landmark.
 | 
			
		||||
 | 
			
		||||
    Parameters:
 | 
			
		||||
      p     - The point to check.
 | 
			
		||||
      lmdks - A 2d array of landmarks.
 | 
			
		||||
    Returns:
 | 
			
		||||
      True/False
 | 
			
		||||
  '''
 | 
			
		||||
  if len(lmdks) and any(np.equal(lmdks[:, 1:5], p).all(1)):
 | 
			
		||||
    return True
 | 
			
		||||
  return False
 | 
			
		||||
		Reference in New Issue
	
	Block a user