code: WIP

2021-10-06 01:44:50 +02:00
parent 0a45c9d1bd
commit 0aca4ec9c1
4 changed files with 599 additions and 0 deletions
--- a/code/expt/copenhagen-sel.py
+++ b/code/expt/copenhagen-sel.py
@ -0,0 +1,195 @@
 #!/usr/bin/env python3
 import sys
 sys.path.insert(1, '../lib')
 import argparse
 import ast
 from datetime import datetime
 from geopy.distance import distance
 import lmdk_bgt
 import lmdk_lib
 import math
 import numpy as np
 from matplotlib import pyplot as plt
 import time
 def main(args):
  res_file = '/home/manos/Cloud/Data/Copenhagen/Results.zip'
  # Contacts for all users
  cont_data = lmdk_lib.load_data(args, 'cont')
  # Contacts for landmark's percentages for all users
  lmdk_data = lmdk_lib.load_data(args, 'usrs_data')
  # The name of the dataset
  d = 'Copenhagen'
  # The user's id
  uid = '449'
  # The landmarks percentages
  lmdks_pct = [0, 20, 40, 60, 80, 100]
  # The privacy budget
  epsilon = 1.0
  # Number of methods
  n = 3
  # Width of bars
  bar_width = 1/(n + 1)
  # The x axis
  x_i = np.arange(len(lmdks_pct))
  x_margin = bar_width*(n/2 + 1)
  print('\n##############################', d, '\n')
  # Get user's contacts sequence
  seq = cont_data[cont_data[:, 1] == float(uid)][:1000]
  # Initialize plot
  lmdk_lib.plot_init()
  # The x axis
  plt.xticks(x_i, np.array(lmdks_pct, int))
  plt.xlabel('Landmarks (%)')  # Set x axis label.
  plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin)
  # The y axis
  plt.ylabel('Mean absolute error (%)')  # Set y axis label.
  # plt.yscale('log')
  plt.ylim(0, 100)
  # Bar offset
  x_offset = -(bar_width/2)*(n - 1)
  mae_u = np.zeros(len(lmdks_pct))
  mae_s = np.zeros(len(lmdks_pct))
  mae_a = np.zeros(len(lmdks_pct))
  mae_evt = 0
  mae_usr = 0
  for i, pct in enumerate(lmdks_pct):
    # Find landmarks
    lmdks = lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, pct)
    for _ in range(args.iter):
      eps_sel = 0
      if pct != 0 and pct != 100:
        # Get landmarks timestamps in sequence
        lmdks_seq = lmdk_lib.find_lmdks_seq(seq, lmdks)
        # Turn landmarks to histogram
        hist, h = lmdk_lib.get_hist(lmdk_lib.get_seq(1, len(seq)), lmdks_seq)
        # Find all possible options
        opts = lmdk_sel.get_opts_from_top_h(lmdk_lib.get_seq(1, len(seq)), lmdks_seq)
        # Landmarks selection budget
        eps_sel = epsilon/(len(lmdks_seq) + 1)
        # Get private landmarks timestamps
        lmdks_seq, _ = exp_mech.exponential_pareto(hist, opts, exp_mech.score, 1.0, eps_sel)
        # Get actual landmarks values
        lmdks = seq[lmdks_seq] 
      # Skip
      rls_data_s, bgts_s = lmdk_bgt.skip_cont(seq, lmdks, epsilon - eps_sel)
      # lmdk_bgt.validate_bgts(seq, lmdks, epsilon, bgts_s)
      mae_s[i] += lmdk_bgt.mae_cont(rls_data_s)/args.iter
      # Uniform
      rls_data_u, bgts_u = lmdk_bgt.uniform_cont(seq, lmdks, epsilon - eps_sel)
      # lmdk_bgt.validate_bgts(seq, lmdks, epsilon, bgts_u)
      mae_u[i] += lmdk_bgt.mae_cont(rls_data_u)/args.iter
      # Adaptive
      rls_data_a, _, _ = lmdk_bgt.adaptive_cont(seq, lmdks, epsilon - eps_sel, .5, .5)
      mae_a[i] += lmdk_bgt.mae_cont(rls_data_a)/args.iter
      # Calculate once
      if i == 0:
        # Event
        rls_data_evt, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 0), epsilon)
        mae_evt += lmdk_bgt.mae_cont(rls_data_evt)/args.iter
        # User
        rls_data_usr, _ = lmdk_bgt.uniform_cont(seq, lmdk_lib.find_lmdks_cont(lmdk_data, seq, uid, 100), epsilon)
        mae_usr += lmdk_bgt.mae_cont(rls_data_usr)/args.iter
  mae_u *= 100
  mae_s *= 100
  mae_a *= 100
  mae_evt *= 100
  mae_usr *= 100
  plt.axhline(
    y = mae_evt,
    color = '#212121',
    linewidth=lmdk_lib.line_width
  )
  plt.text(x_i[-1] + x_i[-1]*.14, mae_evt - mae_evt*.05, 'event')
  plt.axhline(
    y = mae_usr,
    color = '#616161',
    linewidth=lmdk_lib.line_width
  )
  plt.text(x_i[-1] + x_i[-1]*.14, mae_usr - mae_usr*.05, 'user')
  plt.bar(
    x_i + x_offset,
    mae_s,
    bar_width,
    label='Skip',
    linewidth=lmdk_lib.line_width
  )
  x_offset += bar_width
  plt.bar(
    x_i + x_offset,
    mae_u,
    bar_width,
    label='Uniform',
    linewidth=lmdk_lib.line_width
  )
  x_offset += bar_width
  plt.bar(
    x_i + x_offset,
    mae_a,
    bar_width,
    label='Adaptive',
    linewidth=lmdk_lib.line_width
  )
  x_offset += bar_width
  path = str('../../rslt/bgt_cmp/' + d)
  # Plot legend
  lmdk_lib.plot_legend()
  # # Show plot
  # plt.show()
  # Save plot
  lmdk_lib.save_plot(path + '-sel.pdf')
  print('[OK]', flush=True)
 def parse_args():
  '''
    Parse arguments.
    Optional:
      res  - The results archive file.
      iter - The total iterations.
  '''
  # Create argument parser.
  parser = argparse.ArgumentParser()
  # Mandatory arguments.
  # Optional arguments.
  parser.add_argument('-r', '--res', help='The results archive file.', type=str, default='/home/manos/Cloud/Data/Copenhagen/Results.zip')
  parser.add_argument('-i', '--iter', help='The total iterations.', type=int, default=1)
  # Parse arguments.
  args = parser.parse_args()
  return args
 if __name__ == '__main__':
  try:
    start_time = time.time()
    main(parse_args())
    end_time = time.time()
    print('##############################')
    print('Time elapsed: %s' % (time.strftime('%H:%M:%S', time.gmtime(end_time - start_time))))
    print('##############################')
  except KeyboardInterrupt:
    print('Interrupted by user.')
    exit()
--- a/code/expt/hue-sel.py
+++ b/code/expt/hue-sel.py
@ -0,0 +1,185 @@
 #!/usr/bin/env python3
 import sys
 sys.path.insert(1, '../lib')
 import argparse
 import ast
 from datetime import datetime
 from geopy.distance import distance
 import lmdk_bgt
 import lmdk_lib
 import math
 import numpy as np
 from matplotlib import pyplot as plt
 import time
 def main(args):
  res_file = '/home/manos/Cloud/Data/HUE/Results.zip'
  # User's consumption
  seq = lmdk_lib.load_data(args, 'cons')
  # The name of the dataset
  d = 'HUE'
  # The landmarks percentages
  lmdks_pct = [0, 20, 40, 60, 80, 100]
  # Landmarks' thresholds
  lmdks_th = [0, .54, .68, .88, 1.12, 10]
  # The privacy budget
  epsilon = 10.0
  # Number of methods
  n = 3
  # Width of bars
  bar_width = 1/(n + 1)
  # The x axis
  x_i = np.arange(len(lmdks_pct))
  x_margin = bar_width*(n/2 + 1)
  print('\n##############################', d, '\n')
  # Initialize plot
  lmdk_lib.plot_init()
  # The x axis
  plt.xticks(x_i, np.array(lmdks_pct, int))
  plt.xlabel('Landmarks (%)')  # Set x axis label.
  plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin)
  # The y axis
  plt.ylabel('Mean absolute error (kWh)')  # Set y axis label.
  plt.yscale('log')
  # plt.ylim(.01, 10000)
  # Bar offset
  x_offset = -(bar_width/2)*(n - 1)
  mae_u = np.zeros(len(lmdks_pct))
  mae_s = np.zeros(len(lmdks_pct))
  mae_a = np.zeros(len(lmdks_pct))
  mae_evt = 0
  mae_usr = 0
  for i, pct in enumerate(lmdks_pct):
    # Find landmarks
    lmdks = seq[seq[:, 1] < lmdks_th[i]]
    for _ in range(args.iter):
      eps_sel = 0
      if pct != 0 and pct != 100:
        # Get landmarks timestamps in sequence
        lmdks_seq = lmdk_lib.find_lmdks_seq(seq, lmdks)
        # Turn landmarks to histogram
        hist, h = lmdk_lib.get_hist(lmdk_lib.get_seq(1, len(seq)), lmdks_seq)
        # Find all possible options
        opts = lmdk_sel.get_opts_from_top_h(lmdk_lib.get_seq(1, len(seq)), lmdks_seq)
        # Landmarks selection budget
        eps_sel = epsilon/(len(lmdks_seq) + 1)
        # Get private landmarks timestamps
        lmdks_seq, _ = exp_mech.exponential_pareto(hist, opts, exp_mech.score, 1.0, eps_sel)
        # Get actual landmarks values
        lmdks = seq[lmdks_seq] 
      # Skip
      rls_data_s, bgts_s = lmdk_bgt.skip_cons(seq, lmdks, epsilon - eps_sel)
      # lmdk_bgt.validate_bgts(seq, lmdks, epsilon, bgts_s)
      mae_s[i] += lmdk_bgt.mae_cons(seq, rls_data_s)/args.iter
      # Uniform
      rls_data_u, bgts_u = lmdk_bgt.uniform_cons(seq, lmdks, epsilon - eps_sel)
      mae_u[i] += lmdk_bgt.mae_cons(seq, rls_data_u)/args.iter
      # Adaptive
      rls_data_a, _, _ = lmdk_bgt.adaptive_cons(seq, lmdks, epsilon - eps_sel, .5, .5)
      mae_a[i] += lmdk_bgt.mae_cons(seq, rls_data_a)/args.iter
      # Calculate once
      # Event
      if i == 0:
        rls_data_evt, _ = lmdk_bgt.uniform_cons(seq, seq[seq[:, 1] < lmdks_th[0]], epsilon)
        mae_evt += lmdk_bgt.mae_cons(seq, rls_data_evt)/args.iter
      # User
      if i == 0:
        rls_data_usr, _ = lmdk_bgt.uniform_cons(seq, seq[seq[:, 1] < lmdks_th[len(lmdks_th)-1]], epsilon)
        mae_usr += lmdk_bgt.mae_cons(seq, rls_data_usr)/args.iter
  plt.axhline(
    y = mae_evt,
    color = '#212121',
    linewidth=lmdk_lib.line_width
  )
  plt.text(x_i[-1] + x_i[-1]*.14, mae_evt - mae_evt*.14, 'event')
  plt.axhline(
    y = mae_usr,
    color = '#616161',
    linewidth=lmdk_lib.line_width
  )
  plt.text(x_i[-1] + x_i[-1]*.14, mae_usr - mae_usr*.14, 'user')
  plt.bar(
    x_i + x_offset,
    mae_s,
    bar_width,
    label='Skip',
    linewidth=lmdk_lib.line_width
  )
  x_offset += bar_width
  plt.bar(
    x_i + x_offset,
    mae_u,
    bar_width,
    label='Uniform',
    linewidth=lmdk_lib.line_width
  )
  x_offset += bar_width
  plt.bar(
    x_i + x_offset,
    mae_a,
    bar_width,
    label='Adaptive',
    linewidth=lmdk_lib.line_width
  )
  x_offset += bar_width
  path = str('../../rslt/bgt_cmp/' + d)
  # Plot legend
  lmdk_lib.plot_legend()
  # Show plot
  # plt.show()
  # Save plot
  lmdk_lib.save_plot(path + '-sel.pdf')
  print('[OK]', flush=True)
 def parse_args():
  '''
    Parse arguments.
    Optional:
      res  - The results archive file.
      iter - The total iterations.
  '''
  # Create argument parser.
  parser = argparse.ArgumentParser()
  # Mandatory arguments.
  # Optional arguments.
  parser.add_argument('-r', '--res', help='The results archive file.', type=str, default='/home/manos/Cloud/Data/HUE/Results.zip')
  parser.add_argument('-i', '--iter', help='The total iterations.', type=int, default=1)
  # Parse arguments.
  args = parser.parse_args()
  return args
 if __name__ == '__main__':
  try:
    start_time = time.time()
    main(parse_args())
    end_time = time.time()
    print('##############################')
    print('Time elapsed: %s' % (time.strftime('%H:%M:%S', time.gmtime(end_time - start_time))))
    print('##############################')
  except KeyboardInterrupt:
    print('Interrupted by user.')
    exit()
--- a/code/expt/t-drive-sel.py
+++ b/code/expt/t-drive-sel.py
@ -0,0 +1,211 @@
 #!/usr/bin/env python3
 import sys
 sys.path.insert(1, '../lib')
 import argparse
 from datetime import datetime
 from geopy.distance import distance
 import lmdk_bgt
 import lmdk_lib
 import numpy as np
 from matplotlib import pyplot as plt
 import time
 def main(args):
  # The data files
  data_files = {
    'T-drive': '/home/manos/Cloud/Data/T-drive/Results.zip',
  }
  # Data related info
  data_info = {
    'T-drive': {
      'uid': 2,
      'lmdks': {
          0: {'dist': 0, 'per': 1000},   #   0.0%
         20: {'dist': 2095, 'per': 30},  #  19.6%
         40: {'dist': 2790, 'per': 30},  #  40.2%
         60: {'dist': 3590, 'per': 30},  #  59.9%
         80: {'dist': 4825, 'per': 30},  #  79.4%
        100: {'dist': 10350, 'per': 30}  # 100.0%
      }
    }
  }
  # The data sets
  data_sets = {}
  # Load data sets
  for df in data_files:
    args.res = data_files[df]
    data_sets[df] = lmdk_lib.load_data(args, 'usrs_data')
  # Geo-I configuration
  # epsilon = level/radius
  # Radius is in meters
  bgt_conf = [
    {'epsilon': 1},
  ]
  # Number of methods
  n = 3
  # Width of bars
  bar_width = 1/(n + 1)
  # The x axis
  x_i = np.arange(len(list(data_info.values())[0]['lmdks']))
  x_margin = bar_width*(n/2 + 1)
  for d in data_sets:
    print('\n##############################', d, '\n')
    args.res = data_files[d]
    data = data_sets[d]
    # Truncate trajectory according to arguments
    seq = data[data[:,0]==data_info[d]['uid'], :][:args.time]
    # Initialize plot
    lmdk_lib.plot_init()
    # The x axis
    plt.xticks(x_i, np.array([key for key in data_info[d]['lmdks']]).astype(int))
    plt.xlabel('Landmarks (%)')  # Set x axis label.
    plt.xlim(x_i.min() - x_margin, x_i.max() + x_margin)
    # The y axis
    plt.ylabel('Mean absolute error (m)')  # Set y axis label.
    plt.yscale('log')
    # plt.ylim(1, 100000000)
    # Bar offset
    x_offset = -(bar_width/2)*(n - 1)
    mae_u = np.zeros(len(data_info[d]['lmdks']))
    mae_s = np.zeros(len(data_info[d]['lmdks']))
    mae_a = np.zeros(len(data_info[d]['lmdks']))
    mae_evt = 0
    mae_usr = 0
    for i, lmdk in enumerate(data_info[d]['lmdks']):
      # Find landmarks
      args.dist = data_info[d]['lmdks'][lmdk]['dist']
      args.per = data_info[d]['lmdks'][lmdk]['per']
      lmdks = lmdk_lib.find_lmdks(seq, args)[:args.time]
      for bgt in bgt_conf:
        for _ in range(args.iter):
          eps_sel = 0
          if lmdk != 0 and lmdk != 100:
            # Get landmarks timestamps in sequence
            lmdks_seq = lmdk_lib.find_lmdks_seq(seq, lmdks)
            # Turn landmarks to histogram
            hist, h = lmdk_lib.get_hist(lmdk_lib.get_seq(1, len(seq)), lmdks_seq)
            # Find all possible options
            opts = lmdk_sel.get_opts_from_top_h(lmdk_lib.get_seq(1, len(seq)), lmdks_seq)
            # Landmarks selection budget
            eps_sel = bgt['epsilon']/(len(lmdks_seq) + 1)
            # Get private landmarks timestamps
            lmdks_seq, _ = exp_mech.exponential_pareto(hist, opts, exp_mech.score, 1.0, eps_sel)
            # Get actual landmarks values
            lmdks = seq[lmdks_seq] 
          # Skip
          rls_data_s, _ = lmdk_bgt.skip(seq, lmdks, bgt['epsilon'] - eps_sel)
          mae_s[i] += lmdk_bgt.mae(seq, rls_data_s)/args.iter
          # Uniform
          rls_data_u, _ = lmdk_bgt.uniform_r(seq, lmdks, bgt['epsilon'] - eps_sel)
          mae_u[i] += lmdk_bgt.mae(seq, rls_data_u)/args.iter
          # Adaptive
          rls_data_a, _, _ = lmdk_bgt.adaptive(seq, lmdks, bgt['epsilon'] - eps_sel, .5, .5)
          mae_a[i] += lmdk_bgt.mae(seq, rls_data_a)/args.iter
          # Event
          if lmdk == 0:
            rls_data_evt, _ = lmdk_bgt.uniform_r(seq, lmdks, bgt['epsilon'])
            mae_evt += lmdk_bgt.mae(seq, rls_data_evt)/args.iter
          # User
          if lmdk == 100:
            rls_data_usr, _ = lmdk_bgt.uniform_r(seq, lmdks, bgt['epsilon'])
            mae_usr += lmdk_bgt.mae(seq, rls_data_usr)/args.iter
    # Plot lines
    plt.axhline(
      y = mae_evt,
      color = '#212121',
      linewidth=lmdk_lib.line_width
    )
    plt.text(x_i[-1] + x_i[-1]*.14, mae_evt - mae_evt*.14, 'event')
    plt.axhline(
      y = mae_usr,
      color = '#616161',
      linewidth=lmdk_lib.line_width
    )
    plt.text(x_i[-1] + x_i[-1]*.14, mae_usr - mae_usr*.14, 'user')
    # Plot bars
    plt.bar(
      x_i + x_offset,
      mae_s,
      bar_width,
      label='Skip',
      linewidth=lmdk_lib.line_width
    )
    x_offset += bar_width
    plt.bar(
      x_i + x_offset,
      mae_u,
      bar_width,
      label='Uniform',
      linewidth=lmdk_lib.line_width
    )
    x_offset += bar_width
    plt.bar(
      x_i + x_offset,
      mae_a,
      bar_width,
      label='Adaptive',
      linewidth=lmdk_lib.line_width
    )
    path = str('../../rslt/bgt_cmp/' + d)
    # Plot legend
    lmdk_lib.plot_legend()
    # Show plot
    # plt.show()
    # Save plot
    lmdk_lib.save_plot(path + '-sel.pdf')
    print('[OK]', flush=True)
 def parse_args():
  '''
    Parse arguments.
    Optional:
      dist - The coordinates distance threshold in meters.
      per  - The timestaps period threshold in mimutes.
      time - The total timestamps.
      iter - The total iterations.
  '''
  # Create argument parser.
  parser = argparse.ArgumentParser()
  # Mandatory arguments.
  # Optional arguments.
  parser.add_argument('-l', '--dist', help='The coordinates distance threshold in meters.', type=int, default=200)
  parser.add_argument('-p', '--per', help='The timestaps period threshold in mimutes.', type=int, default=30)
  parser.add_argument('-r', '--res', help='The results archive file.', type=str, default='/home/manos/Cloud/Data/T-drive/Results.zip')
  parser.add_argument('-t', '--time', help='The total timestamps.', type=int, default=1000)
  parser.add_argument('-i', '--iter', help='The total iterations.', type=int, default=1)
  # Parse arguments.
  args = parser.parse_args()
  return args
 if __name__ == '__main__':
  try:
    start_time = time.time()
    main(parse_args())
    end_time = time.time()
    print('##############################')
    print('Time elapsed: %s' % (time.strftime('%H:%M:%S', time.gmtime(end_time - start_time))))
    print('##############################')
  except KeyboardInterrupt:
    print('Interrupted by user.')
    exit()
--- a/code/lib/lmdk_lib.py
+++ b/code/lib/lmdk_lib.py
@ -907,6 +907,14 @@ def find_lmdks(usrs_data, args):
  return usrs_lmdks
 def find_lmdks_seq(seq, lmdks):
  lmdks_seq = []
  for i, p in enumerate(seq):
    if any(np.equal(lmdks, p).all(1)):
      lmdks_seq.append(i + 1)
  return np.numpy(lmdks_seq, dtype = int)
 def find_lmdks_tim(lmdk_data, seq, uid, pct):
  '''
    Find user's landmarks timestamps.