1578 lines
48 KiB
Python
1578 lines
48 KiB
Python
#!/usr/bin/env python3
|
|
|
|
import argparse
|
|
import collections
|
|
import csv
|
|
import datetime
|
|
import math
|
|
from matplotlib import pyplot as plt
|
|
import numpy as np
|
|
import os
|
|
import random
|
|
from scipy.sparse.linalg import eigs
|
|
from scipy.stats import laplace
|
|
from scipy.stats import norm
|
|
import sympy as sp
|
|
import time
|
|
import xxhash
|
|
|
|
|
|
# Debugging
|
|
DEBUG = False # Variable to check.
|
|
# When enabled print something.
|
|
|
|
|
|
# Memoization
|
|
MEM = {} # The cache.
|
|
MISS = 0 # Number of additions to the cache.
|
|
TOTAL = 0 # Number of cache accesses.
|
|
|
|
|
|
def load_data(path):
|
|
'''
|
|
Read data from a file.
|
|
|
|
Parameters:
|
|
path - The relative path to the data file.
|
|
Returns:
|
|
data - A list of tuples [uid, timestamp, lng, lat, loc].
|
|
'''
|
|
print('Loading data from', os.path.abspath(path), '... ', end='')
|
|
data = []
|
|
try:
|
|
with open(path, 'r') as f:
|
|
reader = csv.reader(f, delimiter=',')
|
|
data = list(reader)
|
|
print('OK.')
|
|
return data
|
|
except IOError:
|
|
print('Oops!')
|
|
exit()
|
|
|
|
|
|
def save_output(path, t, e, a_b, a_f, a):
|
|
'''
|
|
Save output to a file.
|
|
|
|
Parameters:
|
|
path - The relative path to the output file.
|
|
t - The number of timestamps.
|
|
e - The privacy budget at each timestamp.
|
|
a_b - The backward privacy loss at each timestamp.
|
|
a_f - The forward privacy loss at each timestamp.
|
|
a - The temporal privacy loss at each timestamp.
|
|
Returns:
|
|
Nothing.
|
|
'''
|
|
# timestamp = time.strftime('%Y%m%d%H%M%S')
|
|
print('Saving output to %s... ' %(path), end='', flush=True)
|
|
os.makedirs(os.path.dirname(path), exist_ok=True)
|
|
with open(path, 'w') as f:
|
|
w = csv.writer(f, delimiter=';')
|
|
w.writerow(['t', 'e', 'bpl', 'fpl', 'tpl'])
|
|
w.writerows(zip(list(range(1, t + 1)), e, a_b, a_f, a))
|
|
print('OK.', flush=True)
|
|
|
|
|
|
def get_timestamps(data):
|
|
'''
|
|
Get all the timestamps from the input data.
|
|
|
|
Parameters:
|
|
data - The input data set.
|
|
Returns:
|
|
timestamps - An ndarray of all of the timestamps from the input data.
|
|
'''
|
|
print('Getting a list of all timestamps... ', end='', flush=True)
|
|
timestamps = np.sort(np.unique(np.array(data)[:, 1]))
|
|
if not len(timestamps):
|
|
print('No timestamps found.', flush=True)
|
|
exit()
|
|
# Check timestamps' order.
|
|
prev = None
|
|
for t in timestamps:
|
|
curr = time.mktime(datetime.datetime.strptime(t, '%Y-%m-%d %H:%M').timetuple())
|
|
if prev:
|
|
if curr < prev:
|
|
print('Wrong order.', flush=True)
|
|
exit()
|
|
prev = curr
|
|
print('OK.', flush=True)
|
|
if DEBUG:
|
|
print(timestamps, len(timestamps))
|
|
return timestamps
|
|
|
|
|
|
def get_locs(data):
|
|
'''
|
|
Get all the unique locations from the input data.
|
|
|
|
Parameters:
|
|
data - The input data set.
|
|
Returns:
|
|
locs - A sorted ndarray of all the unique locations int the input data.
|
|
'''
|
|
print('Getting a list of all locations... ', end='', flush=True)
|
|
locs = np.sort(np.unique(np.array(data)[:, 4].astype(np.int)))
|
|
if not len(locs):
|
|
print('No locations found.', flush=True)
|
|
exit()
|
|
print('OK.', flush=True)
|
|
if DEBUG:
|
|
print(list(map(str, locs)), len(locs))
|
|
return list(map(str, locs))
|
|
|
|
|
|
def get_cnts(data, t):
|
|
'''
|
|
Get the counts at every location for a specific timestamp.
|
|
|
|
Parameters:
|
|
data - The input data set.
|
|
t - The timestamp of interest.
|
|
Returns:
|
|
cnts - A dict {loc:cnt} with the counts at every location for a specific timestamp.
|
|
'''
|
|
print('Getting all counts at %s... ' %(t), end='', flush=True)
|
|
locs = get_locs(data)
|
|
cnts = dict.fromkeys(locs, 0)
|
|
for d in data:
|
|
if d[1] == t:
|
|
cnts[int(d[4])] += 1
|
|
print('OK.', flush=True)
|
|
if DEBUG:
|
|
print(cnts)
|
|
return cnts
|
|
|
|
|
|
def get_all_cnts(data):
|
|
'''
|
|
Get the counts at every location for every timestamp.
|
|
|
|
Parameters:
|
|
data - The input data set.
|
|
Returns:
|
|
cnts - A dict {timestamp:loc} with all the counts at every location for every timestamp.
|
|
'''
|
|
cnts = {}
|
|
for d in data:
|
|
key = d[1] + '@' + d[4]
|
|
cnts[key] = cnts.get(key, 0) + 1
|
|
if DEBUG:
|
|
print(cnts)
|
|
return cnts
|
|
|
|
|
|
def get_usrs(data):
|
|
'''
|
|
Get a list of unique users in the input data set.
|
|
|
|
Parameters:
|
|
data - The input data set.
|
|
Returns:
|
|
users - An ndarray of all unique users.
|
|
'''
|
|
users = np.sort(np.unique(np.array(data)[:, 0].astype(np.int)))
|
|
if not len(users):
|
|
print('No users found.')
|
|
exit()
|
|
if DEBUG:
|
|
print(users, len(users))
|
|
return users
|
|
|
|
|
|
def get_usr_data(data, id):
|
|
'''
|
|
Get the data of a particular user from a data set.
|
|
|
|
Parameters:
|
|
data - The input data set.
|
|
id - The user identifier.
|
|
Returns:
|
|
output - A list of the data of the targeted user.
|
|
'''
|
|
output = []
|
|
for d in data:
|
|
if (d[0] == str(id)):
|
|
output.append(d)
|
|
if DEBUG:
|
|
print(id, output)
|
|
return output
|
|
|
|
|
|
def get_usrs_data(data):
|
|
'''
|
|
Get the data of every user in a data set.
|
|
|
|
Parameters:
|
|
data - The input data set.
|
|
Returns:
|
|
output - A dict {usr, [usr_data]} with the data of each user.
|
|
'''
|
|
output = {}
|
|
for d in data:
|
|
output[d[0]] = output.get(d[0], []) + [d]
|
|
return output
|
|
|
|
|
|
def get_usr_traj(data):
|
|
'''
|
|
Get the trajectory of a user from her data.
|
|
|
|
Parameters:
|
|
data - The data of the user.
|
|
Returns:
|
|
traj - A list [(timestamp, loc)] with the locations and corresponding timestamps that the user was at.
|
|
'''
|
|
traj = []
|
|
for d in data:
|
|
traj.append((d[1], d[4]))
|
|
if DEBUG:
|
|
print(traj)
|
|
return traj
|
|
|
|
|
|
def get_poss_trans(data):
|
|
'''
|
|
Get all the possible transitions.
|
|
|
|
Parameters:
|
|
data - The input data set.
|
|
Returns:
|
|
trans - A set with all the possible forward transitions in the input.
|
|
'''
|
|
print('Getting possible transitions... ', end='', flush=True)
|
|
trans = set()
|
|
for u, u_data in data.items():
|
|
traj = get_usr_traj(u_data)
|
|
trans.update(list(zip(traj, traj[1:])))
|
|
if len(trans) == 0:
|
|
print('None.', flush=True)
|
|
exit()
|
|
print('OK.', flush=True)
|
|
return trans
|
|
|
|
|
|
def get_bwd_trans(data):
|
|
'''
|
|
Get all backward transitions in a data set.
|
|
|
|
Parameters:
|
|
data - The input data set.
|
|
Returns:
|
|
trans - A dict {(t, t-1):[transitions]} with all the backward transitions
|
|
at every sequential timestamp pair in the input data set.
|
|
'''
|
|
print('Getting all backward transitions... ', end='', flush=True)
|
|
trans = {}
|
|
for u, u_data in data.items():
|
|
traj = get_usr_traj(u_data)
|
|
for i, j in zip(traj[1:], traj):
|
|
trans[(i[0], j[0])] = trans.get((i[0], j[0]), []) + [(i[1], j[1])]
|
|
if len(trans) == 0:
|
|
print('None.', flush=True)
|
|
exit()
|
|
print('OK.', flush=True)
|
|
return trans
|
|
|
|
|
|
def get_fwd_trans(data):
|
|
'''
|
|
Get all forward transitions in a data set.
|
|
|
|
Parameters:
|
|
data - The input data set.
|
|
Returns:
|
|
trans - A dict {(t-1, t):[transitions]} with all the forward transitions
|
|
at every sequential timestamp pair in the input data set.
|
|
'''
|
|
print('Getting all forward transitions... ', end='', flush=True)
|
|
trans = {}
|
|
for u, u_data in data.items():
|
|
traj = get_usr_traj(u_data)
|
|
for i, j in zip(traj, traj[1:]):
|
|
trans[(i[0], j[0])] = trans.get((i[0], j[0]), []) + [(i[1], j[1])]
|
|
if len(trans) == 0:
|
|
print('None.', flush=True)
|
|
exit()
|
|
print('OK.', flush=True)
|
|
return trans
|
|
|
|
|
|
def safe_div(a, b):
|
|
'''
|
|
Divide two numbers. If the divisor is 0 return inf.
|
|
|
|
Parameters:
|
|
a - The dividend.
|
|
b - The divisor.
|
|
Returns:
|
|
The float result of the division.
|
|
'''
|
|
if b == 0:
|
|
return math.inf
|
|
return float(a/b)
|
|
|
|
|
|
def max_val(q, d, a):
|
|
'''
|
|
Calculate the maximum value of the objective function.
|
|
|
|
Parameters:
|
|
q - A row from the transition matrix.
|
|
d - Another row from the transition matrix.
|
|
a - The backward/forward privacy loss of the previous/next
|
|
timestamp.
|
|
Returns:
|
|
The maximum value of the objective function.
|
|
'''
|
|
if a == math.inf:
|
|
return math.nan
|
|
return (q*(math.exp(a) - 1) + 1)/(d*(math.exp(a) - 1) + 1)
|
|
|
|
|
|
def find_qd(p, a):
|
|
'''
|
|
Find two different rows (q and d) of a transition matrix (p)
|
|
that maximize the product of the objective function and return
|
|
their sums.
|
|
|
|
Parameters:
|
|
p - The transition matrix representing the backward/forward
|
|
correlations.
|
|
a - The backward/forward privacy loss of the previous/next
|
|
timestamp.
|
|
Returns:
|
|
sum_q - The sum of the elements of q.
|
|
sum_d - The sum of the elements of d.
|
|
'''
|
|
res = 0.0
|
|
sum_q, sum_d = 0.0, 0.0
|
|
for q in p: # A row from the transition matrix.
|
|
for d in p: # Another row from the transition matrix.
|
|
if not np.array_equal(q, d) and sum(q) and sum(d):
|
|
plus = [] # Indexes of the elements of q+ and d+.
|
|
for i in range(np.shape(p)[1]): # Iterate each column.
|
|
if q[i] > d[i]:
|
|
plus.append(i)
|
|
s_q, s_d = 0.0, 0.0
|
|
update = True
|
|
while update:
|
|
update = False
|
|
for i in plus:
|
|
s_q += q[i]
|
|
s_d += d[i]
|
|
max_tmp = max_val(s_q, s_d, a)
|
|
for i in plus:
|
|
if safe_div(q[i], d[i]) <= max_tmp:
|
|
plus.remove(i)
|
|
update = True
|
|
res_tmp = math.log(max_val(s_q, s_d, a))
|
|
if res < res_tmp:
|
|
res = res_tmp
|
|
sum_q, sum_d = s_q, s_d
|
|
return sum_q, sum_d
|
|
|
|
|
|
def gen_data(usrs, timestamps, locs):
|
|
'''
|
|
Generate data.
|
|
|
|
Parameters:
|
|
usrs - The number of users.
|
|
timestamps - The number of timestamps.
|
|
locs - The numner of locations.
|
|
Returns:
|
|
data - The generated data.
|
|
'''
|
|
print('Generating data... ', end='', flush=True)
|
|
# Generate timestamps.
|
|
ts = []
|
|
t_start = datetime.datetime.now()
|
|
for t in range(timestamps):
|
|
t_start += datetime.timedelta(minutes=t)
|
|
ts.append(str(t_start.strftime('%Y-%m-%d %H:%M')))
|
|
# Randomly generate unique possible transitions.
|
|
trans = set()
|
|
for i in range(locs**2):
|
|
trans.add((str(random.randint(1, locs)), str(random.randint(1, locs))))
|
|
# Generate the data.
|
|
data = []
|
|
for u in range(1, usrs + 1):
|
|
# Choose a random starting location.
|
|
l_start = random.sample(trans, 1)[0][0]
|
|
for t in ts:
|
|
# Choose a random next location.
|
|
while True:
|
|
l_nxt = str(random.randint(1, locs))
|
|
if (l_start, l_nxt) in trans:
|
|
data.append([str(u), str(t), str(l_nxt), str(l_nxt), str(l_nxt)])
|
|
break
|
|
print('OK.', flush=True)
|
|
return data
|
|
|
|
|
|
def gen_trans_mt(n, s):
|
|
'''
|
|
Generate a transition matrix.
|
|
|
|
Parameters:
|
|
n - The dimension of the matrix.
|
|
s - The correlation degree of each row [0, 1].
|
|
The lower its value, the lower the degree of
|
|
uniformity of each row.
|
|
Returns:
|
|
p_ - The transition matrix.
|
|
'''
|
|
if DEBUG:
|
|
print('Generating transition matrix %dx%d with s = %.4f... ' %(n, n, s), end='', flush=True)
|
|
p = np.zeros((n, n), float)
|
|
np.fill_diagonal(p, 1.0)
|
|
p_ = np.zeros((n, n), float)
|
|
for j, p_j in enumerate(p[:, ]):
|
|
for k, p_jk in enumerate(p_j):
|
|
p_[j, k] = round((p[j, k] + s) / (sum(p_j) + len(p_j)*s), 4)
|
|
if DEBUG:
|
|
print('OK.', flush=True)
|
|
if DEBUG:
|
|
print(p_)
|
|
return p_
|
|
|
|
|
|
def get_trans_mt(locs, trans):
|
|
'''
|
|
Get the transition matrix
|
|
|
|
Parameters:
|
|
locs - A list of all the locations.
|
|
trans - A list of all transitions.
|
|
Returns:
|
|
p - A 2d dict {{locs}{locs}} containing the
|
|
corresponding location transition probabilities.
|
|
'''
|
|
if DEBUG:
|
|
print('Generating the transition matrix... ', end='', flush=True)
|
|
# Initialize the transition matrix.
|
|
p = collections.defaultdict(dict)
|
|
for i in locs:
|
|
for j in locs:
|
|
p[i][j] = 0.0
|
|
# Populate the transition matrix.
|
|
for i in locs:
|
|
for j in locs:
|
|
# Count the transitions from i to j.
|
|
cnt = collections.Counter(trans)[(i, j)]
|
|
# Count the transitions from i to any location.
|
|
sum_cnts = len([tr for tr in trans if tr[0] == i])
|
|
res = 0.0
|
|
if cnt != 0 and sum_cnts != 0:
|
|
res = cnt/sum_cnts
|
|
# Update transition matrix.
|
|
p[i][j] = res
|
|
if DEBUG:
|
|
print('OK.', flush=True)
|
|
for i in locs:
|
|
print(p[i])
|
|
return p
|
|
|
|
|
|
def get_entropy(mt):
|
|
'''
|
|
Calculate the measure-theoretic (Kolmogorov-Sinai) entropy
|
|
of a transition matrix.
|
|
|
|
Parameters:
|
|
mt - A 2d dict transition matrix.
|
|
Returns:
|
|
h - The Kolmogorov-Sinai entropy of the matrix.
|
|
'''
|
|
if DEBUG:
|
|
print('Calculating the measure-theoretic entropy... ', end='', flush=True)
|
|
h = 0.0
|
|
# Get the 2d array of the transition matrix.
|
|
a = get_2darray(mt)
|
|
# Check if the transition matrix is empty.
|
|
if np.allclose(a, np.zeros((len(a), len(a)), float)):
|
|
return h
|
|
# Find the stationary distribution (v), written as a row
|
|
# vector, that does not change under the application of
|
|
# the transition matrix. In other words, the left eigenvector
|
|
# associated with eigenvalue (w) 1. It is also called
|
|
# Perron-Frobenius eigenvector, since it is a stochastic
|
|
# matrix and the largest magnitude of the eigenvalues must
|
|
# be 1. We use the transpose of the transition matrix to get
|
|
# the left eigenvector.
|
|
w, v = eigs(a.T, k=1, which='LM')
|
|
# Normalize the eigenvector. The vector is a probability
|
|
# distribution, and therefore its sum must be 1. Thus, we use
|
|
# its absolute elements' values and its l1-norm to normalize it.
|
|
p = np.absolute(v[:,0].real)
|
|
p_norm = np.linalg.norm(p, ord=1)
|
|
if p_norm:
|
|
p = p/p_norm
|
|
# Calculate the entropy.
|
|
if np.isclose(w.real[0], 1, atol=1e-03):
|
|
for i in range(len(a)):
|
|
for j in range(len(a)):
|
|
# When the transition probability is 0 use the limiting value (0).
|
|
if not np.isclose(a[i][j], 0, atol=1e-03) and not np.isclose(p[i], 0, atol=1e-03):
|
|
# Binary logarithm measures entropy in bits.
|
|
h += -p[i]*a[i][j]*math.log2(a[i][j])
|
|
if DEBUG:
|
|
print(h, flush=True)
|
|
return h
|
|
|
|
|
|
def get_2darray(mt):
|
|
'''
|
|
Convert a 2d dict to a 2d array.
|
|
|
|
Parameters:
|
|
mt - The 2d dict.
|
|
Returns:
|
|
p - The 2d numpy array.
|
|
'''
|
|
if type(mt) == type(np.array([])):
|
|
return mt
|
|
p = np.zeros((len(mt), len(mt)), float)
|
|
for i, mt_i in enumerate(mt.items()):
|
|
p[i] = list(mt_i[1].values())
|
|
return p
|
|
|
|
|
|
def get_laplace_pd(ts, t, sc):
|
|
'''
|
|
Get a Laplace probability distribution.
|
|
|
|
Parameters:
|
|
ts - The points of the distribution.
|
|
t - The location of the distribution.
|
|
sc - The scale of the distribution.
|
|
Returns:
|
|
The probability distribution.
|
|
'''
|
|
x = np.arange(0, len(ts), 1)
|
|
loc = np.where(ts == t)
|
|
return laplace.pdf(x, loc=loc, scale=sc)[0]
|
|
|
|
|
|
def get_norm_pd(ts, t, sc):
|
|
'''
|
|
Get a Gaussian probability distribution.
|
|
|
|
Parameters:
|
|
ts - The points of the distribution.
|
|
t - The location of the distribution.
|
|
sc - The scale of the distribution.
|
|
Returns:
|
|
The probability distribution.
|
|
'''
|
|
x = np.arange(0, len(ts), 1)
|
|
loc = np.where(ts == t)
|
|
return norm.pdf(x, loc=loc, scale=sc)[0]
|
|
|
|
|
|
def get_sample(ts, t, pct, pd):
|
|
'''
|
|
Get a sample from the time series.
|
|
|
|
Parameters:
|
|
ts - An ndarray of the timestamps.
|
|
t - The current timestamp.
|
|
pd - The probability distribution.
|
|
ptn - The desired portion [0, 1] of the non-zero elements
|
|
of the probability distribution to be sampled.
|
|
Returns:
|
|
spl - An ndarray of the sampled timestamps.
|
|
'''
|
|
if DEBUG:
|
|
print('Sampling %.2f%% of %s at %s... ' %(pct*100, ts, t), end='', flush=True)
|
|
# Check that it is a valid timestamp.
|
|
if not t in ts:
|
|
print('%s is not in the time series.' %(t))
|
|
exit()
|
|
# Remove the current timestamp from the possible sample.
|
|
i = np.where(ts == t)
|
|
ts = np.delete(ts, i)
|
|
pd = np.delete(pd, i)
|
|
# Make sure that the probabilities sum to 1.
|
|
pd_norm = pd/np.linalg.norm(pd, ord=1)
|
|
# Get the sorted sample.
|
|
spl = np.sort(np.random.choice(ts, size=int(np.count_nonzero(pd)*pct), replace=False, p=pd_norm))
|
|
if DEBUG:
|
|
print(spl, flush=True)
|
|
return spl
|
|
|
|
|
|
def priv_l(p, a, e):
|
|
'''
|
|
Calculate the backward/forward privacy loss at the current
|
|
timestamp.
|
|
|
|
Parameters:
|
|
p - The transition matrix representing the backward/forward
|
|
temporal correlations.
|
|
a - The privacy loss of the previous/next timestamp.
|
|
e - The privacy budget for data publishing.
|
|
Returns:
|
|
The backward/forward privacy loss at the current
|
|
timestamp.
|
|
'''
|
|
sum_q, sum_d = find_qd(p, a)
|
|
return math.log(max_val(sum_q, sum_d, a)) + e
|
|
|
|
|
|
def priv_l_m(p, a, e):
|
|
'''
|
|
Calculate the backward/forward privacy loss at the current
|
|
timestamp using memoization.
|
|
|
|
Parameters:
|
|
p - The transition matrix representing the backward/forward
|
|
temporal correlations.
|
|
a - The privacy loss of the previous/next timestamp.
|
|
e - The privacy budget for data publishing.
|
|
Returns:
|
|
The backward/forward privacy loss at the current
|
|
timestamp.
|
|
'''
|
|
key = xxhash.xxh64(p).hexdigest() + str(a) + str(e)
|
|
global MEM, TOTAL, MISS
|
|
TOTAL += 1
|
|
result = MEM.get(key)
|
|
if not result:
|
|
MISS += 1
|
|
sum_q, sum_d = find_qd(p, a)
|
|
result = math.log(max_val(sum_q, sum_d, a)) + e
|
|
MEM[key] = result
|
|
return result
|
|
|
|
|
|
def bpl(p, a, e, t):
|
|
'''
|
|
Calculate the total backward privacy loss at every timestamp.
|
|
|
|
Parameters:
|
|
p - The transition matrix representing the backward
|
|
temporal correlations.
|
|
a - The backward privacy loss of every release.
|
|
e - The privacy budget for data publishing.
|
|
t - The time limit.
|
|
Returns:
|
|
a - The backward privacy loss at every timestamp
|
|
due to the previous data releases.
|
|
'''
|
|
a[0] = e[0]
|
|
for i in range(1, t):
|
|
a[i] = priv_l(p, a[i - 1], e[i])
|
|
return a
|
|
|
|
|
|
def bpl_m(p, a, e, t):
|
|
'''
|
|
Calculate the total backward privacy loss at the current
|
|
timestamp with memoization.
|
|
|
|
Parameters:
|
|
p - The transition matrix representing the backward
|
|
temporal correlations.
|
|
a - The backward privacy loss of the current release
|
|
at all previous timestamps.
|
|
e - The privacy budget for data publishing.
|
|
t - The time limit.
|
|
Returns:
|
|
a - The backward privacy loss at the current timestamp
|
|
due to the previous data releases.
|
|
'''
|
|
a[0] = e[0]
|
|
for i in range(1, t):
|
|
a[i] = priv_l_m(p, a[i - 1], e[i])
|
|
return a
|
|
|
|
|
|
def bpl_lmdk_mem(p, a, e, t, lmdk):
|
|
# t is (near) the landmark
|
|
if lmdk == t - 1 or t == lmdk:
|
|
return np.array([0]*len(a))
|
|
# There is no landmark before
|
|
if lmdk < 1:
|
|
lmdk = 1
|
|
a[0] = e[0]
|
|
for i in range(lmdk, t):
|
|
a[i] = priv_l_m(p, a[i - 1], e[i])
|
|
return a
|
|
|
|
|
|
def bpl_s(p, e, i, w):
|
|
'''
|
|
Calculate the total backward privacy loss at the current
|
|
timestamp using the static model, i.e., previous releases
|
|
are grouped in a window of static size.
|
|
|
|
Parameters:
|
|
p - The transition matrix representing the backward
|
|
temporal correlations.
|
|
e - The privacy budget for data publishing.
|
|
i - The timestamp of interest.
|
|
w - The window size to group previous releases.
|
|
Returns:
|
|
a - The backward privacy loss at the current timestamp
|
|
due to the previous data releases.
|
|
'''
|
|
if i - w > 1:
|
|
# print('bpl_s: %d - %d [%d]' %(i, i - w, w))
|
|
return priv_l(np.linalg.matrix_power(p, w), bpl_s(p, e, i - w, w), e[i - 1])
|
|
elif i - w <= 1:
|
|
# print('bpl_s: %d - %d [%d]' %(i, 1, i - 1))
|
|
return priv_l(np.linalg.matrix_power(p, i - 1), e[0], e[i - 1])
|
|
elif i == 1:
|
|
# print('bpl_s: %d' %(i))
|
|
return e[0]
|
|
|
|
|
|
def bpl_s_m(p, e, i, w):
|
|
'''
|
|
Calculate the total backward privacy loss at the current
|
|
timestamp using the static model, i.e., previous releases
|
|
are grouped in a window of static size, using memoization.
|
|
|
|
Parameters:
|
|
p - The transition matrix representing the backward
|
|
temporal correlations.
|
|
e - The privacy budget for data publishing.
|
|
i - The timestamp of interest.
|
|
w - The window size to group previous releases.
|
|
Returns:
|
|
a - The backward privacy loss at the current timestamp
|
|
due to the previous data releases.
|
|
'''
|
|
if i - w > 1:
|
|
return priv_l_m(np.linalg.matrix_power(p, w), bpl_s_m(p, e, i - w, w), e[i - 1])
|
|
elif i - w <= 1:
|
|
return priv_l_m(np.linalg.matrix_power(p, i - 1), e[0], e[i - 1])
|
|
elif i == 1:
|
|
return e[0]
|
|
|
|
|
|
def bpl_l(p, e, i, w, l):
|
|
'''
|
|
Calculate the total backward privacy loss at the current
|
|
timestamp using the linear model, i.e., previous releases
|
|
are grouped in a window of a size that increases linearly.
|
|
|
|
Parameters:
|
|
p - The transition matrix representing the backward
|
|
temporal correlations.
|
|
e - The privacy budget for data publishing.
|
|
i - The timestamp of interest.
|
|
w - The window size to group previous releases.
|
|
l - The linearly increasing coefficient that affects the
|
|
window size.
|
|
Returns:
|
|
a - The backward privacy loss at the current timestamp
|
|
due to the previous data releases.
|
|
'''
|
|
if i - w*l > 1:
|
|
# print('bpl_l: %d - %d [%d]' %(i, i - w*l, w*l))
|
|
return priv_l(np.linalg.matrix_power(p, w*l), bpl_l(p, e, i - w*l, w, l + 1), e[i - 1])
|
|
elif i - w*l <= 1:
|
|
# print('bpl_l: %d - %d [%d]' %(i, 1, i - 1))
|
|
return priv_l(np.linalg.matrix_power(p, i - 1), e[0], e[i - 1])
|
|
elif i == 1:
|
|
# print('bpl_l: %d' %(1))
|
|
return e[0]
|
|
|
|
|
|
def bpl_l_m(p, e, i, w, l):
|
|
'''
|
|
Calculate the total backward privacy loss at the current
|
|
timestamp using the linear model, i.e., previous releases
|
|
are grouped in a window of a size that increases linearly,
|
|
using memoization.
|
|
|
|
Parameters:
|
|
p - The transition matrix representing the backward
|
|
temporal correlations.
|
|
e - The privacy budget for data publishing.
|
|
i - The timestamp of interest.
|
|
w - The window size to group previous releases.
|
|
l - The linearly increasing coefficient that affects the
|
|
window size.
|
|
Returns:
|
|
a - The backward privacy loss at the current timestamp
|
|
due to the previous data releases.
|
|
'''
|
|
if i - w*l > 1:
|
|
return priv_l_m(np.linalg.matrix_power(p, w*l), bpl_l_m(p, e, i - w*l, w, l + 1), e[i - 1])
|
|
elif i - w*l <= 1:
|
|
return priv_l_m(np.linalg.matrix_power(p, i - 1), e[0], e[i - 1])
|
|
elif i == 1:
|
|
return e[0]
|
|
|
|
|
|
def bpl_e(p, e, i, w, h):
|
|
'''
|
|
Calculate the total backward privacy loss at the current
|
|
timestamp using the exponential model, i.e., previous releases
|
|
are grouped in a window of a size that increases exponentially.
|
|
|
|
Parameters:
|
|
p - The transition matrix representing the backward
|
|
temporal correlations.
|
|
e - The privacy budget for data publishing.
|
|
i - The timestamp of interest.
|
|
w - The window size to group previous releases.
|
|
h - The exponentially increasing coefficient that affects
|
|
the window size.
|
|
Returns:
|
|
a - The backward privacy loss at the current timestamp
|
|
due to the previous data releases.
|
|
'''
|
|
if i - w**h > 1:
|
|
# print('bpl_e: %d - %d [%d]' %(i, i - w**h, w**h))
|
|
return priv_l(np.linalg.matrix_power(p, w**h), bpl_e(p, e, i - w**h, w, h + 1), e[i - 1])
|
|
elif i - w**h <= 1:
|
|
# print('bpl_e: %d - %d [%d]' %(i, 1, i - 1))
|
|
return priv_l(np.linalg.matrix_power(p, i - 1), e[0], e[i - 1])
|
|
elif i == 1:
|
|
# print('bpl_e: %d' %(1))
|
|
return e[0]
|
|
|
|
|
|
def bpl_e_m(p, e, i, w, h):
|
|
'''
|
|
Calculate the total backward privacy loss at the current
|
|
timestamp using the exponential model, i.e., previous releases
|
|
are grouped in a window of a size that increases exponentially,
|
|
using memoization.
|
|
|
|
Parameters:
|
|
p - The transition matrix representing the backward
|
|
temporal correlations.
|
|
e - The privacy budget for data publishing.
|
|
i - The timestamp of interest.
|
|
w - The window size to group previous releases.
|
|
h - The exponentially increasing coefficient that affects
|
|
the window size.
|
|
Returns:
|
|
a - The backward privacy loss at the current timestamp
|
|
due to the previous data releases.
|
|
'''
|
|
if i - w**h > 1:
|
|
return priv_l_m(np.linalg.matrix_power(p, w**h), bpl_e_m(p, e, i - w**h, w, h + 1), e[i - 1])
|
|
elif i - w**h <= 1:
|
|
return priv_l_m(np.linalg.matrix_power(p, i - 1), e[0], e[i - 1])
|
|
elif i == 1:
|
|
return e[0]
|
|
|
|
|
|
def fpl(p, a, e, t):
|
|
'''
|
|
Calculate the total forward privacy loss at the current
|
|
timestamp.
|
|
|
|
Parameters:
|
|
p - The transition matrix representing the forward
|
|
temporal correlations.
|
|
a - The forward privacy loss of the current release
|
|
at all next timestamps.
|
|
e - The privacy budget for data publishing.
|
|
t - The time limit.
|
|
Returns:
|
|
a - The forward privacy loss at the current timestamp
|
|
due to the next data releases.
|
|
'''
|
|
a[t - 1] = e[t - 1]
|
|
for i in range(t - 2, -1, -1):
|
|
a[i] = priv_l(p, a[i + 1], e[i])
|
|
return a
|
|
|
|
|
|
def fpl_m(p, a, e, t):
|
|
'''
|
|
Calculate the total forward privacy loss at the current
|
|
timestamp, using memoization.
|
|
|
|
Parameters:
|
|
p - The transition matrix representing the forward
|
|
temporal correlations.
|
|
a - The forward privacy loss of the current release
|
|
at all next timestamps.
|
|
e - The privacy budget for data publishing.
|
|
t - The time limit.
|
|
Returns:
|
|
a - The forward privacy loss at the current timestamp
|
|
due to the next data releases.
|
|
'''
|
|
a[t - 1] = e[t - 1]
|
|
for i in range(t - 2, -1, -1):
|
|
a[i] = priv_l_m(p, a[i + 1], e[i])
|
|
return a
|
|
|
|
def fpl_lmdk_mem(p, a, e, t, lmdk):
|
|
# t is (near) the landmark
|
|
if lmdk == t + 1 or t == lmdk:
|
|
return np.array([0]*len(a))
|
|
# There is no landmark after
|
|
if lmdk > len(e):
|
|
lmdk = len(e)
|
|
a[lmdk - 1] = e[lmdk - 1]
|
|
for i in range(lmdk - 2, t - 2, -1):
|
|
a[i] = priv_l_m(p, a[i + 1], e[i])
|
|
return a
|
|
|
|
|
|
def fpl_s(p, e, i, t, w):
|
|
'''
|
|
Calculate the total forward privacy loss at the current
|
|
timestamp using the static model, i.e., next releases
|
|
are grouped in a window of static size.
|
|
|
|
Parameters:
|
|
p - The transition matrix representing the forward
|
|
temporal correlations.
|
|
e - The privacy budget for data publishing.
|
|
i - The timestamp of interest.
|
|
w - The window size to group next releases.
|
|
Returns:
|
|
a - The forward privacy loss at the current timestamp
|
|
due to the next data releases.
|
|
'''
|
|
if i + w < t:
|
|
# print('fpl_s: %d - %d [%d]' %(i, i + w, w))
|
|
return priv_l(np.linalg.matrix_power(p, w), fpl_s(p, e, i + w, t, w), e[i - 1])
|
|
elif i + w >= t:
|
|
# print('fpl_s: %d - %d [%d]' %(i, t, t - i))
|
|
return priv_l(np.linalg.matrix_power(p, t - i), e[t - 1], e[i - 1])
|
|
elif i == t:
|
|
# print('fpl_s: %d' %(t))
|
|
return e[t - 1]
|
|
|
|
|
|
def fpl_s_m(p, e, i, t, w):
|
|
'''
|
|
Calculate the total forward privacy loss at the current
|
|
timestamp using the static model, i.e., next releases
|
|
are grouped in a window of static size, using memoization.
|
|
|
|
Parameters:
|
|
p - The transition matrix representing the forward
|
|
temporal correlations.
|
|
e - The privacy budget for data publishing.
|
|
i - The timestamp of interest.
|
|
w - The window size to group next releases.
|
|
Returns:
|
|
a - The forward privacy loss at the current timestamp
|
|
due to the next data releases.
|
|
'''
|
|
if i + w < t:
|
|
return priv_l_m(np.linalg.matrix_power(p, w), fpl_s_m(p, e, i + w, t, w), e[i - 1])
|
|
elif i + w >= t:
|
|
return priv_l_m(np.linalg.matrix_power(p, t - i), e[t - 1], e[i - 1])
|
|
elif i == t:
|
|
return e[t - 1]
|
|
|
|
|
|
def fpl_l(p, e, i, t, w, l):
|
|
'''
|
|
Calculate the total forward privacy loss at the current
|
|
timestamp using the linear model, i.e., next releases
|
|
are grouped in a window of a size that increases linearly.
|
|
|
|
Parameters:
|
|
p - The transition matrix representing the forward
|
|
temporal correlations.
|
|
e - The privacy budget for data publishing.
|
|
i - The timestamp of interest.
|
|
w - The window size to group next releases.
|
|
l - The linearly increasing coefficient that affects the
|
|
window size.
|
|
Returns:
|
|
a - The forward privacy loss at the current timestamp
|
|
due to the next data releases.
|
|
'''
|
|
if i + w*l < t:
|
|
# print('fpl_l: %d - %d [%d]' %(i, i + w*l, w*l))
|
|
return priv_l(np.linalg.matrix_power(p, w*l), fpl_l(p, e, i + w*l, t, w, l + 1), e[i - 1])
|
|
elif i + w*l >= t:
|
|
# print('fpl_l: %d - %d [%d]' %(i, t, t - i))
|
|
return priv_l(np.linalg.matrix_power(p, t - i), e[t - 1], e[i - 1])
|
|
elif i == t:
|
|
# print('fpl_l: %d' %(t))
|
|
return e[t - 1]
|
|
|
|
|
|
def fpl_l_m(p, e, i, t, w, l):
|
|
'''
|
|
Calculate the total forward privacy loss at the current
|
|
timestamp using the linear model, i.e., next releases
|
|
are grouped in a window of a size that increases linearly,
|
|
using memoization.
|
|
|
|
Parameters:
|
|
p - The transition matrix representing the forward
|
|
temporal correlations.
|
|
e - The privacy budget for data publishing.
|
|
i - The timestamp of interest.
|
|
w - The window size to group next releases.
|
|
l - The linearly increasing coefficient that affects the
|
|
window size.
|
|
Returns:
|
|
a - The forward privacy loss at the current timestamp
|
|
due to the next data releases.
|
|
'''
|
|
if i + w*l < t:
|
|
return priv_l_m(np.linalg.matrix_power(p, w*l), fpl_l_m(p, e, i + w*l, t, w, l + 1), e[i - 1])
|
|
elif i + w*l >= t:
|
|
return priv_l_m(np.linalg.matrix_power(p, t - i), e[t - 1], e[i - 1])
|
|
elif i == t:
|
|
return e[t - 1]
|
|
|
|
|
|
def fpl_e(p, e, i, t, w, h):
|
|
'''
|
|
Calculate the total forward privacy loss at the current
|
|
timestamp using the exponential model, i.e., next releases
|
|
are grouped in a window of a size that increases exponentially.
|
|
|
|
Parameters:
|
|
p - The transition matrix representing the forward
|
|
temporal correlations.
|
|
e - The privacy budget for data publishing.
|
|
i - The timestamp of interest.
|
|
w - The window size to group next releases.
|
|
h - The exponentially increasing coefficient that affects
|
|
the window size.
|
|
Returns:
|
|
a - The forward privacy loss at the current timestamp
|
|
due to the next data releases.
|
|
'''
|
|
if i + w**h < t:
|
|
# print('fpl_e: %d - %d [%d]' %(i, i + w**h, w**h))
|
|
return priv_l(np.linalg.matrix_power(p, w**h), fpl_e(p, e, i + w**h, t, w, h + 1), e[i - 1])
|
|
elif i + w**h >= t:
|
|
# print('fpl_e: %d - %d [%d]' %(i, t, t - i))
|
|
return priv_l(np.linalg.matrix_power(p, t - i), e[t - 1], e[i - 1])
|
|
elif i == t:
|
|
# print('fpl_e: %d' %(t))
|
|
return e[t - 1]
|
|
|
|
|
|
def fpl_e_m(p, e, i, t, w, h):
|
|
'''
|
|
Calculate the total forward privacy loss at the current
|
|
timestamp using the exponential model, i.e., next releases
|
|
are grouped in a window of a size that increases exponentially,
|
|
using memoization.
|
|
|
|
Parameters:
|
|
p - The transition matrix representing the forward
|
|
temporal correlations.
|
|
e - The privacy budget for data publishing.
|
|
i - The timestamp of interest.
|
|
w - The window size to group next releases.
|
|
h - The exponentially increasing coefficient that affects
|
|
the window size.
|
|
Returns:
|
|
a - The forward privacy loss at the current timestamp
|
|
due to the next data releases.
|
|
'''
|
|
if i + w**h < t:
|
|
return priv_l_m(np.linalg.matrix_power(p, w**h), fpl_e_m(p, e, i + w**h, t, w, h + 1), e[i - 1])
|
|
elif i + w**h >= t:
|
|
return priv_l_m(np.linalg.matrix_power(p, t - i), e[t - 1], e[i - 1])
|
|
elif i == t:
|
|
return e[t - 1]
|
|
|
|
|
|
def tpl(bpl, fpl, e):
|
|
'''
|
|
Calculate the total privacy loss at every timestamp.
|
|
|
|
Parameters:
|
|
bpl - The backward privacy loss.
|
|
fpl - The forward privacy loss.
|
|
e - The privacy budget for data publishing.
|
|
Returns:
|
|
The list of total privacy loss at every timestamp.
|
|
'''
|
|
return [x + y - z for (x, y, z) in zip(bpl, fpl, e)]
|
|
|
|
|
|
def tpl_lmdk_mem(e, p_b, p_f, seq, lmdks):
|
|
'''
|
|
Calculate the temporal privacy loss at every timestamp
|
|
taking into account landmarks.
|
|
|
|
Parameters:
|
|
e - The privacy budget for data publishing.
|
|
p_b - The transition matrix representing the backward
|
|
temporal correlations.
|
|
p_f - The transition matrix representing the forward
|
|
temporal correlations.
|
|
seq - The point sequence.
|
|
lmdks - The landmarks.
|
|
Returns:
|
|
a_b - The backward privacy loss at the current timestamp
|
|
due to the previous data releases.
|
|
a_f - The forward privacy loss at the current timestamp
|
|
due to the next data releases.
|
|
a - The total privacy loss at every timestamp
|
|
taking into account landmarks.
|
|
'''
|
|
a_b = np.zeros(len(seq))
|
|
a_f = np.zeros(len(seq))
|
|
a = np.zeros(len(seq))
|
|
for t in seq:
|
|
t_prv, t_nxt = get_limits(t, seq, lmdks)
|
|
a_b[t - 1] = bpl_lmdk_mem(p_b, np.zeros(max(seq)), e, t, t_prv)[t - 1]
|
|
a_f[t - 1] = fpl_lmdk_mem(p_f, np.zeros(max(seq)), e, t, t_nxt)[t - 1]
|
|
if a_b[t - 1] > 0 and a_f[t - 1] > 0:
|
|
a[t - 1] = a_b[t - 1] + a_f[t - 1] - e[t - 1]
|
|
elif a_b[t - 1] > 0 or a_f[t - 1] > 0:
|
|
a[t - 1] = a_b[t - 1] + a_f[t - 1]
|
|
else:
|
|
a[t - 1] = e[t - 1]
|
|
return a_b, a_f, a
|
|
|
|
|
|
def get_limits(t, seq, lmdks):
|
|
'''
|
|
Get the limits for the calculation of temporal privacy loss.
|
|
|
|
Parameters:
|
|
t - The current timestamp.
|
|
seq - The point sequence.
|
|
lmdks - The landmarks.
|
|
Returns:
|
|
t_prv - The previous landmark.
|
|
t_nxt - The next landmark.
|
|
'''
|
|
# Add landmark limits.
|
|
seq_lmdks = np.copy(lmdks)
|
|
# if seq[0] not in seq_lmdks:
|
|
# seq_lmdks = np.append(seq_lmdks, seq[0])
|
|
# if seq[len(seq) - 1] not in seq_lmdks:
|
|
# seq_lmdks = np.append(seq_lmdks, seq[len(seq) - 1])
|
|
# seq_lmdks = np.sort(seq_lmdks)
|
|
# Initialize previous and next landmarks.
|
|
t_prv = 0
|
|
t_nxt = len(seq) + 1
|
|
# Add current timestamp to landmarks.
|
|
seq_lmdks_tmp = np.copy(seq_lmdks)
|
|
if t not in seq_lmdks_tmp[:]:
|
|
seq_lmdks_tmp = np.append(seq_lmdks, t)
|
|
seq_lmdks_tmp = np.sort(seq_lmdks_tmp)
|
|
# Find the index of the current timestamp in the landmarks.
|
|
idx, = np.where(seq_lmdks_tmp == t)
|
|
i = idx[0]
|
|
# Find previous landmark.
|
|
if i != 0:
|
|
t_prv = seq_lmdks_tmp[i - 1]
|
|
# Find next landmark.
|
|
if i != len(seq_lmdks_tmp) - 1:
|
|
t_nxt = seq_lmdks_tmp[i + 1]
|
|
return t_prv, t_nxt
|
|
|
|
|
|
def plot_loss(title, e, a_b, a_f, a):
|
|
'''
|
|
Plots the privacy loss of the time series.
|
|
|
|
Parameters:
|
|
title - The title of the plot.
|
|
e - The privacy budget for data publishing.
|
|
a_b - The backward privacy loss.
|
|
a_f - The forward privacy loss.
|
|
a - The total privacy loss.
|
|
Returns:
|
|
Nothing.
|
|
'''
|
|
plt.rc('font', family='serif')
|
|
plt.rc('font', size=10)
|
|
plt.rc('text', usetex=True)
|
|
p = np.arange(1.0, len(e) + 1, 1)
|
|
plt.plot(p,
|
|
e,
|
|
label=r'$\varepsilon$',
|
|
color='gray',
|
|
marker='s')
|
|
plt.plot(p,
|
|
a_b,
|
|
label=r'$\alpha^B$',
|
|
color='#e52e4e', # red
|
|
marker='<')
|
|
plt.plot(p,
|
|
a_f,
|
|
label=r'$\alpha^F$',
|
|
color='#009874', # green
|
|
marker='>')
|
|
plt.plot(p,
|
|
a,
|
|
label=r'$\alpha$',
|
|
color='#6082b6', # blue
|
|
marker='d')
|
|
plt.axis([1.0, len(e),
|
|
0.0, 1.5*max(a)])
|
|
plt.legend(loc='best', frameon=True)
|
|
plt.grid(axis='y', alpha=1.0) # Add grid on y axis.
|
|
plt.xlabel('time') # Set x axis label.
|
|
plt.ylabel('privacy loss') # Set y axis label.
|
|
plt.title(title)
|
|
plt.show()
|
|
|
|
|
|
def cmp_loss(title, a, a_s, a_e, a_l):
|
|
'''
|
|
Plots a comparison of the privacy loss of all models.
|
|
|
|
Parameters:
|
|
title - The title of the plot.
|
|
a - The privacy loss of the basic model.
|
|
a_s - The privacy loss of the static model.
|
|
a_e - The privacy loss of the exponential model.
|
|
a_l - The privacy loss of the linear model.
|
|
Returns:
|
|
Nothing.
|
|
'''
|
|
plt.rc('font', family='serif')
|
|
plt.rc('font', size=10)
|
|
plt.rc('text', usetex=True)
|
|
p = np.arange(1.0, len(a) + 1, 1)
|
|
plt.plot(p,
|
|
a,
|
|
label=r'$\alpha$',
|
|
color='red',
|
|
marker='s')
|
|
plt.plot(p,
|
|
a_s,
|
|
label=r'$\alpha_s$',
|
|
color='green',
|
|
marker='>')
|
|
plt.plot(p,
|
|
a_e,
|
|
label=r'$\alpha_e$',
|
|
color='blue',
|
|
marker='d')
|
|
plt.plot(p,
|
|
a_l,
|
|
label=r'$\alpha_l$',
|
|
color='yellow',
|
|
marker='<')
|
|
plt.axis([1.0, len(a),
|
|
0.0, 1.5*max(a)])
|
|
plt.legend(loc='best', frameon=True)
|
|
plt.grid(axis='y', alpha=1.0) # Add grid on y axis.
|
|
plt.xlabel('time') # Set x axis label.
|
|
plt.ylabel('privacy loss') # Set y axis label.
|
|
plt.title(title)
|
|
plt.show()
|
|
|
|
|
|
def parse_args():
|
|
'''
|
|
Parse arguments.
|
|
|
|
Mandatory:
|
|
model, The model to be used: (b)asic,
|
|
(s)tatic, (l)inear, (e)xponential.
|
|
|
|
Optional:
|
|
-c, --correlation, The correlation degree.
|
|
-d, --debug, Enable debugging mode.
|
|
-e, --epsilon, The available privacy budget.
|
|
-m, --matrix, The size of the transition matrix.
|
|
-o, --output, The path to the output directory.
|
|
-t, --time, The time limit.
|
|
-w, --window, The size of the event protection window.
|
|
'''
|
|
# Create argument parser.
|
|
parser = argparse.ArgumentParser()
|
|
|
|
# Mandatory arguments.
|
|
parser.add_argument('model', help='The model to be used.', choices=['b', 's', 'l', 'e'], type=str)
|
|
|
|
# Optional arguments.
|
|
parser.add_argument('-c', '--correlation', help='The correlation degree.', type=float, default=0.1)
|
|
parser.add_argument('-d', '--debug', help='Enable debugging mode.', action='store_true')
|
|
parser.add_argument('-e', '--epsilon', help='The available privacy budget.', type=float, default=1.0)
|
|
parser.add_argument('-m', '--matrix', help='The matrix size.', type=int, default=2)
|
|
parser.add_argument('-o', '--output', help='The path to the output directory.', type=str, default='')
|
|
parser.add_argument('-t', '--time', help='The time limit.', type=int, default=1)
|
|
parser.add_argument('-w', '--window', help='The size of the event protection window.', type=int, default=1)
|
|
|
|
# Parse arguments.
|
|
args = parser.parse_args()
|
|
|
|
return args
|
|
|
|
|
|
def main(args):
|
|
|
|
global DEBUG, MISS, TOTAL
|
|
DEBUG = args.debug
|
|
MISS, TOTAL = 0, 0
|
|
|
|
e = [args.epsilon]*args.time
|
|
|
|
# Generate transition matrices.
|
|
p_b = gen_trans_mt(args.matrix, args.correlation)
|
|
p_f = gen_trans_mt(args.matrix, args.correlation)
|
|
|
|
if DEBUG:
|
|
# Run basic for comparison.
|
|
a_b = bpl(p_b, [0.0] * args.time, e, args.time)
|
|
a_f = fpl(p_f, [0.0] * args.time, e, args.time)
|
|
a = tpl(a_b, a_f, e)
|
|
|
|
output_path = args.output +\
|
|
'/' + args.model +\
|
|
'_c' + str(args.correlation) +\
|
|
'_e' + str(args.epsilon) +\
|
|
'_m' + str(args.matrix) +\
|
|
'_t' + str(args.time) +\
|
|
'_w' + str(args.window)
|
|
|
|
if args.model == 'b':
|
|
# Basic
|
|
|
|
# start_time = time.process_time()
|
|
a_b = bpl_m(p_b, [0.0] * args.time, e, args.time)
|
|
a_f = fpl_m(p_f, [0.0] * args.time, e, args.time)
|
|
a = tpl(a_b, a_f, e)
|
|
# duration1 = time.process_time() - start_time
|
|
|
|
if DEBUG:
|
|
print('\Basic\n'
|
|
't\tbpl\t\tfpl\t\ttpl')
|
|
for i in range(0, args.time):
|
|
print('%d\t%f\t%f\t%f' %(i + 1, a_b[i], a_f[i], a[i]))
|
|
print('\nSUM\t%f\t%f\t%f' %(sum(a_b), sum(a_f), sum(a)))
|
|
print('AVG\t%f\t%f\t%f' %(sum(a_b)/args.time, sum(a_f)/args.time, sum(a)/args.time))
|
|
plot_loss('Basic', e, a_b, a_f, a)
|
|
if args.Returns:
|
|
save_output(output_path, args.time, e, a_b, a_f, a)
|
|
|
|
# start_time = time.process_time()
|
|
# a_b_m = bpl_m(p_b, [0.0] * args.time, e, args.time)
|
|
# a_f_m = fpl_m(p_f, [0.0] * args.time, e, args.time)
|
|
# a_m = tpl(a_b_m, a_f_m, e)
|
|
# duration2 = time.process_time() - start_time
|
|
|
|
# diff = ((duration2 - duration1)/duration1)*100
|
|
# success = safe_div((TOTAL - MISS)*100, TOTAL)
|
|
# print('##############################\n'
|
|
# 'basic : %.4fs\n'
|
|
# 'w/ mem: %.4fs\n'
|
|
# '- succ: %d/%d (%.2f%%)\n'
|
|
# 'diff : %.4fs (%.2f%%)\n'
|
|
# 'OK : %r'
|
|
# %(duration1,
|
|
# duration2,
|
|
# TOTAL - MISS, TOTAL, success,
|
|
# duration2 - duration1, diff,
|
|
# np.array_equal(a, a_m)))
|
|
# MISS, TOTAL = 0, 0
|
|
elif args.model == 's':
|
|
# Static
|
|
|
|
# start_time = time.process_time()
|
|
a_s_b, a_s_f = [None]*args.time, [None]*args.time
|
|
for i in range(1, args.time + 1):
|
|
a_s_b[i - 1] = bpl_s_m(p_b, e, i, args.window)
|
|
a_s_f[i - 1] = fpl_s_m(p_f, e, i, args.time, args.window)
|
|
a_s = tpl(a_s_b, a_s_f, e)
|
|
# duration1 = time.process_time() - start_time
|
|
|
|
if DEBUG:
|
|
print('\nStatic\n'
|
|
't\tbpl\t\t\tfpl\t\t\ttpl')
|
|
for i in range(0, args.time):
|
|
print('%d\t%f (%.2f%%)\t%f (%.2f%%)\t%f (%.2f%%)'
|
|
%(i + 1,
|
|
a_s_b[i], (a_s_b[i] - a_b[i])/a_b[i]*100,
|
|
a_s_f[i], (a_s_f[i] - a_f[i])/a_f[i]*100,
|
|
a_s[i], (a_s[i] - a[i])/a[i]*100))
|
|
print('\nSUM\t%f (%.2f%%)\t%f (%.2f%%)\t%f (%.2f%%)'
|
|
%(sum(a_s_b), ((sum(a_s_b) - sum(a_b))/sum(a_b))*100,
|
|
sum(a_s_f), ((sum(a_s_f) - sum(a_f))/sum(a_f))*100,
|
|
sum(a_s), ((sum(a_s) - sum(a))/sum(a))*100))
|
|
print('AVG\t%f (%.2f%%)\t%f (%.2f%%)\t%f (%.2f%%)'
|
|
%(sum(a_s_b)/args.time, ((sum(a_s_b)/args.time - sum(a_b)/args.time)/(sum(a_b)/args.time))*100,
|
|
sum(a_s_f)/args.time, ((sum(a_s_f)/args.time - sum(a_f)/args.time)/(sum(a_f)/args.time))*100,
|
|
sum(a_s)/args.time, ((sum(a_s)/args.time - sum(a)/args.time)/(sum(a)/args.time))*100))
|
|
plot_loss('Static', e, a_s_b, a_s_f, a_s)
|
|
if args.Returns:
|
|
save_output(output_path, args.time, e, a_s_b, a_s_f, a_s)
|
|
|
|
# start_time = time.process_time()
|
|
# a_s_b_m, a_s_f_m = [None]*args.time, [None]*args.time
|
|
# for i in range(1, args.time + 1):
|
|
# a_s_b_m[i - 1] = bpl_s_m(p_b, e, i, args.window)
|
|
# a_s_f_m[i - 1] = fpl_s_m(p_f, e, i, args.time, args.window)
|
|
# a_s_m = tpl(a_s_b_m, a_s_f_m, e)
|
|
# duration2 = time.process_time() - start_time
|
|
|
|
# diff = ((duration2 - duration1)/duration1)*100
|
|
# success = safe_div((TOTAL - MISS)*100, TOTAL)
|
|
# print('##############################\n'
|
|
# 'static: %.4fs\n'
|
|
# 'w/ mem: %.4fs\n'
|
|
# '- succ: %d/%d (%.2f%%)\n'
|
|
# 'diff : %.4fs (%.2f%%)\n'
|
|
# 'OK : %r'
|
|
# %(duration1,
|
|
# duration2,
|
|
# TOTAL - MISS, TOTAL, success,
|
|
# duration2 - duration1, diff,
|
|
# np.array_equal(a_s, a_s_m)))
|
|
# MISS, TOTAL = 0, 0
|
|
elif args.model == 'l':
|
|
# Linear
|
|
l = 1
|
|
|
|
# start_time = time.process_time()
|
|
a_l_b, a_l_f = [None]*args.time, [None]*args.time
|
|
for i in range(1, args.time + 1):
|
|
a_l_b[i - 1] = bpl_l_m(p_b, e, i, args.window, l)
|
|
a_l_f[i - 1] = fpl_l_m(p_f, e, i, args.time, args.window, l)
|
|
a_l = tpl(a_l_b, a_l_f, e)
|
|
# duration1 = time.process_time() - start_time
|
|
|
|
if DEBUG:
|
|
print('\nLinear\n'
|
|
't\tbpl\t\t\tfpl\t\t\ttpl')
|
|
for i in range(0, args.time):
|
|
print('%d\t%f (%.2f%%)\t%f (%.2f%%)\t%f (%.2f%%)'
|
|
%(i + 1,
|
|
a_l_b[i], (a_l_b[i] - a_b[i])/a_b[i]*100,
|
|
a_l_f[i], (a_l_f[i] - a_f[i])/a_f[i]*100,
|
|
a_l[i], (a_l[i] - a[i])/a[i]*100))
|
|
print('\nSUM\t%f (%.2f%%)\t%f (%.2f%%)\t%f (%.2f%%)'
|
|
%(sum(a_l_b), ((sum(a_l_b) - sum(a_b))/sum(a_b))*100,
|
|
sum(a_l_f), ((sum(a_l_f) - sum(a_f))/sum(a_f))*100,
|
|
sum(a_l), ((sum(a_l) - sum(a))/sum(a))*100))
|
|
print('AVG\t%f (%.2f%%)\t%f (%.2f%%)\t%f (%.2f%%)'
|
|
%(sum(a_l_b)/args.time, ((sum(a_l_b)/args.time - sum(a_b)/args.time)/(sum(a_b)/args.time))*100,
|
|
sum(a_l_f)/args.time, ((sum(a_l_f)/args.time - sum(a_f)/args.time)/(sum(a_f)/args.time))*100,
|
|
sum(a_l)/args.time, ((sum(a_l)/args.time - sum(a)/args.time)/(sum(a)/args.time))*100))
|
|
plot_loss('Linear', e, a_l_b, a_l_f, a_l)
|
|
if args.Returns:
|
|
save_output(output_path, args.time, e, a_l_b, a_l_f, a_l)
|
|
|
|
# start_time = time.process_time()
|
|
# a_l_b_m, a_l_f_m = [None]*args.time, [None]*args.time
|
|
# for i in range(1, args.time + 1):
|
|
# a_l_b_m[i - 1] = bpl_l_m(p_b, e, i, args.window, l)
|
|
# a_l_f_m[i - 1] = fpl_l_m(p_f, e, i, args.time, args.window, l)
|
|
# a_l_m = tpl(a_l_b_m, a_l_f_m, e)
|
|
# duration2 = time.process_time() - start_time
|
|
|
|
# diff = ((duration2 - duration1)/duration1)*100
|
|
# success = safe_div((TOTAL - MISS)*100, TOTAL)
|
|
# print('##############################\n'
|
|
# 'linear: %.4fs\n'
|
|
# 'w/ mem: %.4fs\n'
|
|
# '- succ: %d/%d (%.2f%%)\n'
|
|
# 'diff : %.4fs (%.2f%%)\n'
|
|
# 'OK : %r'
|
|
# %(duration1,
|
|
# duration2,
|
|
# TOTAL - MISS, TOTAL, success,
|
|
# duration2 - duration1, diff,
|
|
# np.array_equal(a_l, a_l_m)))
|
|
# MISS, TOTAL = 0, 0
|
|
elif args.model == 'e':
|
|
# Exponential
|
|
h = 1
|
|
|
|
# start_time = time.process_time()
|
|
a_e_b, a_e_f = [None]*args.time, [None]*args.time
|
|
for i in range(1, args.time + 1):
|
|
a_e_b[i - 1] = bpl_e_m(p_b, e, i, args.window, h)
|
|
a_e_f[i - 1] = fpl_e_m(p_f, e, i, args.time, args.window, h)
|
|
a_e = tpl(a_e_b, a_e_f, e)
|
|
# duration1 = time.process_time() - start_time
|
|
|
|
if DEBUG:
|
|
print('\nExponential\n'
|
|
't\tbpl\t\t\tfpl\t\t\ttpl')
|
|
for i in range(0, args.time):
|
|
print('%d\t%f (%.2f%%)\t%f (%.2f%%)\t%f (%.2f%%)'
|
|
%(i + 1,
|
|
a_e_b[i], (a_e_b[i] - a_b[i])/a_b[i]*100,
|
|
a_e_f[i], (a_e_f[i] - a_f[i])/a_f[i]*100,
|
|
a_e[i], (a_e[i] - a[i])/a[i]*100))
|
|
print('\nSUM\t%f (%.2f%%)\t%f (%.2f%%)\t%f (%.2f%%)'
|
|
%(sum(a_e_b), ((sum(a_e_b) - sum(a_b))/sum(a_b))*100,
|
|
sum(a_e_f), ((sum(a_e_f) - sum(a_f))/sum(a_f))*100,
|
|
sum(a_e), ((sum(a_e) - sum(a))/sum(a))*100))
|
|
print('AVG\t%f (%.2f%%)\t%f (%.2f%%)\t%f (%.2f%%)'
|
|
%(sum(a_e_b)/args.time, ((sum(a_e_b)/args.time - sum(a_b)/args.time)/(sum(a_b)/args.time))*100,
|
|
sum(a_e_f)/args.time, ((sum(a_e_f)/args.time - sum(a_f)/args.time)/(sum(a_f)/args.time))*100,
|
|
sum(a_e)/args.time, ((sum(a_e)/args.time - sum(a)/args.time)/(sum(a)/args.time))*100))
|
|
plot_loss('Exponential', e, a_e_b, a_e_f, a_e)
|
|
if args.Returns:
|
|
save_output(output_path, args.time, e, a_e_b, a_e_f, a_e)
|
|
|
|
# start_time = time.process_time()
|
|
# a_e_b_m, a_e_f_m = [None]*args.time, [None]*args.time
|
|
# for i in range(1, args.time + 1):
|
|
# a_e_b_m[i - 1] = bpl_e_m(p_b, e, i, args.window, h)
|
|
# a_e_f_m[i - 1] = fpl_e_m(p_f, e, i, args.time, args.window, h)
|
|
# a_e_m = tpl(a_e_b_m, a_e_f_m, e)
|
|
# duration2 = time.process_time() - start_time
|
|
|
|
# diff = ((duration2 - duration1)/duration1)*100
|
|
# success = safe_div((TOTAL - MISS)*100, TOTAL)
|
|
# print('##############################\n'
|
|
# 'exp : %.4fs\n'
|
|
# 'w/ mem: %.4fs\n'
|
|
# '- succ: %d/%d (%.2f%%)\n'
|
|
# 'diff : %.4fs (%.2f%%)\n'
|
|
# 'OK : %r'
|
|
# %(duration1,
|
|
# duration2,
|
|
# TOTAL - MISS, TOTAL, success,
|
|
# duration2 - duration1, diff,
|
|
# np.array_equal(a_e, a_e_m)))
|
|
# MISS, TOTAL = 0, 0
|
|
|
|
# if DEBUG:
|
|
# cmp_loss('Backward', a_b, a_s_b, a_l_b, a_e_b)
|
|
# cmp_loss('Forward', a_f, a_s_f, a_l_f, a_e_f)
|
|
# cmp_loss('Total', a, a_s, a_l, a_e)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
try:
|
|
args = parse_args()
|
|
print('##############################\n'
|
|
'Correlation: %f\n'
|
|
'Debug : %r\n'
|
|
'Epsilon : %f\n'
|
|
'Matrix : %d\n'
|
|
'Model : %s\n'
|
|
'Output : %s\n'
|
|
'Time : %d\n'
|
|
'Window : %d\n'
|
|
'##############################'
|
|
%(args.correlation,
|
|
args.debug,
|
|
args.epsilon,
|
|
args.matrix,
|
|
args.model,
|
|
args.output,
|
|
args.time,
|
|
args.window), flush=True)
|
|
start_time = time.process_time()
|
|
main(args)
|
|
end_time = time.process_time()
|
|
print('##############################\n'
|
|
'Time : %.4fs\n'
|
|
'##############################\n'
|
|
%(end_time - start_time), flush=True)
|
|
except KeyboardInterrupt:
|
|
print('Interrupted by user.')
|
|
exit()
|