the-last-thing/code/lib/gdp.py

1578 lines
48 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/env python3
import argparse
import collections
import csv
import datetime
import math
from matplotlib import pyplot as plt
import numpy as np
import os
import random
from scipy.sparse.linalg import eigs
from scipy.stats import laplace
from scipy.stats import norm
import sympy as sp
import time
import xxhash
# Debugging
DEBUG = False # Variable to check.
# When enabled print something.
# Memoization
MEM = {} # The cache.
MISS = 0 # Number of additions to the cache.
TOTAL = 0 # Number of cache accesses.
def load_data(path):
2021-10-18 07:32:38 +02:00
'''
Read data from a file.
Parameters:
path - The relative path to the data file.
Returns:
data - A list of tuples [uid, timestamp, lng, lat, loc].
'''
print('Loading data from', os.path.abspath(path), '... ', end='')
data = []
try:
with open(path, 'r') as f:
reader = csv.reader(f, delimiter=',')
data = list(reader)
print('OK.')
return data
except IOError:
print('Oops!')
exit()
def save_output(path, t, e, a_b, a_f, a):
2021-10-18 07:32:38 +02:00
'''
Save output to a file.
Parameters:
path - The relative path to the output file.
t - The number of timestamps.
e - The privacy budget at each timestamp.
a_b - The backward privacy loss at each timestamp.
a_f - The forward privacy loss at each timestamp.
a - The temporal privacy loss at each timestamp.
Returns:
Nothing.
'''
# timestamp = time.strftime('%Y%m%d%H%M%S')
print('Saving output to %s... ' %(path), end='', flush=True)
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, 'w') as f:
w = csv.writer(f, delimiter=';')
w.writerow(['t', 'e', 'bpl', 'fpl', 'tpl'])
w.writerows(zip(list(range(1, t + 1)), e, a_b, a_f, a))
print('OK.', flush=True)
def get_timestamps(data):
2021-10-18 07:32:38 +02:00
'''
Get all the timestamps from the input data.
Parameters:
data - The input data set.
Returns:
timestamps - An ndarray of all of the timestamps from the input data.
'''
print('Getting a list of all timestamps... ', end='', flush=True)
timestamps = np.sort(np.unique(np.array(data)[:, 1]))
if not len(timestamps):
print('No timestamps found.', flush=True)
exit()
# Check timestamps' order.
prev = None
for t in timestamps:
curr = time.mktime(datetime.datetime.strptime(t, '%Y-%m-%d %H:%M').timetuple())
if prev:
if curr < prev:
print('Wrong order.', flush=True)
exit()
prev = curr
print('OK.', flush=True)
if DEBUG:
print(timestamps, len(timestamps))
return timestamps
def get_locs(data):
2021-10-18 07:32:38 +02:00
'''
Get all the unique locations from the input data.
Parameters:
data - The input data set.
Returns:
locs - A sorted ndarray of all the unique locations int the input data.
'''
print('Getting a list of all locations... ', end='', flush=True)
locs = np.sort(np.unique(np.array(data)[:, 4].astype(np.int)))
if not len(locs):
print('No locations found.', flush=True)
exit()
print('OK.', flush=True)
if DEBUG:
print(list(map(str, locs)), len(locs))
return list(map(str, locs))
def get_cnts(data, t):
2021-10-18 07:32:38 +02:00
'''
Get the counts at every location for a specific timestamp.
Parameters:
data - The input data set.
t - The timestamp of interest.
Returns:
cnts - A dict {loc:cnt} with the counts at every location for a specific timestamp.
'''
print('Getting all counts at %s... ' %(t), end='', flush=True)
locs = get_locs(data)
cnts = dict.fromkeys(locs, 0)
for d in data:
if d[1] == t:
cnts[int(d[4])] += 1
print('OK.', flush=True)
if DEBUG:
print(cnts)
return cnts
def get_all_cnts(data):
2021-10-18 07:32:38 +02:00
'''
Get the counts at every location for every timestamp.
Parameters:
data - The input data set.
Returns:
cnts - A dict {timestamp:loc} with all the counts at every location for every timestamp.
'''
cnts = {}
for d in data:
key = d[1] + '@' + d[4]
cnts[key] = cnts.get(key, 0) + 1
if DEBUG:
print(cnts)
return cnts
def get_usrs(data):
2021-10-18 07:32:38 +02:00
'''
Get a list of unique users in the input data set.
Parameters:
data - The input data set.
Returns:
users - An ndarray of all unique users.
'''
users = np.sort(np.unique(np.array(data)[:, 0].astype(np.int)))
if not len(users):
print('No users found.')
exit()
if DEBUG:
print(users, len(users))
return users
def get_usr_data(data, id):
2021-10-18 07:32:38 +02:00
'''
Get the data of a particular user from a data set.
Parameters:
data - The input data set.
id - The user identifier.
Returns:
output - A list of the data of the targeted user.
'''
output = []
for d in data:
if (d[0] == str(id)):
output.append(d)
if DEBUG:
print(id, output)
return output
def get_usrs_data(data):
2021-10-18 07:32:38 +02:00
'''
Get the data of every user in a data set.
Parameters:
data - The input data set.
Returns:
output - A dict {usr, [usr_data]} with the data of each user.
'''
output = {}
for d in data:
output[d[0]] = output.get(d[0], []) + [d]
return output
def get_usr_traj(data):
2021-10-18 07:32:38 +02:00
'''
Get the trajectory of a user from her data.
Parameters:
data - The data of the user.
Returns:
traj - A list [(timestamp, loc)] with the locations and corresponding timestamps that the user was at.
'''
traj = []
for d in data:
traj.append((d[1], d[4]))
if DEBUG:
print(traj)
return traj
def get_poss_trans(data):
2021-10-18 07:32:38 +02:00
'''
Get all the possible transitions.
Parameters:
data - The input data set.
Returns:
trans - A set with all the possible forward transitions in the input.
'''
print('Getting possible transitions... ', end='', flush=True)
trans = set()
for u, u_data in data.items():
traj = get_usr_traj(u_data)
trans.update(list(zip(traj, traj[1:])))
if len(trans) == 0:
print('None.', flush=True)
exit()
print('OK.', flush=True)
return trans
def get_bwd_trans(data):
2021-10-18 07:32:38 +02:00
'''
Get all backward transitions in a data set.
Parameters:
data - The input data set.
Returns:
trans - A dict {(t, t-1):[transitions]} with all the backward transitions
at every sequential timestamp pair in the input data set.
'''
print('Getting all backward transitions... ', end='', flush=True)
trans = {}
for u, u_data in data.items():
traj = get_usr_traj(u_data)
for i, j in zip(traj[1:], traj):
trans[(i[0], j[0])] = trans.get((i[0], j[0]), []) + [(i[1], j[1])]
if len(trans) == 0:
print('None.', flush=True)
exit()
print('OK.', flush=True)
return trans
def get_fwd_trans(data):
2021-10-18 07:32:38 +02:00
'''
Get all forward transitions in a data set.
Parameters:
data - The input data set.
Returns:
trans - A dict {(t-1, t):[transitions]} with all the forward transitions
at every sequential timestamp pair in the input data set.
'''
print('Getting all forward transitions... ', end='', flush=True)
trans = {}
for u, u_data in data.items():
traj = get_usr_traj(u_data)
for i, j in zip(traj, traj[1:]):
trans[(i[0], j[0])] = trans.get((i[0], j[0]), []) + [(i[1], j[1])]
if len(trans) == 0:
print('None.', flush=True)
exit()
print('OK.', flush=True)
return trans
def safe_div(a, b):
2021-10-18 07:32:38 +02:00
'''
Divide two numbers. If the divisor is 0 return inf.
Parameters:
a - The dividend.
b - The divisor.
Returns:
The float result of the division.
'''
if b == 0:
return math.inf
return float(a/b)
def max_val(q, d, a):
2021-10-18 07:32:38 +02:00
'''
Calculate the maximum value of the objective function.
Parameters:
q - A row from the transition matrix.
d - Another row from the transition matrix.
a - The backward/forward privacy loss of the previous/next
timestamp.
Returns:
The maximum value of the objective function.
'''
if a == math.inf:
return math.nan
return (q*(math.exp(a) - 1) + 1)/(d*(math.exp(a) - 1) + 1)
def find_qd(p, a):
2021-10-18 07:32:38 +02:00
'''
Find two different rows (q and d) of a transition matrix (p)
that maximize the product of the objective function and return
their sums.
Parameters:
p - The transition matrix representing the backward/forward
correlations.
a - The backward/forward privacy loss of the previous/next
timestamp.
Returns:
sum_q - The sum of the elements of q.
sum_d - The sum of the elements of d.
'''
res = 0.0
sum_q, sum_d = 0.0, 0.0
for q in p: # A row from the transition matrix.
for d in p: # Another row from the transition matrix.
if not np.array_equal(q, d) and sum(q) and sum(d):
plus = [] # Indexes of the elements of q+ and d+.
for i in range(np.shape(p)[1]): # Iterate each column.
if q[i] > d[i]:
plus.append(i)
s_q, s_d = 0.0, 0.0
update = True
while update:
update = False
for i in plus:
s_q += q[i]
s_d += d[i]
max_tmp = max_val(s_q, s_d, a)
for i in plus:
if safe_div(q[i], d[i]) <= max_tmp:
plus.remove(i)
update = True
res_tmp = math.log(max_val(s_q, s_d, a))
if res < res_tmp:
res = res_tmp
sum_q, sum_d = s_q, s_d
return sum_q, sum_d
def gen_data(usrs, timestamps, locs):
2021-10-18 07:32:38 +02:00
'''
Generate data.
Parameters:
usrs - The number of users.
timestamps - The number of timestamps.
locs - The numner of locations.
Returns:
data - The generated data.
'''
print('Generating data... ', end='', flush=True)
# Generate timestamps.
ts = []
t_start = datetime.datetime.now()
for t in range(timestamps):
t_start += datetime.timedelta(minutes=t)
ts.append(str(t_start.strftime('%Y-%m-%d %H:%M')))
# Randomly generate unique possible transitions.
trans = set()
for i in range(locs**2):
trans.add((str(random.randint(1, locs)), str(random.randint(1, locs))))
# Generate the data.
data = []
for u in range(1, usrs + 1):
# Choose a random starting location.
l_start = random.sample(trans, 1)[0][0]
for t in ts:
# Choose a random next location.
while True:
l_nxt = str(random.randint(1, locs))
if (l_start, l_nxt) in trans:
data.append([str(u), str(t), str(l_nxt), str(l_nxt), str(l_nxt)])
break
print('OK.', flush=True)
return data
def gen_trans_mt(n, s):
2021-10-18 07:32:38 +02:00
'''
Generate a transition matrix.
Parameters:
n - The dimension of the matrix.
s - The correlation degree of each row [0, 1].
The lower its value, the lower the degree of
uniformity of each row.
Returns:
p_ - The transition matrix.
'''
if DEBUG:
print('Generating transition matrix %dx%d with s = %.4f... ' %(n, n, s), end='', flush=True)
p = np.zeros((n, n), float)
np.fill_diagonal(p, 1.0)
p_ = np.zeros((n, n), float)
for j, p_j in enumerate(p[:, ]):
for k, p_jk in enumerate(p_j):
p_[j, k] = round((p[j, k] + s) / (sum(p_j) + len(p_j)*s), 4)
if DEBUG:
print('OK.', flush=True)
if DEBUG:
print(p_)
return p_
def get_trans_mt(locs, trans):
2021-10-18 07:32:38 +02:00
'''
Get the transition matrix
Parameters:
locs - A list of all the locations.
trans - A list of all transitions.
Returns:
p - A 2d dict {{locs}{locs}} containing the
corresponding location transition probabilities.
'''
if DEBUG:
print('Generating the transition matrix... ', end='', flush=True)
# Initialize the transition matrix.
p = collections.defaultdict(dict)
for i in locs:
for j in locs:
p[i][j] = 0.0
# Populate the transition matrix.
for i in locs:
for j in locs:
# Count the transitions from i to j.
cnt = collections.Counter(trans)[(i, j)]
# Count the transitions from i to any location.
sum_cnts = len([tr for tr in trans if tr[0] == i])
res = 0.0
if cnt != 0 and sum_cnts != 0:
res = cnt/sum_cnts
# Update transition matrix.
p[i][j] = res
if DEBUG:
print('OK.', flush=True)
for i in locs:
print(p[i])
return p
def get_entropy(mt):
2021-10-18 07:32:38 +02:00
'''
Calculate the measure-theoretic (Kolmogorov-Sinai) entropy
of a transition matrix.
Parameters:
mt - A 2d dict transition matrix.
Returns:
h - The Kolmogorov-Sinai entropy of the matrix.
'''
if DEBUG:
print('Calculating the measure-theoretic entropy... ', end='', flush=True)
h = 0.0
# Get the 2d array of the transition matrix.
a = get_2darray(mt)
# Check if the transition matrix is empty.
if np.allclose(a, np.zeros((len(a), len(a)), float)):
return h
# Find the stationary distribution (v), written as a row
# vector, that does not change under the application of
# the transition matrix. In other words, the left eigenvector
# associated with eigenvalue (w) 1. It is also called
# Perron-Frobenius eigenvector, since it is a stochastic
# matrix and the largest magnitude of the eigenvalues must
# be 1. We use the transpose of the transition matrix to get
# the left eigenvector.
w, v = eigs(a.T, k=1, which='LM')
# Normalize the eigenvector. The vector is a probability
# distribution, and therefore its sum must be 1. Thus, we use
# its absolute elements' values and its l1-norm to normalize it.
p = np.absolute(v[:,0].real)
p_norm = np.linalg.norm(p, ord=1)
if p_norm:
p = p/p_norm
# Calculate the entropy.
if np.isclose(w.real[0], 1, atol=1e-03):
for i in range(len(a)):
for j in range(len(a)):
# When the transition probability is 0 use the limiting value (0).
if not np.isclose(a[i][j], 0, atol=1e-03) and not np.isclose(p[i], 0, atol=1e-03):
# Binary logarithm measures entropy in bits.
h += -p[i]*a[i][j]*math.log2(a[i][j])
if DEBUG:
print(h, flush=True)
return h
def get_2darray(mt):
2021-10-18 07:32:38 +02:00
'''
Convert a 2d dict to a 2d array.
Parameters:
mt - The 2d dict.
Returns:
p - The 2d numpy array.
'''
if type(mt) == type(np.array([])):
return mt
p = np.zeros((len(mt), len(mt)), float)
for i, mt_i in enumerate(mt.items()):
p[i] = list(mt_i[1].values())
return p
def get_laplace_pd(ts, t, sc):
2021-10-18 07:32:38 +02:00
'''
Get a Laplace probability distribution.
Parameters:
ts - The points of the distribution.
t - The location of the distribution.
sc - The scale of the distribution.
Returns:
The probability distribution.
'''
x = np.arange(0, len(ts), 1)
loc = np.where(ts == t)
return laplace.pdf(x, loc=loc, scale=sc)[0]
def get_norm_pd(ts, t, sc):
2021-10-18 07:32:38 +02:00
'''
Get a Gaussian probability distribution.
Parameters:
ts - The points of the distribution.
t - The location of the distribution.
sc - The scale of the distribution.
Returns:
The probability distribution.
'''
x = np.arange(0, len(ts), 1)
loc = np.where(ts == t)
return norm.pdf(x, loc=loc, scale=sc)[0]
def get_sample(ts, t, pct, pd):
2021-10-18 07:32:38 +02:00
'''
Get a sample from the time series.
Parameters:
ts - An ndarray of the timestamps.
t - The current timestamp.
pd - The probability distribution.
ptn - The desired portion [0, 1] of the non-zero elements
of the probability distribution to be sampled.
Returns:
spl - An ndarray of the sampled timestamps.
'''
if DEBUG:
print('Sampling %.2f%% of %s at %s... ' %(pct*100, ts, t), end='', flush=True)
# Check that it is a valid timestamp.
if not t in ts:
print('%s is not in the time series.' %(t))
exit()
# Remove the current timestamp from the possible sample.
i = np.where(ts == t)
ts = np.delete(ts, i)
pd = np.delete(pd, i)
# Make sure that the probabilities sum to 1.
pd_norm = pd/np.linalg.norm(pd, ord=1)
# Get the sorted sample.
spl = np.sort(np.random.choice(ts, size=int(np.count_nonzero(pd)*pct), replace=False, p=pd_norm))
if DEBUG:
print(spl, flush=True)
return spl
def priv_l(p, a, e):
2021-10-18 07:32:38 +02:00
'''
Calculate the backward/forward privacy loss at the current
timestamp.
Parameters:
p - The transition matrix representing the backward/forward
temporal correlations.
a - The privacy loss of the previous/next timestamp.
e - The privacy budget for data publishing.
Returns:
The backward/forward privacy loss at the current
timestamp.
'''
sum_q, sum_d = find_qd(p, a)
return math.log(max_val(sum_q, sum_d, a)) + e
def priv_l_m(p, a, e):
2021-10-18 07:32:38 +02:00
'''
Calculate the backward/forward privacy loss at the current
timestamp using memoization.
Parameters:
p - The transition matrix representing the backward/forward
temporal correlations.
a - The privacy loss of the previous/next timestamp.
e - The privacy budget for data publishing.
Returns:
The backward/forward privacy loss at the current
timestamp.
'''
key = xxhash.xxh64(p).hexdigest() + str(a) + str(e)
global MEM, TOTAL, MISS
TOTAL += 1
result = MEM.get(key)
if not result:
MISS += 1
sum_q, sum_d = find_qd(p, a)
result = math.log(max_val(sum_q, sum_d, a)) + e
MEM[key] = result
return result
def bpl(p, a, e, t):
2021-10-18 07:32:38 +02:00
'''
Calculate the total backward privacy loss at every timestamp.
Parameters:
p - The transition matrix representing the backward
temporal correlations.
a - The backward privacy loss of every release.
e - The privacy budget for data publishing.
t - The time limit.
Returns:
a - The backward privacy loss at every timestamp
due to the previous data releases.
'''
a[0] = e[0]
for i in range(1, t):
a[i] = priv_l(p, a[i - 1], e[i])
return a
def bpl_m(p, a, e, t):
2021-10-18 07:32:38 +02:00
'''
Calculate the total backward privacy loss at the current
timestamp with memoization.
Parameters:
p - The transition matrix representing the backward
temporal correlations.
a - The backward privacy loss of the current release
at all previous timestamps.
e - The privacy budget for data publishing.
t - The time limit.
Returns:
a - The backward privacy loss at the current timestamp
due to the previous data releases.
'''
a[0] = e[0]
for i in range(1, t):
a[i] = priv_l_m(p, a[i - 1], e[i])
return a
2021-10-18 07:32:38 +02:00
def bpl_lmdk_mem(p, a, e, t, lmdk):
# t is (near) the landmark
if lmdk == t - 1 or t == lmdk:
return np.array([0]*len(a))
# There is no landmark before
if lmdk < 1:
lmdk = 1
a[0] = e[0]
for i in range(lmdk, t):
a[i] = priv_l_m(p, a[i - 1], e[i])
return a
def bpl_s(p, e, i, w):
2021-10-18 07:32:38 +02:00
'''
Calculate the total backward privacy loss at the current
timestamp using the static model, i.e., previous releases
are grouped in a window of static size.
Parameters:
p - The transition matrix representing the backward
temporal correlations.
e - The privacy budget for data publishing.
i - The timestamp of interest.
w - The window size to group previous releases.
Returns:
a - The backward privacy loss at the current timestamp
due to the previous data releases.
'''
if i - w > 1:
# print('bpl_s: %d - %d [%d]' %(i, i - w, w))
return priv_l(np.linalg.matrix_power(p, w), bpl_s(p, e, i - w, w), e[i - 1])
elif i - w <= 1:
# print('bpl_s: %d - %d [%d]' %(i, 1, i - 1))
return priv_l(np.linalg.matrix_power(p, i - 1), e[0], e[i - 1])
elif i == 1:
# print('bpl_s: %d' %(i))
return e[0]
def bpl_s_m(p, e, i, w):
2021-10-18 07:32:38 +02:00
'''
Calculate the total backward privacy loss at the current
timestamp using the static model, i.e., previous releases
are grouped in a window of static size, using memoization.
Parameters:
p - The transition matrix representing the backward
temporal correlations.
e - The privacy budget for data publishing.
i - The timestamp of interest.
w - The window size to group previous releases.
Returns:
a - The backward privacy loss at the current timestamp
due to the previous data releases.
'''
if i - w > 1:
return priv_l_m(np.linalg.matrix_power(p, w), bpl_s_m(p, e, i - w, w), e[i - 1])
elif i - w <= 1:
return priv_l_m(np.linalg.matrix_power(p, i - 1), e[0], e[i - 1])
elif i == 1:
return e[0]
def bpl_l(p, e, i, w, l):
2021-10-18 07:32:38 +02:00
'''
Calculate the total backward privacy loss at the current
timestamp using the linear model, i.e., previous releases
are grouped in a window of a size that increases linearly.
Parameters:
p - The transition matrix representing the backward
temporal correlations.
e - The privacy budget for data publishing.
i - The timestamp of interest.
w - The window size to group previous releases.
l - The linearly increasing coefficient that affects the
window size.
Returns:
a - The backward privacy loss at the current timestamp
due to the previous data releases.
'''
if i - w*l > 1:
# print('bpl_l: %d - %d [%d]' %(i, i - w*l, w*l))
return priv_l(np.linalg.matrix_power(p, w*l), bpl_l(p, e, i - w*l, w, l + 1), e[i - 1])
elif i - w*l <= 1:
# print('bpl_l: %d - %d [%d]' %(i, 1, i - 1))
return priv_l(np.linalg.matrix_power(p, i - 1), e[0], e[i - 1])
elif i == 1:
# print('bpl_l: %d' %(1))
return e[0]
def bpl_l_m(p, e, i, w, l):
2021-10-18 07:32:38 +02:00
'''
Calculate the total backward privacy loss at the current
timestamp using the linear model, i.e., previous releases
are grouped in a window of a size that increases linearly,
using memoization.
Parameters:
p - The transition matrix representing the backward
temporal correlations.
e - The privacy budget for data publishing.
i - The timestamp of interest.
w - The window size to group previous releases.
l - The linearly increasing coefficient that affects the
window size.
Returns:
a - The backward privacy loss at the current timestamp
due to the previous data releases.
'''
if i - w*l > 1:
return priv_l_m(np.linalg.matrix_power(p, w*l), bpl_l_m(p, e, i - w*l, w, l + 1), e[i - 1])
elif i - w*l <= 1:
return priv_l_m(np.linalg.matrix_power(p, i - 1), e[0], e[i - 1])
elif i == 1:
return e[0]
def bpl_e(p, e, i, w, h):
2021-10-18 07:32:38 +02:00
'''
Calculate the total backward privacy loss at the current
timestamp using the exponential model, i.e., previous releases
are grouped in a window of a size that increases exponentially.
Parameters:
p - The transition matrix representing the backward
temporal correlations.
e - The privacy budget for data publishing.
i - The timestamp of interest.
w - The window size to group previous releases.
h - The exponentially increasing coefficient that affects
the window size.
Returns:
a - The backward privacy loss at the current timestamp
due to the previous data releases.
'''
if i - w**h > 1:
# print('bpl_e: %d - %d [%d]' %(i, i - w**h, w**h))
return priv_l(np.linalg.matrix_power(p, w**h), bpl_e(p, e, i - w**h, w, h + 1), e[i - 1])
elif i - w**h <= 1:
# print('bpl_e: %d - %d [%d]' %(i, 1, i - 1))
return priv_l(np.linalg.matrix_power(p, i - 1), e[0], e[i - 1])
elif i == 1:
# print('bpl_e: %d' %(1))
return e[0]
def bpl_e_m(p, e, i, w, h):
2021-10-18 07:32:38 +02:00
'''
Calculate the total backward privacy loss at the current
timestamp using the exponential model, i.e., previous releases
are grouped in a window of a size that increases exponentially,
using memoization.
Parameters:
p - The transition matrix representing the backward
temporal correlations.
e - The privacy budget for data publishing.
i - The timestamp of interest.
w - The window size to group previous releases.
h - The exponentially increasing coefficient that affects
the window size.
Returns:
a - The backward privacy loss at the current timestamp
due to the previous data releases.
'''
if i - w**h > 1:
return priv_l_m(np.linalg.matrix_power(p, w**h), bpl_e_m(p, e, i - w**h, w, h + 1), e[i - 1])
elif i - w**h <= 1:
return priv_l_m(np.linalg.matrix_power(p, i - 1), e[0], e[i - 1])
elif i == 1:
return e[0]
def fpl(p, a, e, t):
2021-10-18 07:32:38 +02:00
'''
Calculate the total forward privacy loss at the current
timestamp.
Parameters:
p - The transition matrix representing the forward
temporal correlations.
a - The forward privacy loss of the current release
at all next timestamps.
e - The privacy budget for data publishing.
t - The time limit.
Returns:
a - The forward privacy loss at the current timestamp
due to the next data releases.
'''
a[t - 1] = e[t - 1]
for i in range(t - 2, -1, -1):
a[i] = priv_l(p, a[i + 1], e[i])
return a
def fpl_m(p, a, e, t):
2021-10-18 07:32:38 +02:00
'''
Calculate the total forward privacy loss at the current
timestamp, using memoization.
Parameters:
p - The transition matrix representing the forward
temporal correlations.
a - The forward privacy loss of the current release
at all next timestamps.
e - The privacy budget for data publishing.
t - The time limit.
Returns:
a - The forward privacy loss at the current timestamp
due to the next data releases.
'''
a[t - 1] = e[t - 1]
for i in range(t - 2, -1, -1):
a[i] = priv_l_m(p, a[i + 1], e[i])
return a
def fpl_lmdk_mem(p, a, e, t, lmdk):
# t is (near) the landmark
if lmdk == t + 1 or t == lmdk:
return np.array([0]*len(a))
# There is no landmark after
if lmdk > len(e):
lmdk = len(e)
a[lmdk - 1] = e[lmdk - 1]
for i in range(lmdk - 2, t - 2, -1):
a[i] = priv_l_m(p, a[i + 1], e[i])
return a
def fpl_s(p, e, i, t, w):
2021-10-18 07:32:38 +02:00
'''
Calculate the total forward privacy loss at the current
timestamp using the static model, i.e., next releases
are grouped in a window of static size.
Parameters:
p - The transition matrix representing the forward
temporal correlations.
e - The privacy budget for data publishing.
i - The timestamp of interest.
w - The window size to group next releases.
Returns:
a - The forward privacy loss at the current timestamp
due to the next data releases.
'''
if i + w < t:
# print('fpl_s: %d - %d [%d]' %(i, i + w, w))
return priv_l(np.linalg.matrix_power(p, w), fpl_s(p, e, i + w, t, w), e[i - 1])
elif i + w >= t:
# print('fpl_s: %d - %d [%d]' %(i, t, t - i))
return priv_l(np.linalg.matrix_power(p, t - i), e[t - 1], e[i - 1])
elif i == t:
# print('fpl_s: %d' %(t))
return e[t - 1]
def fpl_s_m(p, e, i, t, w):
2021-10-18 07:32:38 +02:00
'''
Calculate the total forward privacy loss at the current
timestamp using the static model, i.e., next releases
are grouped in a window of static size, using memoization.
Parameters:
p - The transition matrix representing the forward
temporal correlations.
e - The privacy budget for data publishing.
i - The timestamp of interest.
w - The window size to group next releases.
Returns:
a - The forward privacy loss at the current timestamp
due to the next data releases.
'''
if i + w < t:
return priv_l_m(np.linalg.matrix_power(p, w), fpl_s_m(p, e, i + w, t, w), e[i - 1])
elif i + w >= t:
return priv_l_m(np.linalg.matrix_power(p, t - i), e[t - 1], e[i - 1])
elif i == t:
return e[t - 1]
def fpl_l(p, e, i, t, w, l):
2021-10-18 07:32:38 +02:00
'''
Calculate the total forward privacy loss at the current
timestamp using the linear model, i.e., next releases
are grouped in a window of a size that increases linearly.
Parameters:
p - The transition matrix representing the forward
temporal correlations.
e - The privacy budget for data publishing.
i - The timestamp of interest.
w - The window size to group next releases.
l - The linearly increasing coefficient that affects the
window size.
Returns:
a - The forward privacy loss at the current timestamp
due to the next data releases.
'''
if i + w*l < t:
# print('fpl_l: %d - %d [%d]' %(i, i + w*l, w*l))
return priv_l(np.linalg.matrix_power(p, w*l), fpl_l(p, e, i + w*l, t, w, l + 1), e[i - 1])
elif i + w*l >= t:
# print('fpl_l: %d - %d [%d]' %(i, t, t - i))
return priv_l(np.linalg.matrix_power(p, t - i), e[t - 1], e[i - 1])
elif i == t:
# print('fpl_l: %d' %(t))
return e[t - 1]
def fpl_l_m(p, e, i, t, w, l):
2021-10-18 07:32:38 +02:00
'''
Calculate the total forward privacy loss at the current
timestamp using the linear model, i.e., next releases
are grouped in a window of a size that increases linearly,
using memoization.
Parameters:
p - The transition matrix representing the forward
temporal correlations.
e - The privacy budget for data publishing.
i - The timestamp of interest.
w - The window size to group next releases.
l - The linearly increasing coefficient that affects the
window size.
Returns:
a - The forward privacy loss at the current timestamp
due to the next data releases.
'''
if i + w*l < t:
return priv_l_m(np.linalg.matrix_power(p, w*l), fpl_l_m(p, e, i + w*l, t, w, l + 1), e[i - 1])
elif i + w*l >= t:
return priv_l_m(np.linalg.matrix_power(p, t - i), e[t - 1], e[i - 1])
elif i == t:
return e[t - 1]
def fpl_e(p, e, i, t, w, h):
2021-10-18 07:32:38 +02:00
'''
Calculate the total forward privacy loss at the current
timestamp using the exponential model, i.e., next releases
are grouped in a window of a size that increases exponentially.
Parameters:
p - The transition matrix representing the forward
temporal correlations.
e - The privacy budget for data publishing.
i - The timestamp of interest.
w - The window size to group next releases.
h - The exponentially increasing coefficient that affects
the window size.
Returns:
a - The forward privacy loss at the current timestamp
due to the next data releases.
'''
if i + w**h < t:
# print('fpl_e: %d - %d [%d]' %(i, i + w**h, w**h))
return priv_l(np.linalg.matrix_power(p, w**h), fpl_e(p, e, i + w**h, t, w, h + 1), e[i - 1])
elif i + w**h >= t:
# print('fpl_e: %d - %d [%d]' %(i, t, t - i))
return priv_l(np.linalg.matrix_power(p, t - i), e[t - 1], e[i - 1])
elif i == t:
# print('fpl_e: %d' %(t))
return e[t - 1]
def fpl_e_m(p, e, i, t, w, h):
2021-10-18 07:32:38 +02:00
'''
Calculate the total forward privacy loss at the current
timestamp using the exponential model, i.e., next releases
are grouped in a window of a size that increases exponentially,
using memoization.
Parameters:
p - The transition matrix representing the forward
temporal correlations.
e - The privacy budget for data publishing.
i - The timestamp of interest.
w - The window size to group next releases.
h - The exponentially increasing coefficient that affects
the window size.
Returns:
a - The forward privacy loss at the current timestamp
due to the next data releases.
'''
if i + w**h < t:
return priv_l_m(np.linalg.matrix_power(p, w**h), fpl_e_m(p, e, i + w**h, t, w, h + 1), e[i - 1])
elif i + w**h >= t:
return priv_l_m(np.linalg.matrix_power(p, t - i), e[t - 1], e[i - 1])
elif i == t:
return e[t - 1]
def tpl(bpl, fpl, e):
2021-10-18 07:32:38 +02:00
'''
Calculate the total privacy loss at every timestamp.
Parameters:
bpl - The backward privacy loss.
fpl - The forward privacy loss.
e - The privacy budget for data publishing.
Returns:
The list of total privacy loss at every timestamp.
'''
return [x + y - z for (x, y, z) in zip(bpl, fpl, e)]
def tpl_lmdk_mem(e, p_b, p_f, seq, lmdks):
2021-10-18 07:32:38 +02:00
'''
Calculate the temporal privacy loss at every timestamp
taking into account landmarks.
Parameters:
e - The privacy budget for data publishing.
p_b - The transition matrix representing the backward
temporal correlations.
p_f - The transition matrix representing the forward
temporal correlations.
seq - The point sequence.
lmdks - The landmarks.
Returns:
a_b - The backward privacy loss at the current timestamp
due to the previous data releases.
a_f - The forward privacy loss at the current timestamp
due to the next data releases.
a - The total privacy loss at every timestamp
taking into account landmarks.
'''
a_b = np.zeros(len(seq))
a_f = np.zeros(len(seq))
a = np.zeros(len(seq))
for t in seq:
t_prv, t_nxt = get_limits(t, seq, lmdks)
a_b[t - 1] = bpl_lmdk_mem(p_b, np.zeros(max(seq)), e, t, t_prv)[t - 1]
a_f[t - 1] = fpl_lmdk_mem(p_f, np.zeros(max(seq)), e, t, t_nxt)[t - 1]
if a_b[t - 1] > 0 and a_f[t - 1] > 0:
a[t - 1] = a_b[t - 1] + a_f[t - 1] - e[t - 1]
elif a_b[t - 1] > 0 or a_f[t - 1] > 0:
a[t - 1] = a_b[t - 1] + a_f[t - 1]
else:
a[t - 1] = e[t - 1]
return a_b, a_f, a
def get_limits(t, seq, lmdks):
2021-10-18 07:32:38 +02:00
'''
Get the limits for the calculation of temporal privacy loss.
Parameters:
t - The current timestamp.
seq - The point sequence.
lmdks - The landmarks.
Returns:
t_prv - The previous landmark.
t_nxt - The next landmark.
'''
# Add landmark limits.
seq_lmdks = np.copy(lmdks)
# if seq[0] not in seq_lmdks:
# seq_lmdks = np.append(seq_lmdks, seq[0])
# if seq[len(seq) - 1] not in seq_lmdks:
# seq_lmdks = np.append(seq_lmdks, seq[len(seq) - 1])
# seq_lmdks = np.sort(seq_lmdks)
# Initialize previous and next landmarks.
t_prv = 0
t_nxt = len(seq) + 1
# Add current timestamp to landmarks.
seq_lmdks_tmp = np.copy(seq_lmdks)
if t not in seq_lmdks_tmp[:]:
seq_lmdks_tmp = np.append(seq_lmdks, t)
seq_lmdks_tmp = np.sort(seq_lmdks_tmp)
# Find the index of the current timestamp in the landmarks.
idx, = np.where(seq_lmdks_tmp == t)
i = idx[0]
# Find previous landmark.
if i != 0:
t_prv = seq_lmdks_tmp[i - 1]
# Find next landmark.
if i != len(seq_lmdks_tmp) - 1:
t_nxt = seq_lmdks_tmp[i + 1]
return t_prv, t_nxt
def plot_loss(title, e, a_b, a_f, a):
2021-10-18 07:32:38 +02:00
'''
Plots the privacy loss of the time series.
Parameters:
title - The title of the plot.
e - The privacy budget for data publishing.
a_b - The backward privacy loss.
a_f - The forward privacy loss.
a - The total privacy loss.
Returns:
Nothing.
'''
plt.rc('font', family='serif')
plt.rc('font', size=10)
plt.rc('text', usetex=True)
p = np.arange(1.0, len(e) + 1, 1)
plt.plot(p,
e,
label=r'$\varepsilon$',
color='gray',
marker='s')
plt.plot(p,
a_b,
label=r'$\alpha^B$',
color='#e52e4e', # red
marker='<')
plt.plot(p,
a_f,
label=r'$\alpha^F$',
color='#009874', # green
marker='>')
plt.plot(p,
a,
label=r'$\alpha$',
color='#6082b6', # blue
marker='d')
plt.axis([1.0, len(e),
0.0, 1.5*max(a)])
plt.legend(loc='best', frameon=True)
plt.grid(axis='y', alpha=1.0) # Add grid on y axis.
plt.xlabel('time') # Set x axis label.
plt.ylabel('privacy loss') # Set y axis label.
plt.title(title)
plt.show()
def cmp_loss(title, a, a_s, a_e, a_l):
2021-10-18 07:32:38 +02:00
'''
Plots a comparison of the privacy loss of all models.
Parameters:
title - The title of the plot.
a - The privacy loss of the basic model.
a_s - The privacy loss of the static model.
a_e - The privacy loss of the exponential model.
a_l - The privacy loss of the linear model.
Returns:
Nothing.
'''
plt.rc('font', family='serif')
plt.rc('font', size=10)
plt.rc('text', usetex=True)
p = np.arange(1.0, len(a) + 1, 1)
plt.plot(p,
a,
label=r'$\alpha$',
color='red',
marker='s')
plt.plot(p,
a_s,
label=r'$\alpha_s$',
color='green',
marker='>')
plt.plot(p,
a_e,
label=r'$\alpha_e$',
color='blue',
marker='d')
plt.plot(p,
a_l,
label=r'$\alpha_l$',
color='yellow',
marker='<')
plt.axis([1.0, len(a),
0.0, 1.5*max(a)])
plt.legend(loc='best', frameon=True)
plt.grid(axis='y', alpha=1.0) # Add grid on y axis.
plt.xlabel('time') # Set x axis label.
plt.ylabel('privacy loss') # Set y axis label.
plt.title(title)
plt.show()
def parse_args():
2021-10-18 07:32:38 +02:00
'''
Parse arguments.
Mandatory:
model, The model to be used: (b)asic,
(s)tatic, (l)inear, (e)xponential.
Optional:
-c, --correlation, The correlation degree.
-d, --debug, Enable debugging mode.
-e, --epsilon, The available privacy budget.
-m, --matrix, The size of the transition matrix.
-o, --output, The path to the output directory.
-t, --time, The time limit.
-w, --window, The size of the event protection window.
'''
# Create argument parser.
parser = argparse.ArgumentParser()
# Mandatory arguments.
parser.add_argument('model', help='The model to be used.', choices=['b', 's', 'l', 'e'], type=str)
# Optional arguments.
parser.add_argument('-c', '--correlation', help='The correlation degree.', type=float, default=0.1)
parser.add_argument('-d', '--debug', help='Enable debugging mode.', action='store_true')
parser.add_argument('-e', '--epsilon', help='The available privacy budget.', type=float, default=1.0)
parser.add_argument('-m', '--matrix', help='The matrix size.', type=int, default=2)
parser.add_argument('-o', '--output', help='The path to the output directory.', type=str, default='')
parser.add_argument('-t', '--time', help='The time limit.', type=int, default=1)
parser.add_argument('-w', '--window', help='The size of the event protection window.', type=int, default=1)
# Parse arguments.
args = parser.parse_args()
return args
def main(args):
global DEBUG, MISS, TOTAL
DEBUG = args.debug
MISS, TOTAL = 0, 0
e = [args.epsilon]*args.time
# Generate transition matrices.
p_b = gen_trans_mt(args.matrix, args.correlation)
p_f = gen_trans_mt(args.matrix, args.correlation)
if DEBUG:
# Run basic for comparison.
a_b = bpl(p_b, [0.0] * args.time, e, args.time)
a_f = fpl(p_f, [0.0] * args.time, e, args.time)
a = tpl(a_b, a_f, e)
output_path = args.output +\
'/' + args.model +\
'_c' + str(args.correlation) +\
'_e' + str(args.epsilon) +\
'_m' + str(args.matrix) +\
'_t' + str(args.time) +\
'_w' + str(args.window)
if args.model == 'b':
# Basic
# start_time = time.process_time()
a_b = bpl_m(p_b, [0.0] * args.time, e, args.time)
a_f = fpl_m(p_f, [0.0] * args.time, e, args.time)
a = tpl(a_b, a_f, e)
# duration1 = time.process_time() - start_time
if DEBUG:
print('\Basic\n'
't\tbpl\t\tfpl\t\ttpl')
for i in range(0, args.time):
print('%d\t%f\t%f\t%f' %(i + 1, a_b[i], a_f[i], a[i]))
print('\nSUM\t%f\t%f\t%f' %(sum(a_b), sum(a_f), sum(a)))
print('AVG\t%f\t%f\t%f' %(sum(a_b)/args.time, sum(a_f)/args.time, sum(a)/args.time))
plot_loss('Basic', e, a_b, a_f, a)
if args.Returns:
save_output(output_path, args.time, e, a_b, a_f, a)
# start_time = time.process_time()
# a_b_m = bpl_m(p_b, [0.0] * args.time, e, args.time)
# a_f_m = fpl_m(p_f, [0.0] * args.time, e, args.time)
# a_m = tpl(a_b_m, a_f_m, e)
# duration2 = time.process_time() - start_time
# diff = ((duration2 - duration1)/duration1)*100
# success = safe_div((TOTAL - MISS)*100, TOTAL)
# print('##############################\n'
# 'basic : %.4fs\n'
# 'w/ mem: %.4fs\n'
# '- succ: %d/%d (%.2f%%)\n'
# 'diff : %.4fs (%.2f%%)\n'
# 'OK : %r'
# %(duration1,
# duration2,
# TOTAL - MISS, TOTAL, success,
# duration2 - duration1, diff,
# np.array_equal(a, a_m)))
# MISS, TOTAL = 0, 0
elif args.model == 's':
# Static
# start_time = time.process_time()
a_s_b, a_s_f = [None]*args.time, [None]*args.time
for i in range(1, args.time + 1):
a_s_b[i - 1] = bpl_s_m(p_b, e, i, args.window)
a_s_f[i - 1] = fpl_s_m(p_f, e, i, args.time, args.window)
a_s = tpl(a_s_b, a_s_f, e)
# duration1 = time.process_time() - start_time
if DEBUG:
print('\nStatic\n'
't\tbpl\t\t\tfpl\t\t\ttpl')
for i in range(0, args.time):
print('%d\t%f (%.2f%%)\t%f (%.2f%%)\t%f (%.2f%%)'
%(i + 1,
a_s_b[i], (a_s_b[i] - a_b[i])/a_b[i]*100,
a_s_f[i], (a_s_f[i] - a_f[i])/a_f[i]*100,
a_s[i], (a_s[i] - a[i])/a[i]*100))
print('\nSUM\t%f (%.2f%%)\t%f (%.2f%%)\t%f (%.2f%%)'
%(sum(a_s_b), ((sum(a_s_b) - sum(a_b))/sum(a_b))*100,
sum(a_s_f), ((sum(a_s_f) - sum(a_f))/sum(a_f))*100,
sum(a_s), ((sum(a_s) - sum(a))/sum(a))*100))
print('AVG\t%f (%.2f%%)\t%f (%.2f%%)\t%f (%.2f%%)'
%(sum(a_s_b)/args.time, ((sum(a_s_b)/args.time - sum(a_b)/args.time)/(sum(a_b)/args.time))*100,
sum(a_s_f)/args.time, ((sum(a_s_f)/args.time - sum(a_f)/args.time)/(sum(a_f)/args.time))*100,
sum(a_s)/args.time, ((sum(a_s)/args.time - sum(a)/args.time)/(sum(a)/args.time))*100))
plot_loss('Static', e, a_s_b, a_s_f, a_s)
if args.Returns:
save_output(output_path, args.time, e, a_s_b, a_s_f, a_s)
# start_time = time.process_time()
# a_s_b_m, a_s_f_m = [None]*args.time, [None]*args.time
# for i in range(1, args.time + 1):
# a_s_b_m[i - 1] = bpl_s_m(p_b, e, i, args.window)
# a_s_f_m[i - 1] = fpl_s_m(p_f, e, i, args.time, args.window)
# a_s_m = tpl(a_s_b_m, a_s_f_m, e)
# duration2 = time.process_time() - start_time
# diff = ((duration2 - duration1)/duration1)*100
# success = safe_div((TOTAL - MISS)*100, TOTAL)
# print('##############################\n'
# 'static: %.4fs\n'
# 'w/ mem: %.4fs\n'
# '- succ: %d/%d (%.2f%%)\n'
# 'diff : %.4fs (%.2f%%)\n'
# 'OK : %r'
# %(duration1,
# duration2,
# TOTAL - MISS, TOTAL, success,
# duration2 - duration1, diff,
# np.array_equal(a_s, a_s_m)))
# MISS, TOTAL = 0, 0
elif args.model == 'l':
# Linear
l = 1
# start_time = time.process_time()
a_l_b, a_l_f = [None]*args.time, [None]*args.time
for i in range(1, args.time + 1):
a_l_b[i - 1] = bpl_l_m(p_b, e, i, args.window, l)
a_l_f[i - 1] = fpl_l_m(p_f, e, i, args.time, args.window, l)
a_l = tpl(a_l_b, a_l_f, e)
# duration1 = time.process_time() - start_time
if DEBUG:
print('\nLinear\n'
't\tbpl\t\t\tfpl\t\t\ttpl')
for i in range(0, args.time):
print('%d\t%f (%.2f%%)\t%f (%.2f%%)\t%f (%.2f%%)'
%(i + 1,
a_l_b[i], (a_l_b[i] - a_b[i])/a_b[i]*100,
a_l_f[i], (a_l_f[i] - a_f[i])/a_f[i]*100,
a_l[i], (a_l[i] - a[i])/a[i]*100))
print('\nSUM\t%f (%.2f%%)\t%f (%.2f%%)\t%f (%.2f%%)'
%(sum(a_l_b), ((sum(a_l_b) - sum(a_b))/sum(a_b))*100,
sum(a_l_f), ((sum(a_l_f) - sum(a_f))/sum(a_f))*100,
sum(a_l), ((sum(a_l) - sum(a))/sum(a))*100))
print('AVG\t%f (%.2f%%)\t%f (%.2f%%)\t%f (%.2f%%)'
%(sum(a_l_b)/args.time, ((sum(a_l_b)/args.time - sum(a_b)/args.time)/(sum(a_b)/args.time))*100,
sum(a_l_f)/args.time, ((sum(a_l_f)/args.time - sum(a_f)/args.time)/(sum(a_f)/args.time))*100,
sum(a_l)/args.time, ((sum(a_l)/args.time - sum(a)/args.time)/(sum(a)/args.time))*100))
plot_loss('Linear', e, a_l_b, a_l_f, a_l)
if args.Returns:
save_output(output_path, args.time, e, a_l_b, a_l_f, a_l)
# start_time = time.process_time()
# a_l_b_m, a_l_f_m = [None]*args.time, [None]*args.time
# for i in range(1, args.time + 1):
# a_l_b_m[i - 1] = bpl_l_m(p_b, e, i, args.window, l)
# a_l_f_m[i - 1] = fpl_l_m(p_f, e, i, args.time, args.window, l)
# a_l_m = tpl(a_l_b_m, a_l_f_m, e)
# duration2 = time.process_time() - start_time
# diff = ((duration2 - duration1)/duration1)*100
# success = safe_div((TOTAL - MISS)*100, TOTAL)
# print('##############################\n'
# 'linear: %.4fs\n'
# 'w/ mem: %.4fs\n'
# '- succ: %d/%d (%.2f%%)\n'
# 'diff : %.4fs (%.2f%%)\n'
# 'OK : %r'
# %(duration1,
# duration2,
# TOTAL - MISS, TOTAL, success,
# duration2 - duration1, diff,
# np.array_equal(a_l, a_l_m)))
# MISS, TOTAL = 0, 0
elif args.model == 'e':
# Exponential
h = 1
# start_time = time.process_time()
a_e_b, a_e_f = [None]*args.time, [None]*args.time
for i in range(1, args.time + 1):
a_e_b[i - 1] = bpl_e_m(p_b, e, i, args.window, h)
a_e_f[i - 1] = fpl_e_m(p_f, e, i, args.time, args.window, h)
a_e = tpl(a_e_b, a_e_f, e)
# duration1 = time.process_time() - start_time
if DEBUG:
print('\nExponential\n'
't\tbpl\t\t\tfpl\t\t\ttpl')
for i in range(0, args.time):
print('%d\t%f (%.2f%%)\t%f (%.2f%%)\t%f (%.2f%%)'
%(i + 1,
a_e_b[i], (a_e_b[i] - a_b[i])/a_b[i]*100,
a_e_f[i], (a_e_f[i] - a_f[i])/a_f[i]*100,
a_e[i], (a_e[i] - a[i])/a[i]*100))
print('\nSUM\t%f (%.2f%%)\t%f (%.2f%%)\t%f (%.2f%%)'
%(sum(a_e_b), ((sum(a_e_b) - sum(a_b))/sum(a_b))*100,
sum(a_e_f), ((sum(a_e_f) - sum(a_f))/sum(a_f))*100,
sum(a_e), ((sum(a_e) - sum(a))/sum(a))*100))
print('AVG\t%f (%.2f%%)\t%f (%.2f%%)\t%f (%.2f%%)'
%(sum(a_e_b)/args.time, ((sum(a_e_b)/args.time - sum(a_b)/args.time)/(sum(a_b)/args.time))*100,
sum(a_e_f)/args.time, ((sum(a_e_f)/args.time - sum(a_f)/args.time)/(sum(a_f)/args.time))*100,
sum(a_e)/args.time, ((sum(a_e)/args.time - sum(a)/args.time)/(sum(a)/args.time))*100))
plot_loss('Exponential', e, a_e_b, a_e_f, a_e)
if args.Returns:
save_output(output_path, args.time, e, a_e_b, a_e_f, a_e)
# start_time = time.process_time()
# a_e_b_m, a_e_f_m = [None]*args.time, [None]*args.time
# for i in range(1, args.time + 1):
# a_e_b_m[i - 1] = bpl_e_m(p_b, e, i, args.window, h)
# a_e_f_m[i - 1] = fpl_e_m(p_f, e, i, args.time, args.window, h)
# a_e_m = tpl(a_e_b_m, a_e_f_m, e)
# duration2 = time.process_time() - start_time
# diff = ((duration2 - duration1)/duration1)*100
# success = safe_div((TOTAL - MISS)*100, TOTAL)
# print('##############################\n'
# 'exp : %.4fs\n'
# 'w/ mem: %.4fs\n'
# '- succ: %d/%d (%.2f%%)\n'
# 'diff : %.4fs (%.2f%%)\n'
# 'OK : %r'
# %(duration1,
# duration2,
# TOTAL - MISS, TOTAL, success,
# duration2 - duration1, diff,
# np.array_equal(a_e, a_e_m)))
# MISS, TOTAL = 0, 0
# if DEBUG:
# cmp_loss('Backward', a_b, a_s_b, a_l_b, a_e_b)
# cmp_loss('Forward', a_f, a_s_f, a_l_f, a_e_f)
# cmp_loss('Total', a, a_s, a_l, a_e)
if __name__ == '__main__':
try:
args = parse_args()
print('##############################\n'
'Correlation: %f\n'
'Debug : %r\n'
'Epsilon : %f\n'
'Matrix : %d\n'
'Model : %s\n'
'Output : %s\n'
'Time : %d\n'
'Window : %d\n'
'##############################'
%(args.correlation,
args.debug,
args.epsilon,
args.matrix,
args.model,
args.output,
args.time,
args.window), flush=True)
start_time = time.process_time()
main(args)
end_time = time.process_time()
print('##############################\n'
'Time : %.4fs\n'
'##############################\n'
%(end_time - start_time), flush=True)
except KeyboardInterrupt:
print('Interrupted by user.')
exit()