Merge branch 'master' of git.delkappa.com:manos/the-last-thing
This commit is contained in:
commit
b93328bcea
289
code/lib/gdp.py
289
code/lib/gdp.py
@ -28,15 +28,15 @@ MISS = 0 # Number of additions to the cache.
|
||||
TOTAL = 0 # Number of cache accesses.
|
||||
|
||||
|
||||
'''
|
||||
def load_data(path):
|
||||
'''
|
||||
Read data from a file.
|
||||
|
||||
Parameters:
|
||||
path - The relative path to the data file.
|
||||
Returns:
|
||||
data - A list of tuples [uid, timestamp, lng, lat, loc].
|
||||
'''
|
||||
def load_data(path):
|
||||
'''
|
||||
print('Loading data from', os.path.abspath(path), '... ', end='')
|
||||
data = []
|
||||
try:
|
||||
@ -50,7 +50,8 @@ def load_data(path):
|
||||
exit()
|
||||
|
||||
|
||||
'''
|
||||
def save_output(path, t, e, a_b, a_f, a):
|
||||
'''
|
||||
Save output to a file.
|
||||
|
||||
Parameters:
|
||||
@ -62,8 +63,7 @@ def load_data(path):
|
||||
a - The temporal privacy loss at each timestamp.
|
||||
Returns:
|
||||
Nothing.
|
||||
'''
|
||||
def save_output(path, t, e, a_b, a_f, a):
|
||||
'''
|
||||
# timestamp = time.strftime('%Y%m%d%H%M%S')
|
||||
print('Saving output to %s... ' %(path), end='', flush=True)
|
||||
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||
@ -74,15 +74,15 @@ def save_output(path, t, e, a_b, a_f, a):
|
||||
print('OK.', flush=True)
|
||||
|
||||
|
||||
'''
|
||||
def get_timestamps(data):
|
||||
'''
|
||||
Get all the timestamps from the input data.
|
||||
|
||||
Parameters:
|
||||
data - The input data set.
|
||||
Returns:
|
||||
timestamps - An ndarray of all of the timestamps from the input data.
|
||||
'''
|
||||
def get_timestamps(data):
|
||||
'''
|
||||
print('Getting a list of all timestamps... ', end='', flush=True)
|
||||
timestamps = np.sort(np.unique(np.array(data)[:, 1]))
|
||||
if not len(timestamps):
|
||||
@ -103,15 +103,15 @@ def get_timestamps(data):
|
||||
return timestamps
|
||||
|
||||
|
||||
'''
|
||||
def get_locs(data):
|
||||
'''
|
||||
Get all the unique locations from the input data.
|
||||
|
||||
Parameters:
|
||||
data - The input data set.
|
||||
Returns:
|
||||
locs - A sorted ndarray of all the unique locations int the input data.
|
||||
'''
|
||||
def get_locs(data):
|
||||
'''
|
||||
print('Getting a list of all locations... ', end='', flush=True)
|
||||
locs = np.sort(np.unique(np.array(data)[:, 4].astype(np.int)))
|
||||
if not len(locs):
|
||||
@ -123,7 +123,8 @@ def get_locs(data):
|
||||
return list(map(str, locs))
|
||||
|
||||
|
||||
'''
|
||||
def get_cnts(data, t):
|
||||
'''
|
||||
Get the counts at every location for a specific timestamp.
|
||||
|
||||
Parameters:
|
||||
@ -131,8 +132,7 @@ def get_locs(data):
|
||||
t - The timestamp of interest.
|
||||
Returns:
|
||||
cnts - A dict {loc:cnt} with the counts at every location for a specific timestamp.
|
||||
'''
|
||||
def get_cnts(data, t):
|
||||
'''
|
||||
print('Getting all counts at %s... ' %(t), end='', flush=True)
|
||||
locs = get_locs(data)
|
||||
cnts = dict.fromkeys(locs, 0)
|
||||
@ -145,15 +145,15 @@ def get_cnts(data, t):
|
||||
return cnts
|
||||
|
||||
|
||||
'''
|
||||
def get_all_cnts(data):
|
||||
'''
|
||||
Get the counts at every location for every timestamp.
|
||||
|
||||
Parameters:
|
||||
data - The input data set.
|
||||
Returns:
|
||||
cnts - A dict {timestamp:loc} with all the counts at every location for every timestamp.
|
||||
'''
|
||||
def get_all_cnts(data):
|
||||
'''
|
||||
cnts = {}
|
||||
for d in data:
|
||||
key = d[1] + '@' + d[4]
|
||||
@ -163,15 +163,15 @@ def get_all_cnts(data):
|
||||
return cnts
|
||||
|
||||
|
||||
'''
|
||||
def get_usrs(data):
|
||||
'''
|
||||
Get a list of unique users in the input data set.
|
||||
|
||||
Parameters:
|
||||
data - The input data set.
|
||||
Returns:
|
||||
users - An ndarray of all unique users.
|
||||
'''
|
||||
def get_usrs(data):
|
||||
'''
|
||||
users = np.sort(np.unique(np.array(data)[:, 0].astype(np.int)))
|
||||
if not len(users):
|
||||
print('No users found.')
|
||||
@ -181,7 +181,8 @@ def get_usrs(data):
|
||||
return users
|
||||
|
||||
|
||||
'''
|
||||
def get_usr_data(data, id):
|
||||
'''
|
||||
Get the data of a particular user from a data set.
|
||||
|
||||
Parameters:
|
||||
@ -189,8 +190,7 @@ def get_usrs(data):
|
||||
id - The user identifier.
|
||||
Returns:
|
||||
output - A list of the data of the targeted user.
|
||||
'''
|
||||
def get_usr_data(data, id):
|
||||
'''
|
||||
output = []
|
||||
for d in data:
|
||||
if (d[0] == str(id)):
|
||||
@ -200,30 +200,30 @@ def get_usr_data(data, id):
|
||||
return output
|
||||
|
||||
|
||||
'''
|
||||
def get_usrs_data(data):
|
||||
'''
|
||||
Get the data of every user in a data set.
|
||||
|
||||
Parameters:
|
||||
data - The input data set.
|
||||
Returns:
|
||||
output - A dict {usr, [usr_data]} with the data of each user.
|
||||
'''
|
||||
def get_usrs_data(data):
|
||||
'''
|
||||
output = {}
|
||||
for d in data:
|
||||
output[d[0]] = output.get(d[0], []) + [d]
|
||||
return output
|
||||
|
||||
|
||||
'''
|
||||
def get_usr_traj(data):
|
||||
'''
|
||||
Get the trajectory of a user from her data.
|
||||
|
||||
Parameters:
|
||||
data - The data of the user.
|
||||
Returns:
|
||||
traj - A list [(timestamp, loc)] with the locations and corresponding timestamps that the user was at.
|
||||
'''
|
||||
def get_usr_traj(data):
|
||||
'''
|
||||
traj = []
|
||||
for d in data:
|
||||
traj.append((d[1], d[4]))
|
||||
@ -232,15 +232,15 @@ def get_usr_traj(data):
|
||||
return traj
|
||||
|
||||
|
||||
'''
|
||||
def get_poss_trans(data):
|
||||
'''
|
||||
Get all the possible transitions.
|
||||
|
||||
Parameters:
|
||||
data - The input data set.
|
||||
Returns:
|
||||
trans - A set with all the possible forward transitions in the input.
|
||||
'''
|
||||
def get_poss_trans(data):
|
||||
'''
|
||||
print('Getting possible transitions... ', end='', flush=True)
|
||||
trans = set()
|
||||
for u, u_data in data.items():
|
||||
@ -253,7 +253,8 @@ def get_poss_trans(data):
|
||||
return trans
|
||||
|
||||
|
||||
'''
|
||||
def get_bwd_trans(data):
|
||||
'''
|
||||
Get all backward transitions in a data set.
|
||||
|
||||
Parameters:
|
||||
@ -261,8 +262,7 @@ def get_poss_trans(data):
|
||||
Returns:
|
||||
trans - A dict {(t, t-1):[transitions]} with all the backward transitions
|
||||
at every sequential timestamp pair in the input data set.
|
||||
'''
|
||||
def get_bwd_trans(data):
|
||||
'''
|
||||
print('Getting all backward transitions... ', end='', flush=True)
|
||||
trans = {}
|
||||
for u, u_data in data.items():
|
||||
@ -276,7 +276,8 @@ def get_bwd_trans(data):
|
||||
return trans
|
||||
|
||||
|
||||
'''
|
||||
def get_fwd_trans(data):
|
||||
'''
|
||||
Get all forward transitions in a data set.
|
||||
|
||||
Parameters:
|
||||
@ -284,8 +285,7 @@ def get_bwd_trans(data):
|
||||
Returns:
|
||||
trans - A dict {(t-1, t):[transitions]} with all the forward transitions
|
||||
at every sequential timestamp pair in the input data set.
|
||||
'''
|
||||
def get_fwd_trans(data):
|
||||
'''
|
||||
print('Getting all forward transitions... ', end='', flush=True)
|
||||
trans = {}
|
||||
for u, u_data in data.items():
|
||||
@ -299,7 +299,8 @@ def get_fwd_trans(data):
|
||||
return trans
|
||||
|
||||
|
||||
'''
|
||||
def safe_div(a, b):
|
||||
'''
|
||||
Divide two numbers. If the divisor is 0 return inf.
|
||||
|
||||
Parameters:
|
||||
@ -307,14 +308,14 @@ def get_fwd_trans(data):
|
||||
b - The divisor.
|
||||
Returns:
|
||||
The float result of the division.
|
||||
'''
|
||||
def safe_div(a, b):
|
||||
'''
|
||||
if b == 0:
|
||||
return math.inf
|
||||
return float(a/b)
|
||||
|
||||
|
||||
'''
|
||||
def max_val(q, d, a):
|
||||
'''
|
||||
Calculate the maximum value of the objective function.
|
||||
|
||||
Parameters:
|
||||
@ -324,14 +325,14 @@ def safe_div(a, b):
|
||||
timestamp.
|
||||
Returns:
|
||||
The maximum value of the objective function.
|
||||
'''
|
||||
def max_val(q, d, a):
|
||||
'''
|
||||
if a == math.inf:
|
||||
return math.nan
|
||||
return (q*(math.exp(a) - 1) + 1)/(d*(math.exp(a) - 1) + 1)
|
||||
|
||||
|
||||
'''
|
||||
def find_qd(p, a):
|
||||
'''
|
||||
Find two different rows (q and d) of a transition matrix (p)
|
||||
that maximize the product of the objective function and return
|
||||
their sums.
|
||||
@ -344,8 +345,7 @@ def max_val(q, d, a):
|
||||
Returns:
|
||||
sum_q - The sum of the elements of q.
|
||||
sum_d - The sum of the elements of d.
|
||||
'''
|
||||
def find_qd(p, a):
|
||||
'''
|
||||
res = 0.0
|
||||
sum_q, sum_d = 0.0, 0.0
|
||||
for q in p: # A row from the transition matrix.
|
||||
@ -374,7 +374,8 @@ def find_qd(p, a):
|
||||
return sum_q, sum_d
|
||||
|
||||
|
||||
'''
|
||||
def gen_data(usrs, timestamps, locs):
|
||||
'''
|
||||
Generate data.
|
||||
|
||||
Parameters:
|
||||
@ -383,8 +384,7 @@ def find_qd(p, a):
|
||||
locs - The numner of locations.
|
||||
Returns:
|
||||
data - The generated data.
|
||||
'''
|
||||
def gen_data(usrs, timestamps, locs):
|
||||
'''
|
||||
print('Generating data... ', end='', flush=True)
|
||||
# Generate timestamps.
|
||||
ts = []
|
||||
@ -412,7 +412,8 @@ def gen_data(usrs, timestamps, locs):
|
||||
return data
|
||||
|
||||
|
||||
'''
|
||||
def gen_trans_mt(n, s):
|
||||
'''
|
||||
Generate a transition matrix.
|
||||
|
||||
Parameters:
|
||||
@ -422,8 +423,7 @@ def gen_data(usrs, timestamps, locs):
|
||||
uniformity of each row.
|
||||
Returns:
|
||||
p_ - The transition matrix.
|
||||
'''
|
||||
def gen_trans_mt(n, s):
|
||||
'''
|
||||
if DEBUG:
|
||||
print('Generating transition matrix %dx%d with s = %.4f... ' %(n, n, s), end='', flush=True)
|
||||
p = np.zeros((n, n), float)
|
||||
@ -439,7 +439,8 @@ def gen_trans_mt(n, s):
|
||||
return p_
|
||||
|
||||
|
||||
'''
|
||||
def get_trans_mt(locs, trans):
|
||||
'''
|
||||
Get the transition matrix
|
||||
|
||||
Parameters:
|
||||
@ -448,8 +449,7 @@ def gen_trans_mt(n, s):
|
||||
Returns:
|
||||
p - A 2d dict {{locs}{locs}} containing the
|
||||
corresponding location transition probabilities.
|
||||
'''
|
||||
def get_trans_mt(locs, trans):
|
||||
'''
|
||||
if DEBUG:
|
||||
print('Generating the transition matrix... ', end='', flush=True)
|
||||
# Initialize the transition matrix.
|
||||
@ -476,7 +476,8 @@ def get_trans_mt(locs, trans):
|
||||
return p
|
||||
|
||||
|
||||
'''
|
||||
def get_entropy(mt):
|
||||
'''
|
||||
Calculate the measure-theoretic (Kolmogorov-Sinai) entropy
|
||||
of a transition matrix.
|
||||
|
||||
@ -484,8 +485,7 @@ def get_trans_mt(locs, trans):
|
||||
mt - A 2d dict transition matrix.
|
||||
Returns:
|
||||
h - The Kolmogorov-Sinai entropy of the matrix.
|
||||
'''
|
||||
def get_entropy(mt):
|
||||
'''
|
||||
if DEBUG:
|
||||
print('Calculating the measure-theoretic entropy... ', end='', flush=True)
|
||||
h = 0.0
|
||||
@ -523,15 +523,15 @@ def get_entropy(mt):
|
||||
return h
|
||||
|
||||
|
||||
'''
|
||||
def get_2darray(mt):
|
||||
'''
|
||||
Convert a 2d dict to a 2d array.
|
||||
|
||||
Parameters:
|
||||
mt - The 2d dict.
|
||||
Returns:
|
||||
p - The 2d numpy array.
|
||||
'''
|
||||
def get_2darray(mt):
|
||||
'''
|
||||
if type(mt) == type(np.array([])):
|
||||
return mt
|
||||
p = np.zeros((len(mt), len(mt)), float)
|
||||
@ -540,7 +540,8 @@ def get_2darray(mt):
|
||||
return p
|
||||
|
||||
|
||||
'''
|
||||
def get_laplace_pd(ts, t, sc):
|
||||
'''
|
||||
Get a Laplace probability distribution.
|
||||
|
||||
Parameters:
|
||||
@ -549,14 +550,14 @@ def get_2darray(mt):
|
||||
sc - The scale of the distribution.
|
||||
Returns:
|
||||
The probability distribution.
|
||||
'''
|
||||
def get_laplace_pd(ts, t, sc):
|
||||
'''
|
||||
x = np.arange(0, len(ts), 1)
|
||||
loc = np.where(ts == t)
|
||||
return laplace.pdf(x, loc=loc, scale=sc)[0]
|
||||
|
||||
|
||||
'''
|
||||
def get_norm_pd(ts, t, sc):
|
||||
'''
|
||||
Get a Gaussian probability distribution.
|
||||
|
||||
Parameters:
|
||||
@ -565,14 +566,14 @@ def get_laplace_pd(ts, t, sc):
|
||||
sc - The scale of the distribution.
|
||||
Returns:
|
||||
The probability distribution.
|
||||
'''
|
||||
def get_norm_pd(ts, t, sc):
|
||||
'''
|
||||
x = np.arange(0, len(ts), 1)
|
||||
loc = np.where(ts == t)
|
||||
return norm.pdf(x, loc=loc, scale=sc)[0]
|
||||
|
||||
|
||||
'''
|
||||
def get_sample(ts, t, pct, pd):
|
||||
'''
|
||||
Get a sample from the time series.
|
||||
|
||||
Parameters:
|
||||
@ -583,8 +584,7 @@ def get_norm_pd(ts, t, sc):
|
||||
of the probability distribution to be sampled.
|
||||
Returns:
|
||||
spl - An ndarray of the sampled timestamps.
|
||||
'''
|
||||
def get_sample(ts, t, pct, pd):
|
||||
'''
|
||||
if DEBUG:
|
||||
print('Sampling %.2f%% of %s at %s... ' %(pct*100, ts, t), end='', flush=True)
|
||||
# Check that it is a valid timestamp.
|
||||
@ -604,7 +604,8 @@ def get_sample(ts, t, pct, pd):
|
||||
return spl
|
||||
|
||||
|
||||
'''
|
||||
def priv_l(p, a, e):
|
||||
'''
|
||||
Calculate the backward/forward privacy loss at the current
|
||||
timestamp.
|
||||
|
||||
@ -616,13 +617,13 @@ def get_sample(ts, t, pct, pd):
|
||||
Returns:
|
||||
The backward/forward privacy loss at the current
|
||||
timestamp.
|
||||
'''
|
||||
def priv_l(p, a, e):
|
||||
'''
|
||||
sum_q, sum_d = find_qd(p, a)
|
||||
return math.log(max_val(sum_q, sum_d, a)) + e
|
||||
|
||||
|
||||
'''
|
||||
def priv_l_m(p, a, e):
|
||||
'''
|
||||
Calculate the backward/forward privacy loss at the current
|
||||
timestamp using memoization.
|
||||
|
||||
@ -634,8 +635,7 @@ def priv_l(p, a, e):
|
||||
Returns:
|
||||
The backward/forward privacy loss at the current
|
||||
timestamp.
|
||||
'''
|
||||
def priv_l_m(p, a, e):
|
||||
'''
|
||||
key = xxhash.xxh64(p).hexdigest() + str(a) + str(e)
|
||||
global MEM, TOTAL, MISS
|
||||
TOTAL += 1
|
||||
@ -648,7 +648,8 @@ def priv_l_m(p, a, e):
|
||||
return result
|
||||
|
||||
|
||||
'''
|
||||
def bpl(p, a, e, t):
|
||||
'''
|
||||
Calculate the total backward privacy loss at every timestamp.
|
||||
|
||||
Parameters:
|
||||
@ -660,15 +661,15 @@ def priv_l_m(p, a, e):
|
||||
Returns:
|
||||
a - The backward privacy loss at every timestamp
|
||||
due to the previous data releases.
|
||||
'''
|
||||
def bpl(p, a, e, t):
|
||||
'''
|
||||
a[0] = e[0]
|
||||
for i in range(1, t):
|
||||
a[i] = priv_l(p, a[i - 1], e[i])
|
||||
return a
|
||||
|
||||
|
||||
'''
|
||||
def bpl_m(p, a, e, t):
|
||||
'''
|
||||
Calculate the total backward privacy loss at the current
|
||||
timestamp with memoization.
|
||||
|
||||
@ -682,13 +683,13 @@ def bpl(p, a, e, t):
|
||||
Returns:
|
||||
a - The backward privacy loss at the current timestamp
|
||||
due to the previous data releases.
|
||||
'''
|
||||
def bpl_m(p, a, e, t):
|
||||
'''
|
||||
a[0] = e[0]
|
||||
for i in range(1, t):
|
||||
a[i] = priv_l_m(p, a[i - 1], e[i])
|
||||
return a
|
||||
|
||||
|
||||
def bpl_lmdk_mem(p, a, e, t, lmdk):
|
||||
# t is (near) the landmark
|
||||
if lmdk == t - 1 or t == lmdk:
|
||||
@ -702,7 +703,8 @@ def bpl_lmdk_mem(p, a, e, t, lmdk):
|
||||
return a
|
||||
|
||||
|
||||
'''
|
||||
def bpl_s(p, e, i, w):
|
||||
'''
|
||||
Calculate the total backward privacy loss at the current
|
||||
timestamp using the static model, i.e., previous releases
|
||||
are grouped in a window of static size.
|
||||
@ -716,8 +718,7 @@ def bpl_lmdk_mem(p, a, e, t, lmdk):
|
||||
Returns:
|
||||
a - The backward privacy loss at the current timestamp
|
||||
due to the previous data releases.
|
||||
'''
|
||||
def bpl_s(p, e, i, w):
|
||||
'''
|
||||
if i - w > 1:
|
||||
# print('bpl_s: %d - %d [%d]' %(i, i - w, w))
|
||||
return priv_l(np.linalg.matrix_power(p, w), bpl_s(p, e, i - w, w), e[i - 1])
|
||||
@ -729,7 +730,8 @@ def bpl_s(p, e, i, w):
|
||||
return e[0]
|
||||
|
||||
|
||||
'''
|
||||
def bpl_s_m(p, e, i, w):
|
||||
'''
|
||||
Calculate the total backward privacy loss at the current
|
||||
timestamp using the static model, i.e., previous releases
|
||||
are grouped in a window of static size, using memoization.
|
||||
@ -743,8 +745,7 @@ def bpl_s(p, e, i, w):
|
||||
Returns:
|
||||
a - The backward privacy loss at the current timestamp
|
||||
due to the previous data releases.
|
||||
'''
|
||||
def bpl_s_m(p, e, i, w):
|
||||
'''
|
||||
if i - w > 1:
|
||||
return priv_l_m(np.linalg.matrix_power(p, w), bpl_s_m(p, e, i - w, w), e[i - 1])
|
||||
elif i - w <= 1:
|
||||
@ -753,7 +754,8 @@ def bpl_s_m(p, e, i, w):
|
||||
return e[0]
|
||||
|
||||
|
||||
'''
|
||||
def bpl_l(p, e, i, w, l):
|
||||
'''
|
||||
Calculate the total backward privacy loss at the current
|
||||
timestamp using the linear model, i.e., previous releases
|
||||
are grouped in a window of a size that increases linearly.
|
||||
@ -769,8 +771,7 @@ def bpl_s_m(p, e, i, w):
|
||||
Returns:
|
||||
a - The backward privacy loss at the current timestamp
|
||||
due to the previous data releases.
|
||||
'''
|
||||
def bpl_l(p, e, i, w, l):
|
||||
'''
|
||||
if i - w*l > 1:
|
||||
# print('bpl_l: %d - %d [%d]' %(i, i - w*l, w*l))
|
||||
return priv_l(np.linalg.matrix_power(p, w*l), bpl_l(p, e, i - w*l, w, l + 1), e[i - 1])
|
||||
@ -782,7 +783,8 @@ def bpl_l(p, e, i, w, l):
|
||||
return e[0]
|
||||
|
||||
|
||||
'''
|
||||
def bpl_l_m(p, e, i, w, l):
|
||||
'''
|
||||
Calculate the total backward privacy loss at the current
|
||||
timestamp using the linear model, i.e., previous releases
|
||||
are grouped in a window of a size that increases linearly,
|
||||
@ -799,8 +801,7 @@ def bpl_l(p, e, i, w, l):
|
||||
Returns:
|
||||
a - The backward privacy loss at the current timestamp
|
||||
due to the previous data releases.
|
||||
'''
|
||||
def bpl_l_m(p, e, i, w, l):
|
||||
'''
|
||||
if i - w*l > 1:
|
||||
return priv_l_m(np.linalg.matrix_power(p, w*l), bpl_l_m(p, e, i - w*l, w, l + 1), e[i - 1])
|
||||
elif i - w*l <= 1:
|
||||
@ -809,7 +810,8 @@ def bpl_l_m(p, e, i, w, l):
|
||||
return e[0]
|
||||
|
||||
|
||||
'''
|
||||
def bpl_e(p, e, i, w, h):
|
||||
'''
|
||||
Calculate the total backward privacy loss at the current
|
||||
timestamp using the exponential model, i.e., previous releases
|
||||
are grouped in a window of a size that increases exponentially.
|
||||
@ -825,8 +827,7 @@ def bpl_l_m(p, e, i, w, l):
|
||||
Returns:
|
||||
a - The backward privacy loss at the current timestamp
|
||||
due to the previous data releases.
|
||||
'''
|
||||
def bpl_e(p, e, i, w, h):
|
||||
'''
|
||||
if i - w**h > 1:
|
||||
# print('bpl_e: %d - %d [%d]' %(i, i - w**h, w**h))
|
||||
return priv_l(np.linalg.matrix_power(p, w**h), bpl_e(p, e, i - w**h, w, h + 1), e[i - 1])
|
||||
@ -838,7 +839,8 @@ def bpl_e(p, e, i, w, h):
|
||||
return e[0]
|
||||
|
||||
|
||||
'''
|
||||
def bpl_e_m(p, e, i, w, h):
|
||||
'''
|
||||
Calculate the total backward privacy loss at the current
|
||||
timestamp using the exponential model, i.e., previous releases
|
||||
are grouped in a window of a size that increases exponentially,
|
||||
@ -855,8 +857,7 @@ def bpl_e(p, e, i, w, h):
|
||||
Returns:
|
||||
a - The backward privacy loss at the current timestamp
|
||||
due to the previous data releases.
|
||||
'''
|
||||
def bpl_e_m(p, e, i, w, h):
|
||||
'''
|
||||
if i - w**h > 1:
|
||||
return priv_l_m(np.linalg.matrix_power(p, w**h), bpl_e_m(p, e, i - w**h, w, h + 1), e[i - 1])
|
||||
elif i - w**h <= 1:
|
||||
@ -865,7 +866,8 @@ def bpl_e_m(p, e, i, w, h):
|
||||
return e[0]
|
||||
|
||||
|
||||
'''
|
||||
def fpl(p, a, e, t):
|
||||
'''
|
||||
Calculate the total forward privacy loss at the current
|
||||
timestamp.
|
||||
|
||||
@ -879,15 +881,15 @@ def bpl_e_m(p, e, i, w, h):
|
||||
Returns:
|
||||
a - The forward privacy loss at the current timestamp
|
||||
due to the next data releases.
|
||||
'''
|
||||
def fpl(p, a, e, t):
|
||||
'''
|
||||
a[t - 1] = e[t - 1]
|
||||
for i in range(t - 2, -1, -1):
|
||||
a[i] = priv_l(p, a[i + 1], e[i])
|
||||
return a
|
||||
|
||||
|
||||
'''
|
||||
def fpl_m(p, a, e, t):
|
||||
'''
|
||||
Calculate the total forward privacy loss at the current
|
||||
timestamp, using memoization.
|
||||
|
||||
@ -901,8 +903,7 @@ def fpl(p, a, e, t):
|
||||
Returns:
|
||||
a - The forward privacy loss at the current timestamp
|
||||
due to the next data releases.
|
||||
'''
|
||||
def fpl_m(p, a, e, t):
|
||||
'''
|
||||
a[t - 1] = e[t - 1]
|
||||
for i in range(t - 2, -1, -1):
|
||||
a[i] = priv_l_m(p, a[i + 1], e[i])
|
||||
@ -921,7 +922,8 @@ def fpl_lmdk_mem(p, a, e, t, lmdk):
|
||||
return a
|
||||
|
||||
|
||||
'''
|
||||
def fpl_s(p, e, i, t, w):
|
||||
'''
|
||||
Calculate the total forward privacy loss at the current
|
||||
timestamp using the static model, i.e., next releases
|
||||
are grouped in a window of static size.
|
||||
@ -935,8 +937,7 @@ def fpl_lmdk_mem(p, a, e, t, lmdk):
|
||||
Returns:
|
||||
a - The forward privacy loss at the current timestamp
|
||||
due to the next data releases.
|
||||
'''
|
||||
def fpl_s(p, e, i, t, w):
|
||||
'''
|
||||
if i + w < t:
|
||||
# print('fpl_s: %d - %d [%d]' %(i, i + w, w))
|
||||
return priv_l(np.linalg.matrix_power(p, w), fpl_s(p, e, i + w, t, w), e[i - 1])
|
||||
@ -948,7 +949,8 @@ def fpl_s(p, e, i, t, w):
|
||||
return e[t - 1]
|
||||
|
||||
|
||||
'''
|
||||
def fpl_s_m(p, e, i, t, w):
|
||||
'''
|
||||
Calculate the total forward privacy loss at the current
|
||||
timestamp using the static model, i.e., next releases
|
||||
are grouped in a window of static size, using memoization.
|
||||
@ -962,8 +964,7 @@ def fpl_s(p, e, i, t, w):
|
||||
Returns:
|
||||
a - The forward privacy loss at the current timestamp
|
||||
due to the next data releases.
|
||||
'''
|
||||
def fpl_s_m(p, e, i, t, w):
|
||||
'''
|
||||
if i + w < t:
|
||||
return priv_l_m(np.linalg.matrix_power(p, w), fpl_s_m(p, e, i + w, t, w), e[i - 1])
|
||||
elif i + w >= t:
|
||||
@ -972,7 +973,8 @@ def fpl_s_m(p, e, i, t, w):
|
||||
return e[t - 1]
|
||||
|
||||
|
||||
'''
|
||||
def fpl_l(p, e, i, t, w, l):
|
||||
'''
|
||||
Calculate the total forward privacy loss at the current
|
||||
timestamp using the linear model, i.e., next releases
|
||||
are grouped in a window of a size that increases linearly.
|
||||
@ -988,8 +990,7 @@ def fpl_s_m(p, e, i, t, w):
|
||||
Returns:
|
||||
a - The forward privacy loss at the current timestamp
|
||||
due to the next data releases.
|
||||
'''
|
||||
def fpl_l(p, e, i, t, w, l):
|
||||
'''
|
||||
if i + w*l < t:
|
||||
# print('fpl_l: %d - %d [%d]' %(i, i + w*l, w*l))
|
||||
return priv_l(np.linalg.matrix_power(p, w*l), fpl_l(p, e, i + w*l, t, w, l + 1), e[i - 1])
|
||||
@ -1001,7 +1002,8 @@ def fpl_l(p, e, i, t, w, l):
|
||||
return e[t - 1]
|
||||
|
||||
|
||||
'''
|
||||
def fpl_l_m(p, e, i, t, w, l):
|
||||
'''
|
||||
Calculate the total forward privacy loss at the current
|
||||
timestamp using the linear model, i.e., next releases
|
||||
are grouped in a window of a size that increases linearly,
|
||||
@ -1018,8 +1020,7 @@ def fpl_l(p, e, i, t, w, l):
|
||||
Returns:
|
||||
a - The forward privacy loss at the current timestamp
|
||||
due to the next data releases.
|
||||
'''
|
||||
def fpl_l_m(p, e, i, t, w, l):
|
||||
'''
|
||||
if i + w*l < t:
|
||||
return priv_l_m(np.linalg.matrix_power(p, w*l), fpl_l_m(p, e, i + w*l, t, w, l + 1), e[i - 1])
|
||||
elif i + w*l >= t:
|
||||
@ -1028,7 +1029,8 @@ def fpl_l_m(p, e, i, t, w, l):
|
||||
return e[t - 1]
|
||||
|
||||
|
||||
'''
|
||||
def fpl_e(p, e, i, t, w, h):
|
||||
'''
|
||||
Calculate the total forward privacy loss at the current
|
||||
timestamp using the exponential model, i.e., next releases
|
||||
are grouped in a window of a size that increases exponentially.
|
||||
@ -1044,8 +1046,7 @@ def fpl_l_m(p, e, i, t, w, l):
|
||||
Returns:
|
||||
a - The forward privacy loss at the current timestamp
|
||||
due to the next data releases.
|
||||
'''
|
||||
def fpl_e(p, e, i, t, w, h):
|
||||
'''
|
||||
if i + w**h < t:
|
||||
# print('fpl_e: %d - %d [%d]' %(i, i + w**h, w**h))
|
||||
return priv_l(np.linalg.matrix_power(p, w**h), fpl_e(p, e, i + w**h, t, w, h + 1), e[i - 1])
|
||||
@ -1057,7 +1058,8 @@ def fpl_e(p, e, i, t, w, h):
|
||||
return e[t - 1]
|
||||
|
||||
|
||||
'''
|
||||
def fpl_e_m(p, e, i, t, w, h):
|
||||
'''
|
||||
Calculate the total forward privacy loss at the current
|
||||
timestamp using the exponential model, i.e., next releases
|
||||
are grouped in a window of a size that increases exponentially,
|
||||
@ -1074,8 +1076,7 @@ def fpl_e(p, e, i, t, w, h):
|
||||
Returns:
|
||||
a - The forward privacy loss at the current timestamp
|
||||
due to the next data releases.
|
||||
'''
|
||||
def fpl_e_m(p, e, i, t, w, h):
|
||||
'''
|
||||
if i + w**h < t:
|
||||
return priv_l_m(np.linalg.matrix_power(p, w**h), fpl_e_m(p, e, i + w**h, t, w, h + 1), e[i - 1])
|
||||
elif i + w**h >= t:
|
||||
@ -1084,7 +1085,8 @@ def fpl_e_m(p, e, i, t, w, h):
|
||||
return e[t - 1]
|
||||
|
||||
|
||||
'''
|
||||
def tpl(bpl, fpl, e):
|
||||
'''
|
||||
Calculate the total privacy loss at every timestamp.
|
||||
|
||||
Parameters:
|
||||
@ -1093,12 +1095,12 @@ def fpl_e_m(p, e, i, t, w, h):
|
||||
e - The privacy budget for data publishing.
|
||||
Returns:
|
||||
The list of total privacy loss at every timestamp.
|
||||
'''
|
||||
def tpl(bpl, fpl, e):
|
||||
'''
|
||||
return [x + y - z for (x, y, z) in zip(bpl, fpl, e)]
|
||||
|
||||
|
||||
'''
|
||||
def tpl_lmdk_mem(e, p_b, p_f, seq, lmdks):
|
||||
'''
|
||||
Calculate the temporal privacy loss at every timestamp
|
||||
taking into account landmarks.
|
||||
|
||||
@ -1117,8 +1119,7 @@ def tpl(bpl, fpl, e):
|
||||
due to the next data releases.
|
||||
a - The total privacy loss at every timestamp
|
||||
taking into account landmarks.
|
||||
'''
|
||||
def tpl_lmdk_mem(e, p_b, p_f, seq, lmdks):
|
||||
'''
|
||||
a_b = np.zeros(len(seq))
|
||||
a_f = np.zeros(len(seq))
|
||||
a = np.zeros(len(seq))
|
||||
@ -1135,7 +1136,8 @@ def tpl_lmdk_mem(e, p_b, p_f, seq, lmdks):
|
||||
return a_b, a_f, a
|
||||
|
||||
|
||||
'''
|
||||
def get_limits(t, seq, lmdks):
|
||||
'''
|
||||
Get the limits for the calculation of temporal privacy loss.
|
||||
|
||||
Parameters:
|
||||
@ -1145,8 +1147,7 @@ def tpl_lmdk_mem(e, p_b, p_f, seq, lmdks):
|
||||
Returns:
|
||||
t_prv - The previous landmark.
|
||||
t_nxt - The next landmark.
|
||||
'''
|
||||
def get_limits(t, seq, lmdks):
|
||||
'''
|
||||
# Add landmark limits.
|
||||
seq_lmdks = np.copy(lmdks)
|
||||
# if seq[0] not in seq_lmdks:
|
||||
@ -1174,7 +1175,8 @@ def get_limits(t, seq, lmdks):
|
||||
return t_prv, t_nxt
|
||||
|
||||
|
||||
'''
|
||||
def plot_loss(title, e, a_b, a_f, a):
|
||||
'''
|
||||
Plots the privacy loss of the time series.
|
||||
|
||||
Parameters:
|
||||
@ -1185,8 +1187,7 @@ def get_limits(t, seq, lmdks):
|
||||
a - The total privacy loss.
|
||||
Returns:
|
||||
Nothing.
|
||||
'''
|
||||
def plot_loss(title, e, a_b, a_f, a):
|
||||
'''
|
||||
plt.rc('font', family='serif')
|
||||
plt.rc('font', size=10)
|
||||
plt.rc('text', usetex=True)
|
||||
@ -1221,7 +1222,8 @@ def plot_loss(title, e, a_b, a_f, a):
|
||||
plt.show()
|
||||
|
||||
|
||||
'''
|
||||
def cmp_loss(title, a, a_s, a_e, a_l):
|
||||
'''
|
||||
Plots a comparison of the privacy loss of all models.
|
||||
|
||||
Parameters:
|
||||
@ -1232,8 +1234,7 @@ def plot_loss(title, e, a_b, a_f, a):
|
||||
a_l - The privacy loss of the linear model.
|
||||
Returns:
|
||||
Nothing.
|
||||
'''
|
||||
def cmp_loss(title, a, a_s, a_e, a_l):
|
||||
'''
|
||||
plt.rc('font', family='serif')
|
||||
plt.rc('font', size=10)
|
||||
plt.rc('text', usetex=True)
|
||||
@ -1268,7 +1269,8 @@ def cmp_loss(title, a, a_s, a_e, a_l):
|
||||
plt.show()
|
||||
|
||||
|
||||
'''
|
||||
def parse_args():
|
||||
'''
|
||||
Parse arguments.
|
||||
|
||||
Mandatory:
|
||||
@ -1283,8 +1285,7 @@ def cmp_loss(title, a, a_s, a_e, a_l):
|
||||
-o, --output, The path to the output directory.
|
||||
-t, --time, The time limit.
|
||||
-w, --window, The size of the event protection window.
|
||||
'''
|
||||
def parse_args():
|
||||
'''
|
||||
# Create argument parser.
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
|
@ -7,4 +7,4 @@ The {\thething} selection module introduces a reasonable data utility decline to
|
||||
% \kat{it would be nice to see it clearly on Figure 5.5. (eg, by including another bar that shows adaptive without landmark selection)}
|
||||
% \mk{Done.}
|
||||
In terms of temporal correlation, we observe that under moderate and strong temporal correlation, a greater average regular--{\thething} event distance in a {\thething} distribution causes greater overall privacy loss.
|
||||
Finally, the contribution of the {\thething} privacy on enhancing the data utility, while preserving $\epsilon$-differential privacy, is demonstrated by the fact that the selected Adaptive scheme provides better data utility than the user-level privacy protection.
|
||||
Finally, the contribution of the {\thething} privacy on enhancing the data utility, while preserving $\varepsilon$-differential privacy, is demonstrated by the fact that the selected Adaptive scheme provides better data utility than the user-level privacy protection.
|
||||
|
@ -22,7 +22,7 @@ Take for example the scenario in Figure~\ref{fig:st-cont}, where {\thethings} ar
|
||||
If we want to protect the {\thething} points, we have to allocate at most a budget of $\varepsilon$ to the {\thethings}, while saving some for the release of regular events.
|
||||
Essentially, the more budget we allocate to an event the less we protect it, but at the same time we maintain its utility.
|
||||
With {\thething} privacy we propose to distribute the budget taking into account only the existence of the {\thethings} when we release an event of the time series, i.e.,~allocating $\frac{\varepsilon}{5}$ ($4\ \text{\thethings} + 1\ \text{regular point}$) to each event (see Figure~\ref{fig:st-cont}).
|
||||
This way, we still guarantee\footnote{$\epsilon$-differential privacy guarantees that the allocated budget should be less or equal to $\epsilon$, and not precisely how much.\kat{Mano check.}} that the {\thethings} are adequately protected, as they receive a total budget of $\frac{4\varepsilon}{5}<\varepsilon$.
|
||||
This way, we still guarantee\footnote{$\varepsilon$-differential privacy guarantees that the allocated budget should be less or equal to $\varepsilon$, and not precisely how much.\kat{Mano check.}} that the {\thethings} are adequately protected, as they receive a total budget of $\frac{4\varepsilon}{5}<\varepsilon$.
|
||||
At the same time, we avoid over-perturbing the regular events, as we allocate to them a higher total budget ($\frac{4\varepsilon}{5}$) compared to the user-level scenario ($\frac{\varepsilon}{2}$), and thus less noise.
|
||||
|
||||
|
||||
|
@ -77,7 +77,7 @@ Intuitively, knowing the data set at timestamp $t$ stops the propagation of the
|
||||
%\kat{do we see this in the formula 1 ?}
|
||||
%when calculating the forward or backward privacy loss respectively.
|
||||
|
||||
Cao et al.~\cite{cao2017quantifying} propose a method for computing the total temporal privacy loss $\alpha_t$ at a timestamp $t$ as the sum of the backward and forward privacy loss, $\alpha^B_t$ and $\alpha^F_t$, minus the privacy budget $\varepsilon_t$
|
||||
Cao et al.~\cite{cao2017quantifying} propose a method for computing the temporal privacy loss $\alpha_t$ at a timestamp $t$ as the sum of the backward and forward privacy loss, $\alpha^B_t$ and $\alpha^F_t$, minus the privacy budget $\varepsilon_t$
|
||||
to account for the extra privacy loss due to previous and next releases $\pmb{o}$ of $\mathcal{M}$ under temporal correlation.
|
||||
By Theorem~\ref{theor:thething-prv}, at every timestamp $t$ we consider the data at $t$ and at the {\thething} timestamps $L$.
|
||||
%According to the Definitions~{\ref{def:bpl} and \ref{def:fpl}}, we calculate the backward and forward privacy loss by taking into account the privacy budget at previous and next data releases respectively.
|
||||
|
Loading…
Reference in New Issue
Block a user