Merge branch 'master' of git.delkappa.com:manos/the-last-thing
This commit is contained in:
commit
b93328bcea
@ -28,6 +28,7 @@ MISS = 0 # Number of additions to the cache.
|
||||
TOTAL = 0 # Number of cache accesses.
|
||||
|
||||
|
||||
def load_data(path):
|
||||
'''
|
||||
Read data from a file.
|
||||
|
||||
@ -36,7 +37,6 @@ TOTAL = 0 # Number of cache accesses.
|
||||
Returns:
|
||||
data - A list of tuples [uid, timestamp, lng, lat, loc].
|
||||
'''
|
||||
def load_data(path):
|
||||
print('Loading data from', os.path.abspath(path), '... ', end='')
|
||||
data = []
|
||||
try:
|
||||
@ -50,6 +50,7 @@ def load_data(path):
|
||||
exit()
|
||||
|
||||
|
||||
def save_output(path, t, e, a_b, a_f, a):
|
||||
'''
|
||||
Save output to a file.
|
||||
|
||||
@ -63,7 +64,6 @@ def load_data(path):
|
||||
Returns:
|
||||
Nothing.
|
||||
'''
|
||||
def save_output(path, t, e, a_b, a_f, a):
|
||||
# timestamp = time.strftime('%Y%m%d%H%M%S')
|
||||
print('Saving output to %s... ' %(path), end='', flush=True)
|
||||
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||
@ -74,6 +74,7 @@ def save_output(path, t, e, a_b, a_f, a):
|
||||
print('OK.', flush=True)
|
||||
|
||||
|
||||
def get_timestamps(data):
|
||||
'''
|
||||
Get all the timestamps from the input data.
|
||||
|
||||
@ -82,7 +83,6 @@ def save_output(path, t, e, a_b, a_f, a):
|
||||
Returns:
|
||||
timestamps - An ndarray of all of the timestamps from the input data.
|
||||
'''
|
||||
def get_timestamps(data):
|
||||
print('Getting a list of all timestamps... ', end='', flush=True)
|
||||
timestamps = np.sort(np.unique(np.array(data)[:, 1]))
|
||||
if not len(timestamps):
|
||||
@ -103,6 +103,7 @@ def get_timestamps(data):
|
||||
return timestamps
|
||||
|
||||
|
||||
def get_locs(data):
|
||||
'''
|
||||
Get all the unique locations from the input data.
|
||||
|
||||
@ -111,7 +112,6 @@ def get_timestamps(data):
|
||||
Returns:
|
||||
locs - A sorted ndarray of all the unique locations int the input data.
|
||||
'''
|
||||
def get_locs(data):
|
||||
print('Getting a list of all locations... ', end='', flush=True)
|
||||
locs = np.sort(np.unique(np.array(data)[:, 4].astype(np.int)))
|
||||
if not len(locs):
|
||||
@ -123,6 +123,7 @@ def get_locs(data):
|
||||
return list(map(str, locs))
|
||||
|
||||
|
||||
def get_cnts(data, t):
|
||||
'''
|
||||
Get the counts at every location for a specific timestamp.
|
||||
|
||||
@ -132,7 +133,6 @@ def get_locs(data):
|
||||
Returns:
|
||||
cnts - A dict {loc:cnt} with the counts at every location for a specific timestamp.
|
||||
'''
|
||||
def get_cnts(data, t):
|
||||
print('Getting all counts at %s... ' %(t), end='', flush=True)
|
||||
locs = get_locs(data)
|
||||
cnts = dict.fromkeys(locs, 0)
|
||||
@ -145,6 +145,7 @@ def get_cnts(data, t):
|
||||
return cnts
|
||||
|
||||
|
||||
def get_all_cnts(data):
|
||||
'''
|
||||
Get the counts at every location for every timestamp.
|
||||
|
||||
@ -153,7 +154,6 @@ def get_cnts(data, t):
|
||||
Returns:
|
||||
cnts - A dict {timestamp:loc} with all the counts at every location for every timestamp.
|
||||
'''
|
||||
def get_all_cnts(data):
|
||||
cnts = {}
|
||||
for d in data:
|
||||
key = d[1] + '@' + d[4]
|
||||
@ -163,6 +163,7 @@ def get_all_cnts(data):
|
||||
return cnts
|
||||
|
||||
|
||||
def get_usrs(data):
|
||||
'''
|
||||
Get a list of unique users in the input data set.
|
||||
|
||||
@ -171,7 +172,6 @@ def get_all_cnts(data):
|
||||
Returns:
|
||||
users - An ndarray of all unique users.
|
||||
'''
|
||||
def get_usrs(data):
|
||||
users = np.sort(np.unique(np.array(data)[:, 0].astype(np.int)))
|
||||
if not len(users):
|
||||
print('No users found.')
|
||||
@ -181,6 +181,7 @@ def get_usrs(data):
|
||||
return users
|
||||
|
||||
|
||||
def get_usr_data(data, id):
|
||||
'''
|
||||
Get the data of a particular user from a data set.
|
||||
|
||||
@ -190,7 +191,6 @@ def get_usrs(data):
|
||||
Returns:
|
||||
output - A list of the data of the targeted user.
|
||||
'''
|
||||
def get_usr_data(data, id):
|
||||
output = []
|
||||
for d in data:
|
||||
if (d[0] == str(id)):
|
||||
@ -200,6 +200,7 @@ def get_usr_data(data, id):
|
||||
return output
|
||||
|
||||
|
||||
def get_usrs_data(data):
|
||||
'''
|
||||
Get the data of every user in a data set.
|
||||
|
||||
@ -208,13 +209,13 @@ def get_usr_data(data, id):
|
||||
Returns:
|
||||
output - A dict {usr, [usr_data]} with the data of each user.
|
||||
'''
|
||||
def get_usrs_data(data):
|
||||
output = {}
|
||||
for d in data:
|
||||
output[d[0]] = output.get(d[0], []) + [d]
|
||||
return output
|
||||
|
||||
|
||||
def get_usr_traj(data):
|
||||
'''
|
||||
Get the trajectory of a user from her data.
|
||||
|
||||
@ -223,7 +224,6 @@ def get_usrs_data(data):
|
||||
Returns:
|
||||
traj - A list [(timestamp, loc)] with the locations and corresponding timestamps that the user was at.
|
||||
'''
|
||||
def get_usr_traj(data):
|
||||
traj = []
|
||||
for d in data:
|
||||
traj.append((d[1], d[4]))
|
||||
@ -232,6 +232,7 @@ def get_usr_traj(data):
|
||||
return traj
|
||||
|
||||
|
||||
def get_poss_trans(data):
|
||||
'''
|
||||
Get all the possible transitions.
|
||||
|
||||
@ -240,7 +241,6 @@ def get_usr_traj(data):
|
||||
Returns:
|
||||
trans - A set with all the possible forward transitions in the input.
|
||||
'''
|
||||
def get_poss_trans(data):
|
||||
print('Getting possible transitions... ', end='', flush=True)
|
||||
trans = set()
|
||||
for u, u_data in data.items():
|
||||
@ -253,6 +253,7 @@ def get_poss_trans(data):
|
||||
return trans
|
||||
|
||||
|
||||
def get_bwd_trans(data):
|
||||
'''
|
||||
Get all backward transitions in a data set.
|
||||
|
||||
@ -262,7 +263,6 @@ def get_poss_trans(data):
|
||||
trans - A dict {(t, t-1):[transitions]} with all the backward transitions
|
||||
at every sequential timestamp pair in the input data set.
|
||||
'''
|
||||
def get_bwd_trans(data):
|
||||
print('Getting all backward transitions... ', end='', flush=True)
|
||||
trans = {}
|
||||
for u, u_data in data.items():
|
||||
@ -276,6 +276,7 @@ def get_bwd_trans(data):
|
||||
return trans
|
||||
|
||||
|
||||
def get_fwd_trans(data):
|
||||
'''
|
||||
Get all forward transitions in a data set.
|
||||
|
||||
@ -285,7 +286,6 @@ def get_bwd_trans(data):
|
||||
trans - A dict {(t-1, t):[transitions]} with all the forward transitions
|
||||
at every sequential timestamp pair in the input data set.
|
||||
'''
|
||||
def get_fwd_trans(data):
|
||||
print('Getting all forward transitions... ', end='', flush=True)
|
||||
trans = {}
|
||||
for u, u_data in data.items():
|
||||
@ -299,6 +299,7 @@ def get_fwd_trans(data):
|
||||
return trans
|
||||
|
||||
|
||||
def safe_div(a, b):
|
||||
'''
|
||||
Divide two numbers. If the divisor is 0 return inf.
|
||||
|
||||
@ -308,12 +309,12 @@ def get_fwd_trans(data):
|
||||
Returns:
|
||||
The float result of the division.
|
||||
'''
|
||||
def safe_div(a, b):
|
||||
if b == 0:
|
||||
return math.inf
|
||||
return float(a/b)
|
||||
|
||||
|
||||
def max_val(q, d, a):
|
||||
'''
|
||||
Calculate the maximum value of the objective function.
|
||||
|
||||
@ -325,12 +326,12 @@ def safe_div(a, b):
|
||||
Returns:
|
||||
The maximum value of the objective function.
|
||||
'''
|
||||
def max_val(q, d, a):
|
||||
if a == math.inf:
|
||||
return math.nan
|
||||
return (q*(math.exp(a) - 1) + 1)/(d*(math.exp(a) - 1) + 1)
|
||||
|
||||
|
||||
def find_qd(p, a):
|
||||
'''
|
||||
Find two different rows (q and d) of a transition matrix (p)
|
||||
that maximize the product of the objective function and return
|
||||
@ -345,7 +346,6 @@ def max_val(q, d, a):
|
||||
sum_q - The sum of the elements of q.
|
||||
sum_d - The sum of the elements of d.
|
||||
'''
|
||||
def find_qd(p, a):
|
||||
res = 0.0
|
||||
sum_q, sum_d = 0.0, 0.0
|
||||
for q in p: # A row from the transition matrix.
|
||||
@ -374,6 +374,7 @@ def find_qd(p, a):
|
||||
return sum_q, sum_d
|
||||
|
||||
|
||||
def gen_data(usrs, timestamps, locs):
|
||||
'''
|
||||
Generate data.
|
||||
|
||||
@ -384,7 +385,6 @@ def find_qd(p, a):
|
||||
Returns:
|
||||
data - The generated data.
|
||||
'''
|
||||
def gen_data(usrs, timestamps, locs):
|
||||
print('Generating data... ', end='', flush=True)
|
||||
# Generate timestamps.
|
||||
ts = []
|
||||
@ -412,6 +412,7 @@ def gen_data(usrs, timestamps, locs):
|
||||
return data
|
||||
|
||||
|
||||
def gen_trans_mt(n, s):
|
||||
'''
|
||||
Generate a transition matrix.
|
||||
|
||||
@ -423,7 +424,6 @@ def gen_data(usrs, timestamps, locs):
|
||||
Returns:
|
||||
p_ - The transition matrix.
|
||||
'''
|
||||
def gen_trans_mt(n, s):
|
||||
if DEBUG:
|
||||
print('Generating transition matrix %dx%d with s = %.4f... ' %(n, n, s), end='', flush=True)
|
||||
p = np.zeros((n, n), float)
|
||||
@ -439,6 +439,7 @@ def gen_trans_mt(n, s):
|
||||
return p_
|
||||
|
||||
|
||||
def get_trans_mt(locs, trans):
|
||||
'''
|
||||
Get the transition matrix
|
||||
|
||||
@ -449,7 +450,6 @@ def gen_trans_mt(n, s):
|
||||
p - A 2d dict {{locs}{locs}} containing the
|
||||
corresponding location transition probabilities.
|
||||
'''
|
||||
def get_trans_mt(locs, trans):
|
||||
if DEBUG:
|
||||
print('Generating the transition matrix... ', end='', flush=True)
|
||||
# Initialize the transition matrix.
|
||||
@ -476,6 +476,7 @@ def get_trans_mt(locs, trans):
|
||||
return p
|
||||
|
||||
|
||||
def get_entropy(mt):
|
||||
'''
|
||||
Calculate the measure-theoretic (Kolmogorov-Sinai) entropy
|
||||
of a transition matrix.
|
||||
@ -485,7 +486,6 @@ def get_trans_mt(locs, trans):
|
||||
Returns:
|
||||
h - The Kolmogorov-Sinai entropy of the matrix.
|
||||
'''
|
||||
def get_entropy(mt):
|
||||
if DEBUG:
|
||||
print('Calculating the measure-theoretic entropy... ', end='', flush=True)
|
||||
h = 0.0
|
||||
@ -523,6 +523,7 @@ def get_entropy(mt):
|
||||
return h
|
||||
|
||||
|
||||
def get_2darray(mt):
|
||||
'''
|
||||
Convert a 2d dict to a 2d array.
|
||||
|
||||
@ -531,7 +532,6 @@ def get_entropy(mt):
|
||||
Returns:
|
||||
p - The 2d numpy array.
|
||||
'''
|
||||
def get_2darray(mt):
|
||||
if type(mt) == type(np.array([])):
|
||||
return mt
|
||||
p = np.zeros((len(mt), len(mt)), float)
|
||||
@ -540,6 +540,7 @@ def get_2darray(mt):
|
||||
return p
|
||||
|
||||
|
||||
def get_laplace_pd(ts, t, sc):
|
||||
'''
|
||||
Get a Laplace probability distribution.
|
||||
|
||||
@ -550,12 +551,12 @@ def get_2darray(mt):
|
||||
Returns:
|
||||
The probability distribution.
|
||||
'''
|
||||
def get_laplace_pd(ts, t, sc):
|
||||
x = np.arange(0, len(ts), 1)
|
||||
loc = np.where(ts == t)
|
||||
return laplace.pdf(x, loc=loc, scale=sc)[0]
|
||||
|
||||
|
||||
def get_norm_pd(ts, t, sc):
|
||||
'''
|
||||
Get a Gaussian probability distribution.
|
||||
|
||||
@ -566,12 +567,12 @@ def get_laplace_pd(ts, t, sc):
|
||||
Returns:
|
||||
The probability distribution.
|
||||
'''
|
||||
def get_norm_pd(ts, t, sc):
|
||||
x = np.arange(0, len(ts), 1)
|
||||
loc = np.where(ts == t)
|
||||
return norm.pdf(x, loc=loc, scale=sc)[0]
|
||||
|
||||
|
||||
def get_sample(ts, t, pct, pd):
|
||||
'''
|
||||
Get a sample from the time series.
|
||||
|
||||
@ -584,7 +585,6 @@ def get_norm_pd(ts, t, sc):
|
||||
Returns:
|
||||
spl - An ndarray of the sampled timestamps.
|
||||
'''
|
||||
def get_sample(ts, t, pct, pd):
|
||||
if DEBUG:
|
||||
print('Sampling %.2f%% of %s at %s... ' %(pct*100, ts, t), end='', flush=True)
|
||||
# Check that it is a valid timestamp.
|
||||
@ -604,6 +604,7 @@ def get_sample(ts, t, pct, pd):
|
||||
return spl
|
||||
|
||||
|
||||
def priv_l(p, a, e):
|
||||
'''
|
||||
Calculate the backward/forward privacy loss at the current
|
||||
timestamp.
|
||||
@ -617,11 +618,11 @@ def get_sample(ts, t, pct, pd):
|
||||
The backward/forward privacy loss at the current
|
||||
timestamp.
|
||||
'''
|
||||
def priv_l(p, a, e):
|
||||
sum_q, sum_d = find_qd(p, a)
|
||||
return math.log(max_val(sum_q, sum_d, a)) + e
|
||||
|
||||
|
||||
def priv_l_m(p, a, e):
|
||||
'''
|
||||
Calculate the backward/forward privacy loss at the current
|
||||
timestamp using memoization.
|
||||
@ -635,7 +636,6 @@ def priv_l(p, a, e):
|
||||
The backward/forward privacy loss at the current
|
||||
timestamp.
|
||||
'''
|
||||
def priv_l_m(p, a, e):
|
||||
key = xxhash.xxh64(p).hexdigest() + str(a) + str(e)
|
||||
global MEM, TOTAL, MISS
|
||||
TOTAL += 1
|
||||
@ -648,6 +648,7 @@ def priv_l_m(p, a, e):
|
||||
return result
|
||||
|
||||
|
||||
def bpl(p, a, e, t):
|
||||
'''
|
||||
Calculate the total backward privacy loss at every timestamp.
|
||||
|
||||
@ -661,13 +662,13 @@ def priv_l_m(p, a, e):
|
||||
a - The backward privacy loss at every timestamp
|
||||
due to the previous data releases.
|
||||
'''
|
||||
def bpl(p, a, e, t):
|
||||
a[0] = e[0]
|
||||
for i in range(1, t):
|
||||
a[i] = priv_l(p, a[i - 1], e[i])
|
||||
return a
|
||||
|
||||
|
||||
def bpl_m(p, a, e, t):
|
||||
'''
|
||||
Calculate the total backward privacy loss at the current
|
||||
timestamp with memoization.
|
||||
@ -683,12 +684,12 @@ def bpl(p, a, e, t):
|
||||
a - The backward privacy loss at the current timestamp
|
||||
due to the previous data releases.
|
||||
'''
|
||||
def bpl_m(p, a, e, t):
|
||||
a[0] = e[0]
|
||||
for i in range(1, t):
|
||||
a[i] = priv_l_m(p, a[i - 1], e[i])
|
||||
return a
|
||||
|
||||
|
||||
def bpl_lmdk_mem(p, a, e, t, lmdk):
|
||||
# t is (near) the landmark
|
||||
if lmdk == t - 1 or t == lmdk:
|
||||
@ -702,6 +703,7 @@ def bpl_lmdk_mem(p, a, e, t, lmdk):
|
||||
return a
|
||||
|
||||
|
||||
def bpl_s(p, e, i, w):
|
||||
'''
|
||||
Calculate the total backward privacy loss at the current
|
||||
timestamp using the static model, i.e., previous releases
|
||||
@ -717,7 +719,6 @@ def bpl_lmdk_mem(p, a, e, t, lmdk):
|
||||
a - The backward privacy loss at the current timestamp
|
||||
due to the previous data releases.
|
||||
'''
|
||||
def bpl_s(p, e, i, w):
|
||||
if i - w > 1:
|
||||
# print('bpl_s: %d - %d [%d]' %(i, i - w, w))
|
||||
return priv_l(np.linalg.matrix_power(p, w), bpl_s(p, e, i - w, w), e[i - 1])
|
||||
@ -729,6 +730,7 @@ def bpl_s(p, e, i, w):
|
||||
return e[0]
|
||||
|
||||
|
||||
def bpl_s_m(p, e, i, w):
|
||||
'''
|
||||
Calculate the total backward privacy loss at the current
|
||||
timestamp using the static model, i.e., previous releases
|
||||
@ -744,7 +746,6 @@ def bpl_s(p, e, i, w):
|
||||
a - The backward privacy loss at the current timestamp
|
||||
due to the previous data releases.
|
||||
'''
|
||||
def bpl_s_m(p, e, i, w):
|
||||
if i - w > 1:
|
||||
return priv_l_m(np.linalg.matrix_power(p, w), bpl_s_m(p, e, i - w, w), e[i - 1])
|
||||
elif i - w <= 1:
|
||||
@ -753,6 +754,7 @@ def bpl_s_m(p, e, i, w):
|
||||
return e[0]
|
||||
|
||||
|
||||
def bpl_l(p, e, i, w, l):
|
||||
'''
|
||||
Calculate the total backward privacy loss at the current
|
||||
timestamp using the linear model, i.e., previous releases
|
||||
@ -770,7 +772,6 @@ def bpl_s_m(p, e, i, w):
|
||||
a - The backward privacy loss at the current timestamp
|
||||
due to the previous data releases.
|
||||
'''
|
||||
def bpl_l(p, e, i, w, l):
|
||||
if i - w*l > 1:
|
||||
# print('bpl_l: %d - %d [%d]' %(i, i - w*l, w*l))
|
||||
return priv_l(np.linalg.matrix_power(p, w*l), bpl_l(p, e, i - w*l, w, l + 1), e[i - 1])
|
||||
@ -782,6 +783,7 @@ def bpl_l(p, e, i, w, l):
|
||||
return e[0]
|
||||
|
||||
|
||||
def bpl_l_m(p, e, i, w, l):
|
||||
'''
|
||||
Calculate the total backward privacy loss at the current
|
||||
timestamp using the linear model, i.e., previous releases
|
||||
@ -800,7 +802,6 @@ def bpl_l(p, e, i, w, l):
|
||||
a - The backward privacy loss at the current timestamp
|
||||
due to the previous data releases.
|
||||
'''
|
||||
def bpl_l_m(p, e, i, w, l):
|
||||
if i - w*l > 1:
|
||||
return priv_l_m(np.linalg.matrix_power(p, w*l), bpl_l_m(p, e, i - w*l, w, l + 1), e[i - 1])
|
||||
elif i - w*l <= 1:
|
||||
@ -809,6 +810,7 @@ def bpl_l_m(p, e, i, w, l):
|
||||
return e[0]
|
||||
|
||||
|
||||
def bpl_e(p, e, i, w, h):
|
||||
'''
|
||||
Calculate the total backward privacy loss at the current
|
||||
timestamp using the exponential model, i.e., previous releases
|
||||
@ -826,7 +828,6 @@ def bpl_l_m(p, e, i, w, l):
|
||||
a - The backward privacy loss at the current timestamp
|
||||
due to the previous data releases.
|
||||
'''
|
||||
def bpl_e(p, e, i, w, h):
|
||||
if i - w**h > 1:
|
||||
# print('bpl_e: %d - %d [%d]' %(i, i - w**h, w**h))
|
||||
return priv_l(np.linalg.matrix_power(p, w**h), bpl_e(p, e, i - w**h, w, h + 1), e[i - 1])
|
||||
@ -838,6 +839,7 @@ def bpl_e(p, e, i, w, h):
|
||||
return e[0]
|
||||
|
||||
|
||||
def bpl_e_m(p, e, i, w, h):
|
||||
'''
|
||||
Calculate the total backward privacy loss at the current
|
||||
timestamp using the exponential model, i.e., previous releases
|
||||
@ -856,7 +858,6 @@ def bpl_e(p, e, i, w, h):
|
||||
a - The backward privacy loss at the current timestamp
|
||||
due to the previous data releases.
|
||||
'''
|
||||
def bpl_e_m(p, e, i, w, h):
|
||||
if i - w**h > 1:
|
||||
return priv_l_m(np.linalg.matrix_power(p, w**h), bpl_e_m(p, e, i - w**h, w, h + 1), e[i - 1])
|
||||
elif i - w**h <= 1:
|
||||
@ -865,6 +866,7 @@ def bpl_e_m(p, e, i, w, h):
|
||||
return e[0]
|
||||
|
||||
|
||||
def fpl(p, a, e, t):
|
||||
'''
|
||||
Calculate the total forward privacy loss at the current
|
||||
timestamp.
|
||||
@ -880,13 +882,13 @@ def bpl_e_m(p, e, i, w, h):
|
||||
a - The forward privacy loss at the current timestamp
|
||||
due to the next data releases.
|
||||
'''
|
||||
def fpl(p, a, e, t):
|
||||
a[t - 1] = e[t - 1]
|
||||
for i in range(t - 2, -1, -1):
|
||||
a[i] = priv_l(p, a[i + 1], e[i])
|
||||
return a
|
||||
|
||||
|
||||
def fpl_m(p, a, e, t):
|
||||
'''
|
||||
Calculate the total forward privacy loss at the current
|
||||
timestamp, using memoization.
|
||||
@ -902,7 +904,6 @@ def fpl(p, a, e, t):
|
||||
a - The forward privacy loss at the current timestamp
|
||||
due to the next data releases.
|
||||
'''
|
||||
def fpl_m(p, a, e, t):
|
||||
a[t - 1] = e[t - 1]
|
||||
for i in range(t - 2, -1, -1):
|
||||
a[i] = priv_l_m(p, a[i + 1], e[i])
|
||||
@ -921,6 +922,7 @@ def fpl_lmdk_mem(p, a, e, t, lmdk):
|
||||
return a
|
||||
|
||||
|
||||
def fpl_s(p, e, i, t, w):
|
||||
'''
|
||||
Calculate the total forward privacy loss at the current
|
||||
timestamp using the static model, i.e., next releases
|
||||
@ -936,7 +938,6 @@ def fpl_lmdk_mem(p, a, e, t, lmdk):
|
||||
a - The forward privacy loss at the current timestamp
|
||||
due to the next data releases.
|
||||
'''
|
||||
def fpl_s(p, e, i, t, w):
|
||||
if i + w < t:
|
||||
# print('fpl_s: %d - %d [%d]' %(i, i + w, w))
|
||||
return priv_l(np.linalg.matrix_power(p, w), fpl_s(p, e, i + w, t, w), e[i - 1])
|
||||
@ -948,6 +949,7 @@ def fpl_s(p, e, i, t, w):
|
||||
return e[t - 1]
|
||||
|
||||
|
||||
def fpl_s_m(p, e, i, t, w):
|
||||
'''
|
||||
Calculate the total forward privacy loss at the current
|
||||
timestamp using the static model, i.e., next releases
|
||||
@ -963,7 +965,6 @@ def fpl_s(p, e, i, t, w):
|
||||
a - The forward privacy loss at the current timestamp
|
||||
due to the next data releases.
|
||||
'''
|
||||
def fpl_s_m(p, e, i, t, w):
|
||||
if i + w < t:
|
||||
return priv_l_m(np.linalg.matrix_power(p, w), fpl_s_m(p, e, i + w, t, w), e[i - 1])
|
||||
elif i + w >= t:
|
||||
@ -972,6 +973,7 @@ def fpl_s_m(p, e, i, t, w):
|
||||
return e[t - 1]
|
||||
|
||||
|
||||
def fpl_l(p, e, i, t, w, l):
|
||||
'''
|
||||
Calculate the total forward privacy loss at the current
|
||||
timestamp using the linear model, i.e., next releases
|
||||
@ -989,7 +991,6 @@ def fpl_s_m(p, e, i, t, w):
|
||||
a - The forward privacy loss at the current timestamp
|
||||
due to the next data releases.
|
||||
'''
|
||||
def fpl_l(p, e, i, t, w, l):
|
||||
if i + w*l < t:
|
||||
# print('fpl_l: %d - %d [%d]' %(i, i + w*l, w*l))
|
||||
return priv_l(np.linalg.matrix_power(p, w*l), fpl_l(p, e, i + w*l, t, w, l + 1), e[i - 1])
|
||||
@ -1001,6 +1002,7 @@ def fpl_l(p, e, i, t, w, l):
|
||||
return e[t - 1]
|
||||
|
||||
|
||||
def fpl_l_m(p, e, i, t, w, l):
|
||||
'''
|
||||
Calculate the total forward privacy loss at the current
|
||||
timestamp using the linear model, i.e., next releases
|
||||
@ -1019,7 +1021,6 @@ def fpl_l(p, e, i, t, w, l):
|
||||
a - The forward privacy loss at the current timestamp
|
||||
due to the next data releases.
|
||||
'''
|
||||
def fpl_l_m(p, e, i, t, w, l):
|
||||
if i + w*l < t:
|
||||
return priv_l_m(np.linalg.matrix_power(p, w*l), fpl_l_m(p, e, i + w*l, t, w, l + 1), e[i - 1])
|
||||
elif i + w*l >= t:
|
||||
@ -1028,6 +1029,7 @@ def fpl_l_m(p, e, i, t, w, l):
|
||||
return e[t - 1]
|
||||
|
||||
|
||||
def fpl_e(p, e, i, t, w, h):
|
||||
'''
|
||||
Calculate the total forward privacy loss at the current
|
||||
timestamp using the exponential model, i.e., next releases
|
||||
@ -1045,7 +1047,6 @@ def fpl_l_m(p, e, i, t, w, l):
|
||||
a - The forward privacy loss at the current timestamp
|
||||
due to the next data releases.
|
||||
'''
|
||||
def fpl_e(p, e, i, t, w, h):
|
||||
if i + w**h < t:
|
||||
# print('fpl_e: %d - %d [%d]' %(i, i + w**h, w**h))
|
||||
return priv_l(np.linalg.matrix_power(p, w**h), fpl_e(p, e, i + w**h, t, w, h + 1), e[i - 1])
|
||||
@ -1057,6 +1058,7 @@ def fpl_e(p, e, i, t, w, h):
|
||||
return e[t - 1]
|
||||
|
||||
|
||||
def fpl_e_m(p, e, i, t, w, h):
|
||||
'''
|
||||
Calculate the total forward privacy loss at the current
|
||||
timestamp using the exponential model, i.e., next releases
|
||||
@ -1075,7 +1077,6 @@ def fpl_e(p, e, i, t, w, h):
|
||||
a - The forward privacy loss at the current timestamp
|
||||
due to the next data releases.
|
||||
'''
|
||||
def fpl_e_m(p, e, i, t, w, h):
|
||||
if i + w**h < t:
|
||||
return priv_l_m(np.linalg.matrix_power(p, w**h), fpl_e_m(p, e, i + w**h, t, w, h + 1), e[i - 1])
|
||||
elif i + w**h >= t:
|
||||
@ -1084,6 +1085,7 @@ def fpl_e_m(p, e, i, t, w, h):
|
||||
return e[t - 1]
|
||||
|
||||
|
||||
def tpl(bpl, fpl, e):
|
||||
'''
|
||||
Calculate the total privacy loss at every timestamp.
|
||||
|
||||
@ -1094,10 +1096,10 @@ def fpl_e_m(p, e, i, t, w, h):
|
||||
Returns:
|
||||
The list of total privacy loss at every timestamp.
|
||||
'''
|
||||
def tpl(bpl, fpl, e):
|
||||
return [x + y - z for (x, y, z) in zip(bpl, fpl, e)]
|
||||
|
||||
|
||||
def tpl_lmdk_mem(e, p_b, p_f, seq, lmdks):
|
||||
'''
|
||||
Calculate the temporal privacy loss at every timestamp
|
||||
taking into account landmarks.
|
||||
@ -1118,7 +1120,6 @@ def tpl(bpl, fpl, e):
|
||||
a - The total privacy loss at every timestamp
|
||||
taking into account landmarks.
|
||||
'''
|
||||
def tpl_lmdk_mem(e, p_b, p_f, seq, lmdks):
|
||||
a_b = np.zeros(len(seq))
|
||||
a_f = np.zeros(len(seq))
|
||||
a = np.zeros(len(seq))
|
||||
@ -1135,6 +1136,7 @@ def tpl_lmdk_mem(e, p_b, p_f, seq, lmdks):
|
||||
return a_b, a_f, a
|
||||
|
||||
|
||||
def get_limits(t, seq, lmdks):
|
||||
'''
|
||||
Get the limits for the calculation of temporal privacy loss.
|
||||
|
||||
@ -1146,7 +1148,6 @@ def tpl_lmdk_mem(e, p_b, p_f, seq, lmdks):
|
||||
t_prv - The previous landmark.
|
||||
t_nxt - The next landmark.
|
||||
'''
|
||||
def get_limits(t, seq, lmdks):
|
||||
# Add landmark limits.
|
||||
seq_lmdks = np.copy(lmdks)
|
||||
# if seq[0] not in seq_lmdks:
|
||||
@ -1174,6 +1175,7 @@ def get_limits(t, seq, lmdks):
|
||||
return t_prv, t_nxt
|
||||
|
||||
|
||||
def plot_loss(title, e, a_b, a_f, a):
|
||||
'''
|
||||
Plots the privacy loss of the time series.
|
||||
|
||||
@ -1186,7 +1188,6 @@ def get_limits(t, seq, lmdks):
|
||||
Returns:
|
||||
Nothing.
|
||||
'''
|
||||
def plot_loss(title, e, a_b, a_f, a):
|
||||
plt.rc('font', family='serif')
|
||||
plt.rc('font', size=10)
|
||||
plt.rc('text', usetex=True)
|
||||
@ -1221,6 +1222,7 @@ def plot_loss(title, e, a_b, a_f, a):
|
||||
plt.show()
|
||||
|
||||
|
||||
def cmp_loss(title, a, a_s, a_e, a_l):
|
||||
'''
|
||||
Plots a comparison of the privacy loss of all models.
|
||||
|
||||
@ -1233,7 +1235,6 @@ def plot_loss(title, e, a_b, a_f, a):
|
||||
Returns:
|
||||
Nothing.
|
||||
'''
|
||||
def cmp_loss(title, a, a_s, a_e, a_l):
|
||||
plt.rc('font', family='serif')
|
||||
plt.rc('font', size=10)
|
||||
plt.rc('text', usetex=True)
|
||||
@ -1268,6 +1269,7 @@ def cmp_loss(title, a, a_s, a_e, a_l):
|
||||
plt.show()
|
||||
|
||||
|
||||
def parse_args():
|
||||
'''
|
||||
Parse arguments.
|
||||
|
||||
@ -1284,7 +1286,6 @@ def cmp_loss(title, a, a_s, a_e, a_l):
|
||||
-t, --time, The time limit.
|
||||
-w, --window, The size of the event protection window.
|
||||
'''
|
||||
def parse_args():
|
||||
# Create argument parser.
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
|
@ -7,4 +7,4 @@ The {\thething} selection module introduces a reasonable data utility decline to
|
||||
% \kat{it would be nice to see it clearly on Figure 5.5. (eg, by including another bar that shows adaptive without landmark selection)}
|
||||
% \mk{Done.}
|
||||
In terms of temporal correlation, we observe that under moderate and strong temporal correlation, a greater average regular--{\thething} event distance in a {\thething} distribution causes greater overall privacy loss.
|
||||
Finally, the contribution of the {\thething} privacy on enhancing the data utility, while preserving $\epsilon$-differential privacy, is demonstrated by the fact that the selected Adaptive scheme provides better data utility than the user-level privacy protection.
|
||||
Finally, the contribution of the {\thething} privacy on enhancing the data utility, while preserving $\varepsilon$-differential privacy, is demonstrated by the fact that the selected Adaptive scheme provides better data utility than the user-level privacy protection.
|
||||
|
@ -22,7 +22,7 @@ Take for example the scenario in Figure~\ref{fig:st-cont}, where {\thethings} ar
|
||||
If we want to protect the {\thething} points, we have to allocate at most a budget of $\varepsilon$ to the {\thethings}, while saving some for the release of regular events.
|
||||
Essentially, the more budget we allocate to an event the less we protect it, but at the same time we maintain its utility.
|
||||
With {\thething} privacy we propose to distribute the budget taking into account only the existence of the {\thethings} when we release an event of the time series, i.e.,~allocating $\frac{\varepsilon}{5}$ ($4\ \text{\thethings} + 1\ \text{regular point}$) to each event (see Figure~\ref{fig:st-cont}).
|
||||
This way, we still guarantee\footnote{$\epsilon$-differential privacy guarantees that the allocated budget should be less or equal to $\epsilon$, and not precisely how much.\kat{Mano check.}} that the {\thethings} are adequately protected, as they receive a total budget of $\frac{4\varepsilon}{5}<\varepsilon$.
|
||||
This way, we still guarantee\footnote{$\varepsilon$-differential privacy guarantees that the allocated budget should be less or equal to $\varepsilon$, and not precisely how much.\kat{Mano check.}} that the {\thethings} are adequately protected, as they receive a total budget of $\frac{4\varepsilon}{5}<\varepsilon$.
|
||||
At the same time, we avoid over-perturbing the regular events, as we allocate to them a higher total budget ($\frac{4\varepsilon}{5}$) compared to the user-level scenario ($\frac{\varepsilon}{2}$), and thus less noise.
|
||||
|
||||
|
||||
|
@ -77,7 +77,7 @@ Intuitively, knowing the data set at timestamp $t$ stops the propagation of the
|
||||
%\kat{do we see this in the formula 1 ?}
|
||||
%when calculating the forward or backward privacy loss respectively.
|
||||
|
||||
Cao et al.~\cite{cao2017quantifying} propose a method for computing the total temporal privacy loss $\alpha_t$ at a timestamp $t$ as the sum of the backward and forward privacy loss, $\alpha^B_t$ and $\alpha^F_t$, minus the privacy budget $\varepsilon_t$
|
||||
Cao et al.~\cite{cao2017quantifying} propose a method for computing the temporal privacy loss $\alpha_t$ at a timestamp $t$ as the sum of the backward and forward privacy loss, $\alpha^B_t$ and $\alpha^F_t$, minus the privacy budget $\varepsilon_t$
|
||||
to account for the extra privacy loss due to previous and next releases $\pmb{o}$ of $\mathcal{M}$ under temporal correlation.
|
||||
By Theorem~\ref{theor:thething-prv}, at every timestamp $t$ we consider the data at $t$ and at the {\thething} timestamps $L$.
|
||||
%According to the Definitions~{\ref{def:bpl} and \ref{def:fpl}}, we calculate the backward and forward privacy loss by taking into account the privacy budget at previous and next data releases respectively.
|
||||
|
Loading…
Reference in New Issue
Block a user