Merge branch 'master' of git.delkappa.com:manos/the-last-thing

This commit is contained in:
Manos Katsomallos 2021-10-19 03:44:04 +02:00
commit b93328bcea
4 changed files with 596 additions and 595 deletions

View File

@ -28,6 +28,7 @@ MISS = 0 # Number of additions to the cache.
TOTAL = 0 # Number of cache accesses.
def load_data(path):
'''
Read data from a file.
@ -36,7 +37,6 @@ TOTAL = 0 # Number of cache accesses.
Returns:
data - A list of tuples [uid, timestamp, lng, lat, loc].
'''
def load_data(path):
print('Loading data from', os.path.abspath(path), '... ', end='')
data = []
try:
@ -50,6 +50,7 @@ def load_data(path):
exit()
def save_output(path, t, e, a_b, a_f, a):
'''
Save output to a file.
@ -63,7 +64,6 @@ def load_data(path):
Returns:
Nothing.
'''
def save_output(path, t, e, a_b, a_f, a):
# timestamp = time.strftime('%Y%m%d%H%M%S')
print('Saving output to %s... ' %(path), end='', flush=True)
os.makedirs(os.path.dirname(path), exist_ok=True)
@ -74,6 +74,7 @@ def save_output(path, t, e, a_b, a_f, a):
print('OK.', flush=True)
def get_timestamps(data):
'''
Get all the timestamps from the input data.
@ -82,7 +83,6 @@ def save_output(path, t, e, a_b, a_f, a):
Returns:
timestamps - An ndarray of all of the timestamps from the input data.
'''
def get_timestamps(data):
print('Getting a list of all timestamps... ', end='', flush=True)
timestamps = np.sort(np.unique(np.array(data)[:, 1]))
if not len(timestamps):
@ -103,6 +103,7 @@ def get_timestamps(data):
return timestamps
def get_locs(data):
'''
Get all the unique locations from the input data.
@ -111,7 +112,6 @@ def get_timestamps(data):
Returns:
locs - A sorted ndarray of all the unique locations int the input data.
'''
def get_locs(data):
print('Getting a list of all locations... ', end='', flush=True)
locs = np.sort(np.unique(np.array(data)[:, 4].astype(np.int)))
if not len(locs):
@ -123,6 +123,7 @@ def get_locs(data):
return list(map(str, locs))
def get_cnts(data, t):
'''
Get the counts at every location for a specific timestamp.
@ -132,7 +133,6 @@ def get_locs(data):
Returns:
cnts - A dict {loc:cnt} with the counts at every location for a specific timestamp.
'''
def get_cnts(data, t):
print('Getting all counts at %s... ' %(t), end='', flush=True)
locs = get_locs(data)
cnts = dict.fromkeys(locs, 0)
@ -145,6 +145,7 @@ def get_cnts(data, t):
return cnts
def get_all_cnts(data):
'''
Get the counts at every location for every timestamp.
@ -153,7 +154,6 @@ def get_cnts(data, t):
Returns:
cnts - A dict {timestamp:loc} with all the counts at every location for every timestamp.
'''
def get_all_cnts(data):
cnts = {}
for d in data:
key = d[1] + '@' + d[4]
@ -163,6 +163,7 @@ def get_all_cnts(data):
return cnts
def get_usrs(data):
'''
Get a list of unique users in the input data set.
@ -171,7 +172,6 @@ def get_all_cnts(data):
Returns:
users - An ndarray of all unique users.
'''
def get_usrs(data):
users = np.sort(np.unique(np.array(data)[:, 0].astype(np.int)))
if not len(users):
print('No users found.')
@ -181,6 +181,7 @@ def get_usrs(data):
return users
def get_usr_data(data, id):
'''
Get the data of a particular user from a data set.
@ -190,7 +191,6 @@ def get_usrs(data):
Returns:
output - A list of the data of the targeted user.
'''
def get_usr_data(data, id):
output = []
for d in data:
if (d[0] == str(id)):
@ -200,6 +200,7 @@ def get_usr_data(data, id):
return output
def get_usrs_data(data):
'''
Get the data of every user in a data set.
@ -208,13 +209,13 @@ def get_usr_data(data, id):
Returns:
output - A dict {usr, [usr_data]} with the data of each user.
'''
def get_usrs_data(data):
output = {}
for d in data:
output[d[0]] = output.get(d[0], []) + [d]
return output
def get_usr_traj(data):
'''
Get the trajectory of a user from her data.
@ -223,7 +224,6 @@ def get_usrs_data(data):
Returns:
traj - A list [(timestamp, loc)] with the locations and corresponding timestamps that the user was at.
'''
def get_usr_traj(data):
traj = []
for d in data:
traj.append((d[1], d[4]))
@ -232,6 +232,7 @@ def get_usr_traj(data):
return traj
def get_poss_trans(data):
'''
Get all the possible transitions.
@ -240,7 +241,6 @@ def get_usr_traj(data):
Returns:
trans - A set with all the possible forward transitions in the input.
'''
def get_poss_trans(data):
print('Getting possible transitions... ', end='', flush=True)
trans = set()
for u, u_data in data.items():
@ -253,6 +253,7 @@ def get_poss_trans(data):
return trans
def get_bwd_trans(data):
'''
Get all backward transitions in a data set.
@ -262,7 +263,6 @@ def get_poss_trans(data):
trans - A dict {(t, t-1):[transitions]} with all the backward transitions
at every sequential timestamp pair in the input data set.
'''
def get_bwd_trans(data):
print('Getting all backward transitions... ', end='', flush=True)
trans = {}
for u, u_data in data.items():
@ -276,6 +276,7 @@ def get_bwd_trans(data):
return trans
def get_fwd_trans(data):
'''
Get all forward transitions in a data set.
@ -285,7 +286,6 @@ def get_bwd_trans(data):
trans - A dict {(t-1, t):[transitions]} with all the forward transitions
at every sequential timestamp pair in the input data set.
'''
def get_fwd_trans(data):
print('Getting all forward transitions... ', end='', flush=True)
trans = {}
for u, u_data in data.items():
@ -299,6 +299,7 @@ def get_fwd_trans(data):
return trans
def safe_div(a, b):
'''
Divide two numbers. If the divisor is 0 return inf.
@ -308,12 +309,12 @@ def get_fwd_trans(data):
Returns:
The float result of the division.
'''
def safe_div(a, b):
if b == 0:
return math.inf
return float(a/b)
def max_val(q, d, a):
'''
Calculate the maximum value of the objective function.
@ -325,12 +326,12 @@ def safe_div(a, b):
Returns:
The maximum value of the objective function.
'''
def max_val(q, d, a):
if a == math.inf:
return math.nan
return (q*(math.exp(a) - 1) + 1)/(d*(math.exp(a) - 1) + 1)
def find_qd(p, a):
'''
Find two different rows (q and d) of a transition matrix (p)
that maximize the product of the objective function and return
@ -345,7 +346,6 @@ def max_val(q, d, a):
sum_q - The sum of the elements of q.
sum_d - The sum of the elements of d.
'''
def find_qd(p, a):
res = 0.0
sum_q, sum_d = 0.0, 0.0
for q in p: # A row from the transition matrix.
@ -374,6 +374,7 @@ def find_qd(p, a):
return sum_q, sum_d
def gen_data(usrs, timestamps, locs):
'''
Generate data.
@ -384,7 +385,6 @@ def find_qd(p, a):
Returns:
data - The generated data.
'''
def gen_data(usrs, timestamps, locs):
print('Generating data... ', end='', flush=True)
# Generate timestamps.
ts = []
@ -412,6 +412,7 @@ def gen_data(usrs, timestamps, locs):
return data
def gen_trans_mt(n, s):
'''
Generate a transition matrix.
@ -423,7 +424,6 @@ def gen_data(usrs, timestamps, locs):
Returns:
p_ - The transition matrix.
'''
def gen_trans_mt(n, s):
if DEBUG:
print('Generating transition matrix %dx%d with s = %.4f... ' %(n, n, s), end='', flush=True)
p = np.zeros((n, n), float)
@ -439,6 +439,7 @@ def gen_trans_mt(n, s):
return p_
def get_trans_mt(locs, trans):
'''
Get the transition matrix
@ -449,7 +450,6 @@ def gen_trans_mt(n, s):
p - A 2d dict {{locs}{locs}} containing the
corresponding location transition probabilities.
'''
def get_trans_mt(locs, trans):
if DEBUG:
print('Generating the transition matrix... ', end='', flush=True)
# Initialize the transition matrix.
@ -476,6 +476,7 @@ def get_trans_mt(locs, trans):
return p
def get_entropy(mt):
'''
Calculate the measure-theoretic (Kolmogorov-Sinai) entropy
of a transition matrix.
@ -485,7 +486,6 @@ def get_trans_mt(locs, trans):
Returns:
h - The Kolmogorov-Sinai entropy of the matrix.
'''
def get_entropy(mt):
if DEBUG:
print('Calculating the measure-theoretic entropy... ', end='', flush=True)
h = 0.0
@ -523,6 +523,7 @@ def get_entropy(mt):
return h
def get_2darray(mt):
'''
Convert a 2d dict to a 2d array.
@ -531,7 +532,6 @@ def get_entropy(mt):
Returns:
p - The 2d numpy array.
'''
def get_2darray(mt):
if type(mt) == type(np.array([])):
return mt
p = np.zeros((len(mt), len(mt)), float)
@ -540,6 +540,7 @@ def get_2darray(mt):
return p
def get_laplace_pd(ts, t, sc):
'''
Get a Laplace probability distribution.
@ -550,12 +551,12 @@ def get_2darray(mt):
Returns:
The probability distribution.
'''
def get_laplace_pd(ts, t, sc):
x = np.arange(0, len(ts), 1)
loc = np.where(ts == t)
return laplace.pdf(x, loc=loc, scale=sc)[0]
def get_norm_pd(ts, t, sc):
'''
Get a Gaussian probability distribution.
@ -566,12 +567,12 @@ def get_laplace_pd(ts, t, sc):
Returns:
The probability distribution.
'''
def get_norm_pd(ts, t, sc):
x = np.arange(0, len(ts), 1)
loc = np.where(ts == t)
return norm.pdf(x, loc=loc, scale=sc)[0]
def get_sample(ts, t, pct, pd):
'''
Get a sample from the time series.
@ -584,7 +585,6 @@ def get_norm_pd(ts, t, sc):
Returns:
spl - An ndarray of the sampled timestamps.
'''
def get_sample(ts, t, pct, pd):
if DEBUG:
print('Sampling %.2f%% of %s at %s... ' %(pct*100, ts, t), end='', flush=True)
# Check that it is a valid timestamp.
@ -604,6 +604,7 @@ def get_sample(ts, t, pct, pd):
return spl
def priv_l(p, a, e):
'''
Calculate the backward/forward privacy loss at the current
timestamp.
@ -617,11 +618,11 @@ def get_sample(ts, t, pct, pd):
The backward/forward privacy loss at the current
timestamp.
'''
def priv_l(p, a, e):
sum_q, sum_d = find_qd(p, a)
return math.log(max_val(sum_q, sum_d, a)) + e
def priv_l_m(p, a, e):
'''
Calculate the backward/forward privacy loss at the current
timestamp using memoization.
@ -635,7 +636,6 @@ def priv_l(p, a, e):
The backward/forward privacy loss at the current
timestamp.
'''
def priv_l_m(p, a, e):
key = xxhash.xxh64(p).hexdigest() + str(a) + str(e)
global MEM, TOTAL, MISS
TOTAL += 1
@ -648,6 +648,7 @@ def priv_l_m(p, a, e):
return result
def bpl(p, a, e, t):
'''
Calculate the total backward privacy loss at every timestamp.
@ -661,13 +662,13 @@ def priv_l_m(p, a, e):
a - The backward privacy loss at every timestamp
due to the previous data releases.
'''
def bpl(p, a, e, t):
a[0] = e[0]
for i in range(1, t):
a[i] = priv_l(p, a[i - 1], e[i])
return a
def bpl_m(p, a, e, t):
'''
Calculate the total backward privacy loss at the current
timestamp with memoization.
@ -683,12 +684,12 @@ def bpl(p, a, e, t):
a - The backward privacy loss at the current timestamp
due to the previous data releases.
'''
def bpl_m(p, a, e, t):
a[0] = e[0]
for i in range(1, t):
a[i] = priv_l_m(p, a[i - 1], e[i])
return a
def bpl_lmdk_mem(p, a, e, t, lmdk):
# t is (near) the landmark
if lmdk == t - 1 or t == lmdk:
@ -702,6 +703,7 @@ def bpl_lmdk_mem(p, a, e, t, lmdk):
return a
def bpl_s(p, e, i, w):
'''
Calculate the total backward privacy loss at the current
timestamp using the static model, i.e., previous releases
@ -717,7 +719,6 @@ def bpl_lmdk_mem(p, a, e, t, lmdk):
a - The backward privacy loss at the current timestamp
due to the previous data releases.
'''
def bpl_s(p, e, i, w):
if i - w > 1:
# print('bpl_s: %d - %d [%d]' %(i, i - w, w))
return priv_l(np.linalg.matrix_power(p, w), bpl_s(p, e, i - w, w), e[i - 1])
@ -729,6 +730,7 @@ def bpl_s(p, e, i, w):
return e[0]
def bpl_s_m(p, e, i, w):
'''
Calculate the total backward privacy loss at the current
timestamp using the static model, i.e., previous releases
@ -744,7 +746,6 @@ def bpl_s(p, e, i, w):
a - The backward privacy loss at the current timestamp
due to the previous data releases.
'''
def bpl_s_m(p, e, i, w):
if i - w > 1:
return priv_l_m(np.linalg.matrix_power(p, w), bpl_s_m(p, e, i - w, w), e[i - 1])
elif i - w <= 1:
@ -753,6 +754,7 @@ def bpl_s_m(p, e, i, w):
return e[0]
def bpl_l(p, e, i, w, l):
'''
Calculate the total backward privacy loss at the current
timestamp using the linear model, i.e., previous releases
@ -770,7 +772,6 @@ def bpl_s_m(p, e, i, w):
a - The backward privacy loss at the current timestamp
due to the previous data releases.
'''
def bpl_l(p, e, i, w, l):
if i - w*l > 1:
# print('bpl_l: %d - %d [%d]' %(i, i - w*l, w*l))
return priv_l(np.linalg.matrix_power(p, w*l), bpl_l(p, e, i - w*l, w, l + 1), e[i - 1])
@ -782,6 +783,7 @@ def bpl_l(p, e, i, w, l):
return e[0]
def bpl_l_m(p, e, i, w, l):
'''
Calculate the total backward privacy loss at the current
timestamp using the linear model, i.e., previous releases
@ -800,7 +802,6 @@ def bpl_l(p, e, i, w, l):
a - The backward privacy loss at the current timestamp
due to the previous data releases.
'''
def bpl_l_m(p, e, i, w, l):
if i - w*l > 1:
return priv_l_m(np.linalg.matrix_power(p, w*l), bpl_l_m(p, e, i - w*l, w, l + 1), e[i - 1])
elif i - w*l <= 1:
@ -809,6 +810,7 @@ def bpl_l_m(p, e, i, w, l):
return e[0]
def bpl_e(p, e, i, w, h):
'''
Calculate the total backward privacy loss at the current
timestamp using the exponential model, i.e., previous releases
@ -826,7 +828,6 @@ def bpl_l_m(p, e, i, w, l):
a - The backward privacy loss at the current timestamp
due to the previous data releases.
'''
def bpl_e(p, e, i, w, h):
if i - w**h > 1:
# print('bpl_e: %d - %d [%d]' %(i, i - w**h, w**h))
return priv_l(np.linalg.matrix_power(p, w**h), bpl_e(p, e, i - w**h, w, h + 1), e[i - 1])
@ -838,6 +839,7 @@ def bpl_e(p, e, i, w, h):
return e[0]
def bpl_e_m(p, e, i, w, h):
'''
Calculate the total backward privacy loss at the current
timestamp using the exponential model, i.e., previous releases
@ -856,7 +858,6 @@ def bpl_e(p, e, i, w, h):
a - The backward privacy loss at the current timestamp
due to the previous data releases.
'''
def bpl_e_m(p, e, i, w, h):
if i - w**h > 1:
return priv_l_m(np.linalg.matrix_power(p, w**h), bpl_e_m(p, e, i - w**h, w, h + 1), e[i - 1])
elif i - w**h <= 1:
@ -865,6 +866,7 @@ def bpl_e_m(p, e, i, w, h):
return e[0]
def fpl(p, a, e, t):
'''
Calculate the total forward privacy loss at the current
timestamp.
@ -880,13 +882,13 @@ def bpl_e_m(p, e, i, w, h):
a - The forward privacy loss at the current timestamp
due to the next data releases.
'''
def fpl(p, a, e, t):
a[t - 1] = e[t - 1]
for i in range(t - 2, -1, -1):
a[i] = priv_l(p, a[i + 1], e[i])
return a
def fpl_m(p, a, e, t):
'''
Calculate the total forward privacy loss at the current
timestamp, using memoization.
@ -902,7 +904,6 @@ def fpl(p, a, e, t):
a - The forward privacy loss at the current timestamp
due to the next data releases.
'''
def fpl_m(p, a, e, t):
a[t - 1] = e[t - 1]
for i in range(t - 2, -1, -1):
a[i] = priv_l_m(p, a[i + 1], e[i])
@ -921,6 +922,7 @@ def fpl_lmdk_mem(p, a, e, t, lmdk):
return a
def fpl_s(p, e, i, t, w):
'''
Calculate the total forward privacy loss at the current
timestamp using the static model, i.e., next releases
@ -936,7 +938,6 @@ def fpl_lmdk_mem(p, a, e, t, lmdk):
a - The forward privacy loss at the current timestamp
due to the next data releases.
'''
def fpl_s(p, e, i, t, w):
if i + w < t:
# print('fpl_s: %d - %d [%d]' %(i, i + w, w))
return priv_l(np.linalg.matrix_power(p, w), fpl_s(p, e, i + w, t, w), e[i - 1])
@ -948,6 +949,7 @@ def fpl_s(p, e, i, t, w):
return e[t - 1]
def fpl_s_m(p, e, i, t, w):
'''
Calculate the total forward privacy loss at the current
timestamp using the static model, i.e., next releases
@ -963,7 +965,6 @@ def fpl_s(p, e, i, t, w):
a - The forward privacy loss at the current timestamp
due to the next data releases.
'''
def fpl_s_m(p, e, i, t, w):
if i + w < t:
return priv_l_m(np.linalg.matrix_power(p, w), fpl_s_m(p, e, i + w, t, w), e[i - 1])
elif i + w >= t:
@ -972,6 +973,7 @@ def fpl_s_m(p, e, i, t, w):
return e[t - 1]
def fpl_l(p, e, i, t, w, l):
'''
Calculate the total forward privacy loss at the current
timestamp using the linear model, i.e., next releases
@ -989,7 +991,6 @@ def fpl_s_m(p, e, i, t, w):
a - The forward privacy loss at the current timestamp
due to the next data releases.
'''
def fpl_l(p, e, i, t, w, l):
if i + w*l < t:
# print('fpl_l: %d - %d [%d]' %(i, i + w*l, w*l))
return priv_l(np.linalg.matrix_power(p, w*l), fpl_l(p, e, i + w*l, t, w, l + 1), e[i - 1])
@ -1001,6 +1002,7 @@ def fpl_l(p, e, i, t, w, l):
return e[t - 1]
def fpl_l_m(p, e, i, t, w, l):
'''
Calculate the total forward privacy loss at the current
timestamp using the linear model, i.e., next releases
@ -1019,7 +1021,6 @@ def fpl_l(p, e, i, t, w, l):
a - The forward privacy loss at the current timestamp
due to the next data releases.
'''
def fpl_l_m(p, e, i, t, w, l):
if i + w*l < t:
return priv_l_m(np.linalg.matrix_power(p, w*l), fpl_l_m(p, e, i + w*l, t, w, l + 1), e[i - 1])
elif i + w*l >= t:
@ -1028,6 +1029,7 @@ def fpl_l_m(p, e, i, t, w, l):
return e[t - 1]
def fpl_e(p, e, i, t, w, h):
'''
Calculate the total forward privacy loss at the current
timestamp using the exponential model, i.e., next releases
@ -1045,7 +1047,6 @@ def fpl_l_m(p, e, i, t, w, l):
a - The forward privacy loss at the current timestamp
due to the next data releases.
'''
def fpl_e(p, e, i, t, w, h):
if i + w**h < t:
# print('fpl_e: %d - %d [%d]' %(i, i + w**h, w**h))
return priv_l(np.linalg.matrix_power(p, w**h), fpl_e(p, e, i + w**h, t, w, h + 1), e[i - 1])
@ -1057,6 +1058,7 @@ def fpl_e(p, e, i, t, w, h):
return e[t - 1]
def fpl_e_m(p, e, i, t, w, h):
'''
Calculate the total forward privacy loss at the current
timestamp using the exponential model, i.e., next releases
@ -1075,7 +1077,6 @@ def fpl_e(p, e, i, t, w, h):
a - The forward privacy loss at the current timestamp
due to the next data releases.
'''
def fpl_e_m(p, e, i, t, w, h):
if i + w**h < t:
return priv_l_m(np.linalg.matrix_power(p, w**h), fpl_e_m(p, e, i + w**h, t, w, h + 1), e[i - 1])
elif i + w**h >= t:
@ -1084,6 +1085,7 @@ def fpl_e_m(p, e, i, t, w, h):
return e[t - 1]
def tpl(bpl, fpl, e):
'''
Calculate the total privacy loss at every timestamp.
@ -1094,10 +1096,10 @@ def fpl_e_m(p, e, i, t, w, h):
Returns:
The list of total privacy loss at every timestamp.
'''
def tpl(bpl, fpl, e):
return [x + y - z for (x, y, z) in zip(bpl, fpl, e)]
def tpl_lmdk_mem(e, p_b, p_f, seq, lmdks):
'''
Calculate the temporal privacy loss at every timestamp
taking into account landmarks.
@ -1118,7 +1120,6 @@ def tpl(bpl, fpl, e):
a - The total privacy loss at every timestamp
taking into account landmarks.
'''
def tpl_lmdk_mem(e, p_b, p_f, seq, lmdks):
a_b = np.zeros(len(seq))
a_f = np.zeros(len(seq))
a = np.zeros(len(seq))
@ -1135,6 +1136,7 @@ def tpl_lmdk_mem(e, p_b, p_f, seq, lmdks):
return a_b, a_f, a
def get_limits(t, seq, lmdks):
'''
Get the limits for the calculation of temporal privacy loss.
@ -1146,7 +1148,6 @@ def tpl_lmdk_mem(e, p_b, p_f, seq, lmdks):
t_prv - The previous landmark.
t_nxt - The next landmark.
'''
def get_limits(t, seq, lmdks):
# Add landmark limits.
seq_lmdks = np.copy(lmdks)
# if seq[0] not in seq_lmdks:
@ -1174,6 +1175,7 @@ def get_limits(t, seq, lmdks):
return t_prv, t_nxt
def plot_loss(title, e, a_b, a_f, a):
'''
Plots the privacy loss of the time series.
@ -1186,7 +1188,6 @@ def get_limits(t, seq, lmdks):
Returns:
Nothing.
'''
def plot_loss(title, e, a_b, a_f, a):
plt.rc('font', family='serif')
plt.rc('font', size=10)
plt.rc('text', usetex=True)
@ -1221,6 +1222,7 @@ def plot_loss(title, e, a_b, a_f, a):
plt.show()
def cmp_loss(title, a, a_s, a_e, a_l):
'''
Plots a comparison of the privacy loss of all models.
@ -1233,7 +1235,6 @@ def plot_loss(title, e, a_b, a_f, a):
Returns:
Nothing.
'''
def cmp_loss(title, a, a_s, a_e, a_l):
plt.rc('font', family='serif')
plt.rc('font', size=10)
plt.rc('text', usetex=True)
@ -1268,6 +1269,7 @@ def cmp_loss(title, a, a_s, a_e, a_l):
plt.show()
def parse_args():
'''
Parse arguments.
@ -1284,7 +1286,6 @@ def cmp_loss(title, a, a_s, a_e, a_l):
-t, --time, The time limit.
-w, --window, The size of the event protection window.
'''
def parse_args():
# Create argument parser.
parser = argparse.ArgumentParser()

View File

@ -7,4 +7,4 @@ The {\thething} selection module introduces a reasonable data utility decline to
% \kat{it would be nice to see it clearly on Figure 5.5. (eg, by including another bar that shows adaptive without landmark selection)}
% \mk{Done.}
In terms of temporal correlation, we observe that under moderate and strong temporal correlation, a greater average regular--{\thething} event distance in a {\thething} distribution causes greater overall privacy loss.
Finally, the contribution of the {\thething} privacy on enhancing the data utility, while preserving $\epsilon$-differential privacy, is demonstrated by the fact that the selected Adaptive scheme provides better data utility than the user-level privacy protection.
Finally, the contribution of the {\thething} privacy on enhancing the data utility, while preserving $\varepsilon$-differential privacy, is demonstrated by the fact that the selected Adaptive scheme provides better data utility than the user-level privacy protection.

View File

@ -22,7 +22,7 @@ Take for example the scenario in Figure~\ref{fig:st-cont}, where {\thethings} ar
If we want to protect the {\thething} points, we have to allocate at most a budget of $\varepsilon$ to the {\thethings}, while saving some for the release of regular events.
Essentially, the more budget we allocate to an event the less we protect it, but at the same time we maintain its utility.
With {\thething} privacy we propose to distribute the budget taking into account only the existence of the {\thethings} when we release an event of the time series, i.e.,~allocating $\frac{\varepsilon}{5}$ ($4\ \text{\thethings} + 1\ \text{regular point}$) to each event (see Figure~\ref{fig:st-cont}).
This way, we still guarantee\footnote{$\epsilon$-differential privacy guarantees that the allocated budget should be less or equal to $\epsilon$, and not precisely how much.\kat{Mano check.}} that the {\thethings} are adequately protected, as they receive a total budget of $\frac{4\varepsilon}{5}<\varepsilon$.
This way, we still guarantee\footnote{$\varepsilon$-differential privacy guarantees that the allocated budget should be less or equal to $\varepsilon$, and not precisely how much.\kat{Mano check.}} that the {\thethings} are adequately protected, as they receive a total budget of $\frac{4\varepsilon}{5}<\varepsilon$.
At the same time, we avoid over-perturbing the regular events, as we allocate to them a higher total budget ($\frac{4\varepsilon}{5}$) compared to the user-level scenario ($\frac{\varepsilon}{2}$), and thus less noise.

View File

@ -77,7 +77,7 @@ Intuitively, knowing the data set at timestamp $t$ stops the propagation of the
%\kat{do we see this in the formula 1 ?}
%when calculating the forward or backward privacy loss respectively.
Cao et al.~\cite{cao2017quantifying} propose a method for computing the total temporal privacy loss $\alpha_t$ at a timestamp $t$ as the sum of the backward and forward privacy loss, $\alpha^B_t$ and $\alpha^F_t$, minus the privacy budget $\varepsilon_t$
Cao et al.~\cite{cao2017quantifying} propose a method for computing the temporal privacy loss $\alpha_t$ at a timestamp $t$ as the sum of the backward and forward privacy loss, $\alpha^B_t$ and $\alpha^F_t$, minus the privacy budget $\varepsilon_t$
to account for the extra privacy loss due to previous and next releases $\pmb{o}$ of $\mathcal{M}$ under temporal correlation.
By Theorem~\ref{theor:thething-prv}, at every timestamp $t$ we consider the data at $t$ and at the {\thething} timestamps $L$.
%According to the Definitions~{\ref{def:bpl} and \ref{def:fpl}}, we calculate the backward and forward privacy loss by taking into account the privacy budget at previous and next data releases respectively.