From 75ba2518873206eb3732725e9d9aa883c1576597 Mon Sep 17 00:00:00 2001 From: Manos Katsomallos Date: Mon, 25 Oct 2021 01:47:03 +0200 Subject: [PATCH] text: References to theorems and definitions --- text/preliminaries/correlation.tex | 6 +++--- text/preliminaries/privacy.tex | 20 ++++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/text/preliminaries/correlation.tex b/text/preliminaries/correlation.tex index 7f490ef..30026ad 100644 --- a/text/preliminaries/correlation.tex +++ b/text/preliminaries/correlation.tex @@ -80,7 +80,7 @@ This calculation is done for each individual that is included in the original da The backward/forward privacy loss at any timestamp depends on the backward/forward privacy loss at the previous/next timestamp, the backward/forward temporal correlation, and $\varepsilon$. \begin{definition} - [Temporal privacy loss (TPL)] + [Temporal privacy loss (TPL)~\cite{cao2018quantifying}] \label{def:tpl} The potential privacy loss of a privacy mechanism at a timestamp $t \in T$ due to a series of outputs $(\pmb{o}_i)_{i \in T}$ and temporal correlation in its input $D_t$ with respect to any adversary, targeting an individual with potential data items $x_t$ (or $x'_t$) and having knowledge $\mathbb{D}_t$ equal to $D_t - \{x_t\}$ (or $D'_t - \{x'_t\}$), is defined as: @@ -100,7 +100,7 @@ By analyzing Equation~\ref{eq:tpl} we get the following: \end{align} \begin{definition} - [Backward privacy loss (BPL)] + [Backward privacy loss (BPL)~\cite{cao2018quantifying}] \label{def:bpl} The potential privacy loss of a privacy mechanism at a timestamp $t \in T$ due to outputs $(\pmb{o}_i)_{i \in [\min(T), t]}$ and temporal correlation in its input $D_t$ with respect to any adversary, targeting an individual with potential data items $x_t$ (or $x'_t$) and having knowledge $\mathbb{D}_t$ equal to $D_t - \{x_t\}$ (or $D'_t - \{x'_t\}$), is called backward privacy loss and is defined as: @@ -174,7 +174,7 @@ $x_{t - 1}$ (or $x'_{t - 1}$), and thus Equation~\ref{eq:bpl-4} can be written a \end{align} \begin{definition} - [Forward privacy loss (FPL)] + [Forward privacy loss (FPL)~\cite{cao2018quantifying}] \label{def:fpl} The potential privacy loss of a privacy mechanism at a timestamp $t \in T$ due to outputs $(\pmb{o}_i)_{i \in [t, \max(T)]}$ and temporal correlation in its input $D_t$ with respect to any adversary, targeting an individual with potential data item $x_t$ (or $x'_t$) and having knowledge $\mathbb{D}_t$ equal to $D_t - \{x_t\}$ (or $D'_t - \{x'_t\}$), is called forward privacy loss and is defined as: diff --git a/text/preliminaries/privacy.tex b/text/preliminaries/privacy.tex index 2a1fd2a..db9921e 100644 --- a/text/preliminaries/privacy.tex +++ b/text/preliminaries/privacy.tex @@ -231,7 +231,7 @@ It ensures that any adversary observing a privacy-protected output, no matter th \begin{definition} - [Neighboring data sets] + [Neighboring data sets~\cite{dwork2006calibrating}] \label{def:nb-d-s} Two data sets are neighboring (or adjacent) when they differ by at most one tuple, i.e.,~one can be obtained by adding/removing the data of an individual to/from the other. \end{definition} @@ -255,7 +255,7 @@ Formally, differential privacy is given in Definition~\ref{def:dp}. % \kat{you already said this. Moreover, it is irrelevant to the neighboring datasets and thus does not fit here..} % \kat{Say what is a mechanism and how it is connected to the query, what are their differences? In the next section that you speak about the examples, we are still not sure about what is a mechanism in general.} \begin{definition} - [Differential privacy] + [Differential privacy~\cite{dwork2006calibrating}] \label{def:dp} A privacy mechanism $\mathcal{M}$, with domain $\mathcal{D}$ and range $\mathcal{O}$, satisfies $\varepsilon$-differential privacy, for a given privacy budget $\varepsilon$, if for every pair of neighboring data sets $D, D' \in \mathcal{D}$ and all sets $O \subseteq \mathcal{O}$: $$\Pr[\mathcal{M}(D) \in O] \leq e^\varepsilon \Pr[\mathcal{M}(D') \in O]$$ @@ -292,7 +292,7 @@ queries can be problematic, since a single, outlier value could change the outpu % \kat{introduce and link to the previous text the following definition } \begin{definition} - [Query function sensitivity] + [Query function sensitivity~\cite{dwork2006calibrating}] \label{def:qry-sens} The sensitivity of a query function $f$ for all neighboring data sets $D, D' \in \mathcal{D}$ is: $$\Delta f = \max_{D, D' \in \mathcal{D}} \lVert {f(D) - f(D')} \rVert_{1}$$ @@ -376,7 +376,7 @@ Mechanisms that satisfy differential privacy are \emph{composable}, i.e.,~the co In this section, we provide an overview of the most prominent composition theorems that instruct data publishers \emph{how} to estimate the overall privacy protection when utilizing a series of differential privacy mechanisms. \begin{theorem} - [Composition] + [Composition~\cite{mcsherry2009privacy}] \label{theor:compo} Any combination of a set of independent differential privacy mechanisms satisfying a corresponding set of privacy guarantees shall satisfy differential privacy as well, i.e.,~provide a differentially private output. \end{theorem} @@ -384,7 +384,7 @@ In this section, we provide an overview of the most prominent composition theore Generally, when we apply a series of independent (i.e.,~in the way that they inject noise) differential privacy mechanisms on independent data, we can calculate the privacy level of the resulting output according to the \emph{sequential} composition property~\cite{mcsherry2009privacy, soria2016big}. \begin{theorem} - [Sequential composition on independent data] + [Sequential composition on independent data~\cite{mcsherry2009privacy}] \label{theor:compo-seq-ind} The privacy guarantee of $m \in \mathbb{Z}^+$ independent privacy mechanisms, satisfying $\varepsilon_1$-, $\varepsilon_2$-, \dots, $\varepsilon_m$-differential privacy respectively, when applied over the same data set equals to $\sum_{i = 1}^m \varepsilon_i$. \end{theorem} @@ -401,7 +401,7 @@ Notice that the sequential composition corresponds to the worst case scenario wh In the special case that we query disjoint data sets, we can take advantage of the \emph{parallel} composition property~\cite{mcsherry2009privacy, soria2016big}, and thus spare some of the available privacy budget. \begin{theorem} - [Parallel composition on independent data] + [Parallel composition on independent data~\cite{mcsherry2009privacy}] \label{theor:compo-par-ind} When $m \in \mathbb{Z}^+$ independent privacy mechanisms, satisfying $\varepsilon_1$-, $\varepsilon_2$-,\dots, $\varepsilon_m$-differential privacy respectively, are applied over disjoint independent subsets of a data set, they provide a privacy guarantee equal to $\max_{i \in [1, m]} \varepsilon_i$. \end{theorem} @@ -409,7 +409,7 @@ In the special case that we query disjoint data sets, we can take advantage of t When the users consider recent data releases more privacy-sensitive than distant ones, we estimate the overall privacy loss in a time fading manner according to a temporal discounting function, e.g.,~exponential, hyperbolic,~\cite{farokhi2020temporally}. \begin{theorem} - [Sequential composition with temporal discounting] + [Sequential composition with temporal discounting~\cite{farokhi2020temporally}] \label{theor:compo-seq-disc} A set of $m \in \mathbb{Z}^+$ independent privacy mechanisms, satisfying $\varepsilon_1$-, $\varepsilon_2$-,\dots, $\varepsilon_m$-differential privacy respectively, satisfy $\sum_{i = 1}^m g(i) \varepsilon_i$ differential privacy for a discount function $g$. \end{theorem} @@ -419,7 +419,7 @@ The first ($m - 1$ if $w \leq 2$ or $m - w + 1$ if $w > 2$) and last ($m$) mecha When $w$ is greater than $2$, the rest of the mechanisms (between $m - w + 2$ and $m - 1$) contribute only to the privacy loss that is corresponding to the publication of the relevant data. \begin{theorem} - [Sequential composition under temporal correlations] + [Sequential composition under temporal correlations~\cite{cao2018quantifying}] \label{theor:compo-seq-cor} When a set of $w \leq t \in \mathbb{Z}^+$ independent privacy mechanisms, satisfying $\varepsilon_{m \in [1, t]}$-differential privacy, is applied over a sequence of an equal number of temporally correlated data sets, it provides a privacy guarantee equal to: $$ @@ -444,8 +444,8 @@ Every time a data publisher interacts with (any part of) the original data set, However, the \emph{post-processing} of a perturbed data set can be done without using any additional privacy budget. \begin{theorem} - [Post-processing] - \label{theor:post-processing} + [Post-processing~\cite{mcsherry2009privacy}] + \label{theor:p-proc} The post-processing of any output of an $\varepsilon$-differential privacy mechanism shall not deteriorate its privacy guarantee. \end{theorem}