From 0650b710b5390bf6531985fc4172870f9f9b26d9 Mon Sep 17 00:00:00 2001 From: Manos Katsomallos Date: Tue, 31 Aug 2021 13:15:31 +0300 Subject: [PATCH] correlation: Reviewed wang2021current --- text/bibliography.bib | 10 ++++++++++ text/preliminaries/correlation.tex | 4 ++++ 2 files changed, 14 insertions(+) diff --git a/text/bibliography.bib b/text/bibliography.bib index a1454df..27a1746 100644 --- a/text/bibliography.bib +++ b/text/bibliography.bib @@ -1664,6 +1664,16 @@ organization = {IEEE} } +@article{wang2021current, + title = {Why current differential privacy schemes are inapplicable for correlated data publishing?}, + author = {Wang, Hao and Xu, Zhengquan and Jia, Shan and Xia, Ying and Zhang, Xu}, + journal = {World Wide Web}, + volume = {24}, + pages = {1--23}, + year = {2021}, + publisher = {Springer} +} + @article{warner1965randomized, title = {Randomized response: A survey technique for eliminating evasive answer bias}, author = {Warner, Stanley L}, diff --git a/text/preliminaries/correlation.tex b/text/preliminaries/correlation.tex index a33f15b..cf01257 100644 --- a/text/preliminaries/correlation.tex +++ b/text/preliminaries/correlation.tex @@ -60,6 +60,10 @@ A negative value shows that the behavior of one variable is the \emph{opposite} Zero means that the variables are not linked and are \emph{independent} of each other. A positive correlation indicates that the variables behave in a \emph{similar} manner, e.g.,~when the one decreases the other decreases as well. +Wand et al.~\cite{wang2021current} examined why current differential privacy methods that either increase the noise size to offset the privacy leakage caused by the correlation (model-based) or transform correlated data into independent series to another domain and process them independently (transform-based) are inapplicable for correlated data publishing. +They prove that the privacy distortion, which they quantify using entropy, after filtering out the independent and identically distributed noise from the correlated data by utilizing the data correlation (correlation-distinguishability attack) is equal to that of conditional probability inference. +They conclude that the problem stems from the difference of correlation between the noise that the current methods inject and the output data. + \subsection{Privacy loss under temporal correlation} \label{subsec:cor-temp}