Merge branch 'master' of https://git.delkappa.com/manos/the-last-thing
This commit is contained in:
commit
a104beb365
@ -11,7 +11,8 @@ from matplotlib import pyplot as plt
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
|
|
||||||
'''
|
def score(data, option):
|
||||||
|
'''
|
||||||
The scoring function.
|
The scoring function.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
@ -19,12 +20,13 @@ import time
|
|||||||
option - The option to evaluate.
|
option - The option to evaluate.
|
||||||
Returns:
|
Returns:
|
||||||
The score for the option.
|
The score for the option.
|
||||||
'''
|
'''
|
||||||
def score(data, option):
|
|
||||||
return (option.sum() - data.sum())
|
return (option.sum() - data.sum())
|
||||||
|
# return lmdk_lib.get_norm(data, option)
|
||||||
|
|
||||||
|
|
||||||
'''
|
def exponential(x, R, u, delta, epsilon):
|
||||||
|
'''
|
||||||
The exponential mechanism.
|
The exponential mechanism.
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
@ -36,8 +38,7 @@ def score(data, option):
|
|||||||
Returns:
|
Returns:
|
||||||
res - A randomly sampled output.
|
res - A randomly sampled output.
|
||||||
pr - The PDF of all possible outputs.
|
pr - The PDF of all possible outputs.
|
||||||
'''
|
'''
|
||||||
def exponential(x, R, u, delta, epsilon):
|
|
||||||
# Calculate the score for each element of R
|
# Calculate the score for each element of R
|
||||||
scores = [u(x, r) for r in R]
|
scores = [u(x, r) for r in R]
|
||||||
# Normalize the scores between 0 and 1
|
# Normalize the scores between 0 and 1
|
||||||
|
Binary file not shown.
Binary file not shown.
BIN
rslt/bgt_cmp/T-drive-sel.pdf
Normal file
BIN
rslt/bgt_cmp/T-drive-sel.pdf
Normal file
Binary file not shown.
@ -1761,6 +1761,15 @@
|
|||||||
year = {2017}
|
year = {2017}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@inproceedings{meshgi2015expanding,
|
||||||
|
title={Expanding histogram of colors with gridding to improve tracking accuracy},
|
||||||
|
author={Meshgi, Kourosh and Ishii, Shin},
|
||||||
|
booktitle={2015 14th IAPR International Conference on Machine Vision Applications (MVA)},
|
||||||
|
pages={475--479},
|
||||||
|
year={2015},
|
||||||
|
organization={IEEE}
|
||||||
|
}
|
||||||
|
|
||||||
@inproceedings{wang2017privacy,
|
@inproceedings{wang2017privacy,
|
||||||
title = {Privacy Preserving Anonymity for Periodical SRS Data Publishing},
|
title = {Privacy Preserving Anonymity for Periodical SRS Data Publishing},
|
||||||
author = {Wang, Jie-Teng and Lin, Wen-Yang},
|
author = {Wang, Jie-Teng and Lin, Wen-Yang},
|
||||||
|
@ -39,10 +39,16 @@ In Example~\ref{ex:lmdk-risk}, we demonstrate the extreme case of the applicatio
|
|||||||
\SetKwData{evalCur}{evalCur}
|
\SetKwData{evalCur}{evalCur}
|
||||||
\SetKwData{evalOrig}{evalOrig}
|
\SetKwData{evalOrig}{evalOrig}
|
||||||
\SetKwData{evalSum}{evalSum}
|
\SetKwData{evalSum}{evalSum}
|
||||||
|
\SetKwData{h}{h}
|
||||||
|
\SetKwData{hi}{h$_i$}
|
||||||
|
\SetKwData{hist}{hist}
|
||||||
|
\SetKwData{histCur}{histCur}
|
||||||
|
\SetKwData{histTmp}{histTmp}
|
||||||
\SetKwData{metricCur}{metricCur}
|
\SetKwData{metricCur}{metricCur}
|
||||||
\SetKwData{metricOrig}{metricOrig}
|
\SetKwData{metricOrig}{metricOrig}
|
||||||
\SetKwData{opt}{opt}
|
\SetKwData{opt}{opt}
|
||||||
\SetKwData{opti}{opt$_i$}
|
\SetKwData{opti}{opt$_i$}
|
||||||
|
\SetKwData{opts}{opts}
|
||||||
\SetKwData{optim}{optim}
|
\SetKwData{optim}{optim}
|
||||||
\SetKwData{optimi}{optim$_i$}
|
\SetKwData{optimi}{optim$_i$}
|
||||||
\SetKwData{opts}{opts}
|
\SetKwData{opts}{opts}
|
||||||
@ -51,7 +57,10 @@ In Example~\ref{ex:lmdk-risk}, we demonstrate the extreme case of the applicatio
|
|||||||
\SetKwFunction{calcMetric}{calcMetric}
|
\SetKwFunction{calcMetric}{calcMetric}
|
||||||
\SetKwFunction{evalSeq}{evalSeq}
|
\SetKwFunction{evalSeq}{evalSeq}
|
||||||
\SetKwFunction{getCombs}{getCombs}
|
\SetKwFunction{getCombs}{getCombs}
|
||||||
|
\SetKwFunction{getDiff}{getDiff}
|
||||||
|
\SetKwFunction{getHist}{getHist}
|
||||||
\SetKwFunction{getOpts}{getOpts}
|
\SetKwFunction{getOpts}{getOpts}
|
||||||
|
\SetKwFunction{getNorm}{getNorm}
|
||||||
|
|
||||||
\input{problem/theotherthing/contribution}
|
\input{problem/theotherthing/contribution}
|
||||||
\input{problem/theotherthing/problem}
|
\input{problem/theotherthing/problem}
|
||||||
|
@ -42,16 +42,13 @@ It finds the option that is the most \emph{similar} to the original (Lines~{\ref
|
|||||||
% Evaluate the original
|
% Evaluate the original
|
||||||
\evalOrig $\leftarrow$ \evalSeq{$T, \emptyset, L$}\;
|
\evalOrig $\leftarrow$ \evalSeq{$T, \emptyset, L$}\;
|
||||||
|
|
||||||
% Get all possible option combinations
|
|
||||||
\opts $\leftarrow$ \getOpts{$T, L$}\;
|
|
||||||
|
|
||||||
% Track the minimum (best) evaluation
|
% Track the minimum (best) evaluation
|
||||||
\diffMin $\leftarrow$ $\infty$\;
|
\diffMin $\leftarrow$ $\infty$\;
|
||||||
|
|
||||||
% Track the optimal sequence (the one with the best evaluation)
|
% Track the optimal sequence (the one with the best evaluation)
|
||||||
\optim $\leftarrow$ $[]$\;
|
\opts $\leftarrow$ $[]$\;
|
||||||
|
|
||||||
\ForEach{\opt $\in$ \opts}{ \label{algo:lmdk-sel-opt-for-each}
|
\ForEach{\opt $\in$ \getOpts{$T, L$}}{ \label{algo:lmdk-sel-opt-for-each}
|
||||||
\evalCur $\leftarrow 0$\;
|
\evalCur $\leftarrow 0$\;
|
||||||
\ForEach{\opti $\in$ \opt}{
|
\ForEach{\opti $\in$ \opt}{
|
||||||
\evalCur $\leftarrow$ \evalCur $+$ \evalSeq{$T, \opti, L$}/\#\opt\; \label{algo:lmdk-sel-opt-comparison}
|
\evalCur $\leftarrow$ \evalCur $+$ \evalSeq{$T, \opti, L$}/\#\opt\; \label{algo:lmdk-sel-opt-comparison}
|
||||||
@ -60,10 +57,10 @@ It finds the option that is the most \emph{similar} to the original (Lines~{\ref
|
|||||||
\diffCur $\leftarrow \left|\evalCur - \evalOrig\right|$\;
|
\diffCur $\leftarrow \left|\evalCur - \evalOrig\right|$\;
|
||||||
\If{\diffCur $<$ \diffMin}{
|
\If{\diffCur $<$ \diffMin}{
|
||||||
\diffMin $\leftarrow$ \diffCur\;
|
\diffMin $\leftarrow$ \diffCur\;
|
||||||
\optim $\leftarrow$ \opt\;
|
\opts $\leftarrow$ \opt\;
|
||||||
}
|
}
|
||||||
} \label{algo:lmdk-sel-opt-end}
|
} \label{algo:lmdk-sel-opt-end}
|
||||||
\Return{\optim}
|
\Return{\opts}
|
||||||
\end{algorithm}
|
\end{algorithm}
|
||||||
|
|
||||||
Algorithm~\ref{algo:lmdk-sel-opt} guarantees to return the optimal set of dummy {\thethings} with regard to the original set $L$.
|
Algorithm~\ref{algo:lmdk-sel-opt} guarantees to return the optimal set of dummy {\thethings} with regard to the original set $L$.
|
||||||
@ -73,7 +70,7 @@ Next, we present a heuristic solution with improved time and space requirements.
|
|||||||
|
|
||||||
\paragraph{Heuristic}
|
\paragraph{Heuristic}
|
||||||
Algorithm~\ref{algo:lmdk-sel-heur}, follows an incremental methodology.
|
Algorithm~\ref{algo:lmdk-sel-heur}, follows an incremental methodology.
|
||||||
At each step it selects a new timestamp that corresponds to a regular ({non-\thething}) event from $T \setminus L$.
|
At each step it selects a new timestamp, that corresponds to a regular ({non-\thething}) event from $T \setminus L$, to create an option.
|
||||||
|
|
||||||
\begin{algorithm}
|
\begin{algorithm}
|
||||||
\caption{Heuristic dummy {\thething} set options selection}
|
\caption{Heuristic dummy {\thething} set options selection}
|
||||||
@ -82,14 +79,14 @@ At each step it selects a new timestamp that corresponds to a regular ({non-\the
|
|||||||
\DontPrintSemicolon
|
\DontPrintSemicolon
|
||||||
|
|
||||||
\KwData{$T, L$}
|
\KwData{$T, L$}
|
||||||
\KwResult{\optim}
|
\KwResult{\opts}
|
||||||
\BlankLine
|
\BlankLine
|
||||||
|
|
||||||
% Evaluate the original
|
% Evaluate the original
|
||||||
\evalOrig $\leftarrow$ \evalSeq{$T, \emptyset, L$}\;
|
\evalOrig $\leftarrow$ \evalSeq{$T, \emptyset, L$}\;
|
||||||
|
|
||||||
% Get all possible option combinations
|
% Get all possible option combinations
|
||||||
\optim $\leftarrow$ $[]$\;
|
\opts $\leftarrow$ $[]$\;
|
||||||
|
|
||||||
$L' \leftarrow L$\;
|
$L' \leftarrow L$\;
|
||||||
|
|
||||||
@ -110,45 +107,111 @@ At each step it selects a new timestamp that corresponds to a regular ({non-\the
|
|||||||
\If{\diffCur $<$ \diffMin}{
|
\If{\diffCur $<$ \diffMin}{
|
||||||
\diffMin $\leftarrow$ \diffCur\;
|
\diffMin $\leftarrow$ \diffCur\;
|
||||||
\optimi $\leftarrow$ \reg\;
|
\optimi $\leftarrow$ \reg\;
|
||||||
}\label{algo:lmdk-sel-heur-comparison-end}
|
}\label{algo:lmdk-sel-heur-cmp-end}
|
||||||
}
|
}
|
||||||
|
|
||||||
% Save new point to landmarks
|
% Save new point to landmarks
|
||||||
$L'$.add(\optimi)\;
|
$L'$.add(\optimi)\;
|
||||||
|
|
||||||
% Add new option
|
% Add new option
|
||||||
\optim.append($L' \setminus L$)\;
|
\opts.append($L' \setminus L$)\;
|
||||||
}\label{algo:lmdk-sel-heur-end}
|
}\label{algo:lmdk-sel-heur-end}
|
||||||
|
|
||||||
\Return{\optim}
|
\Return{\opts}
|
||||||
\end{algorithm}
|
\end{algorithm}
|
||||||
|
|
||||||
Similar to Algorithm~\ref{algo:lmdk-sel-opt}, the selection is done based on a predefined metric (Lines~{\ref{algo:lmdk-sel-heur-comparison}-\ref{algo:lmdk-sel-heur-comparison-end}}).
|
Similar to Algorithm~\ref{algo:lmdk-sel-opt}, it selects new options based on a predefined metric (Lines~{\ref{algo:lmdk-sel-heur-comparison}-\ref{algo:lmdk-sel-heur-cmp-end}}).
|
||||||
This process (Lines~{\ref{algo:lmdk-sel-heur-while}-\ref{algo:lmdk-sel-heur-end}}) goes on until we select a set that is equal to the size of the series of events, i.e.,~$L' = T$.
|
This process (Lines~{\ref{algo:lmdk-sel-heur-while}-\ref{algo:lmdk-sel-heur-end}}) goes on until we select a set that is equal to the size of the series of events, i.e.,~$L' = T$.
|
||||||
|
|
||||||
In terms of complexity: given $n$ regular events it requires $\mathcal{O}(n^2)$ time and space.
|
In terms of complexity, given $n$ regular events it requires $\mathcal{O}(n^2)$ time and space.
|
||||||
Note that the reverse heuristic approach, i.e.,~starting with $T$ {\thethings} and removing until $L$, performs similarly with Algorithm~\ref{algo:lmdk-sel-heur}.
|
Note that the reverse heuristic approach, i.e.,~starting with $T$ {\thethings} and removing until $L$, performs similarly with Algorithm~\ref{algo:lmdk-sel-heur}.
|
||||||
|
|
||||||
|
|
||||||
|
\paragraph{Partitioned}
|
||||||
|
We improve the complexity of Algorithm~\ref{algo:lmdk-sel-opt} by partitioning the {\thething} timestamp sequence $L$.
|
||||||
|
Algorithm~\ref{algo:lmdk-sel-hist}, \getHist generates a histogram from $L$ with bins of size \h.
|
||||||
|
We find \h by using the Freedman–Diaconis rule which is resilient to outliers and takes into account the data variability and data size~\cite{meshgi2015expanding}.
|
||||||
|
For every possible histogram version, the \getDiff function finds the difference between two histograms; for this operation we utilize the Euclidean distance~(see Section~\ref{subsec:sel-utl} for more details).
|
||||||
|
|
||||||
\mk{WIP: Histograms}
|
\begin{algorithm}
|
||||||
|
\caption{Partitioned dummy {\thething} set options selection}
|
||||||
|
\label{algo:lmdk-sel-hist}
|
||||||
|
|
||||||
|
\DontPrintSemicolon
|
||||||
|
|
||||||
|
\KwData{$T, L$}
|
||||||
|
\KwResult{\opts}
|
||||||
|
\BlankLine
|
||||||
|
|
||||||
|
\hist, \h $\leftarrow$ \getHist{$T, L$}\;
|
||||||
|
|
||||||
|
\histCur $\leftarrow$ hist\;
|
||||||
|
|
||||||
|
\opts $\leftarrow$ $[]$\;
|
||||||
|
|
||||||
|
\While{sum($L'$) $\neq$ len($T$)}{ \label{algo:lmdk-sel-hist-while}
|
||||||
|
% Track the minimum (best) evaluation
|
||||||
|
\diffMin $\leftarrow$ $\infty$\;
|
||||||
|
|
||||||
|
% The candidate option
|
||||||
|
\opt $\leftarrow$ \histCur\;
|
||||||
|
|
||||||
|
% Check every possibility
|
||||||
|
\ForEach{\hi \reg $L'$}{ \label{algo:lmdk-sel-hist-cmp-start}
|
||||||
|
|
||||||
|
% Can we add one more point?
|
||||||
|
\If{\hi $+$ $1$ $\leq$ \h}{
|
||||||
|
\histTmp $\leftarrow$ \histCur\;
|
||||||
|
\histTmp$[i]$ $\leftarrow$ \histTmp$[i]$ $+$ $1$\;
|
||||||
|
% Find difference from original
|
||||||
|
\diffCur $\leftarrow$ \getDiff{\hist, \histTmp}\;
|
||||||
|
|
||||||
|
% Remember if it is the best that you've seen
|
||||||
|
\If{\diffCur $<$ \diffMin}{ \label{algo:lmdk-sel-hist-cmp}
|
||||||
|
\diffMin $\leftarrow$ \diffCur\;
|
||||||
|
\opt $\leftarrow$ \histTmp\;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
} \label{algo:lmdk-sel-hist-cmp-end}
|
||||||
|
|
||||||
|
% Update current histogram
|
||||||
|
\histCur $\leftarrow$ \opt\;
|
||||||
|
% Add current best to options
|
||||||
|
\opts $\leftarrow$ \opt\;
|
||||||
|
|
||||||
|
} \label{algo:lmdk-sel-hist-end}
|
||||||
|
|
||||||
|
\Return{\opts}
|
||||||
|
\end{algorithm}
|
||||||
|
|
||||||
|
Between Lines~{\ref{algo:lmdk-sel-hist-cmp-start}-\ref{algo:lmdk-sel-hist-cmp-end}} we check every possible histogram version by incrementing each bin by $1$ and comparing it to the original (Line~\ref{algo:lmdk-sel-hist-cmp}).
|
||||||
|
In the end of the process, we return \opts which contains all the versions of \hist that are closest to \hist for all possible sizes of \hist.
|
||||||
|
|
||||||
|
|
||||||
\subsubsection{Privacy-preserving option selection}
|
\subsubsection{Privacy-preserving option selection}
|
||||||
\label{subsec:lmdk-opt-sel}
|
\label{subsec:lmdk-opt-sel}
|
||||||
|
|
||||||
\mk{WIP}
|
The Algorithms of Section~\ref{subsec:lmdk-set-opts} return a set of possible versions of the original {\thething} set $L$ by adding extra timestamps in it from the series of events at timestamps $T \supseteq L$.
|
||||||
|
In the next step of the process, we randomly select a set by utilizing the exponential mechanism (Section~\ref{subsec:prv-mech}).
|
||||||
|
Prior to selecting a set, the exponential mechanism evaluates each set using a score function.
|
||||||
|
|
||||||
|
One way evaluate each set is by taking into account the temporal position the events in the sequence.
|
||||||
% Nearby events
|
% Nearby events
|
||||||
Events that occur at recent timestamps are more likely to reveal sensitive information regarding the users involved~\cite{kellaris2014differentially}.
|
Events that occur at recent timestamps are more likely to reveal sensitive information regarding the users involved~\cite{kellaris2014differentially}.
|
||||||
Thus, taking into account more recent events with respect to {\thethings} can result in less privacy loss and better privacy protection overall.
|
Thus, taking into account more recent events with respect to {\thethings} can result in less privacy loss and better privacy protection overall.
|
||||||
This leads to worse data utility.
|
This leads to worse data utility.
|
||||||
|
|
||||||
% Depending on the {\thething} discovery technique
|
% Depending on the {\thething} discovery technique
|
||||||
The values of events near a {\thething} are usually similar to that of the latter.
|
The values of events near a {\thething} are usually similar to that of the latter.
|
||||||
Therefore, privacy-preserving mechanisms are likely to approximate their values based on the nearest {\thething} instead of investing extra privacy budget to perturb their actual values; thus, spending less privacy budget.
|
Therefore, privacy-preserving mechanisms are likely to approximate their values based on the nearest {\thething} instead of investing extra privacy budget to perturb their actual values; thus, spending less privacy budget.
|
||||||
Saving privacy budget for releasing perturbed versions of actual event values can bring about better data utility.
|
Saving privacy budget for releasing perturbed versions of actual event values can bring about better data utility.
|
||||||
|
|
||||||
% Distant events
|
% Distant events
|
||||||
However, indicating the existence of randomized/dummy {\thethings} nearby actual {\thethings} can increase the adversarial confidence regarding the location of the latter within a series of events.
|
However, indicating the existence of dummy {\thethings} nearby actual {\thethings} can increase the adversarial confidence regarding the location of the latter within a series of events.
|
||||||
Hence, choosing randomized/dummy {\thethings} far from the actual {\thethings} (and thus less relevant) can limit the final privacy loss.
|
Hence, choosing dummy {\thethings} far from the actual {\thethings} (and thus less relevant) can limit the final privacy loss.
|
||||||
|
|
||||||
|
Another approach for the score function is to consider the number of events in each set.
|
||||||
|
On the one hand, sets with more dummy {\thethings} may render actual {\thethings} more indistinguishable probabilistically.
|
||||||
|
That is due to the fact that, it is harder for an adversary to pick a {\thething} when the ratio of {\thethings} to the size of the set gets lower.
|
||||||
|
On the other hand, more dummy {\thethings} lead to distributing the privacy budget to more events, and therefore investing less at each timestamp.
|
||||||
|
Thus, providing a better level of privacy protection.
|
||||||
|
Loading…
Reference in New Issue
Block a user