problem: Reviewed lmdk-set-opts

This commit is contained in:
Manos Katsomallos 2021-10-12 11:00:50 +02:00
parent 323cea9f72
commit dd1f5beec8
3 changed files with 85 additions and 8 deletions

View File

@ -1761,6 +1761,15 @@
year = {2017}
}
@inproceedings{meshgi2015expanding,
title={Expanding histogram of colors with gridding to improve tracking accuracy},
author={Meshgi, Kourosh and Ishii, Shin},
booktitle={2015 14th IAPR International Conference on Machine Vision Applications (MVA)},
pages={475--479},
year={2015},
organization={IEEE}
}
@inproceedings{wang2017privacy,
title = {Privacy Preserving Anonymity for Periodical SRS Data Publishing},
author = {Wang, Jie-Teng and Lin, Wen-Yang},

View File

@ -39,10 +39,16 @@ In Example~\ref{ex:lmdk-risk}, we demonstrate the extreme case of the applicatio
\SetKwData{evalCur}{evalCur}
\SetKwData{evalOrig}{evalOrig}
\SetKwData{evalSum}{evalSum}
\SetKwData{h}{h}
\SetKwData{hi}{h$_i$}
\SetKwData{hist}{hist}
\SetKwData{histCur}{histCur}
\SetKwData{histTmp}{histTmp}
\SetKwData{metricCur}{metricCur}
\SetKwData{metricOrig}{metricOrig}
\SetKwData{opt}{opt}
\SetKwData{opti}{opt$_i$}
\SetKwData{opts}{opts}
\SetKwData{optim}{optim}
\SetKwData{optimi}{optim$_i$}
\SetKwData{opts}{opts}
@ -51,7 +57,10 @@ In Example~\ref{ex:lmdk-risk}, we demonstrate the extreme case of the applicatio
\SetKwFunction{calcMetric}{calcMetric}
\SetKwFunction{evalSeq}{evalSeq}
\SetKwFunction{getCombs}{getCombs}
\SetKwFunction{getDiff}{getDiff}
\SetKwFunction{getHist}{getHist}
\SetKwFunction{getOpts}{getOpts}
\SetKwFunction{getNorm}{getNorm}
\input{problem/theotherthing/contribution}
\input{problem/theotherthing/problem}

View File

@ -73,7 +73,7 @@ Next, we present a heuristic solution with improved time and space requirements.
\paragraph{Heuristic}
Algorithm~\ref{algo:lmdk-sel-heur}, follows an incremental methodology.
At each step it selects a new timestamp that corresponds to a regular ({non-\thething}) event from $T \setminus L$.
At each step it selects a new timestamp, that corresponds to a regular ({non-\thething}) event from $T \setminus L$, to create an option.
\begin{algorithm}
\caption{Heuristic dummy {\thething} set options selection}
@ -89,7 +89,7 @@ At each step it selects a new timestamp that corresponds to a regular ({non-\the
\evalOrig $\leftarrow$ \evalSeq{$T, \emptyset, L$}\;
% Get all possible option combinations
\optim $\leftarrow$ $[]$\;
\opts $\leftarrow$ $[]$\;
$L' \leftarrow L$\;
@ -110,28 +110,87 @@ At each step it selects a new timestamp that corresponds to a regular ({non-\the
\If{\diffCur $<$ \diffMin}{
\diffMin $\leftarrow$ \diffCur\;
\optimi $\leftarrow$ \reg\;
}\label{algo:lmdk-sel-heur-comparison-end}
}\label{algo:lmdk-sel-heur-cmp-end}
}
% Save new point to landmarks
$L'$.add(\optimi)\;
% Add new option
\optim.append($L' \setminus L$)\;
\opts.append($L' \setminus L$)\;
}\label{algo:lmdk-sel-heur-end}
\Return{\optim}
\Return{\opts}
\end{algorithm}
Similar to Algorithm~\ref{algo:lmdk-sel-opt}, the selection is done based on a predefined metric (Lines~{\ref{algo:lmdk-sel-heur-comparison}-\ref{algo:lmdk-sel-heur-comparison-end}}).
Similar to Algorithm~\ref{algo:lmdk-sel-opt}, it selects new options based on a predefined metric (Lines~{\ref{algo:lmdk-sel-heur-comparison}-\ref{algo:lmdk-sel-heur-cmp-end}}).
This process (Lines~{\ref{algo:lmdk-sel-heur-while}-\ref{algo:lmdk-sel-heur-end}}) goes on until we select a set that is equal to the size of the series of events, i.e.,~$L' = T$.
In terms of complexity: given $n$ regular events it requires $\mathcal{O}(n^2)$ time and space.
In terms of complexity, given $n$ regular events it requires $\mathcal{O}(n^2)$ time and space.
Note that the reverse heuristic approach, i.e.,~starting with $T$ {\thethings} and removing until $L$, performs similarly with Algorithm~\ref{algo:lmdk-sel-heur}.
\paragraph{Partitioned}
We improve the complexity of Algorithm~\ref{algo:lmdk-sel-opt} by partitioning the {\thething} timestamp sequence $L$.
Algorithm~\ref{algo:lmdk-sel-hist}, \getHist generates a histogram from $L$ with bins of size \h.
We find \h by using the FreedmanDiaconis rule which is resilient to outliers and takes into account the data variability and data size~\cite{meshgi2015expanding}.
For every possible histogram version, the \getDiff function finds the difference between two histograms; for this operation we utilize the Euclidean distance~(see Section~\ref{subsec:sel-utl} for more details).
\mk{WIP: Histograms}
\begin{algorithm}
\caption{Partitioned dummy {\thething} set options selection}
\label{algo:lmdk-sel-hist}
\DontPrintSemicolon
\KwData{$T, L$}
\KwResult{\opts}
\BlankLine
\hist, \h $\leftarrow$ \getHist{$T, L$}\;
\histCur $\leftarrow$ hist\;
\opts $\leftarrow$ $[]$\;
\While{sum($L'$) $\neq$ len($T$)}{ \label{algo:lmdk-sel-hist-while}
% Track the minimum (best) evaluation
\diffMin $\leftarrow$ $\infty$\;
% The candidate option
\opt $\leftarrow$ \histCur\;
% Check every possibility
\ForEach{\hi \reg $L'$}{ \label{algo:lmdk-sel-hist-cmp-start}
% Can we add one more point?
\If{\hi $+$ $1$ $\leq$ \h}{
\histTmp $\leftarrow$ \histCur\;
\histTmp$[i]$ $\leftarrow$ \histTmp$[i]$ $+$ $1$\;
% Find difference from original
\diffCur $\leftarrow$ \getDiff{\hist, \histTmp}\;
% Remember if it is the best that you've seen
\If{\diffCur $<$ \diffMin}{ \label{algo:lmdk-sel-hist-cmp}
\diffMin $\leftarrow$ \diffCur\;
\opt $\leftarrow$ \histTmp\;
}
}
} \label{algo:lmdk-sel-hist-cmp-end}
% Update current histogram
\histCur $\leftarrow$ \opt\;
% Add current best to options
\opts $\leftarrow$ \opt\;
} \label{algo:lmdk-sel-hist-end}
\Return{\opts}
\end{algorithm}
Between Lines~{\ref{algo:lmdk-sel-hist-cmp-start}-\ref{algo:lmdk-sel-hist-cmp-end}} we check every possible histogram version by incrementing each bin by $1$ and comparing it to the original (Line~\ref{algo:lmdk-sel-hist-cmp}).
In the end of the process, we return \opts which contains all the versions of \hist that are closest to \hist for all possible sizes of \hist.
\subsubsection{Privacy-preserving option selection}