Skip to content

Commit a01f54d

Browse files
committed
updating number_of_records_epsilon to number_of_records_epsilon_proportion (in terms of the input privacy budget)
1 parent dc5e3df commit a01f54d

File tree

11 files changed

+553
-520
lines changed

11 files changed

+553
-520
lines changed

packages/cli/src/main.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -189,11 +189,11 @@ enum Command {
189189
sigma_proportions: Option<Vec<f64>>,
190190

191191
#[structopt(
192-
long = "number-of-records-epsilon",
193-
help = "epsilon used to add noise to the protected number of records in the aggregated data (default is 0.1)",
192+
long = "number-of-records-epsilon-proportion",
193+
help = "proportion of epsilon used to add noise to the protected number of records in the aggregated data (default is 0.005)",
194194
requires = "dp"
195195
)]
196-
number_of_records_epsilon: Option<f64>,
196+
number_of_records_epsilon_proportion: Option<f64>,
197197

198198
#[structopt(
199199
long = "aggregates-json",
@@ -398,7 +398,7 @@ fn main() {
398398
noise_threshold_type,
399399
noise_threshold_values,
400400
sigma_proportions,
401-
number_of_records_epsilon,
401+
number_of_records_epsilon_proportion,
402402
aggregates_json,
403403
} => {
404404
let mut aggregator = Aggregator::new(data_block.clone());
@@ -429,7 +429,7 @@ fn main() {
429429
sensitivities_percentile.unwrap(),
430430
sensitivities_epsilon_proportion.unwrap(),
431431
sigma_proportions,
432-
number_of_records_epsilon,
432+
number_of_records_epsilon_proportion,
433433
),
434434
threshold,
435435
&mut progress_reporter,

packages/core/src/dp/dp_parameters.rs

+14-13
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22
use pyo3::prelude::*;
33
use serde::{Deserialize, Serialize};
44

5-
/// Default epsilon used to add noise to the protected number of records in the aggregated data
6-
pub const DEFAULT_NUMBER_OF_RECORDS_EPSILON: f64 = 0.1;
5+
/// Default epsilon proportion used to add noise to the protected number of records
6+
/// in the aggregated data
7+
pub const DEFAULT_NUMBER_OF_RECORDS_EPSILON_PROPORTION: f64 = 0.005;
78

89
/// Parameters for aggregate generation with differential privacy
910
#[cfg_attr(feature = "pyo3", pyclass)]
@@ -26,9 +27,9 @@ pub struct DpParameters {
2627
/// (e.g. \[1.0, 2.0, 3.0\] means that `sigma_2 = 2.0 * sigma_1` and `sigma_3 = 3.0 * sigma_1`)
2728
/// - If `None` all the sigma values will be the same
2829
pub sigma_proportions: Option<Vec<f64>>,
29-
/// Epsilon used to add noise to the protected number of records in the aggregated data
30-
/// (default is 0.1)
31-
pub number_of_records_epsilon: Option<f64>,
30+
/// Proportion of epsilon used to add noise to the protected number of records in
31+
/// the aggregated data (default is 0.005)
32+
pub number_of_records_epsilon_proportion: Option<f64>,
3233
}
3334

3435
#[cfg_attr(feature = "pyo3", pymethods)]
@@ -49,23 +50,23 @@ impl DpParameters {
4950
/// controls how the budget being split across combination lengths
5051
/// (e.g. \[1.0, 2.0, 3.0\] means that `sigma_2 = 2.0 * sigma_1` and `sigma_3 = 3.0 * sigma_1`)
5152
/// - If `None` all the sigma values will be the same
52-
/// * `number_of_records_epsilon` - Epsilon used to add noise to the protected number of records in the aggregated data
53-
/// (default is 0.1)
53+
/// * `number_of_records_epsilon_proportion` - Proportion of epsilon used to add noise to the protected number of records
54+
/// in the aggregated data (default is 0.005)
5455
pub fn new(
5556
epsilon: f64,
5657
delta: f64,
5758
percentile_percentage: usize,
5859
percentile_epsilon_proportion: f64,
5960
sigma_proportions: Option<Vec<f64>>,
60-
number_of_records_epsilon: Option<f64>,
61+
number_of_records_epsilon_proportion: Option<f64>,
6162
) -> Self {
6263
DpParameters {
6364
epsilon,
6465
delta,
6566
percentile_percentage,
6667
percentile_epsilon_proportion,
6768
sigma_proportions,
68-
number_of_records_epsilon,
69+
number_of_records_epsilon_proportion,
6970
}
7071
}
7172

@@ -84,23 +85,23 @@ impl DpParameters {
8485
/// controls how the budget being split across combination lengths
8586
/// (e.g. \[1.0, 2.0, 3.0\] means that `sigma_2 = 2.0 * sigma_1` and `sigma_3 = 3.0 * sigma_1`)
8687
/// - If `None` all the sigma values will be the same
87-
/// * `number_of_records_epsilon` - Epsilon used to add noise to the protected number of records in the aggregated data
88-
/// (default is 0.1)
88+
/// * `number_of_records_epsilon_proportion` - Proportion of epsilon used to add noise to the protected number of records
89+
/// in the aggregated data (default is 0.005)
8990
pub fn new(
9091
epsilon: f64,
9192
delta: f64,
9293
percentile_percentage: usize,
9394
percentile_epsilon_proportion: f64,
9495
sigma_proportions: Option<Vec<f64>>,
95-
number_of_records_epsilon: Option<f64>,
96+
number_of_records_epsilon_proportion: Option<f64>,
9697
) -> Self {
9798
DpParameters {
9899
epsilon,
99100
delta,
100101
percentile_percentage,
101102
percentile_epsilon_proportion,
102103
sigma_proportions,
103-
number_of_records_epsilon,
104+
number_of_records_epsilon_proportion,
104105
}
105106
}
106107
}

packages/core/src/dp/noise_aggregator.rs

+39-16
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use super::{
22
CombinationsByRecord, DpParameters, DpPercentile, NoisyCountThreshold,
3-
DEFAULT_NUMBER_OF_RECORDS_EPSILON,
3+
DEFAULT_NUMBER_OF_RECORDS_EPSILON_PROPORTION,
44
};
55
use fnv::FnvHashSet;
66
use itertools::Itertools;
@@ -44,13 +44,14 @@ pub struct NoiseAggregator {
4444

4545
impl NoiseAggregator {
4646
#[inline]
47-
fn calc_percentile_epsilon_and_sigma_by_len(
47+
fn calc_percentile_epsilon_number_of_records_epsilon_and_sigma_by_len(
4848
reporting_length: usize,
4949
epsilon: f64,
5050
delta: f64,
5151
percentile_epsilon_proportion: f64,
52+
number_of_records_proportion: f64,
5253
sigma_proportions: &Option<Vec<f64>>,
53-
) -> (f64, Vec<f64>) {
54+
) -> (f64, f64, Vec<f64>) {
5455
let proportions = match sigma_proportions {
5556
Some(proportions) => proportions.clone(),
5657
None => {
@@ -60,37 +61,58 @@ impl NoiseAggregator {
6061
}
6162
};
6263

63-
assert!(
64-
reporting_length == proportions.len(),
65-
"sigma proportions array size should match the reporting length",
66-
);
67-
6864
info!(
69-
"calculating percentile epsilon and sigma by len: total epsilon = {}, delta = {}, percentile_epsilon_proportion = {}, sigma_proportions = {:?}",
65+
"calculating percentile epsilon, number of records epsilon and sigma by len: total epsilon = {}, delta = {}, percentile_epsilon_proportion = {}, number_of_records_proportion = {}, sigma_proportions = {:?}",
7066
epsilon,
7167
delta,
7268
percentile_epsilon_proportion,
69+
number_of_records_proportion,
7370
proportions
7471
);
7572

73+
assert!(
74+
reporting_length == proportions.len(),
75+
"sigma proportions array size should match the reporting length",
76+
);
77+
78+
assert!(
79+
percentile_epsilon_proportion < 1.0 && percentile_epsilon_proportion > 0.0,
80+
"percentile_epsilon_proportion must be > 0 and < 1"
81+
);
82+
83+
assert!(
84+
number_of_records_proportion < 1.0 && number_of_records_proportion > 0.0,
85+
"number_of_records_proportion must be > 0 and < 1"
86+
);
87+
88+
assert!(
89+
number_of_records_proportion + percentile_epsilon_proportion < 1.0,
90+
"(percentile_epsilon_proportion + number_of_records_proportion) must be > 0 and < 1"
91+
);
92+
7693
let t = reporting_length as f64;
7794
let rho = (epsilon + (2.0 / delta).ln()).sqrt() - (2.0 / delta).ln().sqrt();
7895
let k: f64 = proportions.iter().map(|p| 1.0 / (p * p)).sum();
7996
let percentile_epsilon = (2.0 * rho * percentile_epsilon_proportion / t).sqrt();
80-
let base_sigma = (k / (2.0 * rho * (1.0 - percentile_epsilon_proportion))).sqrt();
97+
let number_of_records_epsilon = (2.0 * rho * number_of_records_proportion).sqrt();
98+
let base_sigma = (k
99+
/ (2.0 * rho * (1.0 - percentile_epsilon_proportion - number_of_records_proportion)))
100+
.sqrt();
81101
let sigmas: Vec<f64> = proportions.iter().map(|p| p * base_sigma).collect();
82102
let lhs = ((t * percentile_epsilon * percentile_epsilon) / 2.0)
103+
+ ((number_of_records_epsilon * number_of_records_epsilon) / 2.0)
83104
+ (sigmas.iter().map(|s| 1.0 / (s * s)).sum::<f64>() / 2.0);
84105

85106
info!("percentile epsilon = {}", percentile_epsilon);
107+
info!("number of records epsilon = {}", number_of_records_epsilon);
86108
info!("calculated sigmas = {:?}", sigmas);
87109

88110
assert!(
89111
(lhs - rho).abs() <= DEFAULT_TOLERANCE,
90112
"something went wrong calculating DP sigmas"
91113
);
92114

93-
(percentile_epsilon, sigmas)
115+
(percentile_epsilon, number_of_records_epsilon, sigmas)
94116
}
95117

96118
#[inline]
@@ -427,12 +449,15 @@ impl NoiseAggregator {
427449
dp_parameters: &DpParameters,
428450
threshold: NoisyCountThreshold,
429451
) -> NoiseAggregator {
430-
let (percentile_epsilon, sigmas) =
431-
NoiseAggregator::calc_percentile_epsilon_and_sigma_by_len(
452+
let (percentile_epsilon, number_of_records_epsilon, sigmas) =
453+
NoiseAggregator::calc_percentile_epsilon_number_of_records_epsilon_and_sigma_by_len(
432454
reporting_length,
433455
dp_parameters.epsilon,
434456
dp_parameters.delta,
435457
dp_parameters.percentile_epsilon_proportion,
458+
dp_parameters
459+
.number_of_records_epsilon_proportion
460+
.unwrap_or(DEFAULT_NUMBER_OF_RECORDS_EPSILON_PROPORTION),
436461
&dp_parameters.sigma_proportions,
437462
);
438463

@@ -444,9 +469,7 @@ impl NoiseAggregator {
444469
delta: dp_parameters.delta,
445470
sigmas,
446471
threshold,
447-
number_of_records_epsilon: dp_parameters
448-
.number_of_records_epsilon
449-
.unwrap_or(DEFAULT_NUMBER_OF_RECORDS_EPSILON),
472+
number_of_records_epsilon,
450473
}
451474
}
452475

0 commit comments

Comments
 (0)