Skip to content

Commit 0f6de5a

Browse files
cijothomasTommyCpp
andauthored
Metrics Aggregation - Improve throughput by 10x (#1833)
Co-authored-by: Zhongyang Wu <[email protected]>
1 parent 6ee5579 commit 0f6de5a

File tree

3 files changed

+40
-28
lines changed

3 files changed

+40
-28
lines changed

opentelemetry-sdk/CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
- Removed `XrayIdGenerator`, which was marked deprecated since 0.21.3. Use
1414
[`opentelemetry-aws`](https://crates.io/crates/opentelemetry-aws), version
1515
0.10.0 or newer.
16+
- Performance Improvement - Counter/UpDownCounter instruments internally use
17+
`RwLock` instead of `Mutex` to reduce contention.
1618

1719
- **Breaking** [1726](https://github.com/open-telemetry/opentelemetry-rust/pull/1726)
1820
Update `LogProcessor::emit() method to take mutable reference to LogData. This is breaking

opentelemetry-sdk/src/metrics/internal/sum.rs

+37-27
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
use std::sync::atomic::{AtomicBool, Ordering};
22
use std::vec;
33
use std::{
4-
collections::{hash_map::Entry, HashMap},
5-
sync::Mutex,
4+
collections::HashMap,
5+
sync::{Mutex, RwLock},
66
time::SystemTime,
77
};
88

@@ -18,7 +18,7 @@ use super::{
1818

1919
/// The storage for sums.
2020
struct ValueMap<T: Number<T>> {
21-
values: Mutex<HashMap<AttributeSet, T>>,
21+
values: RwLock<HashMap<AttributeSet, T::AtomicTracker>>,
2222
has_no_value_attribute_value: AtomicBool,
2323
no_attribute_value: T::AtomicTracker,
2424
}
@@ -32,7 +32,7 @@ impl<T: Number<T>> Default for ValueMap<T> {
3232
impl<T: Number<T>> ValueMap<T> {
3333
fn new() -> Self {
3434
ValueMap {
35-
values: Mutex::new(HashMap::new()),
35+
values: RwLock::new(HashMap::new()),
3636
has_no_value_attribute_value: AtomicBool::new(false),
3737
no_attribute_value: T::new_atomic_tracker(),
3838
}
@@ -45,21 +45,31 @@ impl<T: Number<T>> ValueMap<T> {
4545
self.no_attribute_value.add(measurement);
4646
self.has_no_value_attribute_value
4747
.store(true, Ordering::Release);
48-
} else if let Ok(mut values) = self.values.lock() {
49-
let size = values.len();
50-
match values.entry(attrs) {
51-
Entry::Occupied(mut occupied_entry) => {
52-
let sum = occupied_entry.get_mut();
53-
*sum += measurement;
54-
}
55-
Entry::Vacant(vacant_entry) => {
56-
if is_under_cardinality_limit(size) {
57-
vacant_entry.insert(measurement);
58-
} else if let Some(val) = values.get_mut(&STREAM_OVERFLOW_ATTRIBUTE_SET) {
59-
*val += measurement;
48+
} else if let Ok(values) = self.values.read() {
49+
if let Some(value_to_update) = values.get(&attrs) {
50+
value_to_update.add(measurement);
51+
return;
52+
} else {
53+
drop(values);
54+
if let Ok(mut values) = self.values.write() {
55+
// Recheck after acquiring write lock, in case another
56+
// thread has added the value.
57+
if let Some(value_to_update) = values.get(&attrs) {
58+
value_to_update.add(measurement);
59+
return;
60+
} else if is_under_cardinality_limit(values.len()) {
61+
let new_value = T::new_atomic_tracker();
62+
new_value.add(measurement);
63+
values.insert(attrs, new_value);
64+
} else if let Some(overflow_value) =
65+
values.get_mut(&STREAM_OVERFLOW_ATTRIBUTE_SET)
66+
{
67+
overflow_value.add(measurement);
6068
return;
6169
} else {
62-
values.insert(STREAM_OVERFLOW_ATTRIBUTE_SET.clone(), measurement);
70+
let new_value = T::new_atomic_tracker();
71+
new_value.add(measurement);
72+
values.insert(STREAM_OVERFLOW_ATTRIBUTE_SET.clone(), new_value);
6373
global::handle_error(MetricsError::Other("Warning: Maximum data points for metric stream exceeded. Entry added to overflow. Subsequent overflows to same metric until next collect will not be logged.".into()));
6474
}
6575
}
@@ -114,7 +124,7 @@ impl<T: Number<T>> Sum<T> {
114124
s_data.is_monotonic = self.monotonic;
115125
s_data.data_points.clear();
116126

117-
let mut values = match self.value_map.values.lock() {
127+
let mut values = match self.value_map.values.write() {
118128
Ok(v) => v,
119129
Err(_) => return (0, None),
120130
};
@@ -149,7 +159,7 @@ impl<T: Number<T>> Sum<T> {
149159
.collect(),
150160
start_time: Some(prev_start),
151161
time: Some(t),
152-
value,
162+
value: value.get_value(),
153163
exemplars: vec![],
154164
});
155165
}
@@ -186,7 +196,7 @@ impl<T: Number<T>> Sum<T> {
186196
s_data.is_monotonic = self.monotonic;
187197
s_data.data_points.clear();
188198

189-
let values = match self.value_map.values.lock() {
199+
let values = match self.value_map.values.write() {
190200
Ok(v) => v,
191201
Err(_) => return (0, None),
192202
};
@@ -226,7 +236,7 @@ impl<T: Number<T>> Sum<T> {
226236
.collect(),
227237
start_time: Some(prev_start),
228238
time: Some(t),
229-
value: *value,
239+
value: value.get_value(),
230240
exemplars: vec![],
231241
});
232242
}
@@ -282,7 +292,7 @@ impl<T: Number<T>> PrecomputedSum<T> {
282292
s_data.temporality = Temporality::Delta;
283293
s_data.is_monotonic = self.monotonic;
284294

285-
let mut values = match self.value_map.values.lock() {
295+
let mut values = match self.value_map.values.write() {
286296
Ok(v) => v,
287297
Err(_) => return (0, None),
288298
};
@@ -315,9 +325,9 @@ impl<T: Number<T>> PrecomputedSum<T> {
315325

316326
let default = T::default();
317327
for (attrs, value) in values.drain() {
318-
let delta = value - *reported.get(&attrs).unwrap_or(&default);
328+
let delta = value.get_value() - *reported.get(&attrs).unwrap_or(&default);
319329
if delta != default {
320-
new_reported.insert(attrs.clone(), value);
330+
new_reported.insert(attrs.clone(), value.get_value());
321331
}
322332
s_data.data_points.push(DataPoint {
323333
attributes: attrs
@@ -367,7 +377,7 @@ impl<T: Number<T>> PrecomputedSum<T> {
367377
s_data.temporality = Temporality::Cumulative;
368378
s_data.is_monotonic = self.monotonic;
369379

370-
let values = match self.value_map.values.lock() {
380+
let values = match self.value_map.values.write() {
371381
Ok(v) => v,
372382
Err(_) => return (0, None),
373383
};
@@ -400,9 +410,9 @@ impl<T: Number<T>> PrecomputedSum<T> {
400410

401411
let default = T::default();
402412
for (attrs, value) in values.iter() {
403-
let delta = *value - *reported.get(attrs).unwrap_or(&default);
413+
let delta = value.get_value() - *reported.get(attrs).unwrap_or(&default);
404414
if delta != default {
405-
new_reported.insert(attrs.clone(), *value);
415+
new_reported.insert(attrs.clone(), value.get_value());
406416
}
407417
s_data.data_points.push(DataPoint {
408418
attributes: attrs

stress/src/metrics_counter.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
OS: Ubuntu 22.04.3 LTS (5.15.146.1-microsoft-standard-WSL2)
44
Hardware: AMD EPYC 7763 64-Core Processor - 2.44 GHz, 16vCPUs,
55
RAM: 64.0 GB
6-
3M /sec
6+
35 M /sec
77
*/
88

99
use lazy_static::lazy_static;

0 commit comments

Comments
 (0)