-
Notifications
You must be signed in to change notification settings - Fork 704
Add sampler API, use in SDK tracer #225
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 17 commits
496320d
21dd081
9ac4516
5e0370a
bc8782d
9a8f3c1
4264f16
567a7da
e743fdc
d90a8bc
7e0330b
a0978b2
7b1fccb
9b935ad
8de632d
644cf9d
6f0a33d
0de147e
c3efeb0
b24a465
235d74f
3594e32
1a4f96b
d6127b0
0e370ad
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
# Copyright 2019, OpenTelemetry Authors | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import abc | ||
from typing import Dict, Mapping, Optional, Sequence | ||
|
||
# pylint: disable=unused-import | ||
from opentelemetry.trace import Link, SpanContext | ||
from opentelemetry.util.types import AttributeValue | ||
|
||
|
||
class Decision: | ||
"""A sampling decision as applied to a newly-created Span. | ||
|
||
Args: | ||
sampled: Whether the `Span` should be sampled. | ||
attributes: Attributes to add to the `Span`. | ||
""" | ||
|
||
def __repr__(self) -> str: | ||
return "{}({}, attributes={})".format( | ||
type(self).__name__, str(self.sampled), str(self.attributes) | ||
) | ||
|
||
def __init__( | ||
self, | ||
sampled: bool = False, | ||
attributes: Mapping[str, "AttributeValue"] = None, | ||
) -> None: | ||
self.sampled: bool | ||
self.attributes: Dict[str, "AttributeValue"] | ||
|
||
self.sampled = sampled | ||
if attributes is None: | ||
self.attributes = {} | ||
else: | ||
self.attributes = dict(attributes) | ||
|
||
|
||
class Sampler(abc.ABC): | ||
@abc.abstractmethod | ||
def should_sample( | ||
self, | ||
parent_context: Optional["SpanContext"], | ||
trace_id: int, | ||
span_id: int, | ||
name: str, | ||
links: Optional[Sequence["Link"]] = None, | ||
c24t marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) -> "Decision": | ||
pass | ||
|
||
|
||
class StaticSampler(Sampler): | ||
"""Sampler that always returns the same decision.""" | ||
|
||
def __init__(self, decision: "Decision"): | ||
self._decision = decision | ||
|
||
def should_sample( | ||
self, | ||
parent_context: Optional["SpanContext"], | ||
trace_id: int, | ||
span_id: int, | ||
name: str, | ||
links: Optional[Sequence["Link"]] = None, | ||
) -> "Decision": | ||
return self._decision | ||
|
||
|
||
class ProbabilitySampler(Sampler): | ||
def __init__(self, rate: float): | ||
self._rate = rate | ||
self._bound = self.get_bound_for_rate(self._rate) | ||
|
||
# The sampler checks the last 8 bytes of the trace ID to decide whether to | ||
# sample a given trace. | ||
CHECK_BYTES = 0xFFFFFFFFFFFFFFFF | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. According to the W3 spec, you should use the high ("left") 8 bytes instead:
But I wonder if we can find a more robust way to maybe randomly mix some bits here and there together. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I considered this but decided to stick to the OC behavior (or at least the intended behavior: this also fixes a rounding/OBO bug). FWIW checking the high bytes also seems more correct to me, but in practice -- if people are using short trace IDs in the wild -- sampling every request seems worse than sampling based on the non-random part. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you're right about this. I suggested it as a change to the spec in open-telemetry/opentelemetry-specification#331. Another fun benefit of checking bytes high-to-low is that the sampling decision should be mostly consistent between samplers that check different numbers of bytes. Unlike checking low-to-high where every incremental bit is effectively another coin toss. Ultimately we'll probably just put this number in the spec, but it's a neat property in any case. :D There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FYI - keep an eye on https://github.com/w3c/trace-context/pull/344/files. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's revisit this once open-telemetry/opentelemetry-specification#331 and w3c/trace-context#344 are resolved. |
||
|
||
@classmethod | ||
def get_bound_for_rate(cls, rate: float) -> int: | ||
return round(rate * (cls.CHECK_BYTES + 1)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If rate is less than There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually it is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I used
and etc. I think if you pass in a sampling rate This doesn't exactly work because float precision is worse than
Oh that's interesting, so the trace ID space is return 1 + round(rate * cls.CHECK_BYTES) I think that'd give us the same behavior as above, but always sample trace ID Should we have special handling for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm also happy to leave this as-is and treat There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't have any preference here as I cannot think of a scenario where people would need such low but non-zero sample rate (unless someone is instrumenting SETI@home). 😆 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Something I missed above: there are and |
||
|
||
@property | ||
def rate(self) -> float: | ||
return self._rate | ||
|
||
@rate.setter | ||
def rate(self, new_rate: float) -> None: | ||
self._rate = new_rate | ||
self._bound = self.get_bound_for_rate(self._rate) | ||
|
||
@property | ||
def bound(self) -> int: | ||
return self._bound | ||
|
||
def should_sample( | ||
self, | ||
parent_context: Optional["SpanContext"], | ||
trace_id: int, | ||
span_id: int, | ||
name: str, | ||
links: Optional[Sequence["Link"]] = None, | ||
) -> "Decision": | ||
if parent_context is not None: | ||
return Decision(parent_context.trace_options.recorded) | ||
|
||
return Decision(trace_id & self.CHECK_BYTES < self.bound, {}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This could be a problem for systems that generate shorter There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you think we should check fewer bytes? Or make it configurable? This follows the OC behavior, I actually don't know why we only check the lower 8. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I haven't dug into this. If we're at the same bar of OpenCensus, we probably should be okay. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If everyone follows the W3C spec here, we should be covered:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As long as they follow the SHOULDs, which they MAY not. :P
c24t marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
|
||
# Samplers that ignore the parent sampling decision and never/always sample. | ||
ALWAYS_OFF = StaticSampler(Decision(False)) | ||
ALWAYS_ON = StaticSampler(Decision(True)) | ||
|
||
# Samplers that respect the parent sampling decision, but otherwise | ||
# never/always sample. | ||
DEFAULT_OFF = ProbabilitySampler(0.0) | ||
DEFAULT_ON = ProbabilitySampler(1.0) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,223 @@ | ||
# Copyright 2019, OpenTelemetry Authors | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import unittest | ||
|
||
from opentelemetry import trace | ||
from opentelemetry.trace import sampling | ||
|
||
TO_DEFAULT = trace.TraceOptions(trace.TraceOptions.DEFAULT) | ||
TO_RECORDED = trace.TraceOptions(trace.TraceOptions.RECORDED) | ||
|
||
|
||
class TestSampler(unittest.TestCase): | ||
def test_always_on(self): | ||
no_record_always_on = sampling.ALWAYS_ON.should_sample( | ||
trace.SpanContext( | ||
0xDEADBEEF, 0xDEADBEF0, trace_options=TO_DEFAULT | ||
), | ||
0xDEADBEF1, | ||
0xDEADBEF2, | ||
"unrecorded parent, sampling on", | ||
) | ||
self.assertTrue(no_record_always_on.sampled) | ||
self.assertEqual(no_record_always_on.attributes, {}) | ||
|
||
recorded_always_on = sampling.ALWAYS_ON.should_sample( | ||
trace.SpanContext( | ||
0xDEADBEEF, 0xDEADBEF0, trace_options=TO_RECORDED | ||
), | ||
0xDEADBEF1, | ||
0xDEADBEF2, | ||
"recorded parent, sampling on", | ||
) | ||
self.assertTrue(recorded_always_on.sampled) | ||
self.assertEqual(recorded_always_on.attributes, {}) | ||
|
||
def test_always_off(self): | ||
no_record_always_off = sampling.ALWAYS_OFF.should_sample( | ||
trace.SpanContext( | ||
0xDEADBEEF, 0xDEADBEF0, trace_options=TO_DEFAULT | ||
), | ||
0xDEADBEF1, | ||
0xDEADBEF2, | ||
"unrecorded parent, sampling off", | ||
) | ||
self.assertFalse(no_record_always_off.sampled) | ||
self.assertEqual(no_record_always_off.attributes, {}) | ||
|
||
recorded_always_on = sampling.ALWAYS_OFF.should_sample( | ||
trace.SpanContext( | ||
0xDEADBEEF, 0xDEADBEF0, trace_options=TO_RECORDED | ||
), | ||
0xDEADBEF1, | ||
0xDEADBEF2, | ||
"recorded parent, sampling off", | ||
) | ||
self.assertFalse(recorded_always_on.sampled) | ||
self.assertEqual(recorded_always_on.attributes, {}) | ||
|
||
def test_default_on(self): | ||
no_record_default_on = sampling.DEFAULT_ON.should_sample( | ||
trace.SpanContext( | ||
0xDEADBEEF, 0xDEADBEF0, trace_options=TO_DEFAULT | ||
), | ||
0xDEADBEF1, | ||
0xDEADBEF2, | ||
"unrecorded parent, sampling on", | ||
) | ||
self.assertFalse(no_record_default_on.sampled) | ||
self.assertEqual(no_record_default_on.attributes, {}) | ||
|
||
recorded_default_on = sampling.DEFAULT_ON.should_sample( | ||
trace.SpanContext( | ||
0xDEADBEEF, 0xDEADBEF0, trace_options=TO_RECORDED | ||
), | ||
0xDEADBEF1, | ||
0xDEADBEF2, | ||
"recorded parent, sampling on", | ||
) | ||
self.assertTrue(recorded_default_on.sampled) | ||
self.assertEqual(recorded_default_on.attributes, {}) | ||
|
||
def test_default_off(self): | ||
no_record_default_off = sampling.DEFAULT_OFF.should_sample( | ||
trace.SpanContext( | ||
0xDEADBEEF, 0xDEADBEF0, trace_options=TO_DEFAULT | ||
), | ||
0xDEADBEF1, | ||
0xDEADBEF2, | ||
"unrecorded parent, sampling off", | ||
) | ||
self.assertFalse(no_record_default_off.sampled) | ||
self.assertEqual(no_record_default_off.attributes, {}) | ||
|
||
recorded_default_off = sampling.DEFAULT_OFF.should_sample( | ||
trace.SpanContext( | ||
0xDEADBEEF, 0xDEADBEF0, trace_options=TO_RECORDED | ||
), | ||
0xDEADBEF1, | ||
0xDEADBEF2, | ||
"recorded parent, sampling off", | ||
) | ||
self.assertTrue(recorded_default_off.sampled) | ||
self.assertEqual(recorded_default_off.attributes, {}) | ||
|
||
def test_probability_sampler(self): | ||
sampler = sampling.ProbabilitySampler(0.5) | ||
|
||
# Check that we sample based on the trace ID if the parent context is | ||
# null | ||
self.assertTrue( | ||
sampler.should_sample( | ||
None, 0x7FFFFFFFFFFFFFFF, 0xDEADBEEF, "span name" | ||
).sampled | ||
) | ||
self.assertFalse( | ||
sampler.should_sample( | ||
None, 0x8000000000000000, 0xDEADBEEF, "span name" | ||
).sampled | ||
) | ||
|
||
# Check that the sampling decision matches the parent context if given, | ||
# and that the sampler ignores the trace ID | ||
self.assertFalse( | ||
sampler.should_sample( | ||
trace.SpanContext( | ||
0xDEADBEF0, 0xDEADBEF1, trace_options=TO_DEFAULT | ||
), | ||
0x8000000000000000, | ||
0xDEADBEEF, | ||
"span name", | ||
).sampled | ||
) | ||
self.assertTrue( | ||
sampler.should_sample( | ||
trace.SpanContext( | ||
0xDEADBEF0, 0xDEADBEF1, trace_options=TO_RECORDED | ||
), | ||
0x8000000000000001, | ||
0xDEADBEEF, | ||
"span name", | ||
).sampled | ||
) | ||
|
||
def test_probability_sampler_zero(self): | ||
default_off = sampling.ProbabilitySampler(0.0) | ||
self.assertFalse( | ||
default_off.should_sample( | ||
None, 0x0, 0xDEADBEEF, "span name" | ||
).sampled | ||
) | ||
|
||
def test_probability_sampler_one(self): | ||
default_off = sampling.ProbabilitySampler(1.0) | ||
self.assertTrue( | ||
default_off.should_sample( | ||
None, 0xFFFFFFFFFFFFFFFF, 0xDEADBEEF, "span name" | ||
).sampled | ||
) | ||
|
||
def test_probability_sampler_limits(self): | ||
|
||
# Sample one of every 2^64 (= 5e-20) traces. This is the lowest | ||
# possible meaningful sampling rate, only traces with trace ID 0x0 | ||
# should get sampled. | ||
almost_always_off = sampling.ProbabilitySampler(2 ** -64) | ||
self.assertTrue( | ||
almost_always_off.should_sample( | ||
None, 0x0, 0xDEADBEEF, "span name" | ||
).sampled | ||
) | ||
self.assertFalse( | ||
almost_always_off.should_sample( | ||
None, 0x1, 0xDEADBEEF, "span name" | ||
).sampled | ||
) | ||
self.assertEqual( | ||
sampling.ProbabilitySampler.get_bound_for_rate(2 ** -64), 0x1 | ||
) | ||
|
||
# Sample every trace with (last 8 bytes of) trace ID less than | ||
# 0xffffffffffffffff. In principle this is the highest possible | ||
# sampling rate less than 1, but we can't actually express this rate as | ||
# a float! | ||
# | ||
# In practice, the highest possible sampling rate is: | ||
# | ||
# round(sys.float_info.epsilon * 2 ** 64) | ||
|
||
almost_always_on = sampling.ProbabilitySampler(1 - 2 ** -64) | ||
self.assertTrue( | ||
almost_always_on.should_sample( | ||
None, 0xFFFFFFFFFFFFFFFE, 0xDEADBEEF, "span name" | ||
).sampled | ||
) | ||
|
||
# These tests are logically consistent, but fail because of the float | ||
# precision issue above. Changing the sampler to check fewer bytes of | ||
# the trace ID will cause these to pass. | ||
|
||
# self.assertFalse( | ||
# almost_always_on.should_sample( | ||
# None, | ||
# 0xffffffffffffffff, | ||
# 0xdeadbeef, | ||
# "span name", | ||
# ).sampled | ||
# ) | ||
# self.assertEqual( | ||
# sampling.ProbabilitySampler.get_bound_for_rate(1 - 2 ** -64)), | ||
# 0xffffffffffffffff, | ||
# ) |
Uh oh!
There was an error while loading. Please reload this page.