chore(opentelemetry): support tag flattening [AIT-9352] (#8042)

mabdinur · web-flow · commit 7c75365cef4c · 2024-01-12T11:07:55.000-05:00
Ensures otel tags/attribute values with the type List, Tuple and/or set are flattened. This operation is already done by the Datadog UI when displaying tags. However we should also do this in the tracer to better align with other languages. The flattening logic must follow this spec: ``` Array values must decay into as many keys as there are entries in the array. The keys must be mapped by concatenating the outer and inner key values together, recursively, separated by a dot. In pseudo-code: fn addArrayAttribute(key, array): for (subkey, value) in array: addScalarOrArrayAttribute(key + "." + subkey, value) Example: Given attributes {"key": [[1,2], ["3", "4"]]}, we will have {"key.0.0": "1", "key.0.1": 2, "key.1.0": 3, "key.1.1":4}. ``` ## Checklist - [x] Change(s) are motivated and described in the PR description. - [x] Testing strategy is described if automated tests are not included in the PR. - [x] Risk is outlined (performance impact, potential for breakage, maintainability, etc). - [x] Change is maintainable (easy to change, telemetry, documentation). - [x] [Library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) are followed. If no release note is required, add label `changelog/no-changelog`. - [x] Documentation is included (in-code, generated user docs, [public corp docs](https://github.com/DataDog/documentation/)). - [x] Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [ ] Title is accurate. - [ ] No unnecessary changes are introduced. - [ ] Description motivates each change. - [ ] Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes unless absolutely necessary. - [ ] Testing strategy adequately addresses listed risk(s). - [ ] Change is maintainable (easy to change, telemetry, documentation). - [ ] Release note makes sense to a user of the library. - [ ] Reviewer has explicitly acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment. - [ ] Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) - [ ] If this PR touches code that signs or publishes builds or packages, or handles credentials of any kind, I've requested a review from `@DataDog/security-design-and-guidance`. - [ ] This PR doesn't touch any of that.
diff --git a/ddtrace/internal/utils/formats.py b/ddtrace/internal/utils/formats.py
@@ -19,6 +19,7 @@
 
 T = TypeVar("T")
 
+
 log = logging.getLogger(__name__)
 
 
@@ -157,3 +158,28 @@ def stringify_cache_args(args, value_max_len=VALUE_MAX_LEN, cmd_max_len=CMD_MAX_
             break
 
     return " ".join(out)
+
+
+def is_sequence(obj):
+    # type: (Any) -> bool
+    try:
+        return isinstance(obj, (list, tuple, set, frozenset))
+    except TypeError:
+        # Checking the type of Generic Subclasses raises a TypeError
+        return False
+
+
+def flatten_key_value(root_key, value):
+    # type: (str, Any) -> Dict[str, Any]
+    """Flattens attributes"""
+    if not is_sequence(value):
+        return {root_key: value}
+
+    flattened = dict()
+    for i, item in enumerate(value):
+        key = f"{root_key}.{i}"
+        if is_sequence(item):
+            flattened.update(flatten_key_value(key, item))
+        else:
+            flattened[key] = item
+    return flattened
diff --git a/ddtrace/opentelemetry/_span.py b/ddtrace/opentelemetry/_span.py
@@ -13,10 +13,11 @@
 from ddtrace.internal import core
 from ddtrace.internal.compat import time_ns
 from ddtrace.internal.logger import get_logger
+from ddtrace.internal.utils.formats import flatten_key_value
+from ddtrace.internal.utils.formats import is_sequence
 
 
 if TYPE_CHECKING:
-    from typing import Callable  # noqa:F401
     from typing import Mapping  # noqa:F401
     from typing import Optional  # noqa:F401
     from typing import Union  # noqa:F401
@@ -160,6 +161,10 @@ def set_attribute(self, key, value):
             _ddmap(self._ddspan, ddattribute, value)
             return
 
+        if is_sequence(value):
+            for k, v in flatten_key_value(key, value).items():
+                self._ddspan.set_tag(k, v)
+            return
         self._ddspan.set_tag(key, value)
 
     def add_event(self, name, attributes=None, timestamp=None):
diff --git a/ddtrace/tracing/_span_link.py b/ddtrace/tracing/_span_link.py
@@ -28,6 +28,8 @@
 
 import attr
 
+from ddtrace.internal.utils.formats import flatten_key_value
+
 
 def _id_not_zero(self, attribute, value):
     if not value > 0:
@@ -84,7 +86,13 @@ def to_dict(self):
             "span_id": "{:016x}".format(self.span_id),
         }
         if self.attributes:
-            d["attributes"] = {k: str(v) for k, v in self.attributes.items()}
+            d["attributes"] = {}
+            for k, v in self.attributes.items():
+                # flatten all values with the type list, tuple and set
+                for k1, v1 in flatten_key_value(k, v).items():
+                    # convert all values to string
+                    d["attributes"][k1] = str(v1)
+
         if self._dropped_attributes > 0:
             d["dropped_attributes_count"] = self._dropped_attributes
         if self.tracestate:
diff --git a/tests/opentelemetry/test_span.py b/tests/opentelemetry/test_span.py
@@ -15,7 +15,7 @@
 from tests.utils import flaky
 
 
-@pytest.mark.snapshot(wait_for_num_traces=2)
+@pytest.mark.snapshot(wait_for_num_traces=3)
 def test_otel_span_attributes(oteltracer):
     with oteltracer.start_span("otel-string-tags") as span1:
         span1.set_attribute("service.name", "moons-service-str")
@@ -32,6 +32,13 @@ def test_otel_span_attributes(oteltracer):
         span2.set_attributes({"tag1": 1, "tag2": 2, "tag3": 3.1415})
         span2.end()
 
+    with oteltracer.start_span("otel-list-tags") as span:
+        span.set_attribute("moon1", [1, 2, 3])
+        span.set_attribute("moon", [True, 2, ["hello", 4, ["5", "6asda"]]])
+        span.set_attribute("sunk", (1, 2, 3))
+        span.set_attribute("teardrop68", {1, 2, 3})
+        span.set_attribute("gamer421", frozenset({1, 2, 3}))
+
     # Attributes should not be set on a closed span
     for span in [span1, span2]:
         span.set_attribute("should_not_be_set", "attributes can not be added after a span is ended")
diff --git a/tests/snapshots/tests.opentelemetry.test_span.test_otel_span_attributes.json b/tests/snapshots/tests.opentelemetry.test_span.test_otel_span_attributes.json
@@ -57,6 +57,49 @@
       "tag2": 2,
       "tag3": 3.1415
     },
-    "duration": 118958,
-    "start": 1700080043693385092
+    "duration": 68000,
+    "start": 1704737749273699000
+  }],
+[
+  {
+    "name": "internal",
+    "service": "",
+    "resource": "otel-list-tags",
+    "trace_id": 2,
+    "span_id": 1,
+    "parent_id": 0,
+    "type": "",
+    "error": 0,
+    "meta": {
+      "_dd.p.dm": "-0",
+      "_dd.p.tid": "659c3bd500000000",
+      "language": "python",
+      "moon.0": "True",
+      "moon.2.0": "hello",
+      "moon.2.2.0": "5",
+      "moon.2.2.1": "6asda",
+      "runtime-id": "4b2f5598b7cb468fab14c8b0b52221bc"
+    },
+    "metrics": {
+      "_dd.top_level": 1,
+      "_dd.tracer_kr": 1.0,
+      "_sampling_priority_v1": 1,
+      "gamer421.0": 1,
+      "gamer421.1": 2,
+      "gamer421.2": 3,
+      "moon.1": 2,
+      "moon.2.1": 4,
+      "moon1.0": 1,
+      "moon1.1": 2,
+      "moon1.2": 3,
+      "process_id": 47933,
+      "sunk.0": 1,
+      "sunk.1": 2,
+      "sunk.2": 3,
+      "teardrop68.0": 1,
+      "teardrop68.1": 2,
+      "teardrop68.2": 3
+    },
+    "duration": 242000,
+    "start": 1704737749273824000
   }]]
diff --git a/tests/tracer/test_encoders.py b/tests/tracer/test_encoders.py
@@ -442,6 +442,7 @@ def test_span_link_v04_encoding():
                     "link.kind": "link_kind",
                     "someval": 1,
                     "drop_me": "bye",
+                    "key_other": [True, 2, ["hello", 4, {"5"}]],
                 },
             )
         ],
@@ -470,6 +471,11 @@ def test_span_link_v04_encoding():
                 b"link.name": b"link_name",
                 b"link.kind": b"link_kind",
                 b"someval": b"1",
+                b"key_other.0": b"True",
+                b"key_other.1": b"2",
+                b"key_other.2.0": b"hello",
+                b"key_other.2.1": b"4",
+                b"key_other.2.2.0": b"5",
             },
             b"dropped_attributes_count": 1,
             b"tracestate": b"congo=t61rcWkgMzE",
@@ -491,7 +497,13 @@ def test_span_link_v05_encoding():
                 span_id=(2**64) - 1,
                 tracestate="congo=t61rcWkgMzE",
                 flags=0,
-                attributes={"moon": "ears", "link.name": "link_name", "link.kind": "link_kind", "drop_me": "bye"},
+                attributes={
+                    "moon": "ears",
+                    "link.name": "link_name",
+                    "link.kind": "link_kind",
+                    "drop_me": "bye",
+                    "key2": [True, 2, ["hello", 4, {"5"}]],
+                },
             )
         ],
     )
@@ -513,7 +525,8 @@ def test_span_link_v05_encoding():
     assert (
         encoded_span_meta[b"_dd.span_links"] == b'[{"trace_id": "7fffffffffffffffffffffffffffffff", '
         b'"span_id": "ffffffffffffffff", "attributes": {"moon": "ears", "link.name": "link_name", "link.kind": '
-        b'"link_kind"}, "dropped_attributes_count": 1, "tracestate": "congo=t61rcWkgMzE", "flags": 0}]'
+        b'"link_kind", "key2.0": "True", "key2.1": "2", "key2.2.0": "hello", "key2.2.1": "4", "key2.2.2.0": "5"}, '
+        b'"dropped_attributes_count": 1, "tracestate": "congo=t61rcWkgMzE", "flags": 0}]'
     )
 
 
diff --git a/tests/tracer/test_span.py b/tests/tracer/test_span.py
@@ -367,7 +367,12 @@ def test_span_links(self):
         s2.context._meta["tracestate"] = "congo=t61rcWkgMzE"
         s2.context.sampling_priority = 1
 
-        link_attributes = {"link.name": "s1_to_s2", "link.kind": "scheduled_by", "key1": "value2"}
+        link_attributes = {
+            "link.name": "s1_to_s2",
+            "link.kind": "scheduled_by",
+            "key1": "value2",
+            "key2": [True, 2, ["hello", 4, ["5", "6asda"]]],
+        }
         s1.link_span(s2.context, link_attributes)
 
         assert s1._links == [
diff --git a/tests/tracer/test_utils.py b/tests/tracer/test_utils.py
@@ -16,6 +16,8 @@
 from ddtrace.internal.utils.cache import cachedmethod
 from ddtrace.internal.utils.cache import callonce
 from ddtrace.internal.utils.formats import asbool
+from ddtrace.internal.utils.formats import flatten_key_value
+from ddtrace.internal.utils.formats import is_sequence
 from ddtrace.internal.utils.formats import parse_tags_str
 from ddtrace.internal.utils.http import w3c_get_dd_list_member
 from ddtrace.internal.utils.importlib import func_name
@@ -96,6 +98,40 @@ def test_parse_env_tags(tag_str, expected_tags, error_calls):
             assert log.error.call_count == 0, log.error.call_args_list
 
 
+@pytest.mark.parametrize(
+    "key,value,expected",
+    [
+        ("a", "1", {"a": "1"}),
+        ("a", set("0"), {"a.0": "0"}),
+        ("a", frozenset("0"), {"a.0": "0"}),
+        ("a", ["0", "1", "2", "3"], {"a.0": "0", "a.1": "1", "a.2": "2", "a.3": "3"}),
+        ("a", ("0", "1", "2", "3"), {"a.0": "0", "a.1": "1", "a.2": "2", "a.3": "3"}),
+        (
+            "a",
+            ["0", {"1"}, ("2",), ["3", "4", ["5"]]],
+            {"a.0": "0", "a.1.0": "1", "a.2.0": "2", "a.3.0": "3", "a.3.1": "4", "a.3.2.0": "5"},
+        ),
+    ],
+)
+def test_flatten_key_value_pairs(key, value, expected):
+    assert flatten_key_value(key, value) == expected
+
+
+@pytest.mark.parametrize(
+    "value,expected",
+    [
+        (("0", "1"), True),
+        (["0", "1"], True),
+        ({"0", "1"}, True),
+        (frozenset(["0", "1"]), True),
+        ("123", False),
+        ({"a": "1"}, False),
+    ],
+)
+def test_is_sequence(value, expected):
+    assert is_sequence(value) == expected
+
+
 def test_no_states():
     watch = time.StopWatch()
     with pytest.raises(RuntimeError):