Skip to content

Fix UnknownRemoteOperation for Database instrumentation. #50

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 36 commits into from
Feb 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
a41b5a2
Fix UnknownRemoteOperation for Database instrumentation.
zzhlogin Feb 8, 2024
68d867b
Apply lint.
zzhlogin Feb 9, 2024
c71ca7a
Merge branch 'main' into db_remote_operator
zzhlogin Feb 12, 2024
8e3eb41
Address comments.
zzhlogin Feb 13, 2024
8ac72da
Add dependencies.
zzhlogin Feb 13, 2024
884e37e
change tox.ini
zzhlogin Feb 13, 2024
011e439
fix.
zzhlogin Feb 13, 2024
7603bc0
fix.
zzhlogin Feb 13, 2024
18b9a4a
Apply lint.
zzhlogin Feb 13, 2024
226dfb8
Apply lint.
zzhlogin Feb 13, 2024
b6a9dcf
Add test.py.
zzhlogin Feb 13, 2024
bf8c20a
Merge branch 'main' into db_remote_operator
zzhlogin Feb 13, 2024
3da862b
Address comments.
zzhlogin Feb 13, 2024
4e022fa
fix.
zzhlogin Feb 13, 2024
9cec7f3
skip json.
zzhlogin Feb 13, 2024
81232e0
skip json.
zzhlogin Feb 13, 2024
b2c4efb
skip json.
zzhlogin Feb 13, 2024
958acd8
skip json.
zzhlogin Feb 13, 2024
d8ff4ab
skip json.
zzhlogin Feb 13, 2024
084eeb9
skip json.
zzhlogin Feb 13, 2024
f686ee5
skip json.
zzhlogin Feb 13, 2024
fd25bfe
skip json.
zzhlogin Feb 13, 2024
6dfa0a8
skip json.
zzhlogin Feb 13, 2024
8835871
skip json.
zzhlogin Feb 14, 2024
355514d
Code cleanup.
zzhlogin Feb 14, 2024
805eb1f
Merge branch 'main' into db_remote_operator
zzhlogin Feb 14, 2024
ed275fa
Apply lint.
zzhlogin Feb 14, 2024
bee2cf3
Address comments.
zzhlogin Feb 14, 2024
3676b48
Move cpdespell skip.
zzhlogin Feb 14, 2024
beadb89
Reformat json.
zzhlogin Feb 14, 2024
0fba6a0
small typo.
zzhlogin Feb 14, 2024
96dcb51
Address comments.
zzhlogin Feb 14, 2024
aec2eea
Apply lint.
zzhlogin Feb 14, 2024
b605736
Merge branch 'main' into db_remote_operator
zzhlogin Feb 14, 2024
b5e01f0
Ignore.
zzhlogin Feb 14, 2024
dc9154a
pylint disable.
zzhlogin Feb 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .codespellrc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[codespell]
# skipping auto generated folders
skip = ./.tox,./.mypy_cache,./target,*/LICENSE,./venv
skip = ./.tox,./.mypy_cache,./target,*/LICENSE,./venv,*/sql_dialect_keywords.json
ignore-words-list = ot
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
import re
from logging import DEBUG, Logger, getLogger
from typing import Optional
from typing import Match, Optional
from urllib.parse import ParseResult, urlparse

from amazon.opentelemetry.distro._aws_attribute_keys import (
Expand All @@ -14,6 +15,8 @@
)
from amazon.opentelemetry.distro._aws_span_processing_util import (
LOCAL_ROOT,
MAX_KEYWORD_LENGTH,
SQL_KEYWORD_PATTERN,
UNKNOWN_OPERATION,
UNKNOWN_REMOTE_OPERATION,
UNKNOWN_REMOTE_SERVICE,
Expand All @@ -38,6 +41,7 @@
# Pertinent OTEL attribute keys
_SERVICE_NAME: str = ResourceAttributes.SERVICE_NAME
_DB_OPERATION: str = SpanAttributes.DB_OPERATION
_DB_STATEMENT: str = SpanAttributes.DB_STATEMENT
_DB_SYSTEM: str = SpanAttributes.DB_SYSTEM
_FAAS_INVOKED_NAME: str = SpanAttributes.FAAS_INVOKED_NAME
_FAAS_TRIGGER: str = SpanAttributes.FAAS_TRIGGER
Expand Down Expand Up @@ -189,9 +193,12 @@ def _set_remote_service_and_operation(span: ReadableSpan, attributes: BoundedAtt
elif is_key_present(span, _RPC_SERVICE) or is_key_present(span, _RPC_METHOD):
remote_service = _normalize_service_name(span, _get_remote_service(span, _RPC_SERVICE))
remote_operation = _get_remote_operation(span, _RPC_METHOD)
elif is_key_present(span, _DB_SYSTEM) or is_key_present(span, _DB_OPERATION):
elif is_key_present(span, _DB_SYSTEM) or is_key_present(span, _DB_OPERATION) or is_key_present(span, _DB_STATEMENT):
remote_service = _get_remote_service(span, _DB_SYSTEM)
remote_operation = _get_remote_operation(span, _DB_OPERATION)
if is_key_present(span, _DB_OPERATION):
remote_operation = _get_remote_operation(span, _DB_OPERATION)
else:
remote_operation = _get_db_statement_remote_operation(span, _DB_STATEMENT)
elif is_key_present(span, _FAAS_INVOKED_NAME) or is_key_present(span, _FAAS_TRIGGER):
remote_service = _get_remote_service(span, _FAAS_INVOKED_NAME)
remote_operation = _get_remote_operation(span, _FAAS_TRIGGER)
Expand Down Expand Up @@ -232,6 +239,28 @@ def _get_remote_operation(span: ReadableSpan, remote_operation_key: str) -> str:
return remote_operation


def _get_db_statement_remote_operation(span: ReadableSpan, statement_key: str) -> str:
"""
If no db.operation attribute provided in the span,
we use db.statement to compute a valid remote operation in a best-effort manner.
To do this, we take the first substring of the statement
and compare to a regex list of known SQL keywords.
The substring length is determined by the longest known SQL keywords.
"""
remote_operation: str = span.attributes.get(statement_key)

if remote_operation is None:
return UNKNOWN_REMOTE_OPERATION

# Remove all whitespace and newline characters from the beginning of remote_operation
# and retrieve the first MAX_KEYWORD_LENGTH characters
remote_operation = remote_operation.lstrip()[:MAX_KEYWORD_LENGTH]
match: Optional[Match[str]] = re.match(SQL_KEYWORD_PATTERN, remote_operation.upper())
remote_operation = match.group(0) if match else UNKNOWN_REMOTE_OPERATION

return remote_operation


def _normalize_service_name(span: ReadableSpan, service_name: str) -> str:
"""
TODO: Determine if problems in Java instrumentation are relevant here. Do we need normalization? If so, probably we
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
"""Utility module designed to support shared logic across AWS Span Processors."""
import json
import os
from typing import Dict, List

from amazon.opentelemetry.distro._aws_attribute_keys import AWS_CONSUMER_PARENT_SPAN_KIND, AWS_LOCAL_OPERATION
from opentelemetry.sdk.trace import InstrumentationScope, ReadableSpan
from opentelemetry.semconv.trace import MessagingOperationValues, SpanAttributes
Expand All @@ -18,6 +22,23 @@
_SQS_RECEIVE_MESSAGE_SPAN_NAME: str = "Sqs.ReceiveMessage"
_AWS_SDK_INSTRUMENTATION_SCOPE_PREFIX: str = "io.opentelemetry.aws-sdk-"

# Max keyword length supported by parsing into remote_operation from DB_STATEMENT
MAX_KEYWORD_LENGTH = 27


# Get dialect keywords retrieved from dialect_keywords.json file.
# Only meant to be invoked by SQL_KEYWORD_PATTERN and unit tests
def _get_dialect_keywords() -> List[str]:
current_dir: str = os.path.dirname(__file__)
file_path: str = os.path.join(current_dir, "configuration/sql_dialect_keywords.json")
with open(file_path, "r", encoding="utf-8") as json_file:
keywords_data: Dict[str, str] = json.load(json_file)
return keywords_data["keywords"]


# A regular expression pattern to match SQL keywords.
SQL_KEYWORD_PATTERN = r"^(?:" + "|".join(_get_dialect_keywords()) + r")\b"


def get_ingress_operation(__, span: ReadableSpan) -> str:
"""
Expand Down
Loading