forked from open-telemetry/opentelemetry-python-contrib
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path__init__.py
250 lines (202 loc) · 7.35 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
# Copyright The OpenTelemetry Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import annotations
from os import environ
from re import IGNORECASE as RE_IGNORECASE
from re import compile as re_compile
from re import search
from typing import Callable, Iterable, Optional
from urllib.parse import urlparse, urlunparse
from opentelemetry.semconv.trace import SpanAttributes
OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS = (
"OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS"
)
OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST = (
"OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST"
)
OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE = (
"OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE"
)
OTEL_PYTHON_INSTRUMENTATION_HTTP_CAPTURE_ALL_METHODS = (
"OTEL_PYTHON_INSTRUMENTATION_HTTP_CAPTURE_ALL_METHODS"
)
# List of recommended metrics attributes
_duration_attrs = {
SpanAttributes.HTTP_METHOD,
SpanAttributes.HTTP_HOST,
SpanAttributes.HTTP_SCHEME,
SpanAttributes.HTTP_STATUS_CODE,
SpanAttributes.HTTP_FLAVOR,
SpanAttributes.HTTP_SERVER_NAME,
SpanAttributes.NET_HOST_NAME,
SpanAttributes.NET_HOST_PORT,
}
_active_requests_count_attrs = {
SpanAttributes.HTTP_METHOD,
SpanAttributes.HTTP_HOST,
SpanAttributes.HTTP_SCHEME,
SpanAttributes.HTTP_FLAVOR,
SpanAttributes.HTTP_SERVER_NAME,
}
class ExcludeList:
"""Class to exclude certain paths (given as a list of regexes) from tracing requests"""
def __init__(self, excluded_urls: Iterable[str]):
self._excluded_urls = excluded_urls
if self._excluded_urls:
self._regex = re_compile("|".join(excluded_urls))
def url_disabled(self, url: str) -> bool:
return bool(self._excluded_urls and search(self._regex, url))
class SanitizeValue:
"""Class to sanitize (remove sensitive data from) certain headers (given as a list of regexes)"""
def __init__(self, sanitized_fields: Iterable[str]):
self._sanitized_fields = sanitized_fields
if self._sanitized_fields:
self._regex = re_compile("|".join(sanitized_fields), RE_IGNORECASE)
def sanitize_header_value(self, header: str, value: str) -> str:
return (
"[REDACTED]"
if (self._sanitized_fields and search(self._regex, header))
else value
)
def sanitize_header_values(
self,
headers: dict[str, str],
header_regexes: list[str],
normalize_function: Callable[[str], str],
) -> dict[str, str]:
values: dict[str, str] = {}
if header_regexes:
header_regexes_compiled = re_compile(
"|".join("^" + i + "$" for i in header_regexes),
RE_IGNORECASE,
)
for header_name in list(
filter(
header_regexes_compiled.match,
headers.keys(),
)
):
header_values = headers.get(header_name)
if header_values:
key = normalize_function(header_name.lower())
values[key] = [
self.sanitize_header_value(
header=header_name, value=header_values
)
]
return values
_root = r"OTEL_PYTHON_{}"
def get_traced_request_attrs(instrumentation):
traced_request_attrs = environ.get(
_root.format(f"{instrumentation}_TRACED_REQUEST_ATTRS"), []
)
if traced_request_attrs:
traced_request_attrs = [
traced_request_attr.strip()
for traced_request_attr in traced_request_attrs.split(",")
]
return traced_request_attrs
def get_excluded_urls(instrumentation: str) -> ExcludeList:
# Get instrumentation-specific excluded URLs. If not set, retrieve them
# from generic variable.
excluded_urls = environ.get(
_root.format(f"{instrumentation}_EXCLUDED_URLS"),
environ.get(_root.format("EXCLUDED_URLS"), ""),
)
return parse_excluded_urls(excluded_urls)
def parse_excluded_urls(excluded_urls: str) -> ExcludeList:
"""
Small helper to put an arbitrary url list inside an ExcludeList
"""
if excluded_urls:
excluded_url_list = [
excluded_url.strip() for excluded_url in excluded_urls.split(",")
]
else:
excluded_url_list = []
return ExcludeList(excluded_url_list)
def remove_url_credentials(url: str) -> str:
"""Given a string url, remove the username and password only if it is a valid url"""
try:
parsed = urlparse(url)
if all([parsed.scheme, parsed.netloc]): # checks for valid url
parsed_url = urlparse(url)
_, _, netloc = parsed.netloc.rpartition("@")
return urlunparse(
(
parsed_url.scheme,
netloc,
parsed_url.path,
parsed_url.params,
parsed_url.query,
parsed_url.fragment,
)
)
except ValueError: # an unparsable url was passed
pass
return url
def normalise_request_header_name(header: str) -> str:
key = header.lower().replace("-", "_")
return f"http.request.header.{key}"
def normalise_response_header_name(header: str) -> str:
key = header.lower().replace("-", "_")
return f"http.response.header.{key}"
def sanitize_method(method: Optional[str]) -> Optional[str]:
if method is None:
return None
method = method.upper()
if (
environ.get(OTEL_PYTHON_INSTRUMENTATION_HTTP_CAPTURE_ALL_METHODS)
or
# Based on https://www.rfc-editor.org/rfc/rfc7231#section-4.1 and https://www.rfc-editor.org/rfc/rfc5789#section-2.
method
in [
"GET",
"HEAD",
"POST",
"PUT",
"DELETE",
"CONNECT",
"OPTIONS",
"TRACE",
"PATCH",
]
):
return method
return "_OTHER"
def get_custom_headers(env_var: str) -> list[str]:
custom_headers = environ.get(env_var, None)
if custom_headers:
return [
custom_headers.strip()
for custom_headers in custom_headers.split(",")
]
return []
def _parse_active_request_count_attrs(req_attrs):
active_requests_count_attrs = {
key: req_attrs[key]
for key in _active_requests_count_attrs.intersection(req_attrs.keys())
}
return active_requests_count_attrs
def _parse_duration_attrs(req_attrs):
duration_attrs = {
key: req_attrs[key]
for key in _duration_attrs.intersection(req_attrs.keys())
}
return duration_attrs
def _parse_url_query(url: str):
parsed_url = urlparse(url)
path = parsed_url.path
query_params = parsed_url.query
return path, query_params