Skip to content

Commit 6861e94

Browse files
authored
Merge pull request #3 from jsburckhardt/dasithw/otel-collector
Dasithw/otel collector
2 parents f1e72bf + b572bd9 commit 6861e94

8 files changed

+360
-3
lines changed

.env.example

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
APP__ENVIRONMENT=development
2+
OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 # OpenTelemetry Collector otlp endpoint (exposed from docker-compose)
3+
4+
OPENAI_API_BASE=
5+
OPENAI_API_KEY=
6+
OPENAI_DEPLOYMENT_ID=

.gitignore

-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ coverage.xml
4242

4343
# Environments
4444
.env
45-
.env.*
4645

4746
# VS Code
4847
.vscode/

Makefile

+5-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
.PHONY: frontend backend both
1+
.PHONY: frontend backend both start-telemetry
22

33
frontend:
44
cd frontend && streamlit run app.py
@@ -8,3 +8,7 @@ backend:
88

99
both:
1010
make -j2 frontend backend
11+
12+
start-telemetry:
13+
-docker compose down
14+
docker compose up -d

app/main.py

+28-1
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,37 @@
1+
from contextlib import asynccontextmanager
2+
import os
13
from fastapi.middleware.cors import CORSMiddleware
4+
from services.observability import DEVELOPMENT_MODE, PRODUCTION_MODE, get_logger, initialize_observability, instrument_application
25
from routers.translate import translate_router
36
from routers.feedback import feedback_router
47
from fastapi import FastAPI
58
from dotenv import load_dotenv
69
load_dotenv()
710

11+
# This method controls the lifecycle of the FastAPI app and is used to setup things post process fork
12+
# https://fastapi.tiangolo.com/advanced/events/#use-case
13+
@asynccontextmanager
14+
async def lifespan(app: FastAPI):
15+
app_environment = os.getenv("APP__ENVIRONMENT", "Unspecified")
16+
# do the initialize logic here
817

9-
app = FastAPI()
18+
if app_environment.lower() == "development":
19+
initialize_observability(DEVELOPMENT_MODE, service_name="AI Translator API", environment=app_environment)
20+
else:
21+
initialize_observability(PRODUCTION_MODE, service_name="AI Translator API", environment=app_environment)
22+
23+
logger = get_logger()
24+
logger.info("Starting API server...")
25+
yield
26+
logger.info("Stopping API server...")
27+
28+
29+
app = FastAPI(
30+
title="AI Translate",
31+
description="This is a simple API that translates text from one language to another",
32+
version="0.1.0",
33+
lifespan=lifespan,
34+
)
1035

1136
# middleware for frontend
1237
app.add_middleware(
@@ -25,3 +50,5 @@
2550
@app.get("/")
2651
def read_root():
2752
return {"message": "Welcome to the ai translate!"}
53+
54+
instrument_application(app)

app/services/observability.py

+232
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
import logging
2+
import os
3+
import re
4+
from logging import Logger
5+
from typing import Dict, List, Literal
6+
7+
from fastapi import FastAPI
8+
9+
# OpenTelemetry
10+
from opentelemetry import metrics, trace
11+
from opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter
12+
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
13+
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
14+
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
15+
from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
16+
from opentelemetry.instrumentation.requests import RequestsInstrumentor
17+
from opentelemetry.instrumentation.openai import OpenAIInstrumentor
18+
from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
19+
from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
20+
from opentelemetry.sdk.metrics import Meter, MeterProvider
21+
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
22+
from opentelemetry.sdk.resources import Resource
23+
from opentelemetry.sdk.trace import Span, Tracer, TracerProvider
24+
from opentelemetry.sdk.trace.export import BatchSpanProcessor
25+
from opentelemetry.sdk.trace.sampling import ParentBasedTraceIdRatio
26+
27+
SENSITIVE_DATA_SPAN_NAME = "sensitive_data_logged"
28+
SENSITIVE_DATA_INDICATOR_ATTRIBUTE_NAME = "contains_sensitive_data"
29+
30+
DEVELOPMENT_MODE = Literal["DEVELOPMENT"]
31+
PRODUCTION_MODE = Literal["PRODUCTION"]
32+
33+
ACTIVE_SERVICE_NAME = "translator_service"
34+
35+
_has_already_init = False
36+
run_mode: Literal["DEVELOPMENT", "PRODUCTION"] = DEVELOPMENT_MODE
37+
38+
_main_tracer: Tracer = None
39+
_main_logger: Logger = logging.getLogger()
40+
_main_meter: Meter = None
41+
log_level = (os.getenv("OTEL_LOG_LEVEL") or "INFO").upper()
42+
43+
# https://opentelemetry-python-contrib.readthedocs.io/en/latest/instrumentation/logging/logging.html
44+
logging.basicConfig(
45+
level=log_level,
46+
format="%(asctime)s - %(name)s[%(process)d] - %(levelname)s - %(message)s",
47+
)
48+
49+
50+
def ensure_initialized():
51+
if not _has_already_init:
52+
raise Exception(
53+
"Observability module has not been initialized. Please call initialize_observability()."
54+
)
55+
56+
57+
def is_development() -> bool:
58+
return run_mode == DEVELOPMENT_MODE
59+
60+
61+
def get_tracer():
62+
global _main_tracer
63+
ensure_initialized()
64+
return _main_tracer
65+
66+
67+
def get_logger(name: str | None = None) -> logging.Logger:
68+
logger = logging.getLogger(name)
69+
return logger
70+
71+
72+
def get_meter():
73+
global _main_meter
74+
ensure_initialized()
75+
return _main_meter
76+
77+
78+
def initialize_observability(
79+
mode: Literal["DEVELOPMENT", "PRODUCTION"], service_name: str = "ai.translator", environment: str = "Unspecified"
80+
):
81+
"""Initializes the observability once for the lifetime of the application/process"""
82+
global \
83+
_has_already_init, \
84+
run_mode, \
85+
_main_tracer, \
86+
_main_logger, \
87+
_main_meter, \
88+
ACTIVE_SERVICE_NAME
89+
90+
if _has_already_init:
91+
_main_logger.warning("Attempt made to initialize observability more than once")
92+
return
93+
94+
_has_already_init = True
95+
96+
run_mode = mode
97+
_main_logger.info(f"Initializing the observability with mode: {mode}")
98+
# See this for all the config options using environment variables: https://opentelemetry.io/docs/specs/otel/protocol/exporter/
99+
opentelemetry_exporter_otlp_endpoint = os.getenv("OTEL_EXPORTER_OTLP_ENDPOINT")
100+
101+
if opentelemetry_exporter_otlp_endpoint:
102+
_main_logger.info("🚀 Configuring OTLP telemetry")
103+
service_name = os.getenv(
104+
"OTEL_SERVICE_NAME", service_name
105+
) # https://opentelemetry.io/docs/languages/sdk-configuration/general/#otel_service_name
106+
sample_ratio = float(
107+
os.getenv("OTEL_TRACES_SAMPLER_ARG", "1.0")
108+
) # https://opentelemetry-python.readthedocs.io/en/latest/sdk/trace.sampling.html
109+
110+
# setup the instrumentors
111+
resource = Resource.create(
112+
attributes={
113+
"service.name": service_name, # https://opentelemetry.io/docs/specs/semconv/resource/#service
114+
"service.namespace": "ai.translator",
115+
"deployment.environment.name": environment, # https://opentelemetry.io/docs/specs/semconv/resource/deployment-environment/
116+
"process.pid": str(
117+
os.getpid()
118+
), # https://opentelemetry.io/docs/specs/semconv/attributes-registry/process/
119+
}
120+
)
121+
122+
ACTIVE_SERVICE_NAME = service_name
123+
124+
# tracing
125+
trace.set_tracer_provider(
126+
TracerProvider(
127+
resource=resource, sampler=ParentBasedTraceIdRatio(sample_ratio)
128+
)
129+
)
130+
span_processor = BatchSpanProcessor(OTLPSpanExporter())
131+
trace.get_tracer_provider().add_span_processor(span_processor)
132+
_main_tracer = trace.get_tracer_provider().get_tracer(service_name)
133+
134+
# metrics
135+
metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter())
136+
meter_provider = MeterProvider(
137+
resource=resource, metric_readers=[metric_reader]
138+
)
139+
metrics.set_meter_provider(meter_provider)
140+
_main_meter = metrics.get_meter(service_name)
141+
142+
# logging
143+
logger_provider = LoggerProvider(resource=resource)
144+
batch_log_record_processor = BatchLogRecordProcessor(OTLPLogExporter())
145+
logger_provider.add_log_record_processor(batch_log_record_processor)
146+
147+
handler = LoggingHandler(level=log_level, logger_provider=logger_provider)
148+
# Attach OTLP handler to root logger
149+
logging.getLogger().addHandler(handler)
150+
else:
151+
_main_logger.info(
152+
"🚀 OTLP telemetry exporter not configured (set OTEL_EXPORTER_OTLP_ENDPOINT)"
153+
)
154+
_main_tracer = trace.get_tracer("default")
155+
_main_meter = metrics.get_meter("default")
156+
157+
_main_logger = get_logger()
158+
_main_logger.info("Observability initialization complete")
159+
160+
161+
def mark_span_as_sensitive(span: Span):
162+
span.set_attribute(SENSITIVE_DATA_INDICATOR_ATTRIBUTE_NAME, "true")
163+
164+
165+
def add_sensitive_event(span: Span, event: str, attributes: dict[str, str]):
166+
if not attributes:
167+
attributes = {}
168+
169+
attributes[SENSITIVE_DATA_INDICATOR_ATTRIBUTE_NAME] = "true"
170+
span.add_event(name=event, attributes=attributes)
171+
172+
173+
def log_sensitive_data(
174+
message: str,
175+
attributes: str | Dict | int | List = None,
176+
print_to_console: bool = False,
177+
span_name: str | None = None,
178+
) -> None:
179+
if is_development() and print_to_console:
180+
_main_logger.info(f"{message} - attributes={attributes}")
181+
182+
if not span_name:
183+
span_name = SENSITIVE_DATA_SPAN_NAME
184+
185+
with get_tracer().start_as_current_span(span_name) as span:
186+
if not attributes:
187+
attributes = {}
188+
if isinstance(attributes, dict):
189+
span.set_attributes({k: str(v) for k, v in attributes.items()})
190+
if attributes:
191+
span.set_attribute("event.attributes", str(attributes))
192+
193+
span.set_attribute("message", message)
194+
span.set_attribute(SENSITIVE_DATA_INDICATOR_ATTRIBUTE_NAME, "true")
195+
196+
197+
def convert_to_metric_name(input_string: str) -> str:
198+
"""
199+
Converts a string into a metric name compatible with OpenTelemetry.
200+
# https://opentelemetry.io/docs/specs/otel/metrics/api/#instrument-name-syntax
201+
202+
Args:
203+
input_string (str): The input string to be converted.
204+
205+
Returns:
206+
str: The converted metric name.
207+
"""
208+
209+
# Remove leading and trailing whitespace
210+
input_string = input_string.strip()
211+
# Add leading alpha character
212+
if not re.match(r"^[a-zA-Z]", input_string):
213+
input_string = "A" + input_string
214+
# Replace spaces with underscores
215+
input_string = input_string.replace(" ", "_")
216+
# Remove special characters and non-alphanumeric characters
217+
input_string = re.sub(r"[^a-zA-Z0-9_]", "", input_string)
218+
# Limit the length to 100 characters
219+
input_string = input_string[:100]
220+
221+
return input_string
222+
223+
224+
def instrument_application(app: FastAPI):
225+
_main_logger.info("Setting up OpenTelemetry instrumentation...")
226+
RequestsInstrumentor().instrument()
227+
HTTPXClientInstrumentor().instrument()
228+
OpenAIInstrumentor().instrument()
229+
FastAPIInstrumentor.instrument_app(
230+
app,
231+
http_capture_headers_server_request=[".*"]
232+
)

docker-compose.yaml

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
services:
2+
lgtm:
3+
image: grafana/otel-lgtm
4+
container_name: otel-lgtm
5+
ports:
6+
- "3000:3000" # LGTM UI
7+
8+
otel-collector:
9+
image: otel/opentelemetry-collector-contrib:0.112.0
10+
container_name: otel-collector
11+
command: ["--config=/etc/otel-collector-config.yaml"]
12+
volumes:
13+
- ${HOST_PROJECT_PATH}/otel-collector-config.yaml:/etc/otel-collector-config.yaml
14+
ports:
15+
- "13133:13133" # health_check extension
16+
- "4317:40317" # OTLP gRPC receiver
17+
- "4318:40318" # OTLP http receiver
18+
depends_on:
19+
- lgtm

otel-collector-config.yaml

+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
receivers:
2+
otlp:
3+
protocols:
4+
grpc:
5+
endpoint: 0.0.0.0:40317
6+
http:
7+
endpoint: 0.0.0.0:40318
8+
9+
exporters:
10+
debug:
11+
verbosity: normal
12+
otlp:
13+
endpoint: "http://lgtm:4317"
14+
tls:
15+
insecure: true
16+
17+
processors:
18+
batch:
19+
transform/redact_special: # This processor will redact any spans and span events with the given regex patterns
20+
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/transformprocessor
21+
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/pkg/ottl#grammar
22+
error_mode: ignore
23+
trace_statements:
24+
- context: span # https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/pkg/ottl/contexts/ottlspan
25+
statements:
26+
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/pkg/ottl/ottlfuncs#replace_all_patterns
27+
# Redact TFN (Tax File Number) from the attributes
28+
- replace_all_patterns(attributes, "value", "\\b\\d{3}\\s?\\d{3}\\s?\\d{3}\\b", "{redacted}")
29+
- context: spanevent # https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/pkg/ottl/contexts/ottlspanevent
30+
statements:
31+
# https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/pkg/ottl/ottlfuncs#replace_pattern
32+
# Redact TFN (Tax File Number) from the name
33+
- replace_pattern(name, "\\b\\d{3}\\s?\\d{3}\\s?\\d{3}\\b", "{redacted}")
34+
# Redact TFN (Tax File Number) from the attributes
35+
- replace_all_patterns(attributes, "value", "\\b\\d{3}\\s?\\d{3}\\s?\\d{3}\\b", "{redacted}")
36+
log_statements:
37+
- context: log
38+
statements:
39+
- replace_pattern(body.string, "\\b\\d{3}\\s?\\d{3}\\s?\\d{3}\\b", "{redacted}")
40+
- replace_all_patterns(attributes, "value", "\\b\\d{3}\\s?\\d{3}\\s?\\d{3}\\b", "{redacted}")
41+
42+
extensions:
43+
health_check:
44+
pprof:
45+
zpages:
46+
47+
service:
48+
extensions: [pprof, zpages, health_check]
49+
pipelines:
50+
traces:
51+
receivers: [otlp]
52+
processors: [transform/redact_special, batch]
53+
exporters: [debug, otlp]
54+
metrics:
55+
receivers: [otlp]
56+
processors: [batch]
57+
exporters: [debug, otlp]
58+
logs:
59+
receivers: [otlp]
60+
processors: [transform/redact_special, batch]
61+
exporters: [debug, otlp]

0 commit comments

Comments
 (0)