Skip to content

Commit 1131541

Browse files
authored
feat: support extra parameter in Logger messages (#257)
* improv: refactor formatter to ease changes * feat: support for extra keys in log messages * docs: add extra param, temp toc fix
1 parent 157a7bc commit 1131541

File tree

3 files changed

+211
-44
lines changed

3 files changed

+211
-44
lines changed
+108-35
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,31 @@
11
import json
22
import logging
33
import os
4+
from typing import Dict, Iterable, Optional, Union
5+
6+
STD_LOGGING_KEYS = (
7+
"name",
8+
"msg",
9+
"args",
10+
"levelname",
11+
"levelno",
12+
"pathname",
13+
"filename",
14+
"module",
15+
"exc_info",
16+
"exc_text",
17+
"stack_info",
18+
"lineno",
19+
"funcName",
20+
"created",
21+
"msecs",
22+
"relativeCreated",
23+
"thread",
24+
"threadName",
25+
"processName",
26+
"process",
27+
"asctime",
28+
)
429

530

631
class JsonFormatter(logging.Formatter):
@@ -30,12 +55,12 @@ def __init__(self, **kwargs):
3055
# Set the default unserializable function, by default values will be cast as str.
3156
self.default_json_formatter = kwargs.pop("json_default", str)
3257
# Set the insertion order for the log messages
33-
self.format_dict = dict.fromkeys(kwargs.pop("log_record_order", ["level", "location", "message", "timestamp"]))
58+
self.log_format = dict.fromkeys(kwargs.pop("log_record_order", ["level", "location", "message", "timestamp"]))
3459
self.reserved_keys = ["timestamp", "level", "location"]
3560
# Set the date format used by `asctime`
3661
super(JsonFormatter, self).__init__(datefmt=kwargs.pop("datefmt", None))
3762

38-
self.format_dict.update(self._build_root_keys(**kwargs))
63+
self.log_format.update(self._build_root_keys(**kwargs))
3964

4065
@staticmethod
4166
def _build_root_keys(**kwargs):
@@ -52,47 +77,95 @@ def _get_latest_trace_id():
5277
return xray_trace_id.split(";")[0].replace("Root=", "") if xray_trace_id else None
5378

5479
def update_formatter(self, **kwargs):
55-
self.format_dict.update(kwargs)
80+
self.log_format.update(kwargs)
5681

57-
def format(self, record): # noqa: A003
58-
record_dict = record.__dict__.copy()
59-
record_dict["asctime"] = self.formatTime(record, self.datefmt)
82+
@staticmethod
83+
def _extract_log_message(log_record: logging.LogRecord) -> Union[Dict, str, bool, Iterable]:
84+
"""Extract message from log record and attempt to JSON decode it
85+
86+
Parameters
87+
----------
88+
log_record : logging.LogRecord
89+
Log record to extract message from
90+
91+
Returns
92+
-------
93+
message: Union[Dict, str, bool, Iterable]
94+
Extracted message
95+
"""
96+
if isinstance(log_record.msg, dict):
97+
return log_record.msg
6098

61-
log_dict = {}
99+
message: str = log_record.getMessage()
62100

63-
for key, value in self.format_dict.items():
64-
if value and key in self.reserved_keys:
65-
# converts default logging expr to its record value
66-
# e.g. '%(asctime)s' to '2020-04-24 09:35:40,698'
67-
log_dict[key] = value % record_dict
68-
else:
69-
log_dict[key] = value
101+
# Attempt to decode non-str messages e.g. msg = '{"x": "y"}'
102+
try:
103+
message = json.loads(log_record.msg)
104+
except (json.decoder.JSONDecodeError, TypeError, ValueError):
105+
pass
106+
107+
return message
108+
109+
def _extract_log_exception(self, log_record: logging.LogRecord) -> Optional[str]:
110+
"""Format traceback information, if available
111+
112+
Parameters
113+
----------
114+
log_record : logging.LogRecord
115+
Log record to extract message from
116+
117+
Returns
118+
-------
119+
log_record: Optional[str]
120+
Log record with constant traceback info
121+
"""
122+
if log_record.exc_info:
123+
return self.formatException(log_record.exc_info)
124+
125+
return None
70126

71-
if isinstance(record_dict["msg"], dict):
72-
log_dict["message"] = record_dict["msg"]
73-
else:
74-
log_dict["message"] = record.getMessage()
127+
def _extract_log_keys(self, log_record: logging.LogRecord) -> Dict:
128+
"""Extract and parse custom and reserved log keys
75129
76-
# Attempt to decode the message as JSON, if so, merge it with the
77-
# overall message for clarity.
78-
try:
79-
log_dict["message"] = json.loads(log_dict["message"])
80-
except (json.decoder.JSONDecodeError, TypeError, ValueError):
81-
pass
130+
Parameters
131+
----------
132+
log_record : logging.LogRecord
133+
Log record to extract keys from
82134
83-
if record.exc_info and not record.exc_text:
84-
# Cache the traceback text to avoid converting it multiple times
85-
# (it's constant anyway)
86-
# from logging.Formatter:format
87-
record.exc_text = self.formatException(record.exc_info)
135+
Returns
136+
-------
137+
formatted_log: Dict
138+
Structured log as dictionary
139+
"""
140+
record_dict = log_record.__dict__.copy() # has extra kwargs we are after
141+
record_dict["asctime"] = self.formatTime(log_record, self.datefmt)
88142

89-
if record.exc_text:
90-
log_dict["exception"] = record.exc_text
143+
formatted_log = {}
91144

92-
# fetch latest X-Ray Trace ID, if any
93-
log_dict.update({"xray_trace_id": self._get_latest_trace_id()})
145+
# We have to iterate over a default or existing log structure
146+
# then replace any logging expression for reserved keys e.g. '%(level)s' to 'INFO'
147+
# and lastly add or replace incoming keys (those added within the constructor or .structure_logs method)
148+
for key, value in self.log_format.items():
149+
if value and key in self.reserved_keys:
150+
formatted_log[key] = value % record_dict
151+
else:
152+
formatted_log[key] = value
153+
154+
# pick up extra keys when logging a new message e.g. log.info("my message", extra={"additional_key": "value"}
155+
# these messages will be added to the root of the final structure not within `message` key
156+
for key, value in record_dict.items():
157+
if key not in STD_LOGGING_KEYS:
158+
formatted_log[key] = value
159+
160+
return formatted_log
161+
162+
def format(self, record): # noqa: A003
163+
formatted_log = self._extract_log_keys(log_record=record)
164+
formatted_log["message"] = self._extract_log_message(log_record=record)
165+
formatted_log["exception"] = self._extract_log_exception(log_record=record)
166+
formatted_log.update({"xray_trace_id": self._get_latest_trace_id()}) # fetch latest Trace ID, if any
94167

95168
# Filter out top level key with values that are None
96-
log_dict = {k: v for k, v in log_dict.items() if v is not None}
169+
formatted_log = {k: v for k, v in formatted_log.items() if v is not None}
97170

98-
return json.dumps(log_dict, default=self.default_json_formatter)
171+
return json.dumps(formatted_log, default=self.default_json_formatter)

docs/content/core/logger.mdx

+84-9
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,13 @@ def handler(event, context):
150150

151151
## Appending additional keys
152152

153+
You can append additional keys using either mechanism:
154+
155+
* Persist new keys across all future log messages via `structure_logs`
156+
* Add additional keys on a per log message basis via `extra` parameter
157+
158+
### structure_logs
159+
153160
You can append your own keys to your existing Logger via `structure_logs` with **append** param.
154161

155162
```python:title=collect.py
@@ -179,18 +186,41 @@ This example will add `order_id` if its value is not empty, and in subsequent in
179186
"level": "INFO",
180187
"location": "collect.handler:1",
181188
"service": "payment",
182-
"lambda_function_name": "test",
183-
"lambda_function_memory_size": 128,
184-
"lambda_function_arn": "arn:aws:lambda:eu-west-1:12345678910:function:test",
185-
"lambda_request_id": "52fdfc07-2182-154f-163f-5f0f9a621d72",
186-
"cold_start": true,
187189
"sampling_rate": 0.0,
188190
"order_id": "order_id_value", // highlight-line
189191
"message": "Collecting payment"
190192
}
191193
```
192194
</details>
193195

196+
### extra parameter
197+
198+
Extra parameter is available for all log levels, as implemented in the standard logging library. It accepts any dictionary, and it'll be added as part of the root structure of the logs.
199+
200+
```python:title=extra_parameter.py
201+
logger = Logger(service="payment")
202+
203+
fields = { "request_id": "1123" }
204+
205+
logger.info("Hello", extra=fields) # highlight-line
206+
```
207+
208+
<details>
209+
<summary><strong>Excerpt output in CloudWatch Logs</strong></summary>
210+
211+
```json:title=cloudwatch_logs.json
212+
{
213+
"timestamp": "2021-01-12 14:08:12,357",
214+
"level": "INFO",
215+
"location": "collect.handler:1",
216+
"service": "payment",
217+
"sampling_rate": 0.0,
218+
"request_id": "1123", // highlight-line
219+
"message": "Collecting payment"
220+
}
221+
```
222+
</details>
223+
194224
## Reusing Logger across your code
195225

196226
Logger supports inheritance via `child` parameter. This allows you to create multiple Loggers across your code base, and propagate changes such as new keys to all Loggers.
@@ -291,13 +321,13 @@ def handler(event, context):
291321

292322
If you're migrating from other Loggers, there are few key points to be aware of: **Service parameter**, **Inheriting Loggers**, **Overriding Log records**, and **Logging exceptions**.
293323

294-
### The service parameter
324+
### service parameter
295325

296326
Service is what defines what the function is responsible for, or part of (e.g payment service), and the name of the Logger.
297327

298328
For Logger, the `service` is the logging key customers can use to search log operations for one or more functions - For example, **search for all errors, or messages like X, where service is payment**.
299329

300-
### Inheriting Loggers
330+
### inheriting Loggers
301331

302332
> Python Logging hierarchy happens via the dot notation: `service`, `service.child`, `service.child_2`.
303333
@@ -325,7 +355,7 @@ In this case, Logger will register a Logger named `payment`, and a Logger named
325355

326356
This can be fixed by either ensuring both has the `service` value as `payment`, or simply use the environment variable `POWERTOOLS_SERVICE_NAME` to ensure service value will be the same across all Loggers when not explicitly set.
327357

328-
### Overriding Log records
358+
### overriding Log records
329359

330360
You might want to continue to use the same date formatting style, or override `location` to display the `package.function_name:line_number` as you previously had.
331361

@@ -355,7 +385,7 @@ logger = Logger(stream=stdout, log_record_order=["level","location","message","t
355385

356386
Some keys cannot be supressed in the Log records: `sampling_rate` is part of the specification and cannot be supressed; `xray_trace_id` is supressed automatically if X-Ray is not enabled in the Lambda function, and added automatically if it is.
357387

358-
### Logging exceptions
388+
### logging exceptions
359389

360390
When logging exceptions, Logger will add a new key named `exception`, and will serialize the full traceback as a string.
361391

@@ -434,3 +464,48 @@ def handler(event: Dict, context: LambdaContext) -> List:
434464

435465
return response.get("Buckets", [])
436466
```
467+
468+
**What's the difference between `structure_log` and `extra`?**
469+
470+
Keys added with `structure_log` will persist across multiple log messages while keys added via `extra` will only be available in a given log message operation.
471+
472+
**Example - Persisting payment_id not request_id**
473+
474+
```python
475+
from aws_lambda_powertools import Logger
476+
477+
logger = Logger(service="payment")
478+
logger.structure_logs(append=True, payment_id="123456789")
479+
480+
try:
481+
booking_id = book_flight()
482+
logger.info("Flight booked successfully", extra={ "booking_id": booking_id})
483+
except BookingReservationError:
484+
...
485+
486+
logger.info("goodbye")
487+
```
488+
489+
Note that `payment_id` remains in both log messages while `booking_id` is only available in the first message.
490+
491+
```json
492+
{
493+
"level": "INFO",
494+
"location": "<module>:5",
495+
"message": "Flight booked successfully",
496+
"timestamp": "2021-01-12 14:09:10,859",
497+
"service": "payment",
498+
"sampling_rate": 0.0,
499+
"payment_id": "123456789",
500+
"booking_id": "75edbad0-0857-4fc9-b547-6180e2f7959b"
501+
},
502+
{
503+
"level": "INFO",
504+
"location": "<module>:6",
505+
"message": "goodbye",
506+
"timestamp": "2021-01-12 14:09:10,860",
507+
"service": "payment",
508+
"sampling_rate": 0.0,
509+
"payment_id": "123456789"
510+
}
511+
```

tests/functional/test_logger.py

+19
Original file line numberDiff line numberDiff line change
@@ -381,3 +381,22 @@ def test_logger_do_not_log_twice_when_root_logger_is_setup(stdout, service_name)
381381
# since child's log records propagated to root logger should be rejected
382382
logs = list(stdout.getvalue().strip().split("\n"))
383383
assert len(logs) == 2
384+
385+
386+
def test_logger_extra_kwargs(stdout, service_name):
387+
# GIVEN Logger is initialized
388+
logger = Logger(service=service_name, stream=stdout)
389+
390+
# WHEN `request_id` is an extra field in a log message to the existing structured log
391+
fields = {"request_id": "blah"}
392+
393+
logger.info("with extra fields", extra=fields)
394+
logger.info("without extra fields")
395+
396+
extra_fields_log, no_extra_fields_log = capture_multiple_logging_statements_output(stdout)
397+
398+
# THEN first log should have request_id field in the root structure
399+
assert "request_id" in extra_fields_log
400+
401+
# THEN second log should not have request_id in the root structure
402+
assert "request_id" not in no_extra_fields_log

0 commit comments

Comments
 (0)