Skip to content

Commit 9c2d1c8

Browse files
nordic-krchcarlescufi
authored andcommitted
scripts: logging: dictionary: Fix database generation
I noticed that not all strings were put into database. That is because algorithm was searching for any null terminated byte arrays in the section (e.g. rodata) and then attempting to decode it to string. However, section may contain other static const variable and if a string is preceded by a variable that has non-printable bytes and no zero at the end then algorithm was picking a candidate that started with non-printable characters followed by the string. Such candidate was discarded because it was not a valid string. Algorithm is changed to treat as string candidate any sequences that contains printable bytes followed by 0. Signed-off-by: Krzysztof Chruściński <[email protected]>
1 parent 12e8de7 commit 9c2d1c8

File tree

1 file changed

+49
-76
lines changed

1 file changed

+49
-76
lines changed

scripts/logging/dictionary/database_gen.py

Lines changed: 49 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -378,97 +378,70 @@ def extract_string_variables(elf):
378378

379379
return strings
380380

381-
382381
def try_decode_string(str_maybe):
383382
"""Check if it is a printable string"""
384383
for encoding in STR_ENCODINGS:
385384
try:
386-
decoded_str = str_maybe.decode(encoding)
387-
388-
# Check if string is printable according to Python
389-
# since the parser (written in Python) will need to
390-
# print the string.
391-
#
392-
# Note that '\r' and '\n' are not included in
393-
# string.printable so they need to be checked separately.
394-
printable = True
395-
for one_char in decoded_str:
396-
if (one_char not in string.printable
397-
and one_char not in ACCEPTABLE_ESCAPE_CHARS):
398-
printable = False
399-
break
400-
401-
if printable:
402-
return decoded_str
385+
return str_maybe.decode(encoding)
403386
except UnicodeDecodeError:
404387
pass
405388

406389
return None
407390

391+
def is_printable(b):
392+
# Check if string is printable according to Python
393+
# since the parser (written in Python) will need to
394+
# print the string.
395+
#
396+
# Note that '\r' and '\n' are not included in
397+
# string.printable so they need to be checked separately.
398+
return (b in string.printable) or (b in ACCEPTABLE_ESCAPE_CHARS)
408399

409400
def extract_strings_in_one_section(section, str_mappings):
410401
"""Extract NULL-terminated strings in one ELF section"""
411-
bindata = section['data']
412-
413-
if len(bindata) < 2:
414-
# Can't have a NULL-terminated string with fewer than 2 bytes.
415-
return str_mappings
416-
402+
data = section['data']
417403
idx = 0
418-
419-
# If first byte is not NULL, it may be a string.
420-
if bindata[0] == 0:
421-
start = None
422-
else:
423-
start = 0
424-
425-
while idx < len(bindata):
426-
if start is None:
427-
if bindata[idx] == 0:
428-
# Skip NULL bytes to find next string
429-
idx += 1
430-
else:
431-
# Beginning of possible string
404+
start = None
405+
for x in data:
406+
if is_printable(chr(x)):
407+
# Printable character, potential part of string
408+
if start is None:
409+
# Beginning of potential string
432410
start = idx
433-
idx += 1
434-
else:
435-
if bindata[idx] != 0:
436-
# Skipping till next NULL byte for possible string
437-
idx += 1
438-
else:
439-
# End of possible string
440-
end = idx
441-
442-
if start != end:
443-
str_maybe = bindata[start:end]
444-
decoded_str = try_decode_string(str_maybe)
445-
446-
# Only store readable string
447-
if decoded_str is not None:
448-
addr = section['start'] + start
449-
450-
if addr not in str_mappings:
451-
str_mappings[addr] = decoded_str
452-
453-
# Decoded string may contain un-printable characters
454-
# (e.g. extended ASC-II characters) or control
455-
# characters (e.g. '\r' or '\n'), so simply print
456-
# the byte string instead.
457-
logger.debug('Found string via extraction at ' + PTR_FMT + ': %s',
458-
addr, str_maybe)
459-
460-
# GCC-based toolchain will reuse the NULL character
461-
# for empty strings. There is no way to know which
462-
# one is being reused, so just treat all NULL character
463-
# at the end of legitimate strings as empty strings.
464-
null_addr = section['start'] + end
465-
str_mappings[null_addr] = ''
466-
467-
logger.debug('Found null string via extraction at ' + PTR_FMT,
468-
null_addr)
469-
411+
elif x == 0:
412+
# End of possible string
413+
if start is not None:
414+
# Found potential string
415+
str_maybe = data[start : idx]
416+
decoded_str = try_decode_string(str_maybe)
417+
418+
if decoded_str is not None:
419+
addr = section['start'] + start
420+
421+
if addr not in str_mappings:
422+
str_mappings[addr] = decoded_str
423+
424+
# Decoded string may contain un-printable characters
425+
# (e.g. extended ASC-II characters) or control
426+
# characters (e.g. '\r' or '\n'), so simply print
427+
# the byte string instead.
428+
logger.debug('Found string via extraction at ' + PTR_FMT + ': %s',
429+
addr, str_maybe)
430+
431+
# GCC-based toolchain will reuse the NULL character
432+
# for empty strings. There is no way to know which
433+
# one is being reused, so just treat all NULL character
434+
# at the end of legitimate strings as empty strings.
435+
null_addr = section['start'] + idx
436+
str_mappings[null_addr] = ''
437+
438+
logger.debug('Found null string via extraction at ' + PTR_FMT,
439+
null_addr)
470440
start = None
471-
idx += 1
441+
else:
442+
# Non-printable byte, remove start location
443+
start = None
444+
idx += 1
472445

473446
return str_mappings
474447

0 commit comments

Comments
 (0)