scripts: logging: dictionary: Fix database generation

nordic-krch · carlescufi · commit 9c2d1c8e9915 · 2023-06-19T15:03:07.000+02:00
I noticed that not all strings were put into database. That is because
algorithm was searching for any null terminated byte arrays in the
section (e.g. rodata) and then attempting to decode it to string.
However, section may contain other static const variable and if a
string is preceded by a variable that has non-printable bytes and no
zero at the end then algorithm was picking a candidate that started
with non-printable characters followed by the string. Such candidate
was discarded because it was not a valid string.

Algorithm is changed to treat as string candidate any sequences that
contains printable bytes followed by 0.

Signed-off-by: Krzysztof Chruściński &lt;krzysztof.chruscinski@nordicsemi.no&gt;
diff --git a/scripts/logging/dictionary/database_gen.py b/scripts/logging/dictionary/database_gen.py
@@ -378,97 +378,70 @@ def extract_string_variables(elf):
 
     return strings
 
-
 def try_decode_string(str_maybe):
     """Check if it is a printable string"""
     for encoding in STR_ENCODINGS:
         try:
-            decoded_str = str_maybe.decode(encoding)
-
-            # Check if string is printable according to Python
-            # since the parser (written in Python) will need to
-            # print the string.
-            #
-            # Note that '\r' and '\n' are not included in
-            # string.printable so they need to be checked separately.
-            printable = True
-            for one_char in decoded_str:
-                if (one_char not in string.printable
-                    and one_char not in ACCEPTABLE_ESCAPE_CHARS):
-                    printable = False
-                    break
-
-            if printable:
-                return decoded_str
+            return str_maybe.decode(encoding)
         except UnicodeDecodeError:
             pass
 
     return None
 
+def is_printable(b):
+    # Check if string is printable according to Python
+    # since the parser (written in Python) will need to
+    # print the string.
+    #
+    # Note that '\r' and '\n' are not included in
+    # string.printable so they need to be checked separately.
+    return (b in string.printable) or (b in ACCEPTABLE_ESCAPE_CHARS)
 
 def extract_strings_in_one_section(section, str_mappings):
     """Extract NULL-terminated strings in one ELF section"""
-    bindata = section['data']
-
-    if len(bindata) < 2:
-        # Can't have a NULL-terminated string with fewer than 2 bytes.
-        return str_mappings
-
+    data = section['data']
     idx = 0
-
-    # If first byte is not NULL, it may be a string.
-    if bindata[0] == 0:
-        start = None
-    else:
-        start = 0
-
-    while idx < len(bindata):
-        if start is None:
-            if bindata[idx] == 0:
-                # Skip NULL bytes to find next string
-                idx += 1
-            else:
-                # Beginning of possible string
+    start = None
+    for x in data:
+        if is_printable(chr(x)):
+            # Printable character, potential part of string
+            if start is None:
+                # Beginning of potential string
                 start = idx
-                idx += 1
-        else:
-            if bindata[idx] != 0:
-                # Skipping till next NULL byte for possible string
-                idx += 1
-            else:
-                # End of possible string
-                end = idx
-
-                if start != end:
-                    str_maybe = bindata[start:end]
-                    decoded_str = try_decode_string(str_maybe)
-
-                    # Only store readable string
-                    if decoded_str is not None:
-                        addr = section['start'] + start
-
-                        if addr not in str_mappings:
-                            str_mappings[addr] = decoded_str
-
-                            # Decoded string may contain un-printable characters
-                            # (e.g. extended ASC-II characters) or control
-                            # characters (e.g. '\r' or '\n'), so simply print
-                            # the byte string instead.
-                            logger.debug('Found string via extraction at ' + PTR_FMT + ': %s',
-                                         addr, str_maybe)
-
-                            # GCC-based toolchain will reuse the NULL character
-                            # for empty strings. There is no way to know which
-                            # one is being reused, so just treat all NULL character
-                            # at the end of legitimate strings as empty strings.
-                            null_addr = section['start'] + end
-                            str_mappings[null_addr] = ''
-
-                            logger.debug('Found null string via extraction at ' + PTR_FMT,
-                                         null_addr)
-
+        elif x == 0:
+            # End of possible string
+            if start is not None:
+                # Found potential string
+                str_maybe = data[start : idx]
+                decoded_str = try_decode_string(str_maybe)
+
+                if decoded_str is not None:
+                    addr = section['start'] + start
+
+                    if addr not in str_mappings:
+                        str_mappings[addr] = decoded_str
+
+                        # Decoded string may contain un-printable characters
+                        # (e.g. extended ASC-II characters) or control
+                        # characters (e.g. '\r' or '\n'), so simply print
+                        # the byte string instead.
+                        logger.debug('Found string via extraction at ' + PTR_FMT + ': %s',
+                                     addr, str_maybe)
+
+                        # GCC-based toolchain will reuse the NULL character
+                        # for empty strings. There is no way to know which
+                        # one is being reused, so just treat all NULL character
+                        # at the end of legitimate strings as empty strings.
+                        null_addr = section['start'] + idx
+                        str_mappings[null_addr] = ''
+
+                        logger.debug('Found null string via extraction at ' + PTR_FMT,
+                                     null_addr)
                 start = None
-                idx += 1
+        else:
+            # Non-printable byte, remove start location
+            start = None
+        idx += 1
 
     return str_mappings