@@ -378,97 +378,70 @@ def extract_string_variables(elf):
378
378
379
379
return strings
380
380
381
-
382
381
def try_decode_string (str_maybe ):
383
382
"""Check if it is a printable string"""
384
383
for encoding in STR_ENCODINGS :
385
384
try :
386
- decoded_str = str_maybe .decode (encoding )
387
-
388
- # Check if string is printable according to Python
389
- # since the parser (written in Python) will need to
390
- # print the string.
391
- #
392
- # Note that '\r' and '\n' are not included in
393
- # string.printable so they need to be checked separately.
394
- printable = True
395
- for one_char in decoded_str :
396
- if (one_char not in string .printable
397
- and one_char not in ACCEPTABLE_ESCAPE_CHARS ):
398
- printable = False
399
- break
400
-
401
- if printable :
402
- return decoded_str
385
+ return str_maybe .decode (encoding )
403
386
except UnicodeDecodeError :
404
387
pass
405
388
406
389
return None
407
390
391
+ def is_printable (b ):
392
+ # Check if string is printable according to Python
393
+ # since the parser (written in Python) will need to
394
+ # print the string.
395
+ #
396
+ # Note that '\r' and '\n' are not included in
397
+ # string.printable so they need to be checked separately.
398
+ return (b in string .printable ) or (b in ACCEPTABLE_ESCAPE_CHARS )
408
399
409
400
def extract_strings_in_one_section (section , str_mappings ):
410
401
"""Extract NULL-terminated strings in one ELF section"""
411
- bindata = section ['data' ]
412
-
413
- if len (bindata ) < 2 :
414
- # Can't have a NULL-terminated string with fewer than 2 bytes.
415
- return str_mappings
416
-
402
+ data = section ['data' ]
417
403
idx = 0
418
-
419
- # If first byte is not NULL, it may be a string.
420
- if bindata [0 ] == 0 :
421
- start = None
422
- else :
423
- start = 0
424
-
425
- while idx < len (bindata ):
426
- if start is None :
427
- if bindata [idx ] == 0 :
428
- # Skip NULL bytes to find next string
429
- idx += 1
430
- else :
431
- # Beginning of possible string
404
+ start = None
405
+ for x in data :
406
+ if is_printable (chr (x )):
407
+ # Printable character, potential part of string
408
+ if start is None :
409
+ # Beginning of potential string
432
410
start = idx
433
- idx += 1
434
- else :
435
- if bindata [idx ] != 0 :
436
- # Skipping till next NULL byte for possible string
437
- idx += 1
438
- else :
439
- # End of possible string
440
- end = idx
441
-
442
- if start != end :
443
- str_maybe = bindata [start :end ]
444
- decoded_str = try_decode_string (str_maybe )
445
-
446
- # Only store readable string
447
- if decoded_str is not None :
448
- addr = section ['start' ] + start
449
-
450
- if addr not in str_mappings :
451
- str_mappings [addr ] = decoded_str
452
-
453
- # Decoded string may contain un-printable characters
454
- # (e.g. extended ASC-II characters) or control
455
- # characters (e.g. '\r' or '\n'), so simply print
456
- # the byte string instead.
457
- logger .debug ('Found string via extraction at ' + PTR_FMT + ': %s' ,
458
- addr , str_maybe )
459
-
460
- # GCC-based toolchain will reuse the NULL character
461
- # for empty strings. There is no way to know which
462
- # one is being reused, so just treat all NULL character
463
- # at the end of legitimate strings as empty strings.
464
- null_addr = section ['start' ] + end
465
- str_mappings [null_addr ] = ''
466
-
467
- logger .debug ('Found null string via extraction at ' + PTR_FMT ,
468
- null_addr )
469
-
411
+ elif x == 0 :
412
+ # End of possible string
413
+ if start is not None :
414
+ # Found potential string
415
+ str_maybe = data [start : idx ]
416
+ decoded_str = try_decode_string (str_maybe )
417
+
418
+ if decoded_str is not None :
419
+ addr = section ['start' ] + start
420
+
421
+ if addr not in str_mappings :
422
+ str_mappings [addr ] = decoded_str
423
+
424
+ # Decoded string may contain un-printable characters
425
+ # (e.g. extended ASC-II characters) or control
426
+ # characters (e.g. '\r' or '\n'), so simply print
427
+ # the byte string instead.
428
+ logger .debug ('Found string via extraction at ' + PTR_FMT + ': %s' ,
429
+ addr , str_maybe )
430
+
431
+ # GCC-based toolchain will reuse the NULL character
432
+ # for empty strings. There is no way to know which
433
+ # one is being reused, so just treat all NULL character
434
+ # at the end of legitimate strings as empty strings.
435
+ null_addr = section ['start' ] + idx
436
+ str_mappings [null_addr ] = ''
437
+
438
+ logger .debug ('Found null string via extraction at ' + PTR_FMT ,
439
+ null_addr )
470
440
start = None
471
- idx += 1
441
+ else :
442
+ # Non-printable byte, remove start location
443
+ start = None
444
+ idx += 1
472
445
473
446
return str_mappings
474
447
0 commit comments