Skip to content

Commit a68e4c8

Browse files
committed
Add initial core code
1 parent ebb7951 commit a68e4c8

File tree

7 files changed

+408
-0
lines changed

7 files changed

+408
-0
lines changed

Diff for: README.md

+27
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,29 @@
11
# python-string-extractor
2+
23
Extracts strings relevant to control flow from Python code
4+
5+
This package extracts strings (including prefixes, suffixes and fragments)
6+
from Python code that seems (potentially) relevant to control flow. Having
7+
a list of strings that are potentially relevant to the execution path can
8+
help with scriptless testing or automated test case generation.
9+
10+
For, example in this code example strings "foo" and "bar" are relevant to
11+
the control flow, so it might be useful to include them in test input for
12+
the software.
13+
14+
```
15+
if var == "foo":
16+
doSomething()
17+
elif var == "bar":
18+
doSomethingElse()
19+
```
20+
21+
## Usage
22+
23+
The string extractor needs to be provided with an execution trace of Python
24+
software. This trace can be obtained using a trace function, or a modified
25+
version of coverage measurement software.
26+
27+
The string extractor has a cache, which is enabled by default and can
28+
optionally be persisted to a file. This helps with efficiently handling loops
29+
in code, as well as overlapping execution traces.

Diff for: setup.py

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from setuptools import setup
2+
3+
setup(
4+
author="Sietse Snel",
5+
author_email="[email protected]",
6+
description=('Extracts strings relevant to control flow from python code'),
7+
install_requires=[],
8+
name='string_extractor',
9+
packages=['string_extractor'],
10+
version='0.0.1'
11+
)

Diff for: string_extractor/__init__.py

+104
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
#!/usr/bin/env python3
2+
3+
import ast
4+
import linecache
5+
import re
6+
7+
from string_extractor.string_collector import InterestingStringCollector
8+
9+
class StringExtractor:
10+
def _preprocessLine(self, filename, lineNumber):
11+
""" Retrieves a line of code for parsing, checks whether it can be parsed
12+
and performs any transformations needed for dealing with control structures
13+
or multiline statements.
14+
15+
Result is a 2-tuple, with the first element being a status code, and the second
16+
element being the preprocessed statement to be parsed.
17+
18+
Status codes:
19+
OK : line can be parsed
20+
NOTSUPPORTED : the parser should ignore this line, because the preprocessor
21+
doesn't support this particular type of statement (in this context).
22+
ERROR : the preprocessor was unable to transform the line into a
23+
parsable form. This could happen for long multiline statements (length
24+
more than maxMultilineLength), or for language constructs that aren't
25+
recognized by the preprocessor.
26+
"""
27+
maxMultilineLength= 20
28+
numberOfLines = self._getNumberOfLines(filename)
29+
line = linecache.getline(filename, lineNumber).strip()
30+
31+
if filename.endswith(".j2"):
32+
return ("IGNORE","")
33+
34+
# Ignore irrelevant control flow statements
35+
for keyword in ["try", "except", "finally", "else" ]:
36+
if re.match( "^{}\s*[\s:]$".format(keyword), line):
37+
return ("IGNORE", "")
38+
elif re.match( "^{}\s".format(keyword), line):
39+
return ("IGNORE", "")
40+
41+
# Normalize elif statement to if statement.
42+
if re.match( "^elif\s", line):
43+
line = line[2:]
44+
45+
# Check whether we are dealing with a relevant compound statement
46+
compoundStatement = False
47+
for keyword in ["for", "if", "while"]:
48+
if re.match( "^{}[\(\s]".format(keyword), line):
49+
compoundStatement = True
50+
51+
statement = ""
52+
max_offset = max ( maxMultilineLength, numberOfLines - lineNumber )
53+
for offset in range(numberOfLines - lineNumber):
54+
thisLineNumber = lineNumber + offset
55+
56+
if offset == 0:
57+
thisLine = line
58+
else:
59+
thisLine = linecache.getline(filename, thisLineNumber).strip()
60+
61+
statement = " ".join([statement, thisLine ]).strip()
62+
63+
if statement.endswith("\\"):
64+
# Explicit line continuation. Keep iterating over lines,
65+
# even if maximum statement length has been exceeded.
66+
statement = statement[:-1]
67+
continue
68+
elif compoundStatement and statement.endswith(":"):
69+
# It seems we have a complete control structure statement.
70+
# Add a dummy block.
71+
statementWithDummyBlock = statement + "\n pass"
72+
if self._isParsable(statementWithDummyBlock):
73+
return ("OK", statementWithDummyBlock)
74+
75+
elif (not compoundStatement) and self._isParsable(statement):
76+
return ("OK", statement)
77+
78+
if offset >= maxMultilineLength:
79+
break
80+
81+
return ("ERROR", "")
82+
83+
84+
def _isParsable(self, statement):
85+
"""Determines whether a statement is parsable."""
86+
try:
87+
tree = ast.parse(statement)
88+
return True
89+
except SyntaxError:
90+
return False
91+
92+
93+
def _getNumberOfLines(self, filename):
94+
with open(filename, 'r') as file:
95+
for count, line in enumerate(file):
96+
pass
97+
return count + 1
98+
99+
def getInterestingStrings(self, statement):
100+
tree = ast.parse(statement)
101+
collector = InterestingStringCollector()
102+
collector.visit(tree)
103+
return collector.getCollectedStrings()
104+

Diff for: string_extractor/string_collector.py

+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
""" Internal class for extracting relevant strings from
2+
an ast parse tree, as a visitor."""
3+
4+
import ast
5+
6+
class InterestingStringCollector(ast.NodeVisitor):
7+
def __init__(self):
8+
self.suffixes = set()
9+
self.prefixes = set()
10+
self.fragments = set()
11+
self.fullStrings = set()
12+
13+
14+
def getCollectedStrings(self):
15+
return ( [ ( "SUFFIX", s ) for s in self.suffixes ] +
16+
[ ( "PREFIX", s ) for s in self.prefixes ] +
17+
[ ( "FRAGMENT", s ) for s in self.fragments ] +
18+
[ ( "FULL", s ) for s in self.fullStrings ] )
19+
20+
21+
def visit_Compare(self, node):
22+
opstype = str(type(node.ops[0]))
23+
if opstype in ["<class '_ast.Eq'>","<class '_ast.NotEq'>"]:
24+
leftClass = str(type(node.left))
25+
rightClass = str(type(node.comparators[0]))
26+
if leftClass == "<class '_ast.Str'>" and rightClass != "<class '_ast.Str'>":
27+
self.fullStrings.add(node.left.s)
28+
elif leftClass != "<class '_ast.Str'>" and rightClass == "<class '_ast.Str'>":
29+
self.fullStrings.add(node.comparators[0].s)
30+
elif opstype == "<class '_ast.In'>":
31+
leftClass = str(type(node.left))
32+
comparatorsClass = str(type(node.comparators))
33+
if leftClass == "<class '_ast.Str'>":
34+
self.fragments.add(node.left.s)
35+
elif comparatorsClass == "<class 'list'>":
36+
for element in node.comparators[0].elts:
37+
elementClass = str(type(element))
38+
if elementClass == "<class '_ast.Str'>":
39+
self.fullStrings.add(element.s)
40+
41+
42+
def visit_Call(self,node):
43+
if node.func.attr == "startswith" and str(type(node.args[0])) == "<class '_ast.Str'>":
44+
self.prefixes.add(node.args[0].s)
45+
elif node.func.attr == "endswith" and str(type(node.args[0])) == "<class '_ast.Str'>":
46+
self.suffixes.add(node.args[0].s)
47+
elif node.func.attr == "index" and str(type(node.args[0])) == "<class '_ast.Str'>":
48+
self.fragments.add(node.args[0].s)
49+
elif node.func.attr == "find" and str(type(node.args[0])) == "<class '_ast.Str'>":
50+
self.fragments.add(node.args[0].s)

Diff for: tests/stringprocessor-testdata.py

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#!/usr/bin/env python3
2+
3+
4+
# Line 5: single-line statement
5+
print("Hello")
6+
7+
# Line 8: two line statement with implicit continuation
8+
print(
9+
"Hello")
10+
11+
# Line 12: three line statement with implicit continuation
12+
print(
13+
"Hello"
14+
)
15+
16+
# Line 17: two line statement with explicit continuation
17+
print\
18+
("Hello")
19+
20+
# Line 21: three line statement with explicit continuation
21+
print\
22+
(\
23+
"Hello")
24+
25+
# Line 26: if statement / line 28 elif statement / line 30 else statement
26+
if foo == "bar":
27+
print("Hello")
28+
elif foo == "baz":
29+
print("Bye")
30+
else:
31+
print("Hello")
32+
33+
# Line 34: while loop
34+
while foo == "bat":
35+
print("Foo is bat")
36+
37+
# Line 38: for loop
38+
for a in b:
39+
print("Foo bar")
40+
41+
# Line 42,44,46: try / except / finally
42+
try:
43+
print("Foo")
44+
except Exception as e:
45+
print("Bar")
46+
finally:
47+
print("Baz")

Diff for: tests/testfile_jinja.j2

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
foo

0 commit comments

Comments
 (0)