diff --git a/docs/codeql/support/reusables/frameworks.rst b/docs/codeql/support/reusables/frameworks.rst index 93280c6732ad..12bcd5af8e64 100644 --- a/docs/codeql/support/reusables/frameworks.rst +++ b/docs/codeql/support/reusables/frameworks.rst @@ -214,3 +214,4 @@ Python built-in support libtaxii, TAXII utility library libxml2, XML processing library lxml, XML processing library + xmltodict, XML processing library diff --git a/python/PoCs/README.md b/python/PoCs/README.md new file mode 100644 index 000000000000..20eeb5dbd78d --- /dev/null +++ b/python/PoCs/README.md @@ -0,0 +1 @@ +A place to collect proof of concept for how certain vulnerabilities work. diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py b/python/PoCs/XmlParsing/PoC.py similarity index 93% rename from python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py rename to python/PoCs/XmlParsing/PoC.py index adcace1aa0a6..a4de65084aed 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py +++ b/python/PoCs/XmlParsing/PoC.py @@ -70,6 +70,14 @@ bar """ +exfiltrate_through_dtd_retrieval = f""" + %xxe; ]> +""" + +predefined_entity_xml = """ +< +""" + # ============================================================================== # other setup @@ -95,6 +103,22 @@ def test_xxe(): hit_xxe = True return "ok" +@app.route("/exfiltrate-through.dtd") +def exfiltrate_through_dtd(): + return f""" +"> +%eval; +%exfiltrate; + """ + +exfiltrated_data = None +@app.route("/exfiltrate-data") +def exfiltrate_data(): + from flask import request + global exfiltrated_data + exfiltrated_data = request.args["data"] + return "ok" + def run_app(): app.run(host=HOST, port=PORT) @@ -346,7 +370,7 @@ def test_local_xxe_enabled_by_default(): parser = lxml.etree.XMLParser() root = lxml.etree.fromstring(local_xxe, parser=parser) assert root.tag == "test" - assert root.text == "SECRET_FLAG\n", root.text + assert root.text == "SECRET_FLAG", root.text @staticmethod def test_local_xxe_disabled(): @@ -361,11 +385,7 @@ def test_remote_xxe_disabled_by_default(): hit_xxe = False parser = lxml.etree.XMLParser() - try: - root = lxml.etree.fromstring(remote_xxe, parser=parser) - assert False - except lxml.etree.XMLSyntaxError as e: - assert "Failure to process entity remote_xxe" in str(e) + root = lxml.etree.fromstring(remote_xxe, parser=parser) assert hit_xxe == False @staticmethod @@ -416,6 +436,23 @@ def test_dtd_manually_enabled(): pass assert hit_dtd == False + @staticmethod + def test_exfiltrate_through_dtd(): + # note that this only works when the data to exfiltrate does not contain a newline :| + global exfiltrated_data + exfiltrated_data = None + parser = lxml.etree.XMLParser(load_dtd=True, no_network=False) + with pytest.raises(lxml.etree.XMLSyntaxError): + lxml.etree.fromstring(exfiltrate_through_dtd_retrieval, parser=parser) + + assert exfiltrated_data == "SECRET_FLAG" + + @staticmethod + def test_predefined_entity(): + parser = lxml.etree.XMLParser(resolve_entities=False) + root = lxml.etree.fromstring(predefined_entity_xml, parser=parser) + assert root.tag == "test" + assert root.text == "<" # ============================================================================== diff --git a/python/PoCs/XmlParsing/flag b/python/PoCs/XmlParsing/flag new file mode 100644 index 000000000000..b8bd68387749 --- /dev/null +++ b/python/PoCs/XmlParsing/flag @@ -0,0 +1 @@ +SECRET_FLAG \ No newline at end of file diff --git a/python/ql/lib/change-notes/2022-03-29-add-taint-for-StringIO.md b/python/ql/lib/change-notes/2022-03-29-add-taint-for-StringIO.md new file mode 100644 index 000000000000..7857e6f9ca6d --- /dev/null +++ b/python/ql/lib/change-notes/2022-03-29-add-taint-for-StringIO.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Added taint propagation for `io.StringIO` and `io.BytesIO`. This addition was originally [submitted as part of an experimental query by @jorgectf](https://github.com/github/codeql/pull/6112). diff --git a/python/ql/lib/semmle/python/Concepts.qll b/python/ql/lib/semmle/python/Concepts.qll index dc461861aceb..4fadc953c3b1 100644 --- a/python/ql/lib/semmle/python/Concepts.qll +++ b/python/ql/lib/semmle/python/Concepts.qll @@ -498,6 +498,65 @@ module XML { abstract string getName(); } } + + /** + * A kind of XML vulnerability. + * + * See overview of kinds at https://pypi.org/project/defusedxml/#python-xml-libraries + * + * See PoC at `python/PoCs/XmlParsing/PoC.py` for some tests of vulnerable XML parsing. + */ + class XmlParsingVulnerabilityKind extends string { + XmlParsingVulnerabilityKind() { this in ["XML bomb", "XXE", "DTD retrieval"] } + + /** + * Holds for XML bomb vulnerability kind, such as 'Billion Laughs' and 'Quadratic + * Blowup'. + * + * While a parser could technically be vulnerable to one and not the other, from our + * point of view the interesting part is that it IS vulnerable to these types of + * attacks, and not so much which one specifically works. In practice I haven't seen + * a parser that is vulnerable to one and not the other. + */ + predicate isXmlBomb() { this = "XML bomb" } + + /** Holds for XXE vulnerability kind. */ + predicate isXxe() { this = "XXE" } + + /** Holds for DTD retrieval vulnerability kind. */ + predicate isDtdRetrieval() { this = "DTD retrieval" } + } + + /** + * A data-flow node that parses XML. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `XmlParsing` instead. + */ + class XmlParsing extends Decoding instanceof XmlParsing::Range { + /** + * Holds if this XML parsing is vulnerable to `kind`. + */ + predicate vulnerableTo(XmlParsingVulnerabilityKind kind) { super.vulnerableTo(kind) } + } + + /** Provides classes for modeling XML parsing APIs. */ + module XmlParsing { + /** + * A data-flow node that parses XML. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `XmlParsing` instead. + */ + abstract class Range extends Decoding::Range { + /** + * Holds if this XML parsing is vulnerable to `kind`. + */ + abstract predicate vulnerableTo(XmlParsingVulnerabilityKind kind); + + override string getFormat() { result = "XML" } + } + } } /** Provides classes for modeling LDAP-related APIs. */ diff --git a/python/ql/lib/semmle/python/Frameworks.qll b/python/ql/lib/semmle/python/Frameworks.qll index b94b8aee5d96..4812628d262f 100644 --- a/python/ql/lib/semmle/python/Frameworks.qll +++ b/python/ql/lib/semmle/python/Frameworks.qll @@ -52,3 +52,4 @@ private import semmle.python.frameworks.Ujson private import semmle.python.frameworks.Urllib3 private import semmle.python.frameworks.Yaml private import semmle.python.frameworks.Yarl +private import semmle.python.frameworks.Xmltodict diff --git a/python/ql/lib/semmle/python/frameworks/Lxml.qll b/python/ql/lib/semmle/python/frameworks/Lxml.qll index 9259668a5c84..70e46a6d3b05 100644 --- a/python/ql/lib/semmle/python/frameworks/Lxml.qll +++ b/python/ql/lib/semmle/python/frameworks/Lxml.qll @@ -19,6 +19,9 @@ private import semmle.python.ApiGraphs * - https://lxml.de/tutorial.html */ private module Lxml { + // --------------------------------------------------------------------------- + // XPath + // --------------------------------------------------------------------------- /** * A class constructor compiling an XPath expression. * @@ -57,13 +60,25 @@ private module Lxml { */ class XPathCall extends XML::XPathExecution::Range, DataFlow::CallCfgNode { XPathCall() { - this = - API::moduleImport("lxml") - .getMember("etree") - .getMember(["parse", "fromstring", "fromstringlist", "HTML", "XML"]) - .getReturn() - .getMember("xpath") - .getACall() + exists(API::Node parseResult | + parseResult = + API::moduleImport("lxml") + .getMember("etree") + .getMember(["parse", "fromstring", "fromstringlist", "HTML", "XML"]) + .getReturn() + or + // TODO: lxml.etree.parseid()[0] will contain the root element from parsing + // but we don't really have a way to model that nicely. + parseResult = + API::moduleImport("lxml") + .getMember("etree") + .getMember("XMLParser") + .getReturn() + .getMember("close") + .getReturn() + | + this = parseResult.getMember("xpath").getACall() + ) } override DataFlow::Node getXPath() { result in [this.getArg(0), this.getArgByName("_path")] } @@ -85,4 +100,235 @@ private module Lxml { override string getName() { result = "lxml.etree" } } + + // --------------------------------------------------------------------------- + // Parsing + // --------------------------------------------------------------------------- + /** + * Provides models for `lxml.etree` parsers. + * + * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser + */ + module XmlParser { + /** + * A source of instances of `lxml.etree` parsers, extend this class to model new instances. + * + * This can include instantiations of the class, return values from function + * calls, or a special parameter that will be set when functions are called by an external + * library. + * + * Use the predicate `XmlParser::instance()` to get references to instances of `lxml.etree` parsers. + */ + abstract class InstanceSource extends DataFlow::LocalSourceNode { + /** Holds if this instance is vulnerable to `kind`. */ + abstract predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind); + } + + /** + * A call to `lxml.etree.XMLParser`. + * + * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser + */ + private class LxmlParser extends InstanceSource, API::CallNode { + LxmlParser() { + this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall() + } + + // NOTE: it's not possible to change settings of a parser after constructing it + override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { + kind.isXxe() and + ( + // resolve_entities has default True + not exists(this.getArgByName("resolve_entities")) + or + this.getKeywordParameter("resolve_entities").getAValueReachingRhs().asExpr() = any(True t) + ) + or + kind.isXmlBomb() and + this.getKeywordParameter("huge_tree").getAValueReachingRhs().asExpr() = any(True t) and + not this.getKeywordParameter("resolve_entities").getAValueReachingRhs().asExpr() = + any(False t) + or + kind.isDtdRetrieval() and + this.getKeywordParameter("load_dtd").getAValueReachingRhs().asExpr() = any(True t) and + this.getKeywordParameter("no_network").getAValueReachingRhs().asExpr() = any(False t) + } + } + + /** + * A call to `lxml.etree.get_default_parser`. + * + * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser + */ + private class LxmlDefaultParser extends InstanceSource, DataFlow::CallCfgNode { + LxmlDefaultParser() { + this = + API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall() + } + + override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { + // as highlighted by + // https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser + // by default XXE is allow. so as long as the default parser has not been + // overridden, the result is also vuln to XXE. + kind.isXxe() + // TODO: take into account that you can override the default parser with `lxml.etree.set_default_parser`. + } + } + + /** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */ + private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t, InstanceSource origin) { + t.start() and + result = origin + or + exists(DataFlow::TypeTracker t2 | result = instance(t2, origin).track(t2, t)) + } + + /** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */ + DataFlow::Node instance(InstanceSource origin) { + instance(DataFlow::TypeTracker::end(), origin).flowsTo(result) + } + + /** Gets a reference to an `lxml.etree` parser instance, that is vulnerable to `kind`. */ + DataFlow::Node instanceVulnerableTo(XML::XmlParsingVulnerabilityKind kind) { + exists(InstanceSource origin | result = instance(origin) and origin.vulnerableTo(kind)) + } + + /** + * A call to the `feed` method of an `lxml` parser. + */ + private class LxmlParserFeedCall extends DataFlow::MethodCallNode, XML::XmlParsing::Range { + LxmlParserFeedCall() { this.calls(instance(_), "feed") } + + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } + + override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { + this.calls(instanceVulnerableTo(kind), "feed") + } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { + exists(DataFlow::Node objRef | + DataFlow::localFlow(this.getObject(), objRef) and + result.(DataFlow::MethodCallNode).calls(objRef, "close") + ) + } + } + } + + /** + * A call to either of: + * - `lxml.etree.fromstring` + * - `lxml.etree.fromstringlist` + * - `lxml.etree.XML` + * - `lxml.etree.XMLID` + * - `lxml.etree.parse` + * - `lxml.etree.parseid` + * + * See + * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstring + * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstringlist + * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.XML + * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.XMLID + * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parse + * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parseid + */ + private class LxmlParsing extends DataFlow::CallCfgNode, XML::XmlParsing::Range { + string functionName; + + LxmlParsing() { + functionName in ["fromstring", "fromstringlist", "XML", "XMLID", "parse", "parseid"] and + this = API::moduleImport("lxml").getMember("etree").getMember(functionName).getACall() + } + + override DataFlow::Node getAnInput() { + result in [ + this.getArg(0), + // fromstring / XML / XMLID + this.getArgByName("text"), + // fromstringlist + this.getArgByName("strings"), + // parse / parseid + this.getArgByName("source"), + ] + } + + DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] } + + override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { + this.getParserArg() = XmlParser::instanceVulnerableTo(kind) + or + kind.isXxe() and + not exists(this.getParserArg()) + } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { + // Note: for `parseid`/XMLID the result of the call is a tuple with `(root, dict)`, so + // maybe we should not just say that the entire tuple is the decoding output... my + // gut feeling is that THIS instance doesn't matter too much, but that it would be + // nice to be able to do this in general. (this is a problem for both `lxml.etree` + // and `xml.etree`) + result = this + } + } + + /** + * A call to `lxml.etree.ElementTree.parse` or `lxml.etree.ElementTree.parseid`, which + * takes either a filename or a file-like object as argument. To capture the filename + * for path-injection, we have this subclass. + * + * See + * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parse + * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parseid + */ + private class FileAccessFromLxmlParsing extends LxmlParsing, FileSystemAccess::Range { + FileAccessFromLxmlParsing() { + functionName in ["parse", "parseid"] + // I considered whether we should try to reduce FPs from people passing file-like + // objects, which will not be a file system access (and couldn't cause a + // path-injection). + // + // I suppose that once we have proper flow-summary support for file-like objects, + // we can make the XXE/XML-bomb sinks allow an access-path, while the + // path-injection sink wouldn't, and then we will not end up with such FPs. + } + + override DataFlow::Node getAPathArgument() { result = this.getAnInput() } + } + + /** + * A call to `lxml.etree.iterparse` + * + * See + * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.iterparse + */ + private class LxmlIterparseCall extends API::CallNode, XML::XmlParsing::Range, + FileSystemAccess::Range { + LxmlIterparseCall() { + this = API::moduleImport("lxml").getMember("etree").getMember("iterparse").getACall() + } + + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] } + + override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { + // note that there is no `resolve_entities` argument, so it's not possible to turn off XXE :O + kind.isXxe() + or + kind.isXmlBomb() and + this.getKeywordParameter("huge_tree").getAValueReachingRhs().asExpr() = any(True t) + or + kind.isDtdRetrieval() and + this.getKeywordParameter("load_dtd").getAValueReachingRhs().asExpr() = any(True t) and + this.getKeywordParameter("no_network").getAValueReachingRhs().asExpr() = any(False t) + } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { result = this } + + override DataFlow::Node getAPathArgument() { result = this.getAnInput() } + } } diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 514a9d9e5eb3..ef60841acd66 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -2890,70 +2890,6 @@ private module StdlibPrivate { override string getKind() { result = Escaping::getRegexKind() } } - // --------------------------------------------------------------------------- - // xml.etree.ElementTree - // --------------------------------------------------------------------------- - /** - * An instance of `xml.etree.ElementTree.ElementTree`. - * - * See https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.ElementTree - */ - private API::Node elementTreeInstance() { - //parse to a tree - result = - API::moduleImport("xml") - .getMember("etree") - .getMember("ElementTree") - .getMember("parse") - .getReturn() - or - // construct a tree without parsing - result = - API::moduleImport("xml") - .getMember("etree") - .getMember("ElementTree") - .getMember("ElementTree") - .getReturn() - } - - /** - * An instance of `xml.etree.ElementTree.Element`. - * - * See https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.Element - */ - private API::Node elementInstance() { - // parse or go to the root of a tree - result = elementTreeInstance().getMember(["parse", "getroot"]).getReturn() - or - // parse directly to an element - result = - API::moduleImport("xml") - .getMember("etree") - .getMember("ElementTree") - .getMember(["fromstring", "fromstringlist", "XML"]) - .getReturn() - } - - /** - * A call to a find method on a tree or an element will execute an XPath expression. - */ - private class ElementTreeFindCall extends XML::XPathExecution::Range, DataFlow::CallCfgNode { - string methodName; - - ElementTreeFindCall() { - methodName in ["find", "findall", "findtext"] and - ( - this = elementTreeInstance().getMember(methodName).getACall() - or - this = elementInstance().getMember(methodName).getACall() - ) - } - - override DataFlow::Node getXPath() { result in [this.getArg(0), this.getArgByName("match")] } - - override string getName() { result = "xml.etree" } - } - // --------------------------------------------------------------------------- // urllib // --------------------------------------------------------------------------- @@ -3171,6 +3107,547 @@ private module StdlibPrivate { result in [this.getArg(0), this.getArgByName("path")] } } + + // --------------------------------------------------------------------------- + // io + // --------------------------------------------------------------------------- + /** + * Provides models for the `io.StringIO`/`io.BytesIO` classes + * + * See https://docs.python.org/3.10/library/io.html#io.StringIO. + */ + module StringIO { + /** Gets a reference to the `io.StringIO` class. */ + private API::Node classRef() { + result = API::moduleImport("io").getMember(["StringIO", "BytesIO"]) + } + + /** + * A source of instances of `io.StringIO`/`io.BytesIO`, extend this class to model new instances. + * + * This can include instantiations of the class, return values from function + * calls, or a special parameter that will be set when functions are called by an external + * library. + * + * Use the predicate `StringIO::instance()` to get references to instances of `io.StringIO`. + */ + abstract class InstanceSource extends Stdlib::FileLikeObject::InstanceSource { } + + /** A direct instantiation of `io.StringIO`/`io.BytesIO`. */ + private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode { + ClassInstantiation() { this = classRef().getACall() } + + DataFlow::Node getInitialValue() { + result = this.getArg(0) + or + // `initial_value` for StringIO, `initial_bytes` for BytesIO + result = this.getArgByName(["initial_value", "initial_bytes"]) + } + } + + /** Gets a reference to an instance of `io.StringIO`/`io.BytesIO`. */ + private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) { + t.start() and + result instanceof InstanceSource + or + exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t)) + } + + /** Gets a reference to an instance of `io.StringIO`/`io.BytesIO`. */ + DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) } + + /** + * Extra taint propagation for `io.StringIO`/`io.BytesIO`. + */ + private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { + override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + nodeTo.(ClassInstantiation).getInitialValue() = nodeFrom + } + } + } + + // --------------------------------------------------------------------------- + // xml.etree.ElementTree + // --------------------------------------------------------------------------- + /** + * An instance of `xml.etree.ElementTree.ElementTree`. + * + * See https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.ElementTree + */ + private API::Node elementTreeInstance() { + //parse to a tree + result = + API::moduleImport("xml") + .getMember("etree") + .getMember("ElementTree") + .getMember("parse") + .getReturn() + or + // construct a tree without parsing + result = + API::moduleImport("xml") + .getMember("etree") + .getMember("ElementTree") + .getMember("ElementTree") + .getReturn() + } + + /** + * An instance of `xml.etree.ElementTree.Element`. + * + * See https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.Element + */ + private API::Node elementInstance() { + // parse or go to the root of a tree + result = elementTreeInstance().getMember(["parse", "getroot"]).getReturn() + or + // parse directly to an element + result = + API::moduleImport("xml") + .getMember("etree") + .getMember("ElementTree") + .getMember(["fromstring", "fromstringlist", "XML"]) + .getReturn() + or + result = + API::moduleImport("xml") + .getMember("etree") + .getMember("ElementTree") + .getMember("XMLParser") + .getReturn() + .getMember("close") + .getReturn() + } + + /** + * A call to a find method on a tree or an element will execute an XPath expression. + */ + private class ElementTreeFindCall extends XML::XPathExecution::Range, DataFlow::CallCfgNode { + string methodName; + + ElementTreeFindCall() { + methodName in ["find", "findall", "findtext"] and + ( + this = elementTreeInstance().getMember(methodName).getACall() + or + this = elementInstance().getMember(methodName).getACall() + ) + } + + override DataFlow::Node getXPath() { result in [this.getArg(0), this.getArgByName("match")] } + + override string getName() { result = "xml.etree" } + } + + /** + * Provides models for `xml.etree` parsers + * + * See + * - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLParser + * - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLPullParser + */ + module XmlParser { + /** + * A source of instances of `xml.etree` parsers, extend this class to model new instances. + * + * This can include instantiations of the class, return values from function + * calls, or a special parameter that will be set when functions are called by an external + * library. + * + * Use the predicate `XmlParser::instance()` to get references to instances of `xml.etree` parsers. + */ + abstract class InstanceSource extends DataFlow::LocalSourceNode { } + + /** A direct instantiation of `xml.etree` parsers. */ + private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode { + ClassInstantiation() { + this = + API::moduleImport("xml") + .getMember("etree") + .getMember("ElementTree") + .getMember(["XMLParser", "XMLPullParser"]) + .getACall() + } + } + + /** Gets a reference to an `xml.etree` parser instance. */ + private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) { + t.start() and + result instanceof InstanceSource + or + exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t)) + } + + /** Gets a reference to an `xml.etree` parser instance. */ + DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) } + + /** + * A call to the `feed` method of an `xml.etree` parser. + */ + private class XmlEtreeParserFeedCall extends DataFlow::MethodCallNode, XML::XmlParsing::Range { + XmlEtreeParserFeedCall() { this.calls(instance(), "feed") } + + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } + + override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { kind.isXmlBomb() } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { + exists(DataFlow::Node objRef | + DataFlow::localFlow(this.getObject(), objRef) and + result.(DataFlow::MethodCallNode).calls(objRef, "close") + ) + } + } + } + + /** + * A call to either of: + * - `xml.etree.ElementTree.fromstring` + * - `xml.etree.ElementTree.fromstringlist` + * - `xml.etree.ElementTree.XML` + * - `xml.etree.ElementTree.XMLID` + * - `xml.etree.ElementTree.parse` + * - `xml.etree.ElementTree.iterparse` + * - `parse` method on an `xml.etree.ElementTree.ElementTree` instance + * + * See + * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.fromstring + * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.fromstringlist + * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.XML + * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLID + * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.parse + * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.iterparse + */ + private class XmlEtreeParsing extends DataFlow::CallCfgNode, XML::XmlParsing::Range { + XmlEtreeParsing() { + this = + API::moduleImport("xml") + .getMember("etree") + .getMember("ElementTree") + .getMember(["fromstring", "fromstringlist", "XML", "XMLID", "parse", "iterparse"]) + .getACall() + or + this = elementTreeInstance().getMember("parse").getACall() + } + + override DataFlow::Node getAnInput() { + result in [ + this.getArg(0), + // fromstring / XML / XMLID + this.getArgByName("text"), + // fromstringlist + this.getArgByName("sequence"), + // parse / iterparse + this.getArgByName("source"), + ] + } + + override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { + // note: it does not matter what `xml.etree` parser you are using, you cannot + // change the security features anyway :| + kind.isXmlBomb() + } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { + // Note: for `XMLID` the result of the call is a tuple with `(root, dict)`, so + // maybe we should not just say that the entire tuple is the decoding output... my + // gut feeling is that THIS instance doesn't matter too much, but that it would be + // nice to be able to do this in general. (this is a problem for both `lxml.etree` + // and `xml.etree`) + result = this + } + } + + /** + * A call to `xml.etree.ElementTree.parse` or `xml.etree.ElementTree.iterparse`, which + * takes either a filename or a file-like object as argument. To capture the filename + * for path-injection, we have this subclass. + * + * See + * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.parse + * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.iterparse + */ + private class FileAccessFromXmlEtreeParsing extends XmlEtreeParsing, FileSystemAccess::Range { + FileAccessFromXmlEtreeParsing() { + this = + API::moduleImport("xml") + .getMember("etree") + .getMember("ElementTree") + .getMember(["parse", "iterparse"]) + .getACall() + or + this = elementTreeInstance().getMember("parse").getACall() + // I considered whether we should try to reduce FPs from people passing file-like + // objects, which will not be a file system access (and couldn't cause a + // path-injection). + // + // I suppose that once we have proper flow-summary support for file-like objects, + // we can make the XXE/XML-bomb sinks allow an access-path, while the + // path-injection sink wouldn't, and then we will not end up with such FPs. + } + + override DataFlow::Node getAPathArgument() { result = this.getAnInput() } + } + + // --------------------------------------------------------------------------- + // xml.sax + // --------------------------------------------------------------------------- + /** + * A call to the `setFeature` method on a XML sax parser. + * + * See https://docs.python.org/3.10/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.setFeature + */ + private class SaxParserSetFeatureCall extends API::CallNode, DataFlow::MethodCallNode { + SaxParserSetFeatureCall() { + this = + API::moduleImport("xml") + .getMember("sax") + .getMember("make_parser") + .getReturn() + .getMember("setFeature") + .getACall() + } + + // The keyword argument names does not match documentation. I checked (with Python + // 3.9.5) that the names used here actually works. + API::Node getFeatureArg() { result = this.getParameter(0, "name") } + + API::Node getStateArg() { result = this.getParameter(1, "state") } + } + + /** + * Gets a reference to a XML sax parser that has `feature_external_ges` turned on. + * + * See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges + */ + private DataFlow::Node saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker t) { + t.start() and + exists(SaxParserSetFeatureCall call | + call.getFeatureArg().getARhs() = + API::moduleImport("xml") + .getMember("sax") + .getMember("handler") + .getMember("feature_external_ges") + .getAUse() and + call.getStateArg().getAValueReachingRhs().asExpr().(BooleanLiteral).booleanValue() = true and + result = call.getObject() + ) + or + exists(DataFlow::TypeTracker t2 | + t = t2.smallstep(saxParserWithFeatureExternalGesTurnedOn(t2), result) + ) and + // take account of that we can set the feature to False, which makes the parser safe again + not exists(SaxParserSetFeatureCall call | + call.getObject() = result and + call.getFeatureArg().getARhs() = + API::moduleImport("xml") + .getMember("sax") + .getMember("handler") + .getMember("feature_external_ges") + .getAUse() and + call.getStateArg().getAValueReachingRhs().asExpr().(BooleanLiteral).booleanValue() = false + ) + } + + /** + * Gets a reference to a XML sax parser that has `feature_external_ges` turned on. + * + * See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges + */ + DataFlow::Node saxParserWithFeatureExternalGesTurnedOn() { + result = saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker::end()) + } + + /** + * A call to the `parse` method on a SAX XML parser. + * + * See https://docs.python.org/3/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.parse + */ + private class XmlSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XmlParsing::Range, + FileSystemAccess::Range { + XmlSaxInstanceParsing() { + this = + API::moduleImport("xml") + .getMember("sax") + .getMember("make_parser") + .getReturn() + .getMember("parse") + .getACall() + } + + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] } + + override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { + // always vuln to these + kind.isXmlBomb() + or + // can be vuln to other things if features has been turned on + this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and + (kind.isXxe() or kind.isDtdRetrieval()) + } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { + // note: the output of parsing with SAX is that the content handler gets the + // data... but we don't currently model this (it's not trivial to do, and won't + // really give us any value, at least not as of right now). + none() + } + + override DataFlow::Node getAPathArgument() { + // I considered whether we should try to reduce FPs from people passing file-like + // objects, which will not be a file system access (and couldn't cause a + // path-injection). + // + // I suppose that once we have proper flow-summary support for file-like objects, + // we can make the XXE/XML-bomb sinks allow an access-path, while the + // path-injection sink wouldn't, and then we will not end up with such FPs. + result = this.getAnInput() + } + } + + /** + * A call to either `parse` or `parseString` from `xml.sax` module. + * + * See: + * - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parse + * - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parseString + */ + private class XmlSaxParsing extends DataFlow::CallCfgNode, XML::XmlParsing::Range { + XmlSaxParsing() { + this = + API::moduleImport("xml").getMember("sax").getMember(["parse", "parseString"]).getACall() + } + + override DataFlow::Node getAnInput() { + result in [ + this.getArg(0), + // parseString + this.getArgByName("string"), + // parse + this.getArgByName("source"), + ] + } + + override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { + // always vuln to these + kind.isXmlBomb() + } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { + // note: the output of parsing with SAX is that the content handler gets the + // data... but we don't currently model this (it's not trivial to do, and won't + // really give us any value, at least not as of right now). + none() + } + } + + /** + * A call to `xml.sax.parse`, which takes either a filename or a file-like object as + * argument. To capture the filename for path-injection, we have this subclass. + * + * See + * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.parse + * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.iterparse + */ + private class FileAccessFromXmlSaxParsing extends XmlSaxParsing, FileSystemAccess::Range { + FileAccessFromXmlSaxParsing() { + this = API::moduleImport("xml").getMember("sax").getMember("parse").getACall() + // I considered whether we should try to reduce FPs from people passing file-like + // objects, which will not be a file system access (and couldn't cause a + // path-injection). + // + // I suppose that once we have proper flow-summary support for file-like objects, + // we can make the XXE/XML-bomb sinks allow an access-path, while the + // path-injection sink wouldn't, and then we will not end up with such FPs. + } + + override DataFlow::Node getAPathArgument() { result = this.getAnInput() } + } + + // --------------------------------------------------------------------------- + // xml.dom.* + // --------------------------------------------------------------------------- + /** + * A call to the `parse` or `parseString` methods from `xml.dom.minidom` or `xml.dom.pulldom`. + * + * Both of these modules are based on SAX parsers. + * + * See + * - https://docs.python.org/3/library/xml.dom.minidom.html#xml.dom.minidom.parse + * - https://docs.python.org/3/library/xml.dom.pulldom.html#xml.dom.pulldom.parse + */ + private class XmlDomParsing extends DataFlow::CallCfgNode, XML::XmlParsing::Range { + XmlDomParsing() { + this = + API::moduleImport("xml") + .getMember("dom") + .getMember(["minidom", "pulldom"]) + .getMember(["parse", "parseString"]) + .getACall() + } + + override DataFlow::Node getAnInput() { + result in [ + this.getArg(0), + // parseString + this.getArgByName("string"), + // minidom.parse + this.getArgByName("file"), + // pulldom.parse + this.getArgByName("stream_or_string"), + ] + } + + DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] } + + override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { + this.getParserArg() = saxParserWithFeatureExternalGesTurnedOn() and + (kind.isXxe() or kind.isDtdRetrieval()) + or + kind.isXmlBomb() + } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { result = this } + } + + /** + * A call to the `parse` or `parseString` methods from `xml.dom.minidom` or + * `xml.dom.pulldom`, which takes either a filename or a file-like object as argument. + * To capture the filename for path-injection, we have this subclass. + * + * See + * - https://docs.python.org/3/library/xml.dom.minidom.html#xml.dom.minidom.parse + * - https://docs.python.org/3/library/xml.dom.pulldom.html#xml.dom.pulldom.parse + */ + private class FileAccessFromXmlDomParsing extends XmlDomParsing, FileSystemAccess::Range { + FileAccessFromXmlDomParsing() { + this = + API::moduleImport("xml") + .getMember("dom") + .getMember(["minidom", "pulldom"]) + .getMember("parse") + .getACall() + // I considered whether we should try to reduce FPs from people passing file-like + // objects, which will not be a file system access (and couldn't cause a + // path-injection). + // + // I suppose that once we have proper flow-summary support for file-like objects, + // we can make the XXE/XML-bomb sinks allow an access-path, while the + // path-injection sink wouldn't, and then we will not end up with such FPs. + } + + override DataFlow::Node getAPathArgument() { result = this.getAnInput() } + } } // --------------------------------------------------------------------------- diff --git a/python/ql/lib/semmle/python/frameworks/Xmltodict.qll b/python/ql/lib/semmle/python/frameworks/Xmltodict.qll new file mode 100644 index 000000000000..f63fec7afe4c --- /dev/null +++ b/python/ql/lib/semmle/python/frameworks/Xmltodict.qll @@ -0,0 +1,39 @@ +/** + * Provides classes modeling security-relevant aspects of the `xmltodict` PyPI package. + * + * See + * - https://pypi.org/project/xmltodict/ + */ + +private import python +private import semmle.python.dataflow.new.DataFlow +private import semmle.python.Concepts +private import semmle.python.ApiGraphs + +/** + * Provides classes modeling security-relevant aspects of the `xmltodict` PyPI package + * + * See + * - https://pypi.org/project/xmltodict/ + */ +private module Xmltodict { + /** + * A call to `xmltodict.parse`. + */ + private class XMLtoDictParsing extends API::CallNode, XML::XmlParsing::Range { + XMLtoDictParsing() { this = API::moduleImport("xmltodict").getMember("parse").getACall() } + + override DataFlow::Node getAnInput() { + result in [this.getArg(0), this.getArgByName("xml_input")] + } + + override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { + kind.isXmlBomb() and + this.getKeywordParameter("disable_entities").getAValueReachingRhs().asExpr() = any(False f) + } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { result = this } + } +} diff --git a/python/ql/lib/semmle/python/security/dataflow/XmlBombCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/XmlBombCustomizations.qll new file mode 100644 index 000000000000..a2fe1b8ecb27 --- /dev/null +++ b/python/ql/lib/semmle/python/security/dataflow/XmlBombCustomizations.qll @@ -0,0 +1,49 @@ +/** + * Provides default sources, sinks and sanitizers for detecting + * "XML bomb" + * vulnerabilities, as well as extension points for adding your own. + */ + +private import python +private import semmle.python.dataflow.new.DataFlow +private import semmle.python.Concepts +private import semmle.python.dataflow.new.RemoteFlowSources + +/** + * Provides default sources, sinks and sanitizers for detecting "XML bomb" + * vulnerabilities, as well as extension points for adding your own. + */ +module XmlBomb { + /** + * A data flow source for XML-bomb vulnerabilities. + */ + abstract class Source extends DataFlow::Node { } + + /** + * A data flow sink for XML-bomb vulnerabilities. + */ + abstract class Sink extends DataFlow::Node { } + + /** + * A sanitizer for XML-bomb vulnerabilities. + */ + abstract class Sanitizer extends DataFlow::Node { } + + /** A source of remote user input, considered as a flow source for XML bomb vulnerabilities. */ + class RemoteFlowSourceAsSource extends Source { + RemoteFlowSourceAsSource() { this instanceof RemoteFlowSource } + } + + /** + * A call to an XML parser that is vulnerable to XML bombs. + */ + class XmlParsingVulnerableToXmlBomb extends Sink { + XmlParsingVulnerableToXmlBomb() { + exists(XML::XmlParsing parsing, XML::XmlParsingVulnerabilityKind kind | + kind.isXmlBomb() and + parsing.vulnerableTo(kind) and + this = parsing.getAnInput() + ) + } + } +} diff --git a/python/ql/lib/semmle/python/security/dataflow/XmlBombQuery.qll b/python/ql/lib/semmle/python/security/dataflow/XmlBombQuery.qll new file mode 100644 index 000000000000..d0c0b85d84f1 --- /dev/null +++ b/python/ql/lib/semmle/python/security/dataflow/XmlBombQuery.qll @@ -0,0 +1,28 @@ +/** + * Provides a taint-tracking configuration for detecting "XML bomb" vulnerabilities. + * + * Note, for performance reasons: only import this file if + * `Configuration` is needed, otherwise + * `XmlBombCustomizations` should be imported instead. + */ + +import python +import semmle.python.dataflow.new.DataFlow +import semmle.python.dataflow.new.TaintTracking +import XmlBombCustomizations::XmlBomb + +/** + * A taint-tracking configuration for detecting "XML bomb" vulnerabilities. + */ +class Configuration extends TaintTracking::Configuration { + Configuration() { this = "XmlBomb" } + + override predicate isSource(DataFlow::Node source) { source instanceof Source } + + override predicate isSink(DataFlow::Node sink) { sink instanceof Sink } + + override predicate isSanitizer(DataFlow::Node node) { + super.isSanitizer(node) or + node instanceof Sanitizer + } +} diff --git a/python/ql/lib/semmle/python/security/dataflow/XxeCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/XxeCustomizations.qll new file mode 100644 index 000000000000..1d1ad087f849 --- /dev/null +++ b/python/ql/lib/semmle/python/security/dataflow/XxeCustomizations.qll @@ -0,0 +1,49 @@ +/** + * Provides default sources, sinks and sanitizers for detecting + * "XML External Entity (XXE)" + * vulnerabilities, as well as extension points for adding your own. + */ + +private import python +private import semmle.python.dataflow.new.DataFlow +private import semmle.python.Concepts +private import semmle.python.dataflow.new.RemoteFlowSources + +/** + * Provides default sources, sinks and sanitizers for detecting "XML External Entity (XXE)" + * vulnerabilities, as well as extension points for adding your own. + */ +module Xxe { + /** + * A data flow source for XXE vulnerabilities. + */ + abstract class Source extends DataFlow::Node { } + + /** + * A data flow sink for XXE vulnerabilities. + */ + abstract class Sink extends DataFlow::Node { } + + /** + * A sanitizer for XXE vulnerabilities. + */ + abstract class Sanitizer extends DataFlow::Node { } + + /** A source of remote user input, considered as a flow source for XXE vulnerabilities. */ + class RemoteFlowSourceAsSource extends Source { + RemoteFlowSourceAsSource() { this instanceof RemoteFlowSource } + } + + /** + * A call to an XML parser that is vulnerable to XXE. + */ + class XmlParsingVulnerableToXxe extends Sink { + XmlParsingVulnerableToXxe() { + exists(XML::XmlParsing parsing, XML::XmlParsingVulnerabilityKind kind | + kind.isXxe() and + parsing.vulnerableTo(kind) and + this = parsing.getAnInput() + ) + } + } +} diff --git a/python/ql/lib/semmle/python/security/dataflow/XxeQuery.qll b/python/ql/lib/semmle/python/security/dataflow/XxeQuery.qll new file mode 100644 index 000000000000..dd2409f2a3ce --- /dev/null +++ b/python/ql/lib/semmle/python/security/dataflow/XxeQuery.qll @@ -0,0 +1,28 @@ +/** + * Provides a taint-tracking configuration for detecting "XML External Entity (XXE)" vulnerabilities. + * + * Note, for performance reasons: only import this file if + * `Configuration` is needed, otherwise + * `XxeCustomizations` should be imported instead. + */ + +import python +import semmle.python.dataflow.new.DataFlow +import semmle.python.dataflow.new.TaintTracking +import XxeCustomizations::Xxe + +/** + * A taint-tracking configuration for detecting "XML External Entity (XXE)" vulnerabilities. + */ +class Configuration extends TaintTracking::Configuration { + Configuration() { this = "Xxe" } + + override predicate isSource(DataFlow::Node source) { source instanceof Source } + + override predicate isSink(DataFlow::Node sink) { sink instanceof Sink } + + override predicate isSanitizer(DataFlow::Node node) { + super.isSanitizer(node) or + node instanceof Sanitizer + } +} diff --git a/python/ql/src/Security/CWE-611/Xxe.qhelp b/python/ql/src/Security/CWE-611/Xxe.qhelp new file mode 100644 index 000000000000..19bbc955fd68 --- /dev/null +++ b/python/ql/src/Security/CWE-611/Xxe.qhelp @@ -0,0 +1,74 @@ + + + + +

+Parsing untrusted XML files with a weakly configured XML parser may lead to an +XML External Entity (XXE) attack. This type of attack uses external entity references +to access arbitrary files on a system, carry out denial-of-service (DoS) attacks, or server-side +request forgery. Even when the result of parsing is not returned to the user, DoS attacks are still possible +and out-of-band data retrieval techniques may allow attackers to steal sensitive data. +

+
+ + +

+The easiest way to prevent XXE attacks is to disable external entity handling when +parsing untrusted data. How this is done depends on the library being used. Note that some +libraries, such as recent versions of the XML libraries in the standard library of Python 3, +disable entity expansion by default, +so unless you have explicitly enabled entity expansion, no further action needs to be taken. +

+ +

+We recommend using the defusedxml +PyPI package, which has been created to prevent XML attacks (both XXE and XML bombs). +

+
+ + +

+The following example uses the lxml XML parser to parse a string +xml_src. That string is from an untrusted source, so this code is +vulnerable to an XXE attack, since the +default parser from lxml.etree allows local external entities to be resolved. +

+ + +

+To guard against XXE attacks with the lxml library, you should create a +parser with resolve_entities set to false. This means that no +entity expansion is undertaken, althuogh standard predefined entities such as +&gt;, for writing > inside the text of an XML element, +are still allowed. +

+ +
+ + +
  • +OWASP: +XML External Entity (XXE) Processing. +
  • +
  • +Timothy Morgen: +XML Schema, DTD, and Entity Attacks. +
  • +
  • +Timur Yunusov, Alexey Osipov: +XML Out-Of-Band Data Retrieval. +
  • +
  • +Python 3 standard library: +XML Vulnerabilities. +
  • +
  • +Python 2 standard library: +XML Vulnerabilities. +
  • +
  • +PortSwigger: +XML external entity (XXE) injection. +
  • +
    +
    diff --git a/python/ql/src/Security/CWE-611/Xxe.ql b/python/ql/src/Security/CWE-611/Xxe.ql new file mode 100644 index 000000000000..5cc6da254677 --- /dev/null +++ b/python/ql/src/Security/CWE-611/Xxe.ql @@ -0,0 +1,23 @@ +/** + * @name XML external entity expansion + * @description Parsing user input as an XML document with external + * entity expansion is vulnerable to XXE attacks. + * @kind path-problem + * @problem.severity error + * @security-severity 9.1 + * @precision high + * @id py/xxe + * @tags security + * external/cwe/cwe-611 + * external/cwe/cwe-827 + */ + +import python +import semmle.python.security.dataflow.XxeQuery +import DataFlow::PathGraph + +from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink +where cfg.hasFlowPath(source, sink) +select sink.getNode(), source, sink, + "A $@ is parsed as XML without guarding against external entity expansion.", source.getNode(), + "user-provided value" diff --git a/python/ql/src/Security/CWE-611/examples/XxeBad.py b/python/ql/src/Security/CWE-611/examples/XxeBad.py new file mode 100644 index 000000000000..4b2121ab4a64 --- /dev/null +++ b/python/ql/src/Security/CWE-611/examples/XxeBad.py @@ -0,0 +1,10 @@ +from flask import Flask, request +import lxml.etree + +app = Flask(__name__) + +@app.post("/upload") +def upload(): + xml_src = request.get_data() + doc = lxml.etree.fromstring(xml_src) + return lxml.etree.tostring(doc) diff --git a/python/ql/src/Security/CWE-611/examples/XxeGood.py b/python/ql/src/Security/CWE-611/examples/XxeGood.py new file mode 100644 index 000000000000..20844032fa39 --- /dev/null +++ b/python/ql/src/Security/CWE-611/examples/XxeGood.py @@ -0,0 +1,11 @@ +from flask import Flask, request +import lxml.etree + +app = Flask(__name__) + +@app.post("/upload") +def upload(): + xml_src = request.get_data() + parser = lxml.etree.XMLParser(resolve_entities=False) + doc = lxml.etree.fromstring(xml_src, parser=parser) + return lxml.etree.tostring(doc) diff --git a/python/ql/src/Security/CWE-776/XmlBomb.qhelp b/python/ql/src/Security/CWE-776/XmlBomb.qhelp new file mode 100644 index 000000000000..8841f98ab27e --- /dev/null +++ b/python/ql/src/Security/CWE-776/XmlBomb.qhelp @@ -0,0 +1,74 @@ + + + + +

    +Parsing untrusted XML files with a weakly configured XML parser may be vulnerable to +denial-of-service (DoS) attacks exploiting uncontrolled internal entity expansion. +

    +

    +In XML, so-called internal entities are a mechanism for introducing an abbreviation +for a piece of text or part of a document. When a parser that has been configured +to expand entities encounters a reference to an internal entity, it replaces the entity +by the data it represents. The replacement text may itself contain other entity references, +which are expanded recursively. This means that entity expansion can increase document size +dramatically. +

    +

    +If untrusted XML is parsed with entity expansion enabled, a malicious attacker could +submit a document that contains very deeply nested entity definitions, causing the parser +to take a very long time or use large amounts of memory. This is sometimes called an +XML bomb attack. +

    +
    + + +

    +The safest way to prevent XML bomb attacks is to disable entity expansion when parsing untrusted +data. Whether this can be done depends on the library being used. Note that some libraries, such as +lxml, have measures enabled by default to prevent such DoS XML attacks, so +unless you have explicitly set huge_tree to True, no further action is needed. +

    + +

    +We recommend using the defusedxml +PyPI package, which has been created to prevent XML attacks (both XXE and XML bombs). +

    +
    + + +

    +The following example uses the xml.etree XML parser provided by the Python standard library to +parse a string xml_src. That string is from an untrusted source, so this code is +vulnerable to a DoS attack, since the xml.etree XML parser expands internal entities by default: +

    + + +

    +It is not possible to guard against internal entity expansion with +xml.etree, so to guard against these attacks, the following example uses +the defusedxml +PyPI package instead, which is not exposed to such internal entity expansion attacks. +

    + +
    + + +
  • +Wikipedia: +Billion Laughs. +
  • +
  • +Bryan Sullivan: +Security Briefs - XML Denial of Service Attacks and Defenses. +
  • +
  • +Python 3 standard library: +XML Vulnerabilities. +
  • +
  • +Python 2 standard library: +XML Vulnerabilities. +
  • +
    +
    diff --git a/python/ql/src/Security/CWE-776/XmlBomb.ql b/python/ql/src/Security/CWE-776/XmlBomb.ql new file mode 100644 index 000000000000..54d483db17ea --- /dev/null +++ b/python/ql/src/Security/CWE-776/XmlBomb.ql @@ -0,0 +1,23 @@ +/** + * @name XML internal entity expansion + * @description Parsing user input as an XML document with arbitrary internal + * entity expansion is vulnerable to denial-of-service attacks. + * @kind path-problem + * @problem.severity warning + * @security-severity 7.5 + * @precision high + * @id py/xml-bomb + * @tags security + * external/cwe/cwe-776 + * external/cwe/cwe-400 + */ + +import python +import semmle.python.security.dataflow.XmlBombQuery +import DataFlow::PathGraph + +from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink +where cfg.hasFlowPath(source, sink) +select sink.getNode(), source, sink, + "A $@ is parsed as XML without guarding against uncontrolled entity expansion.", source.getNode(), + "user-provided value" diff --git a/python/ql/src/Security/CWE-776/examples/XmlBombBad.py b/python/ql/src/Security/CWE-776/examples/XmlBombBad.py new file mode 100644 index 000000000000..d52054d94929 --- /dev/null +++ b/python/ql/src/Security/CWE-776/examples/XmlBombBad.py @@ -0,0 +1,10 @@ +from flask import Flask, request +import xml.etree.ElementTree as ET + +app = Flask(__name__) + +@app.post("/upload") +def upload(): + xml_src = request.get_data() + doc = ET.fromstring(xml_src) + return ET.tostring(doc) diff --git a/python/ql/src/Security/CWE-776/examples/XmlBombGood.py b/python/ql/src/Security/CWE-776/examples/XmlBombGood.py new file mode 100644 index 000000000000..5e4261e35da9 --- /dev/null +++ b/python/ql/src/Security/CWE-776/examples/XmlBombGood.py @@ -0,0 +1,10 @@ +from flask import Flask, request +import defusedxml.ElementTree as ET + +app = Flask(__name__) + +@app.post("/upload") +def upload(): + xml_src = request.get_data() + doc = ET.fromstring(xml_src) + return ET.tostring(doc) diff --git a/python/ql/src/change-notes/2022-04-05-add-xxe-and-xmlbomb.md b/python/ql/src/change-notes/2022-04-05-add-xxe-and-xmlbomb.md new file mode 100644 index 000000000000..bd867091aea3 --- /dev/null +++ b/python/ql/src/change-notes/2022-04-05-add-xxe-and-xmlbomb.md @@ -0,0 +1,5 @@ +--- +category: newQuery +--- +* "XML external entity expansion" (`py/xxe`). Results will appear by default. This query was based on [an experimental query by @jorgectf](https://github.com/github/codeql/pull/6112). +* "XML internal entity expansion" (`py/xml-bomb`). Results will appear by default. This query was based on [an experimental query by @jorgectf](https://github.com/github/codeql/pull/6112). diff --git a/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql b/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql index cda0633690c5..e31fdc88629f 100644 --- a/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql +++ b/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql @@ -10,16 +10,10 @@ */ private import python -private import experimental.semmle.python.Concepts +private import semmle.python.Concepts private import semmle.python.ApiGraphs -from DataFlow::CallCfgNode call, string kinds +from DataFlow::CallCfgNode call where - call = API::moduleImport("xmlrpc").getMember("server").getMember("SimpleXMLRPCServer").getACall() and - kinds = - strictconcat(ExperimentalXML::XMLVulnerabilityKind kind | - kind.isBillionLaughs() or kind.isQuadraticBlowup() - | - kind, ", " - ) -select call, "SimpleXMLRPCServer is vulnerable to: " + kinds + "." + call = API::moduleImport("xmlrpc").getMember("server").getMember("SimpleXMLRPCServer").getACall() +select call, "SimpleXMLRPCServer is vulnerable to XML bombs" diff --git a/python/ql/src/experimental/Security/CWE-611/XXE.xml b/python/ql/src/experimental/Security/CWE-611/XXE.xml deleted file mode 100644 index ddd196f2f137..000000000000 --- a/python/ql/src/experimental/Security/CWE-611/XXE.xml +++ /dev/null @@ -1,4 +0,0 @@ - -]> -&xxe; \ No newline at end of file diff --git a/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.py b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.py deleted file mode 100644 index 0e9eec933d7d..000000000000 --- a/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.py +++ /dev/null @@ -1,25 +0,0 @@ -from flask import request, Flask -import lxml.etree -import xml.etree.ElementTree - -app = Flask(__name__) - -# BAD -@app.route("/bad") -def bad(): - xml_content = request.args['xml_content'] - - parser = lxml.etree.XMLParser() - parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser) - - return parsed_xml.text - -# GOOD -@app.route("/good") -def good(): - xml_content = request.args['xml_content'] - - parser = lxml.etree.XMLParser(resolve_entities=False) - parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser) - - return parsed_xml.text \ No newline at end of file diff --git a/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.qhelp b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.qhelp deleted file mode 100644 index 6da1bf1d3063..000000000000 --- a/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.qhelp +++ /dev/null @@ -1,48 +0,0 @@ - - - - -

    -Parsing untrusted XML files with a weakly configured XML parser may lead to attacks such as XML External Entity (XXE), -Billion Laughs, Quadratic Blowup and DTD retrieval. -This type of attack uses external entity references to access arbitrary files on a system, carry out denial of -service, or server side request forgery. Even when the result of parsing is not returned to the user, out-of-band -data retrieval techniques may allow attackers to steal sensitive data. Denial of services can also be carried out -in this situation. -

    -
    - - -

    -Use defusedxml, a Python package aimed -to prevent any potentially malicious operation. -

    -
    - - -

    -The following example calls xml.etree.ElementTree.fromstring using a parser (lxml.etree.XMLParser) -that is not safely configured on untrusted data, and is therefore inherently unsafe. -

    - -

    -Providing an input (xml_content) like the following XML content against /bad, the request response would contain the contents of -/etc/passwd. -

    - -
    - - -
  • Python 3 XML Vulnerabilities.
  • -
  • Python 2 XML Vulnerabilities.
  • -
  • Python XML Parsing.
  • -
  • OWASP vulnerability description: XML External Entity (XXE) Processing.
  • -
  • OWASP guidance on parsing xml files: XXE Prevention Cheat Sheet.
  • -
  • Paper by Timothy Morgen: XML Schema, DTD, and Entity Attacks
  • -
  • Out-of-band data retrieval: Timur Yunusov & Alexey Osipov, Black hat EU 2013: XML Out-Of-Band Data Retrieval.
  • -
  • Denial of service attack (Billion laughs): Billion Laughs.
  • -
    - -
    diff --git a/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql deleted file mode 100644 index 922ca346b173..000000000000 --- a/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql +++ /dev/null @@ -1,31 +0,0 @@ -/** - * @name XML Entity injection - * @description User input should not be parsed allowing the injection of entities. - * @kind path-problem - * @problem.severity error - * @id py/xml-entity-injection - * @tags security - * external/cwe/cwe-611 - * external/cwe/cwe-776 - * external/cwe/cwe-827 - */ - -// determine precision above -import python -import experimental.semmle.python.security.dataflow.XmlEntityInjection -import DataFlow::PathGraph - -from - XmlEntityInjection::XmlEntityInjectionConfiguration config, DataFlow::PathNode source, - DataFlow::PathNode sink, string kinds -where - config.hasFlowPath(source, sink) and - kinds = - strictconcat(string kind | - kind = sink.getNode().(XmlEntityInjection::Sink).getVulnerableKind() - | - kind, ", " - ) -select sink.getNode(), source, sink, - "$@ XML input is constructed from a $@ and is vulnerable to: " + kinds + ".", sink.getNode(), - "This", source.getNode(), "user-provided value" diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll index 0c6bca7733be..1c08609c547a 100644 --- a/python/ql/src/experimental/semmle/python/Concepts.qll +++ b/python/ql/src/experimental/semmle/python/Concepts.qll @@ -81,74 +81,6 @@ class LogOutput extends DataFlow::Node { DataFlow::Node getAnInput() { result = range.getAnInput() } } -/** - * Since there is both XML module in normal and experimental Concepts, - * we have to rename the experimental module as this. - */ -module ExperimentalXML { - /** - * A kind of XML vulnerability. - * - * See https://pypi.org/project/defusedxml/#python-xml-libraries - */ - class XMLVulnerabilityKind extends string { - XMLVulnerabilityKind() { - this in ["Billion Laughs", "Quadratic Blowup", "XXE", "DTD retrieval"] - } - - /** Holds for Billion Laughs vulnerability kind. */ - predicate isBillionLaughs() { this = "Billion Laughs" } - - /** Holds for Quadratic Blowup vulnerability kind. */ - predicate isQuadraticBlowup() { this = "Quadratic Blowup" } - - /** Holds for XXE vulnerability kind. */ - predicate isXxe() { this = "XXE" } - - /** Holds for DTD retrieval vulnerability kind. */ - predicate isDtdRetrieval() { this = "DTD retrieval" } - } - - /** - * A data-flow node that parses XML. - * - * Extend this class to model new APIs. If you want to refine existing API models, - * extend `XMLParsing` instead. - */ - class XMLParsing extends DataFlow::Node instanceof XMLParsing::Range { - /** - * Gets the argument containing the content to parse. - */ - DataFlow::Node getAnInput() { result = super.getAnInput() } - - /** - * Holds if this XML parsing is vulnerable to `kind`. - */ - predicate vulnerableTo(XMLVulnerabilityKind kind) { super.vulnerableTo(kind) } - } - - /** Provides classes for modeling XML parsing APIs. */ - module XMLParsing { - /** - * A data-flow node that parses XML. - * - * Extend this class to model new APIs. If you want to refine existing API models, - * extend `XMLParsing` instead. - */ - abstract class Range extends DataFlow::Node { - /** - * Gets the argument containing the content to parse. - */ - abstract DataFlow::Node getAnInput(); - - /** - * Holds if this XML parsing is vulnerable to `kind`. - */ - abstract predicate vulnerableTo(XMLVulnerabilityKind kind); - } - } -} - /** Provides classes for modeling LDAP query execution-related APIs. */ module LdapQuery { /** diff --git a/python/ql/src/experimental/semmle/python/Frameworks.qll b/python/ql/src/experimental/semmle/python/Frameworks.qll index 45003faada62..f0b120ceacce 100644 --- a/python/ql/src/experimental/semmle/python/Frameworks.qll +++ b/python/ql/src/experimental/semmle/python/Frameworks.qll @@ -3,7 +3,6 @@ */ private import experimental.semmle.python.frameworks.Stdlib -private import experimental.semmle.python.frameworks.Xml private import experimental.semmle.python.frameworks.Flask private import experimental.semmle.python.frameworks.Django private import experimental.semmle.python.frameworks.Werkzeug diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll deleted file mode 100644 index eb1899fdd18f..000000000000 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ /dev/null @@ -1,442 +0,0 @@ -/** - * Provides class and predicates to track external data that - * may represent malicious XML objects. - */ - -private import python -private import semmle.python.dataflow.new.DataFlow -private import experimental.semmle.python.Concepts -private import semmle.python.ApiGraphs - -module XML = ExperimentalXML; - -private module XmlEtree { - /** - * Provides models for `xml.etree` parsers - * - * See - * - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLParser - * - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLPullParser - */ - module XMLParser { - /** - * A source of instances of `xml.etree` parsers, extend this class to model new instances. - * - * This can include instantiations of the class, return values from function - * calls, or a special parameter that will be set when functions are called by an external - * library. - * - * Use the predicate `XMLParser::instance()` to get references to instances of `xml.etree` parsers. - */ - abstract class InstanceSource extends DataFlow::LocalSourceNode { } - - /** A direct instantiation of `xml.etree` parsers. */ - private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode { - ClassInstantiation() { - this = - API::moduleImport("xml") - .getMember("etree") - .getMember("ElementTree") - .getMember("XMLParser") - .getACall() - or - this = - API::moduleImport("xml") - .getMember("etree") - .getMember("ElementTree") - .getMember("XMLPullParser") - .getACall() - } - } - - /** Gets a reference to an `xml.etree` parser instance. */ - private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) { - t.start() and - result instanceof InstanceSource - or - exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t)) - } - - /** Gets a reference to an `xml.etree` parser instance. */ - DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) } - - /** - * A call to the `feed` method of an `xml.etree` parser. - */ - private class XMLEtreeParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range { - XMLEtreeParserFeedCall() { this.calls(instance(), "feed") } - - override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } - - override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { - kind.isBillionLaughs() or kind.isQuadraticBlowup() - } - } - } - - /** - * A call to either of: - * - `xml.etree.ElementTree.fromstring` - * - `xml.etree.ElementTree.fromstringlist` - * - `xml.etree.ElementTree.XML` - * - `xml.etree.ElementTree.XMLID` - * - `xml.etree.ElementTree.parse` - * - `xml.etree.ElementTree.iterparse` - */ - private class XMLEtreeParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { - XMLEtreeParsing() { - this = - API::moduleImport("xml") - .getMember("etree") - .getMember("ElementTree") - .getMember(["fromstring", "fromstringlist", "XML", "XMLID", "parse", "iterparse"]) - .getACall() - } - - override DataFlow::Node getAnInput() { - result in [ - this.getArg(0), - // fromstring / XML / XMLID - this.getArgByName("text"), - // fromstringlist - this.getArgByName("sequence"), - // parse / iterparse - this.getArgByName("source"), - ] - } - - override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { - // note: it does not matter what `xml.etree` parser you are using, you cannot - // change the security features anyway :| - kind.isBillionLaughs() or kind.isQuadraticBlowup() - } - } -} - -private module SaxBasedParsing { - /** - * A call to the `setFeature` method on a XML sax parser. - * - * See https://docs.python.org/3.10/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.setFeature - */ - class SaxParserSetFeatureCall extends API::CallNode, DataFlow::MethodCallNode { - SaxParserSetFeatureCall() { - this = - API::moduleImport("xml") - .getMember("sax") - .getMember("make_parser") - .getReturn() - .getMember("setFeature") - .getACall() - } - - // The keyword argument names does not match documentation. I checked (with Python - // 3.9.5) that the names used here actually works. - API::Node getFeatureArg() { result = this.getParameter(0, "name") } - - API::Node getStateArg() { result = this.getParameter(1, "state") } - } - - /** - * Gets a reference to a XML sax parser that has `feature_external_ges` turned on. - * - * See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges - */ - private DataFlow::Node saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker t) { - t.start() and - exists(SaxParserSetFeatureCall call | - call.getFeatureArg().getARhs() = - API::moduleImport("xml") - .getMember("sax") - .getMember("handler") - .getMember("feature_external_ges") - .getAUse() and - call.getStateArg().getAValueReachingRhs().asExpr().(BooleanLiteral).booleanValue() = true and - result = call.getObject() - ) - or - exists(DataFlow::TypeTracker t2 | - t = t2.smallstep(saxParserWithFeatureExternalGesTurnedOn(t2), result) - ) and - // take account of that we can set the feature to False, which makes the parser safe again - not exists(SaxParserSetFeatureCall call | - call.getObject() = result and - call.getFeatureArg().getARhs() = - API::moduleImport("xml") - .getMember("sax") - .getMember("handler") - .getMember("feature_external_ges") - .getAUse() and - call.getStateArg().getAValueReachingRhs().asExpr().(BooleanLiteral).booleanValue() = false - ) - } - - /** - * Gets a reference to a XML sax parser that has `feature_external_ges` turned on. - * - * See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges - */ - DataFlow::Node saxParserWithFeatureExternalGesTurnedOn() { - result = saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker::end()) - } - - /** - * A call to the `parse` method on a SAX XML parser. - */ - private class XMLSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range { - XMLSaxInstanceParsing() { - this = - API::moduleImport("xml") - .getMember("sax") - .getMember("make_parser") - .getReturn() - .getMember("parse") - .getACall() - } - - override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] } - - override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { - // always vuln to these - (kind.isBillionLaughs() or kind.isQuadraticBlowup()) - or - // can be vuln to other things if features has been turned on - this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and - (kind.isXxe() or kind.isDtdRetrieval()) - } - } - - /** - * A call to either `parse` or `parseString` from `xml.sax` module. - * - * See: - * - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parse - * - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parseString - */ - private class XMLSaxParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range { - XMLSaxParsing() { - this = - API::moduleImport("xml").getMember("sax").getMember(["parse", "parseString"]).getACall() - } - - override DataFlow::Node getAnInput() { - result in [ - this.getArg(0), - // parseString - this.getArgByName("string"), - // parse - this.getArgByName("source"), - ] - } - - override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { - // always vuln to these - (kind.isBillionLaughs() or kind.isQuadraticBlowup()) - or - // can be vuln to other things if features has been turned on - this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and - (kind.isXxe() or kind.isDtdRetrieval()) - } - } - - /** - * A call to the `parse` or `parseString` methods from `xml.dom.minidom` or `xml.dom.pulldom`. - * - * Both of these modules are based on SAX parsers. - */ - private class XMLDomParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { - XMLDomParsing() { - this = - API::moduleImport("xml") - .getMember("dom") - .getMember(["minidom", "pulldom"]) - .getMember(["parse", "parseString"]) - .getACall() - } - - override DataFlow::Node getAnInput() { - result in [ - this.getArg(0), - // parseString - this.getArgByName("string"), - // minidom.parse - this.getArgByName("file"), - // pulldom.parse - this.getArgByName("stream_or_string"), - ] - } - - DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] } - - override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { - this.getParserArg() = saxParserWithFeatureExternalGesTurnedOn() and - (kind.isXxe() or kind.isDtdRetrieval()) - or - (kind.isBillionLaughs() or kind.isQuadraticBlowup()) - } - } -} - -private module Lxml { - /** - * Provides models for `lxml.etree` parsers. - * - * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser - */ - module XMLParser { - /** - * A source of instances of `lxml.etree` parsers, extend this class to model new instances. - * - * This can include instantiations of the class, return values from function - * calls, or a special parameter that will be set when functions are called by an external - * library. - * - * Use the predicate `XMLParser::instance()` to get references to instances of `lxml.etree` parsers. - */ - abstract class InstanceSource extends DataFlow::LocalSourceNode { - /** Holds if this instance is vulnerable to `kind`. */ - abstract predicate vulnerableTo(XML::XMLVulnerabilityKind kind); - } - - /** - * A call to `lxml.etree.XMLParser`. - * - * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser - */ - private class LXMLParser extends InstanceSource, DataFlow::CallCfgNode { - LXMLParser() { - this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall() - } - - // NOTE: it's not possible to change settings of a parser after constructing it - override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { - kind.isXxe() and - ( - // resolve_entities has default True - not exists(this.getArgByName("resolve_entities")) - or - this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(True t) - ) - or - (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and - this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) and - not this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(False t) - or - kind.isDtdRetrieval() and - this.getArgByName("load_dtd").getALocalSource().asExpr() = any(True t) and - this.getArgByName("no_network").getALocalSource().asExpr() = any(False t) - } - } - - /** - * A call to `lxml.etree.get_default_parser`. - * - * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser - */ - private class LXMLDefaultParser extends InstanceSource, DataFlow::CallCfgNode { - LXMLDefaultParser() { - this = - API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall() - } - - override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { - // as highlighted by - // https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser - // by default XXE is allow. so as long as the default parser has not been - // overridden, the result is also vuln to XXE. - kind.isXxe() - // TODO: take into account that you can override the default parser with `lxml.etree.set_default_parser`. - } - } - - /** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */ - private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t, InstanceSource origin) { - t.start() and - result = origin - or - exists(DataFlow::TypeTracker t2 | result = instance(t2, origin).track(t2, t)) - } - - /** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */ - DataFlow::Node instance(InstanceSource origin) { - instance(DataFlow::TypeTracker::end(), origin).flowsTo(result) - } - - /** Gets a reference to an `lxml.etree` parser instance, that is vulnerable to `kind`. */ - DataFlow::Node instanceVulnerableTo(XML::XMLVulnerabilityKind kind) { - exists(InstanceSource origin | result = instance(origin) and origin.vulnerableTo(kind)) - } - - /** - * A call to the `feed` method of an `lxml` parser. - */ - private class LXMLParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range { - LXMLParserFeedCall() { this.calls(instance(_), "feed") } - - override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } - - override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { - this.calls(instanceVulnerableTo(kind), "feed") - } - } - } - - /** - * A call to either of: - * - `lxml.etree.fromstring` - * - `lxml.etree.fromstringlist` - * - `lxml.etree.XML` - * - `lxml.etree.parse` - * - `lxml.etree.parseid` - * - * See https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstring - */ - private class LXMLParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { - LXMLParsing() { - this = - API::moduleImport("lxml") - .getMember("etree") - .getMember(["fromstring", "fromstringlist", "XML", "parse", "parseid"]) - .getACall() - } - - override DataFlow::Node getAnInput() { - result in [ - this.getArg(0), - // fromstring / XML - this.getArgByName("text"), - // fromstringlist - this.getArgByName("strings"), - // parse / parseid - this.getArgByName("source"), - ] - } - - DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] } - - override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { - this.getParserArg() = XMLParser::instanceVulnerableTo(kind) - or - kind.isXxe() and - not exists(this.getParserArg()) - } - } -} - -private module Xmltodict { - /** - * A call to `xmltodict.parse`. - */ - private class XMLtoDictParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { - XMLtoDictParsing() { this = API::moduleImport("xmltodict").getMember("parse").getACall() } - - override DataFlow::Node getAnInput() { - result in [this.getArg(0), this.getArgByName("xml_input")] - } - - override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { - (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and - this.getArgByName("disable_entities").getALocalSource().asExpr() = any(False f) - } - } -} diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll deleted file mode 100644 index 35220e153d12..000000000000 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll +++ /dev/null @@ -1,28 +0,0 @@ -import python -import experimental.semmle.python.Concepts -import semmle.python.dataflow.new.DataFlow -import semmle.python.dataflow.new.TaintTracking -import semmle.python.dataflow.new.RemoteFlowSources -import semmle.python.dataflow.new.BarrierGuards - -module XmlEntityInjection { - import XmlEntityInjectionCustomizations::XmlEntityInjection - - class XmlEntityInjectionConfiguration extends TaintTracking::Configuration { - XmlEntityInjectionConfiguration() { this = "XmlEntityInjectionConfiguration" } - - override predicate isSource(DataFlow::Node source) { - source instanceof RemoteFlowSourceAsSource - } - - override predicate isSink(DataFlow::Node sink) { sink instanceof Sink } - - override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { - guard instanceof SanitizerGuard - } - - override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - any(AdditionalTaintStep s).step(nodeFrom, nodeTo) - } - } -} diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll deleted file mode 100644 index e420c738a978..000000000000 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll +++ /dev/null @@ -1,86 +0,0 @@ -/** - * Provides default sources, sinks and sanitizers for detecting - * "ldap injection" - * vulnerabilities, as well as extension points for adding your own. - */ - -private import python -private import semmle.python.dataflow.new.DataFlow -private import experimental.semmle.python.Concepts -private import semmle.python.dataflow.new.RemoteFlowSources -private import semmle.python.dataflow.new.BarrierGuards -private import semmle.python.ApiGraphs - -/** - * Provides default sources, sinks and sanitizers for detecting "xml injection" - * vulnerabilities, as well as extension points for adding your own. - */ -module XmlEntityInjection { - /** - * A data flow source for "xml injection" vulnerabilities. - */ - abstract class Source extends DataFlow::Node { } - - /** - * A data flow sink for "xml injection" vulnerabilities. - */ - abstract class Sink extends DataFlow::Node { - /** Gets the kind of XML injection that this sink is vulnerable to. */ - abstract string getVulnerableKind(); - } - - /** - * A sanitizer guard for "xml injection" vulnerabilities. - */ - abstract class SanitizerGuard extends DataFlow::BarrierGuard { } - - /** - * A unit class for adding additional taint steps. - * - * Extend this class to add additional taint steps that should apply to `XmlEntityInjection` - * taint configuration. - */ - class AdditionalTaintStep extends Unit { - /** - * Holds if the step from `nodeFrom` to `nodeTo` should be considered a taint - * step for `XmlEntityInjection` configuration. - */ - abstract predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo); - } - - /** - * An input to a direct XML parsing function, considered as a flow sink. - * - * See `XML::XMLParsing`. - */ - class XMLParsingInputAsSink extends Sink { - ExperimentalXML::XMLParsing xmlParsing; - - XMLParsingInputAsSink() { this = xmlParsing.getAnInput() } - - override string getVulnerableKind() { xmlParsing.vulnerableTo(result) } - } - - /** - * A source of remote user input, considered as a flow source. - */ - class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } - - /** - * A comparison with a constant string, considered as a sanitizer-guard. - */ - class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { } - - /** - * A taint step for `io`'s `StringIO` and `BytesIO` methods. - */ - class IoAdditionalTaintStep extends AdditionalTaintStep { - override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - exists(DataFlow::CallCfgNode ioCalls | - ioCalls = API::moduleImport("io").getMember(["StringIO", "BytesIO"]).getACall() and - nodeFrom = ioCalls.getArg(0) and - nodeTo = ioCalls - ) - } - } -} diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql b/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql deleted file mode 100644 index 81bc391d0e55..000000000000 --- a/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql +++ /dev/null @@ -1,33 +0,0 @@ -import python -import experimental.semmle.python.Concepts -import experimental.semmle.python.frameworks.Xml -import semmle.python.dataflow.new.DataFlow -import TestUtilities.InlineExpectationsTest -private import semmle.python.dataflow.new.internal.PrintNode - -class XmlParsingTest extends InlineExpectationsTest { - XmlParsingTest() { this = "XmlParsingTest" } - - override string getARelevantTag() { result in ["input", "vuln"] } - - override predicate hasActualResult(Location location, string element, string tag, string value) { - exists(location.getFile().getRelativePath()) and - exists(XML::XMLParsing parsing | - exists(DataFlow::Node input | - input = parsing.getAnInput() and - location = input.getLocation() and - element = input.toString() and - value = prettyNodeForInlineTest(input) and - tag = "input" - ) - or - exists(XML::XMLVulnerabilityKind kind | - parsing.vulnerableTo(kind) and - location = parsing.getLocation() and - element = parsing.toString() and - value = "'" + kind + "'" and - tag = "vuln" - ) - ) - } -} diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py deleted file mode 100644 index 22930a58af37..000000000000 --- a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py +++ /dev/null @@ -1,54 +0,0 @@ -from io import StringIO -import lxml.etree - -x = "some xml" - -# different parsing methods -lxml.etree.fromstring(x) # $ input=x vuln='XXE' -lxml.etree.fromstring(text=x) # $ input=x vuln='XXE' - -lxml.etree.fromstringlist([x]) # $ input=List vuln='XXE' -lxml.etree.fromstringlist(strings=[x]) # $ input=List vuln='XXE' - -lxml.etree.XML(x) # $ input=x vuln='XXE' -lxml.etree.XML(text=x) # $ input=x vuln='XXE' - -lxml.etree.parse(StringIO(x)) # $ input=StringIO(..) vuln='XXE' -lxml.etree.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='XXE' - -lxml.etree.parseid(StringIO(x)) # $ input=StringIO(..) vuln='XXE' -lxml.etree.parseid(source=StringIO(x)) # $ input=StringIO(..) vuln='XXE' - -# With default parsers (nothing changed) -parser = lxml.etree.XMLParser() -lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE' - -parser = lxml.etree.get_default_parser() -lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE' - -# manual use of feed method -parser = lxml.etree.XMLParser() -parser.feed(x) # $ input=x vuln='XXE' -parser.feed(data=x) # $ input=x vuln='XXE' -parser.close() - -# XXE-safe -parser = lxml.etree.XMLParser(resolve_entities=False) -lxml.etree.fromstring(x, parser) # $ input=x -lxml.etree.fromstring(x, parser=parser) # $ input=x - -# XXE-vuln -parser = lxml.etree.XMLParser(resolve_entities=True) -lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE' - -# Billion laughs vuln (also XXE) -parser = lxml.etree.XMLParser(huge_tree=True) -lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' vuln='XXE' - -# Safe for both Billion laughs and XXE -parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True) -lxml.etree.fromstring(x, parser=parser) # $ input=x - -# DTD retrival vuln (also XXE) -parser = lxml.etree.XMLParser(load_dtd=True, no_network=False) -lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='DTD retrieval' vuln='XXE' diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/poc/flag b/python/ql/test/experimental/library-tests/frameworks/XML/poc/flag deleted file mode 100644 index 45c9436ee9f2..000000000000 --- a/python/ql/test/experimental/library-tests/frameworks/XML/poc/flag +++ /dev/null @@ -1 +0,0 @@ -SECRET_FLAG diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/poc/this-dir-is-not-extracted b/python/ql/test/experimental/library-tests/frameworks/XML/poc/this-dir-is-not-extracted deleted file mode 100644 index b1925ade1d3a..000000000000 --- a/python/ql/test/experimental/library-tests/frameworks/XML/poc/this-dir-is-not-extracted +++ /dev/null @@ -1 +0,0 @@ -just FYI diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py deleted file mode 100644 index 7dce29fc7b9d..000000000000 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py +++ /dev/null @@ -1,31 +0,0 @@ -from io import StringIO -import xml.dom.minidom -import xml.dom.pulldom -import xml.sax - -x = "some xml" - -# minidom -xml.dom.minidom.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' -xml.dom.minidom.parse(file=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' - -xml.dom.minidom.parseString(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' -xml.dom.minidom.parseString(string=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' - - -# pulldom -xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' -xml.dom.pulldom.parse(stream_or_string=StringIO(x))['START_DOCUMENT'][1] # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' - -xml.dom.pulldom.parseString(x)['START_DOCUMENT'][1] # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' -xml.dom.pulldom.parseString(string=x)['START_DOCUMENT'][1] # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' - - -# These are based on SAX parses, and you can specify your own, so you can expose yourself to XXE (yay/) -parser = xml.sax.make_parser() -parser.setFeature(xml.sax.handler.feature_external_ges, True) -xml.dom.minidom.parse(StringIO(x), parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' -xml.dom.minidom.parse(StringIO(x), parser=parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' - -xml.dom.pulldom.parse(StringIO(x), parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' -xml.dom.pulldom.parse(StringIO(x), parser=parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py deleted file mode 100644 index df126e458e2d..000000000000 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py +++ /dev/null @@ -1,45 +0,0 @@ -from io import StringIO -import xml.etree.ElementTree - -x = "some xml" - -# Parsing in different ways -xml.etree.ElementTree.fromstring(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' -xml.etree.ElementTree.fromstring(text=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' - -xml.etree.ElementTree.fromstringlist([x]) # $ input=List vuln='Billion Laughs' vuln='Quadratic Blowup' -xml.etree.ElementTree.fromstringlist(sequence=[x]) # $ input=List vuln='Billion Laughs' vuln='Quadratic Blowup' - -xml.etree.ElementTree.XML(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' -xml.etree.ElementTree.XML(text=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' - -xml.etree.ElementTree.XMLID(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' -xml.etree.ElementTree.XMLID(text=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' - -xml.etree.ElementTree.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' -xml.etree.ElementTree.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' - -xml.etree.ElementTree.iterparse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' -xml.etree.ElementTree.iterparse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' - - -# With parsers (no options available to disable/enable security features) -parser = xml.etree.ElementTree.XMLParser() -xml.etree.ElementTree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' - -# manual use of feed method -parser = xml.etree.ElementTree.XMLParser() -parser.feed(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' -parser.feed(data=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' -parser.close() - -# manual use of feed method on XMLPullParser -parser = xml.etree.ElementTree.XMLPullParser() -parser.feed(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' -parser.feed(data=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' -parser.close() - -# note: it's technically possible to use the thing wrapper func `fromstring` with an -# `lxml` parser, and thereby change what vulnerabilities you are exposed to.. but it -# seems very unlikely that anyone would do this, so we have intentionally not added any -# tests for this. diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py deleted file mode 100644 index 158e62ffae6b..000000000000 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py +++ /dev/null @@ -1,64 +0,0 @@ -from io import StringIO -import xml.sax - -x = "some xml" - -class MainHandler(xml.sax.ContentHandler): - def __init__(self): - self._result = [] - - def characters(self, data): - self._result.append(data) - -xml.sax.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' -xml.sax.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' - -xml.sax.parseString(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' -xml.sax.parseString(string=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' - -parser = xml.sax.make_parser() -parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' -parser.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' - -# You can make it vuln to both XXE and DTD retrieval by setting this flag -# see https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges -parser = xml.sax.make_parser() -parser.setFeature(xml.sax.handler.feature_external_ges, True) -parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' - -parser = xml.sax.make_parser() -parser.setFeature(xml.sax.handler.feature_external_ges, False) -parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' - -# Forward Type Tracking test -def func(cond): - parser = xml.sax.make_parser() - if cond: - parser.setFeature(xml.sax.handler.feature_external_ges, True) - parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' - else: - parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' - -# make it vuln, then making it safe -# a bit of an edge-case, but is nice to be able to handle. -parser = xml.sax.make_parser() -parser.setFeature(xml.sax.handler.feature_external_ges, True) -parser.setFeature(xml.sax.handler.feature_external_ges, False) -parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' - -def check_conditional_assignment(cond): - parser = xml.sax.make_parser() - if cond: - parser.setFeature(xml.sax.handler.feature_external_ges, True) - else: - parser.setFeature(xml.sax.handler.feature_external_ges, False) - parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' - -def check_conditional_assignment2(cond): - parser = xml.sax.make_parser() - if cond: - flag_value = True - else: - flag_value = False - parser.setFeature(xml.sax.handler.feature_external_ges, flag_value) - parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py b/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py deleted file mode 100644 index 473e51c9fe66..000000000000 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py +++ /dev/null @@ -1,8 +0,0 @@ -import xmltodict - -x = "some xml" - -xmltodict.parse(x) # $ input=x -xmltodict.parse(xml_input=x) # $ input=x - -xmltodict.parse(x, disable_entities=False) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' diff --git a/python/ql/test/experimental/meta/ConceptsTest.qll b/python/ql/test/experimental/meta/ConceptsTest.qll index 8f9435f633fe..7b8649b7abbc 100644 --- a/python/ql/test/experimental/meta/ConceptsTest.qll +++ b/python/ql/test/experimental/meta/ConceptsTest.qll @@ -539,3 +539,20 @@ class HttpClientRequestTest extends InlineExpectationsTest { ) } } + +class XmlParsingTest extends InlineExpectationsTest { + XmlParsingTest() { this = "XmlParsingTest" } + + override string getARelevantTag() { result = "xmlVuln" } + + override predicate hasActualResult(Location location, string element, string tag, string value) { + exists(location.getFile().getRelativePath()) and + exists(XML::XmlParsing parsing, XML::XmlParsingVulnerabilityKind kind | + parsing.vulnerableTo(kind) and + location = parsing.getLocation() and + element = parsing.toString() and + value = "'" + kind + "'" and + tag = "xmlVuln" + ) + } +} diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611-SimpleXmlRpcServer/SimpleXmlRpcServer.expected b/python/ql/test/experimental/query-tests/Security/CWE-611-SimpleXmlRpcServer/SimpleXmlRpcServer.expected new file mode 100644 index 000000000000..5f848fb56bb0 --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-611-SimpleXmlRpcServer/SimpleXmlRpcServer.expected @@ -0,0 +1 @@ +| xmlrpc_server.py:7:10:7:48 | ControlFlowNode for SimpleXMLRPCServer() | SimpleXMLRPCServer is vulnerable to XML bombs | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.qlref b/python/ql/test/experimental/query-tests/Security/CWE-611-SimpleXmlRpcServer/SimpleXmlRpcServer.qlref similarity index 100% rename from python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.qlref rename to python/ql/test/experimental/query-tests/Security/CWE-611-SimpleXmlRpcServer/SimpleXmlRpcServer.qlref diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xmlrpc_server.py b/python/ql/test/experimental/query-tests/Security/CWE-611-SimpleXmlRpcServer/xmlrpc_server.py similarity index 100% rename from python/ql/test/experimental/query-tests/Security/CWE-611/xmlrpc_server.py rename to python/ql/test/experimental/query-tests/Security/CWE-611-SimpleXmlRpcServer/xmlrpc_server.py diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.expected deleted file mode 100644 index 4a08d61c47af..000000000000 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.expected +++ /dev/null @@ -1 +0,0 @@ -| xmlrpc_server.py:7:10:7:48 | ControlFlowNode for SimpleXMLRPCServer() | SimpleXMLRPCServer is vulnerable to: Billion Laughs, Quadratic Blowup. | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.qlref b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.qlref deleted file mode 100644 index 36a7c8845fb7..000000000000 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.qlref +++ /dev/null @@ -1 +0,0 @@ -experimental/Security/CWE-611/XmlEntityInjection.ql diff --git a/python/ql/test/library-tests/frameworks/lxml/parsing.py b/python/ql/test/library-tests/frameworks/lxml/parsing.py new file mode 100644 index 000000000000..63cdc79b4c1d --- /dev/null +++ b/python/ql/test/library-tests/frameworks/lxml/parsing.py @@ -0,0 +1,67 @@ +from io import StringIO +import lxml.etree + +x = "some xml" + +# different parsing methods +lxml.etree.fromstring(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) +lxml.etree.fromstring(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) + +lxml.etree.fromstringlist([x]) # $ decodeFormat=XML decodeInput=List xmlVuln='XXE' decodeOutput=lxml.etree.fromstringlist(..) +lxml.etree.fromstringlist(strings=[x]) # $ decodeFormat=XML decodeInput=List xmlVuln='XXE' decodeOutput=lxml.etree.fromstringlist(..) + +lxml.etree.XML(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.XML(..) +lxml.etree.XML(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.XML(..) + +lxml.etree.XMLID(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.XMLID(..) +lxml.etree.XMLID(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.XMLID(..) + +xml_file = 'xml_file' +lxml.etree.parse(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parse(..) getAPathArgument=xml_file +lxml.etree.parse(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parse(..) getAPathArgument=xml_file + +lxml.etree.parseid(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..) getAPathArgument=xml_file +lxml.etree.parseid(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..) getAPathArgument=xml_file + +lxml.etree.iterparse(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file +lxml.etree.iterparse(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file + +# With default parsers (nothing changed) +parser = lxml.etree.XMLParser() +lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) + +parser = lxml.etree.get_default_parser() +lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) + +# manual use of feed method +parser = lxml.etree.XMLParser() +parser.feed(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' +parser.feed(data=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' +parser.close() # $ decodeOutput=parser.close() + +# XXE-safe +parser = lxml.etree.XMLParser(resolve_entities=False) +lxml.etree.fromstring(x, parser) # $ decodeFormat=XML decodeInput=x decodeOutput=lxml.etree.fromstring(..) +lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x decodeOutput=lxml.etree.fromstring(..) + +# XXE-vuln +parser = lxml.etree.XMLParser(resolve_entities=True) +lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) + +# Billion laughs vuln (also XXE) +parser = lxml.etree.XMLParser(huge_tree=True) +lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) + +# Safe for both Billion laughs and XXE +parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True) +lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x decodeOutput=lxml.etree.fromstring(..) + +# DTD retrival vuln (also XXE) +parser = lxml.etree.XMLParser(load_dtd=True, no_network=False) +lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) + +# iterparse configurations ... this doesn't use a parser argument but takes MOST (!) of +# the normal XMLParser arguments. Specifically, it doesn't allow disabling XXE :O + +lxml.etree.iterparse(xml_file, huge_tree=True) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XML bomb' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file +lxml.etree.iterparse(xml_file, load_dtd=True, no_network=False) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file diff --git a/python/ql/test/library-tests/frameworks/lxml/test.py b/python/ql/test/library-tests/frameworks/lxml/test.py deleted file mode 100644 index e8ce583503a3..000000000000 --- a/python/ql/test/library-tests/frameworks/lxml/test.py +++ /dev/null @@ -1,21 +0,0 @@ -from lxml import etree -from io import StringIO - -def test_parse(): - tree = etree.parse(StringIO('')) - r = tree.xpath('/foo/bar') # $ getXPath='/foo/bar' - -def test_XPath_class(): - root = etree.XML("TEXT") - find_text = etree.XPath("path") # $ constructedXPath="path" - text = find_text(root)[0] - -def test_ETXpath_class(): - root = etree.XML("TEXT") - find_text = etree.ETXPath("path") # $ constructedXPath="path" - text = find_text(root)[0] - -def test_XPathEvaluator_class(): - root = etree.XML("TEXT") - search_root = etree.XPathEvaluator(root) - text = search_root("path")[0] # $ getXPath="path" diff --git a/python/ql/test/library-tests/frameworks/lxml/xpath.py b/python/ql/test/library-tests/frameworks/lxml/xpath.py new file mode 100644 index 000000000000..f67c8dae17c8 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/lxml/xpath.py @@ -0,0 +1,21 @@ +from lxml import etree +from io import StringIO + +def test_parse(): + tree = etree.parse(StringIO('')) # $ decodeFormat=XML decodeInput=StringIO(..) decodeOutput=etree.parse(..) xmlVuln='XXE' getAPathArgument=StringIO(..) + r = tree.xpath('/foo/bar') # $ getXPath='/foo/bar' + +def test_XPath_class(): + root = etree.XML("TEXT") # $ decodeFormat=XML decodeInput="TEXT" decodeOutput=etree.XML(..) xmlVuln='XXE' + find_text = etree.XPath("path") # $ constructedXPath="path" + text = find_text(root)[0] + +def test_ETXpath_class(): + root = etree.XML("TEXT") # $ decodeFormat=XML decodeInput="TEXT" decodeOutput=etree.XML(..) xmlVuln='XXE' + find_text = etree.ETXPath("path") # $ constructedXPath="path" + text = find_text(root)[0] + +def test_XPathEvaluator_class(): + root = etree.XML("TEXT") # $ decodeFormat=XML decodeInput="TEXT" decodeOutput=etree.XML(..) xmlVuln='XXE' + search_root = etree.XPathEvaluator(root) + text = search_root("path")[0] # $ getXPath="path" diff --git a/python/ql/test/library-tests/frameworks/stdlib/XPathExecution.py b/python/ql/test/library-tests/frameworks/stdlib/XPathExecution.py index 98bdaefac27b..bf7dd08185b7 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/XPathExecution.py +++ b/python/ql/test/library-tests/frameworks/stdlib/XPathExecution.py @@ -2,7 +2,7 @@ ns = {'dc': 'http://purl.org/dc/elements/1.1/'} import xml.etree.ElementTree as ET -tree = ET.parse('country_data.xml') +tree = ET.parse('country_data.xml') # $ decodeFormat=XML decodeInput='country_data.xml' decodeOutput=ET.parse(..) xmlVuln='XML bomb' getAPathArgument='country_data.xml' root = tree.getroot() root.find(match, namespaces=ns) # $ getXPath=match @@ -10,8 +10,13 @@ root.findtext(match, default=None, namespaces=ns) # $ getXPath=match tree = ET.ElementTree() -tree.parse("index.xhtml") +tree.parse("index.xhtml") # $ decodeFormat=XML decodeInput="index.xhtml" decodeOutput=tree.parse(..) xmlVuln='XML bomb' getAPathArgument="index.xhtml" tree.find(match, namespaces=ns) # $ getXPath=match tree.findall(match, namespaces=ns) # $ getXPath=match tree.findtext(match, default=None, namespaces=ns) # $ getXPath=match + +parser = ET.XMLParser() +parser.feed("bar") # $ decodeFormat=XML decodeInput="bar" xmlVuln='XML bomb' +tree = parser.close() # $ decodeOutput=parser.close() +tree.find(match, namespaces=ns) # $ getXPath=match diff --git a/python/ql/test/library-tests/frameworks/stdlib/io_test.py b/python/ql/test/library-tests/frameworks/stdlib/io_test.py new file mode 100644 index 000000000000..98d60445e1c4 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/stdlib/io_test.py @@ -0,0 +1,47 @@ +from io import StringIO, BytesIO + +TAINTED_STRING = "TS" +TAINTED_BYTES = b"TB" + +def ensure_tainted(*args): + print("ensure_tainted") + for arg in args: + print("", repr(arg)) + + +def test_stringio(): + ts = TAINTED_STRING + + x = StringIO() + x.write(ts) + x.seek(0) + + ensure_tainted( + StringIO(ts), # $ tainted + StringIO(initial_value=ts), # $ tainted + x, # $ tainted + + x.read(), # $ tainted + StringIO(ts).read(), # $ tainted + ) + + +def test_bytesio(): + tb = TAINTED_BYTES + + x = BytesIO() + x.write(tb) + x.seek(0) + + ensure_tainted( + BytesIO(tb), # $ tainted + BytesIO(initial_bytes=tb), # $ tainted + x, # $ tainted + + x.read(), # $ tainted + BytesIO(tb).read(), # $ tainted + ) + + +test_stringio() +test_bytesio() diff --git a/python/ql/test/library-tests/frameworks/stdlib/xml_dom.py b/python/ql/test/library-tests/frameworks/stdlib/xml_dom.py new file mode 100644 index 000000000000..8d511c517334 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/stdlib/xml_dom.py @@ -0,0 +1,31 @@ +from io import StringIO +import xml.dom.minidom +import xml.dom.pulldom +import xml.sax + +x = "some xml" + +# minidom +xml.dom.minidom.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..) +xml.dom.minidom.parse(file=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..) + +xml.dom.minidom.parseString(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.dom.minidom.parseString(..) +xml.dom.minidom.parseString(string=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.dom.minidom.parseString(..) + + +# pulldom +xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..) +xml.dom.pulldom.parse(stream_or_string=StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..) + +xml.dom.pulldom.parseString(x)['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.dom.pulldom.parseString(..) +xml.dom.pulldom.parseString(string=x)['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.dom.pulldom.parseString(..) + + +# These are based on SAX parses, and you can specify your own, so you can expose yourself to XXE (yay/) +parser = xml.sax.make_parser() +parser.setFeature(xml.sax.handler.feature_external_ges, True) +xml.dom.minidom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..) +xml.dom.minidom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..) + +xml.dom.pulldom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..) +xml.dom.pulldom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..) diff --git a/python/ql/test/library-tests/frameworks/stdlib/xml_etree.py b/python/ql/test/library-tests/frameworks/stdlib/xml_etree.py new file mode 100644 index 000000000000..441f9adc87a1 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/stdlib/xml_etree.py @@ -0,0 +1,49 @@ +from io import StringIO +import xml.etree.ElementTree + +x = "some xml" + +# Parsing in different ways +xml.etree.ElementTree.fromstring(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.fromstring(..) +xml.etree.ElementTree.fromstring(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.fromstring(..) + +xml.etree.ElementTree.fromstringlist([x]) # $ decodeFormat=XML decodeInput=List xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.fromstringlist(..) +xml.etree.ElementTree.fromstringlist(sequence=[x]) # $ decodeFormat=XML decodeInput=List xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.fromstringlist(..) + +xml.etree.ElementTree.XML(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.XML(..) +xml.etree.ElementTree.XML(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.XML(..) + +xml.etree.ElementTree.XMLID(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.XMLID(..) +xml.etree.ElementTree.XMLID(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.XMLID(..) + +xml.etree.ElementTree.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.parse(..) getAPathArgument=StringIO(..) +xml.etree.ElementTree.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.parse(..) getAPathArgument=StringIO(..) + +xml.etree.ElementTree.iterparse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.iterparse(..) getAPathArgument=StringIO(..) +xml.etree.ElementTree.iterparse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.iterparse(..) getAPathArgument=StringIO(..) + +tree = xml.etree.ElementTree.ElementTree() +tree.parse("file.xml") # $ decodeFormat=XML decodeInput="file.xml" xmlVuln='XML bomb' decodeOutput=tree.parse(..) getAPathArgument="file.xml" +tree.parse(source="file.xml") # $ decodeFormat=XML decodeInput="file.xml" xmlVuln='XML bomb' decodeOutput=tree.parse(..) getAPathArgument="file.xml" + + +# With parsers (no options available to disable/enable security features) +parser = xml.etree.ElementTree.XMLParser() +xml.etree.ElementTree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.fromstring(..) + +# manual use of feed method +parser = xml.etree.ElementTree.XMLParser() +parser.feed(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' +parser.feed(data=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' +parser.close() # $ decodeOutput=parser.close() + +# manual use of feed method on XMLPullParser +parser = xml.etree.ElementTree.XMLPullParser() +parser.feed(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' +parser.feed(data=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' +parser.close() # $ decodeOutput=parser.close() + +# note: it's technically possible to use the thing wrapper func `fromstring` with an +# `lxml` parser, and thereby change what vulnerabilities you are exposed to.. but it +# seems very unlikely that anyone would do this, so we have intentionally not added any +# tests for this. diff --git a/python/ql/test/library-tests/frameworks/stdlib/xml_sax.py b/python/ql/test/library-tests/frameworks/stdlib/xml_sax.py new file mode 100644 index 000000000000..6199fd76cc10 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/stdlib/xml_sax.py @@ -0,0 +1,64 @@ +from io import StringIO +import xml.sax + +x = "some xml" + +class MainHandler(xml.sax.ContentHandler): + def __init__(self): + self._result = [] + + def characters(self, data): + self._result.append(data) + +xml.sax.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' getAPathArgument=StringIO(..) +xml.sax.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' getAPathArgument=StringIO(..) + +xml.sax.parseString(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' +xml.sax.parseString(string=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' + +parser = xml.sax.make_parser() +parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' getAPathArgument=StringIO(..) +parser.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' getAPathArgument=StringIO(..) + +# You can make it vuln to both XXE and DTD retrieval by setting this flag +# see https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges +parser = xml.sax.make_parser() +parser.setFeature(xml.sax.handler.feature_external_ges, True) +parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' xmlVuln='DTD retrieval' xmlVuln='XXE' getAPathArgument=StringIO(..) + +parser = xml.sax.make_parser() +parser.setFeature(xml.sax.handler.feature_external_ges, False) +parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' getAPathArgument=StringIO(..) + +# Forward Type Tracking test +def func(cond): + parser = xml.sax.make_parser() + if cond: + parser.setFeature(xml.sax.handler.feature_external_ges, True) + parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' xmlVuln='DTD retrieval' xmlVuln='XXE' getAPathArgument=StringIO(..) + else: + parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' getAPathArgument=StringIO(..) + +# make it vuln, then making it safe +# a bit of an edge-case, but is nice to be able to handle. +parser = xml.sax.make_parser() +parser.setFeature(xml.sax.handler.feature_external_ges, True) +parser.setFeature(xml.sax.handler.feature_external_ges, False) +parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' getAPathArgument=StringIO(..) + +def check_conditional_assignment(cond): + parser = xml.sax.make_parser() + if cond: + parser.setFeature(xml.sax.handler.feature_external_ges, True) + else: + parser.setFeature(xml.sax.handler.feature_external_ges, False) + parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' xmlVuln='DTD retrieval' xmlVuln='XXE' getAPathArgument=StringIO(..) + +def check_conditional_assignment2(cond): + parser = xml.sax.make_parser() + if cond: + flag_value = True + else: + flag_value = False + parser.setFeature(xml.sax.handler.feature_external_ges, flag_value) + parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' xmlVuln='DTD retrieval' xmlVuln='XXE' getAPathArgument=StringIO(..) diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.expected b/python/ql/test/library-tests/frameworks/xmltodict/ConceptsTest.expected similarity index 100% rename from python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.expected rename to python/ql/test/library-tests/frameworks/xmltodict/ConceptsTest.expected diff --git a/python/ql/test/library-tests/frameworks/xmltodict/ConceptsTest.ql b/python/ql/test/library-tests/frameworks/xmltodict/ConceptsTest.ql new file mode 100644 index 000000000000..b557a0bccb69 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/xmltodict/ConceptsTest.ql @@ -0,0 +1,2 @@ +import python +import experimental.meta.ConceptsTest diff --git a/python/ql/test/library-tests/frameworks/xmltodict/test.py b/python/ql/test/library-tests/frameworks/xmltodict/test.py new file mode 100644 index 000000000000..ef236f7796c7 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/xmltodict/test.py @@ -0,0 +1,8 @@ +import xmltodict + +x = "some xml" + +xmltodict.parse(x) # $ decodeFormat=XML decodeInput=x decodeOutput=xmltodict.parse(..) +xmltodict.parse(xml_input=x) # $ decodeFormat=XML decodeInput=x decodeOutput=xmltodict.parse(..) + +xmltodict.parse(x, disable_entities=False) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xmltodict.parse(..) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/query-tests/Security/CWE-611-Xxe/Xxe.expected similarity index 58% rename from python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected rename to python/ql/test/query-tests/Security/CWE-611-Xxe/Xxe.expected index 25594b4ddaaf..004369d79cfd 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected +++ b/python/ql/test/query-tests/Security/CWE-611-Xxe/Xxe.expected @@ -2,9 +2,6 @@ edges | test.py:8:19:8:25 | ControlFlowNode for request | test.py:8:19:8:30 | ControlFlowNode for Attribute | | test.py:8:19:8:30 | ControlFlowNode for Attribute | test.py:8:19:8:45 | ControlFlowNode for Subscript | | test.py:8:19:8:45 | ControlFlowNode for Subscript | test.py:9:34:9:44 | ControlFlowNode for xml_content | -| test.py:13:19:13:25 | ControlFlowNode for request | test.py:13:19:13:30 | ControlFlowNode for Attribute | -| test.py:13:19:13:30 | ControlFlowNode for Attribute | test.py:13:19:13:45 | ControlFlowNode for Subscript | -| test.py:13:19:13:45 | ControlFlowNode for Subscript | test.py:15:34:15:44 | ControlFlowNode for xml_content | | test.py:19:19:19:25 | ControlFlowNode for request | test.py:19:19:19:30 | ControlFlowNode for Attribute | | test.py:19:19:19:30 | ControlFlowNode for Attribute | test.py:19:19:19:45 | ControlFlowNode for Subscript | | test.py:19:19:19:45 | ControlFlowNode for Subscript | test.py:30:34:30:44 | ControlFlowNode for xml_content | @@ -13,15 +10,11 @@ nodes | test.py:8:19:8:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | | test.py:8:19:8:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | | test.py:9:34:9:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| test.py:13:19:13:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| test.py:13:19:13:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| test.py:13:19:13:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| test.py:15:34:15:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | | test.py:19:19:19:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | | test.py:19:19:19:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | | test.py:19:19:19:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | | test.py:30:34:30:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | subpaths #select -| test.py:9:34:9:44 | ControlFlowNode for xml_content | test.py:8:19:8:25 | ControlFlowNode for request | test.py:9:34:9:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | test.py:9:34:9:44 | ControlFlowNode for xml_content | This | test.py:8:19:8:25 | ControlFlowNode for request | user-provided value | -| test.py:30:34:30:44 | ControlFlowNode for xml_content | test.py:19:19:19:25 | ControlFlowNode for request | test.py:30:34:30:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | test.py:30:34:30:44 | ControlFlowNode for xml_content | This | test.py:19:19:19:25 | ControlFlowNode for request | user-provided value | +| test.py:9:34:9:44 | ControlFlowNode for xml_content | test.py:8:19:8:25 | ControlFlowNode for request | test.py:9:34:9:44 | ControlFlowNode for xml_content | A $@ is parsed as XML without guarding against external entity expansion. | test.py:8:19:8:25 | ControlFlowNode for request | user-provided value | +| test.py:30:34:30:44 | ControlFlowNode for xml_content | test.py:19:19:19:25 | ControlFlowNode for request | test.py:30:34:30:44 | ControlFlowNode for xml_content | A $@ is parsed as XML without guarding against external entity expansion. | test.py:19:19:19:25 | ControlFlowNode for request | user-provided value | diff --git a/python/ql/test/query-tests/Security/CWE-611-Xxe/Xxe.qlref b/python/ql/test/query-tests/Security/CWE-611-Xxe/Xxe.qlref new file mode 100644 index 000000000000..62a3f7f22d97 --- /dev/null +++ b/python/ql/test/query-tests/Security/CWE-611-Xxe/Xxe.qlref @@ -0,0 +1 @@ +Security/CWE-611/Xxe.ql diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/test.py b/python/ql/test/query-tests/Security/CWE-611-Xxe/test.py similarity index 100% rename from python/ql/test/experimental/query-tests/Security/CWE-611/test.py rename to python/ql/test/query-tests/Security/CWE-611-Xxe/test.py diff --git a/python/ql/test/query-tests/Security/CWE-776-XmlBomb/XmlBomb.expected b/python/ql/test/query-tests/Security/CWE-776-XmlBomb/XmlBomb.expected new file mode 100644 index 000000000000..15c439d07611 --- /dev/null +++ b/python/ql/test/query-tests/Security/CWE-776-XmlBomb/XmlBomb.expected @@ -0,0 +1,12 @@ +edges +| test.py:19:19:19:25 | ControlFlowNode for request | test.py:19:19:19:30 | ControlFlowNode for Attribute | +| test.py:19:19:19:30 | ControlFlowNode for Attribute | test.py:19:19:19:45 | ControlFlowNode for Subscript | +| test.py:19:19:19:45 | ControlFlowNode for Subscript | test.py:30:34:30:44 | ControlFlowNode for xml_content | +nodes +| test.py:19:19:19:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| test.py:19:19:19:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| test.py:19:19:19:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| test.py:30:34:30:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +subpaths +#select +| test.py:30:34:30:44 | ControlFlowNode for xml_content | test.py:19:19:19:25 | ControlFlowNode for request | test.py:30:34:30:44 | ControlFlowNode for xml_content | A $@ is parsed as XML without guarding against uncontrolled entity expansion. | test.py:19:19:19:25 | ControlFlowNode for request | user-provided value | diff --git a/python/ql/test/query-tests/Security/CWE-776-XmlBomb/XmlBomb.qlref b/python/ql/test/query-tests/Security/CWE-776-XmlBomb/XmlBomb.qlref new file mode 100644 index 000000000000..c983b357446f --- /dev/null +++ b/python/ql/test/query-tests/Security/CWE-776-XmlBomb/XmlBomb.qlref @@ -0,0 +1 @@ +Security/CWE-776/XmlBomb.ql diff --git a/python/ql/test/query-tests/Security/CWE-776-XmlBomb/test.py b/python/ql/test/query-tests/Security/CWE-776-XmlBomb/test.py new file mode 100644 index 000000000000..d9181c4cf346 --- /dev/null +++ b/python/ql/test/query-tests/Security/CWE-776-XmlBomb/test.py @@ -0,0 +1,30 @@ +from flask import Flask, request +import lxml.etree + +app = Flask(__name__) + +@app.route("/vuln-handler") +def vuln_handler(): + xml_content = request.args['xml_content'] + return lxml.etree.fromstring(xml_content).text + +@app.route("/safe-handler") +def safe_handler(): + xml_content = request.args['xml_content'] + parser = lxml.etree.XMLParser(resolve_entities=False) + return lxml.etree.fromstring(xml_content, parser=parser).text + +@app.route("/super-vuln-handler") +def super_vuln_handler(): + xml_content = request.args['xml_content'] + parser = lxml.etree.XMLParser( + # allows XXE + resolve_entities=True, + # allows remote XXE + no_network=False, + # together with `no_network=False`, allows DTD-retrival + load_dtd=True, + # allows DoS attacks + huge_tree=True, + ) + return lxml.etree.fromstring(xml_content, parser=parser).text