From 65907c97620e2824ffbd008db54a98d9ee1a1060 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 24 Mar 2022 14:13:12 +0100 Subject: [PATCH 01/51] Python: Copy Xxe/XmlBomb queries from JS After internal discussion, these will replace the `XmlEntityInjection` query, so we can have separate severities on DoS and the other (more serious) attacks. Note: These clearly don't work, since they are verbatim copies of the JS code, but I split it into multiple commits to clearly highlight what changes were made. --- .../Security/NEW/CWE-611/Xxe.qhelp | 57 ++++++++++++++++++ .../experimental/Security/NEW/CWE-611/Xxe.ql | 23 +++++++ .../Security/NEW/CWE-611/examples/Xxe.js | 7 +++ .../Security/NEW/CWE-611/examples/XxeGood.js | 7 +++ .../Security/NEW/CWE-776/XmlBomb.qhelp | 60 +++++++++++++++++++ .../Security/NEW/CWE-776/XmlBomb.ql | 23 +++++++ .../Security/NEW/CWE-776/examples/XmlBomb.js | 10 ++++ .../NEW/CWE-776/examples/XmlBombGood.js | 10 ++++ .../dataflow/XmlBombCustomizations.qll | 49 +++++++++++++++ .../python/security/dataflow/XmlBombQuery.qll | 27 +++++++++ .../security/dataflow/XxeCustomizations.qll | 52 ++++++++++++++++ .../python/security/dataflow/XxeQuery.qll | 27 +++++++++ 12 files changed, 352 insertions(+) create mode 100644 python/ql/src/experimental/Security/NEW/CWE-611/Xxe.qhelp create mode 100644 python/ql/src/experimental/Security/NEW/CWE-611/Xxe.ql create mode 100644 python/ql/src/experimental/Security/NEW/CWE-611/examples/Xxe.js create mode 100644 python/ql/src/experimental/Security/NEW/CWE-611/examples/XxeGood.js create mode 100644 python/ql/src/experimental/Security/NEW/CWE-776/XmlBomb.qhelp create mode 100644 python/ql/src/experimental/Security/NEW/CWE-776/XmlBomb.ql create mode 100644 python/ql/src/experimental/Security/NEW/CWE-776/examples/XmlBomb.js create mode 100644 python/ql/src/experimental/Security/NEW/CWE-776/examples/XmlBombGood.js create mode 100644 python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll create mode 100644 python/ql/src/experimental/semmle/python/security/dataflow/XmlBombQuery.qll create mode 100644 python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll create mode 100644 python/ql/src/experimental/semmle/python/security/dataflow/XxeQuery.qll diff --git a/python/ql/src/experimental/Security/NEW/CWE-611/Xxe.qhelp b/python/ql/src/experimental/Security/NEW/CWE-611/Xxe.qhelp new file mode 100644 index 000000000000..1e859eb121fa --- /dev/null +++ b/python/ql/src/experimental/Security/NEW/CWE-611/Xxe.qhelp @@ -0,0 +1,57 @@ + + + + +

+Parsing untrusted XML files with a weakly configured XML parser may lead to an +XML External Entity (XXE) attack. This type of attack uses external entity references +to access arbitrary files on a system, carry out denial-of-service (DoS) attacks, or server-side +request forgery. Even when the result of parsing is not returned to the user, DoS attacks are still possible +and out-of-band data retrieval techniques may allow attackers to steal sensitive data. +

+
+ + +

+The easiest way to prevent XXE attacks is to disable external entity handling when +parsing untrusted data. How this is done depends on the library being used. Note that some +libraries, such as recent versions of libxml, disable entity expansion by default, +so unless you have explicitly enabled entity expansion, no further action needs to be taken. +

+
+ + +

+The following example uses the libxml XML parser to parse a string xmlSrc. +If that string is from an untrusted source, this code may be vulnerable to an XXE attack, since +the parser is invoked with the noent option set to true: +

+ + +

+To guard against XXE attacks, the noent option should be omitted or set to +false. This means that no entity expansion is undertaken at all, not even for standard +internal entities such as & or >. If desired, these +entities can be expanded in a separate step using utility functions provided by libraries such +as underscore, +lodash or +he. +

+ +
+ + +
  • +OWASP: +XML External Entity (XXE) Processing. +
  • +
  • +Timothy Morgen: +XML Schema, DTD, and Entity Attacks. +
  • +
  • +Timur Yunusov, Alexey Osipov: +XML Out-Of-Band Data Retrieval. +
  • +
    +
    diff --git a/python/ql/src/experimental/Security/NEW/CWE-611/Xxe.ql b/python/ql/src/experimental/Security/NEW/CWE-611/Xxe.ql new file mode 100644 index 000000000000..01e518b6df7b --- /dev/null +++ b/python/ql/src/experimental/Security/NEW/CWE-611/Xxe.ql @@ -0,0 +1,23 @@ +/** + * @name XML external entity expansion + * @description Parsing user input as an XML document with external + * entity expansion is vulnerable to XXE attacks. + * @kind path-problem + * @problem.severity error + * @security-severity 9.1 + * @precision high + * @id js/xxe + * @tags security + * external/cwe/cwe-611 + * external/cwe/cwe-827 + */ + +import javascript +import semmle.javascript.security.dataflow.XxeQuery +import DataFlow::PathGraph + +from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink +where cfg.hasFlowPath(source, sink) +select sink.getNode(), source, sink, + "A $@ is parsed as XML without guarding against external entity expansion.", source.getNode(), + "user-provided value" diff --git a/python/ql/src/experimental/Security/NEW/CWE-611/examples/Xxe.js b/python/ql/src/experimental/Security/NEW/CWE-611/examples/Xxe.js new file mode 100644 index 000000000000..99fa02cc42f6 --- /dev/null +++ b/python/ql/src/experimental/Security/NEW/CWE-611/examples/Xxe.js @@ -0,0 +1,7 @@ +const app = require("express")(), + libxml = require("libxmljs"); + +app.post("upload", (req, res) => { + let xmlSrc = req.body, + doc = libxml.parseXml(xmlSrc, { noent: true }); +}); diff --git a/python/ql/src/experimental/Security/NEW/CWE-611/examples/XxeGood.js b/python/ql/src/experimental/Security/NEW/CWE-611/examples/XxeGood.js new file mode 100644 index 000000000000..8317dcac98f9 --- /dev/null +++ b/python/ql/src/experimental/Security/NEW/CWE-611/examples/XxeGood.js @@ -0,0 +1,7 @@ +const app = require("express")(), + libxml = require("libxmljs"); + +app.post("upload", (req, res) => { + let xmlSrc = req.body, + doc = libxml.parseXml(xmlSrc); +}); diff --git a/python/ql/src/experimental/Security/NEW/CWE-776/XmlBomb.qhelp b/python/ql/src/experimental/Security/NEW/CWE-776/XmlBomb.qhelp new file mode 100644 index 000000000000..c0714b3f96f9 --- /dev/null +++ b/python/ql/src/experimental/Security/NEW/CWE-776/XmlBomb.qhelp @@ -0,0 +1,60 @@ + + + + +

    +Parsing untrusted XML files with a weakly configured XML parser may be vulnerable to +denial-of-service (DoS) attacks exploiting uncontrolled internal entity expansion. +

    +

    +In XML, so-called internal entities are a mechanism for introducing an abbreviation +for a piece of text or part of a document. When a parser that has been configured +to expand entities encounters a reference to an internal entity, it replaces the entity +by the data it represents. The replacement text may itself contain other entity references, +which are expanded recursively. This means that entity expansion can increase document size +dramatically. +

    +

    +If untrusted XML is parsed with entity expansion enabled, a malicious attacker could +submit a document that contains very deeply nested entity definitions, causing the parser +to take a very long time or use large amounts of memory. This is sometimes called an +XML bomb attack. +

    +
    + + +

    +The safest way to prevent XML bomb attacks is to disable entity expansion when parsing untrusted +data. How this is done depends on the library being used. Note that some libraries, such as +recent versions of libxmljs (though not its SAX parser API), disable entity expansion +by default, so unless you have explicitly enabled entity expansion, no further action is needed. +

    +
    + + +

    +The following example uses the XML parser provided by the node-expat package to +parse a string xmlSrc. If that string is from an untrusted source, this code may be +vulnerable to a DoS attack, since node-expat expands internal entities by default: +

    + + +

    +At the time of writing, node-expat does not provide a way of controlling entity +expansion, but the example could be rewritten to use the sax package instead, +which only expands standard entities such as &: +

    + +
    + + +
  • +Wikipedia: +Billion Laughs. +
  • +
  • +Bryan Sullivan: +Security Briefs - XML Denial of Service Attacks and Defenses. +
  • +
    +
    diff --git a/python/ql/src/experimental/Security/NEW/CWE-776/XmlBomb.ql b/python/ql/src/experimental/Security/NEW/CWE-776/XmlBomb.ql new file mode 100644 index 000000000000..c340eee68cce --- /dev/null +++ b/python/ql/src/experimental/Security/NEW/CWE-776/XmlBomb.ql @@ -0,0 +1,23 @@ +/** + * @name XML internal entity expansion + * @description Parsing user input as an XML document with arbitrary internal + * entity expansion is vulnerable to denial-of-service attacks. + * @kind path-problem + * @problem.severity warning + * @security-severity 7.5 + * @precision high + * @id js/xml-bomb + * @tags security + * external/cwe/cwe-776 + * external/cwe/cwe-400 + */ + +import javascript +import semmle.javascript.security.dataflow.XmlBombQuery +import DataFlow::PathGraph + +from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink +where cfg.hasFlowPath(source, sink) +select sink.getNode(), source, sink, + "A $@ is parsed as XML without guarding against uncontrolled entity expansion.", source.getNode(), + "user-provided value" diff --git a/python/ql/src/experimental/Security/NEW/CWE-776/examples/XmlBomb.js b/python/ql/src/experimental/Security/NEW/CWE-776/examples/XmlBomb.js new file mode 100644 index 000000000000..f72902a53041 --- /dev/null +++ b/python/ql/src/experimental/Security/NEW/CWE-776/examples/XmlBomb.js @@ -0,0 +1,10 @@ +const app = require("express")(), + expat = require("node-expat"); + +app.post("upload", (req, res) => { + let xmlSrc = req.body, + parser = new expat.Parser(); + parser.on("startElement", handleStart); + parser.on("text", handleText); + parser.write(xmlSrc); +}); diff --git a/python/ql/src/experimental/Security/NEW/CWE-776/examples/XmlBombGood.js b/python/ql/src/experimental/Security/NEW/CWE-776/examples/XmlBombGood.js new file mode 100644 index 000000000000..a8c5bc97e631 --- /dev/null +++ b/python/ql/src/experimental/Security/NEW/CWE-776/examples/XmlBombGood.js @@ -0,0 +1,10 @@ +const app = require("express")(), + sax = require("sax"); + +app.post("upload", (req, res) => { + let xmlSrc = req.body, + parser = sax.parser(true); + parser.onopentag = handleStart; + parser.ontext = handleText; + parser.write(xmlSrc); +}); diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll new file mode 100644 index 000000000000..1d159b057ad0 --- /dev/null +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll @@ -0,0 +1,49 @@ +/** + * Provides default sources, sinks and sanitizers for reasoning about + * XML-bomb vulnerabilities, as well as extension points for adding + * your own. + */ + +import javascript +import semmle.javascript.security.dataflow.DOM + +module XmlBomb { + /** + * A data flow source for XML-bomb vulnerabilities. + */ + abstract class Source extends DataFlow::Node { } + + /** + * A data flow sink for XML-bomb vulnerabilities. + */ + abstract class Sink extends DataFlow::Node { } + + /** + * A sanitizer for XML-bomb vulnerabilities. + */ + abstract class Sanitizer extends DataFlow::Node { } + + /** A source of remote user input, considered as a flow source for XML bomb vulnerabilities. */ + class RemoteFlowSourceAsSource extends Source { + RemoteFlowSourceAsSource() { this instanceof RemoteFlowSource } + } + + /** + * An access to `document.location`, considered as a flow source for XML bomb vulnerabilities. + */ + class LocationAsSource extends Source, DataFlow::ValueNode { + LocationAsSource() { isLocation(astNode) } + } + + /** + * A call to an XML parser that performs internal entity expansion, viewed + * as a data flow sink for XML-bomb vulnerabilities. + */ + class XmlParsingWithEntityResolution extends Sink, DataFlow::ValueNode { + XmlParsingWithEntityResolution() { + exists(XML::ParserInvocation parse | astNode = parse.getSourceArgument() | + parse.resolvesEntities(XML::InternalEntity()) + ) + } + } +} diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombQuery.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombQuery.qll new file mode 100644 index 000000000000..951b927f86ef --- /dev/null +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombQuery.qll @@ -0,0 +1,27 @@ +/** + * Provides a taint tracking configuration for reasoning about + * XML-bomb vulnerabilities. + * + * Note, for performance reasons: only import this file if + * `XmlBomb::Configuration` is needed, otherwise + * `XmlBombCustomizations` should be imported instead. + */ + +import javascript +import XmlBombCustomizations::XmlBomb + +/** + * A taint-tracking configuration for reasoning about XML-bomb vulnerabilities. + */ +class Configuration extends TaintTracking::Configuration { + Configuration() { this = "XmlBomb" } + + override predicate isSource(DataFlow::Node source) { source instanceof Source } + + override predicate isSink(DataFlow::Node sink) { sink instanceof Sink } + + override predicate isSanitizer(DataFlow::Node node) { + super.isSanitizer(node) or + node instanceof Sanitizer + } +} diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll new file mode 100644 index 000000000000..4e7bb5e730c9 --- /dev/null +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll @@ -0,0 +1,52 @@ +/** + * Provides default sources, sinks and sanitizers for reasoning about + * XML External Entity (XXE) vulnerabilities, as well as extension + * points for adding your own. + */ + +import javascript +import semmle.javascript.security.dataflow.DOM + +module Xxe { + /** + * A data flow source for XXE vulnerabilities. + */ + abstract class Source extends DataFlow::Node { } + + /** + * A data flow sink for XXE vulnerabilities. + */ + abstract class Sink extends DataFlow::Node { } + + /** + * A sanitizer for XXE vulnerabilities. + */ + abstract class Sanitizer extends DataFlow::Node { } + + /** A source of remote user input, considered as a flow source for XXE vulnerabilities. */ + class RemoteFlowSourceAsSource extends Source { + RemoteFlowSourceAsSource() { this instanceof RemoteFlowSource } + } + + /** + * An access to `document.location`, considered as a flow source for XXE vulnerabilities. + */ + class LocationAsSource extends Source, DataFlow::ValueNode { + LocationAsSource() { isLocation(astNode) } + } + + /** + * A call to an XML parser that performs external entity expansion, viewed + * as a data flow sink for XXE vulnerabilities. + */ + class XmlParsingWithExternalEntityResolution extends Sink, DataFlow::ValueNode { + XmlParsingWithExternalEntityResolution() { + exists(XML::ParserInvocation parse | astNode = parse.getSourceArgument() | + parse.resolvesEntities(XML::ExternalEntity(_)) + or + parse.resolvesEntities(XML::ParameterEntity(true)) and + parse.resolvesEntities(XML::InternalEntity()) + ) + } + } +} diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XxeQuery.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XxeQuery.qll new file mode 100644 index 000000000000..82d3fb4f6cc1 --- /dev/null +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XxeQuery.qll @@ -0,0 +1,27 @@ +/** + * Provides a taint tracking configuration for reasoning about XML + * External Entity (XXE) vulnerabilities. + * + * Note, for performance reasons: only import this file if + * `Xxe::Configuration` is needed, otherwise `XxeCustomizations` + * should be imported instead. + */ + +import javascript +import XxeCustomizations::Xxe + +/** + * A taint-tracking configuration for reasoning about XXE vulnerabilities. + */ +class Configuration extends TaintTracking::Configuration { + Configuration() { this = "Xxe" } + + override predicate isSource(DataFlow::Node source) { source instanceof Source } + + override predicate isSink(DataFlow::Node sink) { sink instanceof Sink } + + override predicate isSanitizer(DataFlow::Node node) { + super.isSanitizer(node) or + node instanceof Sanitizer + } +} From e45f9d69ccb44a2109518f3c8334e21f5c193a43 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 24 Mar 2022 14:15:54 +0100 Subject: [PATCH 02/51] Python: Adjust Xxe/XmlBomb for Python I changed a few QLdocs so they fit the style we have used in Python... although I surely do regret having introduced a new style for how these QLDocs look :D --- .../experimental/Security/NEW/CWE-611/Xxe.ql | 6 ++-- .../Security/NEW/CWE-776/XmlBomb.ql | 6 ++-- .../dataflow/XmlBombCustomizations.qll | 31 +++++++++-------- .../python/security/dataflow/XmlBombQuery.qll | 11 +++--- .../security/dataflow/XxeCustomizations.qll | 34 +++++++++---------- .../python/security/dataflow/XxeQuery.qll | 13 +++---- 6 files changed, 51 insertions(+), 50 deletions(-) diff --git a/python/ql/src/experimental/Security/NEW/CWE-611/Xxe.ql b/python/ql/src/experimental/Security/NEW/CWE-611/Xxe.ql index 01e518b6df7b..f706ea6e909c 100644 --- a/python/ql/src/experimental/Security/NEW/CWE-611/Xxe.ql +++ b/python/ql/src/experimental/Security/NEW/CWE-611/Xxe.ql @@ -6,14 +6,14 @@ * @problem.severity error * @security-severity 9.1 * @precision high - * @id js/xxe + * @id py/xxe * @tags security * external/cwe/cwe-611 * external/cwe/cwe-827 */ -import javascript -import semmle.javascript.security.dataflow.XxeQuery +import python +import experimental.semmle.python.security.dataflow.XxeQuery import DataFlow::PathGraph from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink diff --git a/python/ql/src/experimental/Security/NEW/CWE-776/XmlBomb.ql b/python/ql/src/experimental/Security/NEW/CWE-776/XmlBomb.ql index c340eee68cce..2a1ea5916c44 100644 --- a/python/ql/src/experimental/Security/NEW/CWE-776/XmlBomb.ql +++ b/python/ql/src/experimental/Security/NEW/CWE-776/XmlBomb.ql @@ -6,14 +6,14 @@ * @problem.severity warning * @security-severity 7.5 * @precision high - * @id js/xml-bomb + * @id py/xml-bomb * @tags security * external/cwe/cwe-776 * external/cwe/cwe-400 */ -import javascript -import semmle.javascript.security.dataflow.XmlBombQuery +import python +import experimental.semmle.python.security.dataflow.XmlBombQuery import DataFlow::PathGraph from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll index 1d159b057ad0..66a16a4494af 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll @@ -1,12 +1,18 @@ /** - * Provides default sources, sinks and sanitizers for reasoning about - * XML-bomb vulnerabilities, as well as extension points for adding - * your own. + * Provides default sources, sinks and sanitizers for detecting + * "XML bomb" + * vulnerabilities, as well as extension points for adding your own. */ -import javascript -import semmle.javascript.security.dataflow.DOM +private import python +private import semmle.python.dataflow.new.DataFlow +private import experimental.semmle.python.Concepts +private import semmle.python.dataflow.new.RemoteFlowSources +/** + * Provides default sources, sinks and sanitizers for detecting "XML bomb" + * vulnerabilities, as well as extension points for adding your own. + */ module XmlBomb { /** * A data flow source for XML-bomb vulnerabilities. @@ -28,21 +34,16 @@ module XmlBomb { RemoteFlowSourceAsSource() { this instanceof RemoteFlowSource } } - /** - * An access to `document.location`, considered as a flow source for XML bomb vulnerabilities. - */ - class LocationAsSource extends Source, DataFlow::ValueNode { - LocationAsSource() { isLocation(astNode) } - } - /** * A call to an XML parser that performs internal entity expansion, viewed * as a data flow sink for XML-bomb vulnerabilities. */ - class XmlParsingWithEntityResolution extends Sink, DataFlow::ValueNode { + class XmlParsingWithEntityResolution extends Sink { XmlParsingWithEntityResolution() { - exists(XML::ParserInvocation parse | astNode = parse.getSourceArgument() | - parse.resolvesEntities(XML::InternalEntity()) + exists(ExperimentalXML::XMLParsing parsing, ExperimentalXML::XMLVulnerabilityKind kind | + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and + parsing.vulnerableTo(kind) and + this = parsing.getAnInput() ) } } diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombQuery.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombQuery.qll index 951b927f86ef..d0c0b85d84f1 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombQuery.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombQuery.qll @@ -1,17 +1,18 @@ /** - * Provides a taint tracking configuration for reasoning about - * XML-bomb vulnerabilities. + * Provides a taint-tracking configuration for detecting "XML bomb" vulnerabilities. * * Note, for performance reasons: only import this file if - * `XmlBomb::Configuration` is needed, otherwise + * `Configuration` is needed, otherwise * `XmlBombCustomizations` should be imported instead. */ -import javascript +import python +import semmle.python.dataflow.new.DataFlow +import semmle.python.dataflow.new.TaintTracking import XmlBombCustomizations::XmlBomb /** - * A taint-tracking configuration for reasoning about XML-bomb vulnerabilities. + * A taint-tracking configuration for detecting "XML bomb" vulnerabilities. */ class Configuration extends TaintTracking::Configuration { Configuration() { this = "XmlBomb" } diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll index 4e7bb5e730c9..b2992dd335f1 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll @@ -1,12 +1,18 @@ /** - * Provides default sources, sinks and sanitizers for reasoning about - * XML External Entity (XXE) vulnerabilities, as well as extension - * points for adding your own. + * Provides default sources, sinks and sanitizers for detecting + * "XML External Entity (XXE)" + * vulnerabilities, as well as extension points for adding your own. */ -import javascript -import semmle.javascript.security.dataflow.DOM +private import python +private import semmle.python.dataflow.new.DataFlow +private import experimental.semmle.python.Concepts +private import semmle.python.dataflow.new.RemoteFlowSources +/** + * Provides default sources, sinks and sanitizers for detecting "XML External Entity (XXE)" + * vulnerabilities, as well as extension points for adding your own. + */ module Xxe { /** * A data flow source for XXE vulnerabilities. @@ -28,24 +34,16 @@ module Xxe { RemoteFlowSourceAsSource() { this instanceof RemoteFlowSource } } - /** - * An access to `document.location`, considered as a flow source for XXE vulnerabilities. - */ - class LocationAsSource extends Source, DataFlow::ValueNode { - LocationAsSource() { isLocation(astNode) } - } - /** * A call to an XML parser that performs external entity expansion, viewed * as a data flow sink for XXE vulnerabilities. */ - class XmlParsingWithExternalEntityResolution extends Sink, DataFlow::ValueNode { + class XmlParsingWithExternalEntityResolution extends Sink { XmlParsingWithExternalEntityResolution() { - exists(XML::ParserInvocation parse | astNode = parse.getSourceArgument() | - parse.resolvesEntities(XML::ExternalEntity(_)) - or - parse.resolvesEntities(XML::ParameterEntity(true)) and - parse.resolvesEntities(XML::InternalEntity()) + exists(ExperimentalXML::XMLParsing parsing, ExperimentalXML::XMLVulnerabilityKind kind | + kind.isXxe() and + parsing.vulnerableTo(kind) and + this = parsing.getAnInput() ) } } diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XxeQuery.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XxeQuery.qll index 82d3fb4f6cc1..dd2409f2a3ce 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XxeQuery.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XxeQuery.qll @@ -1,17 +1,18 @@ /** - * Provides a taint tracking configuration for reasoning about XML - * External Entity (XXE) vulnerabilities. + * Provides a taint-tracking configuration for detecting "XML External Entity (XXE)" vulnerabilities. * * Note, for performance reasons: only import this file if - * `Xxe::Configuration` is needed, otherwise `XxeCustomizations` - * should be imported instead. + * `Configuration` is needed, otherwise + * `XxeCustomizations` should be imported instead. */ -import javascript +import python +import semmle.python.dataflow.new.DataFlow +import semmle.python.dataflow.new.TaintTracking import XxeCustomizations::Xxe /** - * A taint-tracking configuration for reasoning about XXE vulnerabilities. + * A taint-tracking configuration for detecting "XML External Entity (XXE)" vulnerabilities. */ class Configuration extends TaintTracking::Configuration { Configuration() { this = "Xxe" } From 91795b857756a4912e6a280e4e53f65f4fbaf76a Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 24 Mar 2022 14:16:38 +0100 Subject: [PATCH 03/51] Python: Add simple test of Xxe/XmlBomb Note that most of the testing happens in the framework specific tests, with an inline-expectation test --- .../Security/CWE-611-Xxe/Xxe.expected | 20 +++++++++++++ .../Security/CWE-611-Xxe/Xxe.qlref | 1 + .../query-tests/Security/CWE-611-Xxe/test.py | 30 +++++++++++++++++++ .../Security/CWE-776-XmlBomb/XmlBomb.expected | 12 ++++++++ .../Security/CWE-776-XmlBomb/XmlBomb.qlref | 1 + .../Security/CWE-776-XmlBomb/test.py | 30 +++++++++++++++++++ 6 files changed, 94 insertions(+) create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611-Xxe/Xxe.expected create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611-Xxe/Xxe.qlref create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611-Xxe/test.py create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-776-XmlBomb/XmlBomb.expected create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-776-XmlBomb/XmlBomb.qlref create mode 100644 python/ql/test/experimental/query-tests/Security/CWE-776-XmlBomb/test.py diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611-Xxe/Xxe.expected b/python/ql/test/experimental/query-tests/Security/CWE-611-Xxe/Xxe.expected new file mode 100644 index 000000000000..004369d79cfd --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-611-Xxe/Xxe.expected @@ -0,0 +1,20 @@ +edges +| test.py:8:19:8:25 | ControlFlowNode for request | test.py:8:19:8:30 | ControlFlowNode for Attribute | +| test.py:8:19:8:30 | ControlFlowNode for Attribute | test.py:8:19:8:45 | ControlFlowNode for Subscript | +| test.py:8:19:8:45 | ControlFlowNode for Subscript | test.py:9:34:9:44 | ControlFlowNode for xml_content | +| test.py:19:19:19:25 | ControlFlowNode for request | test.py:19:19:19:30 | ControlFlowNode for Attribute | +| test.py:19:19:19:30 | ControlFlowNode for Attribute | test.py:19:19:19:45 | ControlFlowNode for Subscript | +| test.py:19:19:19:45 | ControlFlowNode for Subscript | test.py:30:34:30:44 | ControlFlowNode for xml_content | +nodes +| test.py:8:19:8:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| test.py:8:19:8:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| test.py:8:19:8:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| test.py:9:34:9:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| test.py:19:19:19:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| test.py:19:19:19:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| test.py:19:19:19:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| test.py:30:34:30:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +subpaths +#select +| test.py:9:34:9:44 | ControlFlowNode for xml_content | test.py:8:19:8:25 | ControlFlowNode for request | test.py:9:34:9:44 | ControlFlowNode for xml_content | A $@ is parsed as XML without guarding against external entity expansion. | test.py:8:19:8:25 | ControlFlowNode for request | user-provided value | +| test.py:30:34:30:44 | ControlFlowNode for xml_content | test.py:19:19:19:25 | ControlFlowNode for request | test.py:30:34:30:44 | ControlFlowNode for xml_content | A $@ is parsed as XML without guarding against external entity expansion. | test.py:19:19:19:25 | ControlFlowNode for request | user-provided value | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611-Xxe/Xxe.qlref b/python/ql/test/experimental/query-tests/Security/CWE-611-Xxe/Xxe.qlref new file mode 100644 index 000000000000..f8a07d7d2ee3 --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-611-Xxe/Xxe.qlref @@ -0,0 +1 @@ +experimental/Security/NEW/CWE-611/Xxe.ql diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611-Xxe/test.py b/python/ql/test/experimental/query-tests/Security/CWE-611-Xxe/test.py new file mode 100644 index 000000000000..d9181c4cf346 --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-611-Xxe/test.py @@ -0,0 +1,30 @@ +from flask import Flask, request +import lxml.etree + +app = Flask(__name__) + +@app.route("/vuln-handler") +def vuln_handler(): + xml_content = request.args['xml_content'] + return lxml.etree.fromstring(xml_content).text + +@app.route("/safe-handler") +def safe_handler(): + xml_content = request.args['xml_content'] + parser = lxml.etree.XMLParser(resolve_entities=False) + return lxml.etree.fromstring(xml_content, parser=parser).text + +@app.route("/super-vuln-handler") +def super_vuln_handler(): + xml_content = request.args['xml_content'] + parser = lxml.etree.XMLParser( + # allows XXE + resolve_entities=True, + # allows remote XXE + no_network=False, + # together with `no_network=False`, allows DTD-retrival + load_dtd=True, + # allows DoS attacks + huge_tree=True, + ) + return lxml.etree.fromstring(xml_content, parser=parser).text diff --git a/python/ql/test/experimental/query-tests/Security/CWE-776-XmlBomb/XmlBomb.expected b/python/ql/test/experimental/query-tests/Security/CWE-776-XmlBomb/XmlBomb.expected new file mode 100644 index 000000000000..15c439d07611 --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-776-XmlBomb/XmlBomb.expected @@ -0,0 +1,12 @@ +edges +| test.py:19:19:19:25 | ControlFlowNode for request | test.py:19:19:19:30 | ControlFlowNode for Attribute | +| test.py:19:19:19:30 | ControlFlowNode for Attribute | test.py:19:19:19:45 | ControlFlowNode for Subscript | +| test.py:19:19:19:45 | ControlFlowNode for Subscript | test.py:30:34:30:44 | ControlFlowNode for xml_content | +nodes +| test.py:19:19:19:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| test.py:19:19:19:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| test.py:19:19:19:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| test.py:30:34:30:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +subpaths +#select +| test.py:30:34:30:44 | ControlFlowNode for xml_content | test.py:19:19:19:25 | ControlFlowNode for request | test.py:30:34:30:44 | ControlFlowNode for xml_content | A $@ is parsed as XML without guarding against uncontrolled entity expansion. | test.py:19:19:19:25 | ControlFlowNode for request | user-provided value | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-776-XmlBomb/XmlBomb.qlref b/python/ql/test/experimental/query-tests/Security/CWE-776-XmlBomb/XmlBomb.qlref new file mode 100644 index 000000000000..5eadbb1f26f9 --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-776-XmlBomb/XmlBomb.qlref @@ -0,0 +1 @@ +experimental/Security/NEW/CWE-776/XmlBomb.ql diff --git a/python/ql/test/experimental/query-tests/Security/CWE-776-XmlBomb/test.py b/python/ql/test/experimental/query-tests/Security/CWE-776-XmlBomb/test.py new file mode 100644 index 000000000000..d9181c4cf346 --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-776-XmlBomb/test.py @@ -0,0 +1,30 @@ +from flask import Flask, request +import lxml.etree + +app = Flask(__name__) + +@app.route("/vuln-handler") +def vuln_handler(): + xml_content = request.args['xml_content'] + return lxml.etree.fromstring(xml_content).text + +@app.route("/safe-handler") +def safe_handler(): + xml_content = request.args['xml_content'] + parser = lxml.etree.XMLParser(resolve_entities=False) + return lxml.etree.fromstring(xml_content, parser=parser).text + +@app.route("/super-vuln-handler") +def super_vuln_handler(): + xml_content = request.args['xml_content'] + parser = lxml.etree.XMLParser( + # allows XXE + resolve_entities=True, + # allows remote XXE + no_network=False, + # together with `no_network=False`, allows DTD-retrival + load_dtd=True, + # allows DoS attacks + huge_tree=True, + ) + return lxml.etree.fromstring(xml_content, parser=parser).text From a1d88e39a77f4c16ca0e292ca5e6311828745b2e Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 24 Mar 2022 15:36:20 +0100 Subject: [PATCH 04/51] Python: Adjust XXE PoC for newer lxml versions Which doesn't raise that syntax error (at least not on my laptop) --- .../experimental/library-tests/frameworks/XML/poc/PoC.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py b/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py index adcace1aa0a6..77d6c0326830 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py @@ -361,11 +361,7 @@ def test_remote_xxe_disabled_by_default(): hit_xxe = False parser = lxml.etree.XMLParser() - try: - root = lxml.etree.fromstring(remote_xxe, parser=parser) - assert False - except lxml.etree.XMLSyntaxError as e: - assert "Failure to process entity remote_xxe" in str(e) + root = lxml.etree.fromstring(remote_xxe, parser=parser) assert hit_xxe == False @staticmethod From 57b97804283545dbe986c019660ae5171ba8e7ed Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 24 Mar 2022 15:37:14 +0100 Subject: [PATCH 05/51] Python: XXE: Add example of exfiltrating data through dtd-retrival --- .../library-tests/frameworks/XML/poc/PoC.py | 32 ++++++++++++++++++- .../library-tests/frameworks/XML/poc/flag | 2 +- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py b/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py index 77d6c0326830..b4cb2faf3044 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py @@ -70,6 +70,10 @@ bar """ +exfiltrate_through_dtd_retrieval = f""" + %xxe; ]> +""" + # ============================================================================== # other setup @@ -95,6 +99,22 @@ def test_xxe(): hit_xxe = True return "ok" +@app.route("/exfiltrate-through.dtd") +def exfiltrate_through_dtd(): + return f""" +"> +%eval; +%exfiltrate; + """ + +exfiltrated_data = None +@app.route("/exfiltrate-data") +def exfiltrate_data(): + from flask import request + global exfiltrated_data + exfiltrated_data = request.args["data"] + return "ok" + def run_app(): app.run(host=HOST, port=PORT) @@ -346,7 +366,7 @@ def test_local_xxe_enabled_by_default(): parser = lxml.etree.XMLParser() root = lxml.etree.fromstring(local_xxe, parser=parser) assert root.tag == "test" - assert root.text == "SECRET_FLAG\n", root.text + assert root.text == "SECRET_FLAG", root.text @staticmethod def test_local_xxe_disabled(): @@ -412,6 +432,16 @@ def test_dtd_manually_enabled(): pass assert hit_dtd == False + @staticmethod + def test_exfiltrate_through_dtd(): + # note that this only works when the data to exfiltrate does not contain a newline :| + global exfiltrated_data + exfiltrated_data = None + parser = lxml.etree.XMLParser(load_dtd=True, no_network=False) + with pytest.raises(lxml.etree.XMLSyntaxError): + lxml.etree.fromstring(exfiltrate_through_dtd_retrieval, parser=parser) + + assert exfiltrated_data == "SECRET_FLAG" # ============================================================================== diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/poc/flag b/python/ql/test/experimental/library-tests/frameworks/XML/poc/flag index 45c9436ee9f2..b8bd68387749 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/poc/flag +++ b/python/ql/test/experimental/library-tests/frameworks/XML/poc/flag @@ -1 +1 @@ -SECRET_FLAG +SECRET_FLAG \ No newline at end of file From 769f5691d08dd8288e4eb6432e163b0a53c8ac21 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 29 Mar 2022 17:18:06 +0200 Subject: [PATCH 06/51] Python: Add taint for `StringIO` and `BytesIO` --- .../2022-03-29-add-taint-for-StringIO.md | 4 ++ .../lib/semmle/python/frameworks/Stdlib.qll | 58 +++++++++++++++++++ .../frameworks/stdlib/io_test.py | 47 +++++++++++++++ 3 files changed, 109 insertions(+) create mode 100644 python/ql/lib/change-notes/2022-03-29-add-taint-for-StringIO.md create mode 100644 python/ql/test/library-tests/frameworks/stdlib/io_test.py diff --git a/python/ql/lib/change-notes/2022-03-29-add-taint-for-StringIO.md b/python/ql/lib/change-notes/2022-03-29-add-taint-for-StringIO.md new file mode 100644 index 000000000000..7857e6f9ca6d --- /dev/null +++ b/python/ql/lib/change-notes/2022-03-29-add-taint-for-StringIO.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Added taint propagation for `io.StringIO` and `io.BytesIO`. This addition was originally [submitted as part of an experimental query by @jorgectf](https://github.com/github/codeql/pull/6112). diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 22dce5427ae6..234a8802f0f4 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -3116,6 +3116,64 @@ private module StdlibPrivate { result in [this.getArg(0), this.getArgByName("path")] } } + + // --------------------------------------------------------------------------- + // io + // --------------------------------------------------------------------------- + /** + * Provides models for the `io.StringIO`/`io.BytesIO` classes + * + * See https://docs.python.org/3.10/library/io.html#io.StringIO. + */ + module StringIO { + /** Gets a reference to the `io.StringIO` class. */ + private API::Node classRef() { + result = API::moduleImport("io").getMember(["StringIO", "BytesIO"]) + } + + /** + * A source of instances of `io.StringIO`/`io.BytesIO`, extend this class to model new instances. + * + * This can include instantiations of the class, return values from function + * calls, or a special parameter that will be set when functions are called by an external + * library. + * + * Use the predicate `StringIO::instance()` to get references to instances of `io.StringIO`. + */ + abstract class InstanceSource extends Stdlib::FileLikeObject::InstanceSource { } + + /** A direct instantiation of `io.StringIO`/`io.BytesIO`. */ + private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode { + ClassInstantiation() { this = classRef().getACall() } + + DataFlow::Node getInitialValue() { + result = this.getArg(0) + or + // `initial_value` for StringIO, `initial_bytes` for BytesIO + result = this.getArgByName(["initial_value", "initial_bytes"]) + } + } + + /** Gets a reference to an instance of `io.StringIO`/`io.BytesIO`. */ + private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) { + t.start() and + result instanceof InstanceSource + or + exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t)) + } + + /** Gets a reference to an instance of `io.StringIO`/`io.BytesIO`. */ + DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) } + + /** + * Extra taint propagation for `io.StringIO`/`io.BytesIO`. + */ + private class AdditionalTaintStep extends TaintTracking::AdditionalTaintStep { + override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + nodeTo.(ClassInstantiation).getInitialValue() = nodeFrom + } + } + } } // --------------------------------------------------------------------------- diff --git a/python/ql/test/library-tests/frameworks/stdlib/io_test.py b/python/ql/test/library-tests/frameworks/stdlib/io_test.py new file mode 100644 index 000000000000..98d60445e1c4 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/stdlib/io_test.py @@ -0,0 +1,47 @@ +from io import StringIO, BytesIO + +TAINTED_STRING = "TS" +TAINTED_BYTES = b"TB" + +def ensure_tainted(*args): + print("ensure_tainted") + for arg in args: + print("", repr(arg)) + + +def test_stringio(): + ts = TAINTED_STRING + + x = StringIO() + x.write(ts) + x.seek(0) + + ensure_tainted( + StringIO(ts), # $ tainted + StringIO(initial_value=ts), # $ tainted + x, # $ tainted + + x.read(), # $ tainted + StringIO(ts).read(), # $ tainted + ) + + +def test_bytesio(): + tb = TAINTED_BYTES + + x = BytesIO() + x.write(tb) + x.seek(0) + + ensure_tainted( + BytesIO(tb), # $ tainted + BytesIO(initial_bytes=tb), # $ tainted + x, # $ tainted + + x.read(), # $ tainted + BytesIO(tb).read(), # $ tainted + ) + + +test_stringio() +test_bytesio() From c3653378671f7e8c39c20993f16f64224945bf97 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 29 Mar 2022 11:20:38 +0200 Subject: [PATCH 07/51] Python: Delete `XmlEntityInjection.ql` Kept the test of SimpleXmlRpcServer, and kept the qhelp so it can be used to write the new qhelp files --- .../src/experimental/Security/CWE-611/XXE.xml | 4 - .../Security/CWE-611/XmlEntityInjection.py | 25 ------ .../Security/CWE-611/XmlEntityInjection.ql | 31 ------- .../{CWE-611 => NEW}/XmlEntityInjection.qhelp | 0 .../security/dataflow/XmlEntityInjection.qll | 28 ------ .../XmlEntityInjectionCustomizations.qll | 86 ------------------- .../SimpleXmlRpcServer.expected | 0 .../SimpleXmlRpcServer.qlref | 0 .../xmlrpc_server.py | 0 .../CWE-611/XmlEntityInjection.expected | 27 ------ .../Security/CWE-611/XmlEntityInjection.qlref | 1 - .../query-tests/Security/CWE-611/test.py | 30 ------- 12 files changed, 232 deletions(-) delete mode 100644 python/ql/src/experimental/Security/CWE-611/XXE.xml delete mode 100644 python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.py delete mode 100644 python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql rename python/ql/src/experimental/Security/{CWE-611 => NEW}/XmlEntityInjection.qhelp (100%) delete mode 100644 python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll delete mode 100644 python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll rename python/ql/test/experimental/query-tests/Security/{CWE-611 => CWE-611-SimpleXmlRpcServer}/SimpleXmlRpcServer.expected (100%) rename python/ql/test/experimental/query-tests/Security/{CWE-611 => CWE-611-SimpleXmlRpcServer}/SimpleXmlRpcServer.qlref (100%) rename python/ql/test/experimental/query-tests/Security/{CWE-611 => CWE-611-SimpleXmlRpcServer}/xmlrpc_server.py (100%) delete mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected delete mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.qlref delete mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611/test.py diff --git a/python/ql/src/experimental/Security/CWE-611/XXE.xml b/python/ql/src/experimental/Security/CWE-611/XXE.xml deleted file mode 100644 index ddd196f2f137..000000000000 --- a/python/ql/src/experimental/Security/CWE-611/XXE.xml +++ /dev/null @@ -1,4 +0,0 @@ - -]> -&xxe; \ No newline at end of file diff --git a/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.py b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.py deleted file mode 100644 index 0e9eec933d7d..000000000000 --- a/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.py +++ /dev/null @@ -1,25 +0,0 @@ -from flask import request, Flask -import lxml.etree -import xml.etree.ElementTree - -app = Flask(__name__) - -# BAD -@app.route("/bad") -def bad(): - xml_content = request.args['xml_content'] - - parser = lxml.etree.XMLParser() - parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser) - - return parsed_xml.text - -# GOOD -@app.route("/good") -def good(): - xml_content = request.args['xml_content'] - - parser = lxml.etree.XMLParser(resolve_entities=False) - parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser) - - return parsed_xml.text \ No newline at end of file diff --git a/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql deleted file mode 100644 index 922ca346b173..000000000000 --- a/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql +++ /dev/null @@ -1,31 +0,0 @@ -/** - * @name XML Entity injection - * @description User input should not be parsed allowing the injection of entities. - * @kind path-problem - * @problem.severity error - * @id py/xml-entity-injection - * @tags security - * external/cwe/cwe-611 - * external/cwe/cwe-776 - * external/cwe/cwe-827 - */ - -// determine precision above -import python -import experimental.semmle.python.security.dataflow.XmlEntityInjection -import DataFlow::PathGraph - -from - XmlEntityInjection::XmlEntityInjectionConfiguration config, DataFlow::PathNode source, - DataFlow::PathNode sink, string kinds -where - config.hasFlowPath(source, sink) and - kinds = - strictconcat(string kind | - kind = sink.getNode().(XmlEntityInjection::Sink).getVulnerableKind() - | - kind, ", " - ) -select sink.getNode(), source, sink, - "$@ XML input is constructed from a $@ and is vulnerable to: " + kinds + ".", sink.getNode(), - "This", source.getNode(), "user-provided value" diff --git a/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.qhelp b/python/ql/src/experimental/Security/NEW/XmlEntityInjection.qhelp similarity index 100% rename from python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.qhelp rename to python/ql/src/experimental/Security/NEW/XmlEntityInjection.qhelp diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll deleted file mode 100644 index 35220e153d12..000000000000 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll +++ /dev/null @@ -1,28 +0,0 @@ -import python -import experimental.semmle.python.Concepts -import semmle.python.dataflow.new.DataFlow -import semmle.python.dataflow.new.TaintTracking -import semmle.python.dataflow.new.RemoteFlowSources -import semmle.python.dataflow.new.BarrierGuards - -module XmlEntityInjection { - import XmlEntityInjectionCustomizations::XmlEntityInjection - - class XmlEntityInjectionConfiguration extends TaintTracking::Configuration { - XmlEntityInjectionConfiguration() { this = "XmlEntityInjectionConfiguration" } - - override predicate isSource(DataFlow::Node source) { - source instanceof RemoteFlowSourceAsSource - } - - override predicate isSink(DataFlow::Node sink) { sink instanceof Sink } - - override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { - guard instanceof SanitizerGuard - } - - override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - any(AdditionalTaintStep s).step(nodeFrom, nodeTo) - } - } -} diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll deleted file mode 100644 index e420c738a978..000000000000 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll +++ /dev/null @@ -1,86 +0,0 @@ -/** - * Provides default sources, sinks and sanitizers for detecting - * "ldap injection" - * vulnerabilities, as well as extension points for adding your own. - */ - -private import python -private import semmle.python.dataflow.new.DataFlow -private import experimental.semmle.python.Concepts -private import semmle.python.dataflow.new.RemoteFlowSources -private import semmle.python.dataflow.new.BarrierGuards -private import semmle.python.ApiGraphs - -/** - * Provides default sources, sinks and sanitizers for detecting "xml injection" - * vulnerabilities, as well as extension points for adding your own. - */ -module XmlEntityInjection { - /** - * A data flow source for "xml injection" vulnerabilities. - */ - abstract class Source extends DataFlow::Node { } - - /** - * A data flow sink for "xml injection" vulnerabilities. - */ - abstract class Sink extends DataFlow::Node { - /** Gets the kind of XML injection that this sink is vulnerable to. */ - abstract string getVulnerableKind(); - } - - /** - * A sanitizer guard for "xml injection" vulnerabilities. - */ - abstract class SanitizerGuard extends DataFlow::BarrierGuard { } - - /** - * A unit class for adding additional taint steps. - * - * Extend this class to add additional taint steps that should apply to `XmlEntityInjection` - * taint configuration. - */ - class AdditionalTaintStep extends Unit { - /** - * Holds if the step from `nodeFrom` to `nodeTo` should be considered a taint - * step for `XmlEntityInjection` configuration. - */ - abstract predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo); - } - - /** - * An input to a direct XML parsing function, considered as a flow sink. - * - * See `XML::XMLParsing`. - */ - class XMLParsingInputAsSink extends Sink { - ExperimentalXML::XMLParsing xmlParsing; - - XMLParsingInputAsSink() { this = xmlParsing.getAnInput() } - - override string getVulnerableKind() { xmlParsing.vulnerableTo(result) } - } - - /** - * A source of remote user input, considered as a flow source. - */ - class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } - - /** - * A comparison with a constant string, considered as a sanitizer-guard. - */ - class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { } - - /** - * A taint step for `io`'s `StringIO` and `BytesIO` methods. - */ - class IoAdditionalTaintStep extends AdditionalTaintStep { - override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { - exists(DataFlow::CallCfgNode ioCalls | - ioCalls = API::moduleImport("io").getMember(["StringIO", "BytesIO"]).getACall() and - nodeFrom = ioCalls.getArg(0) and - nodeTo = ioCalls - ) - } - } -} diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.expected b/python/ql/test/experimental/query-tests/Security/CWE-611-SimpleXmlRpcServer/SimpleXmlRpcServer.expected similarity index 100% rename from python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.expected rename to python/ql/test/experimental/query-tests/Security/CWE-611-SimpleXmlRpcServer/SimpleXmlRpcServer.expected diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.qlref b/python/ql/test/experimental/query-tests/Security/CWE-611-SimpleXmlRpcServer/SimpleXmlRpcServer.qlref similarity index 100% rename from python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.qlref rename to python/ql/test/experimental/query-tests/Security/CWE-611-SimpleXmlRpcServer/SimpleXmlRpcServer.qlref diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xmlrpc_server.py b/python/ql/test/experimental/query-tests/Security/CWE-611-SimpleXmlRpcServer/xmlrpc_server.py similarity index 100% rename from python/ql/test/experimental/query-tests/Security/CWE-611/xmlrpc_server.py rename to python/ql/test/experimental/query-tests/Security/CWE-611-SimpleXmlRpcServer/xmlrpc_server.py diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected deleted file mode 100644 index 25594b4ddaaf..000000000000 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected +++ /dev/null @@ -1,27 +0,0 @@ -edges -| test.py:8:19:8:25 | ControlFlowNode for request | test.py:8:19:8:30 | ControlFlowNode for Attribute | -| test.py:8:19:8:30 | ControlFlowNode for Attribute | test.py:8:19:8:45 | ControlFlowNode for Subscript | -| test.py:8:19:8:45 | ControlFlowNode for Subscript | test.py:9:34:9:44 | ControlFlowNode for xml_content | -| test.py:13:19:13:25 | ControlFlowNode for request | test.py:13:19:13:30 | ControlFlowNode for Attribute | -| test.py:13:19:13:30 | ControlFlowNode for Attribute | test.py:13:19:13:45 | ControlFlowNode for Subscript | -| test.py:13:19:13:45 | ControlFlowNode for Subscript | test.py:15:34:15:44 | ControlFlowNode for xml_content | -| test.py:19:19:19:25 | ControlFlowNode for request | test.py:19:19:19:30 | ControlFlowNode for Attribute | -| test.py:19:19:19:30 | ControlFlowNode for Attribute | test.py:19:19:19:45 | ControlFlowNode for Subscript | -| test.py:19:19:19:45 | ControlFlowNode for Subscript | test.py:30:34:30:44 | ControlFlowNode for xml_content | -nodes -| test.py:8:19:8:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| test.py:8:19:8:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| test.py:8:19:8:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| test.py:9:34:9:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| test.py:13:19:13:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| test.py:13:19:13:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| test.py:13:19:13:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| test.py:15:34:15:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -| test.py:19:19:19:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | -| test.py:19:19:19:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | -| test.py:19:19:19:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | -| test.py:30:34:30:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | -subpaths -#select -| test.py:9:34:9:44 | ControlFlowNode for xml_content | test.py:8:19:8:25 | ControlFlowNode for request | test.py:9:34:9:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | test.py:9:34:9:44 | ControlFlowNode for xml_content | This | test.py:8:19:8:25 | ControlFlowNode for request | user-provided value | -| test.py:30:34:30:44 | ControlFlowNode for xml_content | test.py:19:19:19:25 | ControlFlowNode for request | test.py:30:34:30:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | test.py:30:34:30:44 | ControlFlowNode for xml_content | This | test.py:19:19:19:25 | ControlFlowNode for request | user-provided value | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.qlref b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.qlref deleted file mode 100644 index 36a7c8845fb7..000000000000 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.qlref +++ /dev/null @@ -1 +0,0 @@ -experimental/Security/CWE-611/XmlEntityInjection.ql diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/test.py b/python/ql/test/experimental/query-tests/Security/CWE-611/test.py deleted file mode 100644 index d9181c4cf346..000000000000 --- a/python/ql/test/experimental/query-tests/Security/CWE-611/test.py +++ /dev/null @@ -1,30 +0,0 @@ -from flask import Flask, request -import lxml.etree - -app = Flask(__name__) - -@app.route("/vuln-handler") -def vuln_handler(): - xml_content = request.args['xml_content'] - return lxml.etree.fromstring(xml_content).text - -@app.route("/safe-handler") -def safe_handler(): - xml_content = request.args['xml_content'] - parser = lxml.etree.XMLParser(resolve_entities=False) - return lxml.etree.fromstring(xml_content, parser=parser).text - -@app.route("/super-vuln-handler") -def super_vuln_handler(): - xml_content = request.args['xml_content'] - parser = lxml.etree.XMLParser( - # allows XXE - resolve_entities=True, - # allows remote XXE - no_network=False, - # together with `no_network=False`, allows DTD-retrival - load_dtd=True, - # allows DoS attacks - huge_tree=True, - ) - return lxml.etree.fromstring(xml_content, parser=parser).text From b00766b054d1b58a06dce48bd631a5b0eaacb7b7 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 29 Mar 2022 13:51:00 +0200 Subject: [PATCH 08/51] Python: Adjust XXE qhelp and remove the old copy, we don't need it anymore :) --- .../Security/NEW/CWE-611/Xxe.qhelp | 39 ++++++++++----- .../Security/NEW/CWE-611/examples/Xxe.js | 7 --- .../Security/NEW/CWE-611/examples/XxeBad.py | 10 ++++ .../Security/NEW/CWE-611/examples/XxeGood.js | 7 --- .../Security/NEW/CWE-611/examples/XxeGood.py | 11 +++++ .../Security/NEW/XmlEntityInjection.qhelp | 48 ------------------- .../library-tests/frameworks/XML/poc/PoC.py | 11 +++++ 7 files changed, 58 insertions(+), 75 deletions(-) delete mode 100644 python/ql/src/experimental/Security/NEW/CWE-611/examples/Xxe.js create mode 100644 python/ql/src/experimental/Security/NEW/CWE-611/examples/XxeBad.py delete mode 100644 python/ql/src/experimental/Security/NEW/CWE-611/examples/XxeGood.js create mode 100644 python/ql/src/experimental/Security/NEW/CWE-611/examples/XxeGood.py delete mode 100644 python/ql/src/experimental/Security/NEW/XmlEntityInjection.qhelp diff --git a/python/ql/src/experimental/Security/NEW/CWE-611/Xxe.qhelp b/python/ql/src/experimental/Security/NEW/CWE-611/Xxe.qhelp index 1e859eb121fa..7254e292309f 100644 --- a/python/ql/src/experimental/Security/NEW/CWE-611/Xxe.qhelp +++ b/python/ql/src/experimental/Security/NEW/CWE-611/Xxe.qhelp @@ -15,29 +15,34 @@ and out-of-band data retrieval techniques may allow attackers to steal sensitive

    The easiest way to prevent XXE attacks is to disable external entity handling when parsing untrusted data. How this is done depends on the library being used. Note that some -libraries, such as recent versions of libxml, disable entity expansion by default, +libraries, such as recent versions of the XML libraries in the standard library of Python 3, +disable entity expansion by default, so unless you have explicitly enabled entity expansion, no further action needs to be taken.

    + +

    +We recommend using the defusedxml +PyPI package, which has been created to prevent XML attacks (both XXE and XML bombs). +

    -The following example uses the libxml XML parser to parse a string xmlSrc. -If that string is from an untrusted source, this code may be vulnerable to an XXE attack, since -the parser is invoked with the noent option set to true: +The following example uses the lxml XML parser to parse a string +xml_src. That string is from an untrusted source, so this code is +vulnerable to an XXE attack, since the +default parser from lxml.etree allows local external entities to be resolved.

    - +

    -To guard against XXE attacks, the noent option should be omitted or set to -false. This means that no entity expansion is undertaken at all, not even for standard -internal entities such as & or >. If desired, these -entities can be expanded in a separate step using utility functions provided by libraries such -as underscore, -lodash or -he. +To guard against XXE attacks with the lxml library, you should create a +parser with resolve_entities set to false. This means that no +entity expansion is undertaken, althuogh standard predefined entities such as +>, for writing > inside the text of an XML element, +are still allowed.

    - +
    @@ -53,5 +58,13 @@ Timothy Morgen: Timur Yunusov, Alexey Osipov: XML Out-Of-Band Data Retrieval. +
  • +Python 3 standard library: +XML Vulnerabilities. +
  • +
  • +Python 2 standard library: +XML Vulnerabilities. +
  • diff --git a/python/ql/src/experimental/Security/NEW/CWE-611/examples/Xxe.js b/python/ql/src/experimental/Security/NEW/CWE-611/examples/Xxe.js deleted file mode 100644 index 99fa02cc42f6..000000000000 --- a/python/ql/src/experimental/Security/NEW/CWE-611/examples/Xxe.js +++ /dev/null @@ -1,7 +0,0 @@ -const app = require("express")(), - libxml = require("libxmljs"); - -app.post("upload", (req, res) => { - let xmlSrc = req.body, - doc = libxml.parseXml(xmlSrc, { noent: true }); -}); diff --git a/python/ql/src/experimental/Security/NEW/CWE-611/examples/XxeBad.py b/python/ql/src/experimental/Security/NEW/CWE-611/examples/XxeBad.py new file mode 100644 index 000000000000..4b2121ab4a64 --- /dev/null +++ b/python/ql/src/experimental/Security/NEW/CWE-611/examples/XxeBad.py @@ -0,0 +1,10 @@ +from flask import Flask, request +import lxml.etree + +app = Flask(__name__) + +@app.post("/upload") +def upload(): + xml_src = request.get_data() + doc = lxml.etree.fromstring(xml_src) + return lxml.etree.tostring(doc) diff --git a/python/ql/src/experimental/Security/NEW/CWE-611/examples/XxeGood.js b/python/ql/src/experimental/Security/NEW/CWE-611/examples/XxeGood.js deleted file mode 100644 index 8317dcac98f9..000000000000 --- a/python/ql/src/experimental/Security/NEW/CWE-611/examples/XxeGood.js +++ /dev/null @@ -1,7 +0,0 @@ -const app = require("express")(), - libxml = require("libxmljs"); - -app.post("upload", (req, res) => { - let xmlSrc = req.body, - doc = libxml.parseXml(xmlSrc); -}); diff --git a/python/ql/src/experimental/Security/NEW/CWE-611/examples/XxeGood.py b/python/ql/src/experimental/Security/NEW/CWE-611/examples/XxeGood.py new file mode 100644 index 000000000000..20844032fa39 --- /dev/null +++ b/python/ql/src/experimental/Security/NEW/CWE-611/examples/XxeGood.py @@ -0,0 +1,11 @@ +from flask import Flask, request +import lxml.etree + +app = Flask(__name__) + +@app.post("/upload") +def upload(): + xml_src = request.get_data() + parser = lxml.etree.XMLParser(resolve_entities=False) + doc = lxml.etree.fromstring(xml_src, parser=parser) + return lxml.etree.tostring(doc) diff --git a/python/ql/src/experimental/Security/NEW/XmlEntityInjection.qhelp b/python/ql/src/experimental/Security/NEW/XmlEntityInjection.qhelp deleted file mode 100644 index 6da1bf1d3063..000000000000 --- a/python/ql/src/experimental/Security/NEW/XmlEntityInjection.qhelp +++ /dev/null @@ -1,48 +0,0 @@ - - - - -

    -Parsing untrusted XML files with a weakly configured XML parser may lead to attacks such as XML External Entity (XXE), -Billion Laughs, Quadratic Blowup and DTD retrieval. -This type of attack uses external entity references to access arbitrary files on a system, carry out denial of -service, or server side request forgery. Even when the result of parsing is not returned to the user, out-of-band -data retrieval techniques may allow attackers to steal sensitive data. Denial of services can also be carried out -in this situation. -

    -
    - - -

    -Use defusedxml, a Python package aimed -to prevent any potentially malicious operation. -

    -
    - - -

    -The following example calls xml.etree.ElementTree.fromstring using a parser (lxml.etree.XMLParser) -that is not safely configured on untrusted data, and is therefore inherently unsafe. -

    - -

    -Providing an input (xml_content) like the following XML content against /bad, the request response would contain the contents of -/etc/passwd. -

    - -
    - - -
  • Python 3 XML Vulnerabilities.
  • -
  • Python 2 XML Vulnerabilities.
  • -
  • Python XML Parsing.
  • -
  • OWASP vulnerability description: XML External Entity (XXE) Processing.
  • -
  • OWASP guidance on parsing xml files: XXE Prevention Cheat Sheet.
  • -
  • Paper by Timothy Morgen: XML Schema, DTD, and Entity Attacks
  • -
  • Out-of-band data retrieval: Timur Yunusov & Alexey Osipov, Black hat EU 2013: XML Out-Of-Band Data Retrieval.
  • -
  • Denial of service attack (Billion laughs): Billion Laughs.
  • -
    - -
    diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py b/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py index b4cb2faf3044..a4de65084aed 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py @@ -74,6 +74,10 @@ %xxe; ]> """ +predefined_entity_xml = """ +< +""" + # ============================================================================== # other setup @@ -443,6 +447,13 @@ def test_exfiltrate_through_dtd(): assert exfiltrated_data == "SECRET_FLAG" + @staticmethod + def test_predefined_entity(): + parser = lxml.etree.XMLParser(resolve_entities=False) + root = lxml.etree.fromstring(predefined_entity_xml, parser=parser) + assert root.tag == "test" + assert root.text == "<" + # ============================================================================== import xmltodict From 56b9c891d85636c543bf9529dd7b191908248be7 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 29 Mar 2022 15:30:04 +0200 Subject: [PATCH 09/51] Python: Adjust `XmlBomb.qhelp` from JS --- .../Security/NEW/CWE-776/XmlBomb.qhelp | 36 +++++++++++++------ .../Security/NEW/CWE-776/examples/XmlBomb.js | 10 ------ .../NEW/CWE-776/examples/XmlBombBad.py | 10 ++++++ .../NEW/CWE-776/examples/XmlBombGood.js | 10 ------ .../NEW/CWE-776/examples/XmlBombGood.py | 10 ++++++ 5 files changed, 45 insertions(+), 31 deletions(-) delete mode 100644 python/ql/src/experimental/Security/NEW/CWE-776/examples/XmlBomb.js create mode 100644 python/ql/src/experimental/Security/NEW/CWE-776/examples/XmlBombBad.py delete mode 100644 python/ql/src/experimental/Security/NEW/CWE-776/examples/XmlBombGood.js create mode 100644 python/ql/src/experimental/Security/NEW/CWE-776/examples/XmlBombGood.py diff --git a/python/ql/src/experimental/Security/NEW/CWE-776/XmlBomb.qhelp b/python/ql/src/experimental/Security/NEW/CWE-776/XmlBomb.qhelp index c0714b3f96f9..f20dd526fdd0 100644 --- a/python/ql/src/experimental/Security/NEW/CWE-776/XmlBomb.qhelp +++ b/python/ql/src/experimental/Security/NEW/CWE-776/XmlBomb.qhelp @@ -25,26 +25,32 @@ to take a very long time or use large amounts of memory. This is sometimes calle

    The safest way to prevent XML bomb attacks is to disable entity expansion when parsing untrusted -data. How this is done depends on the library being used. Note that some libraries, such as -recent versions of libxmljs (though not its SAX parser API), disable entity expansion -by default, so unless you have explicitly enabled entity expansion, no further action is needed. +data. Whether this can be done depends on the library being used. Note that some libraries, such as +lxml, have measures enabled by default to prevent such DoS XML attacks, so +unless you have explicitly set huge_tree to True, no further action is needed. +

    + +

    +We recommend using the defusedxml +PyPI package, which has been created to prevent XML attacks (both XXE and XML bombs).

    -The following example uses the XML parser provided by the node-expat package to -parse a string xmlSrc. If that string is from an untrusted source, this code may be -vulnerable to a DoS attack, since node-expat expands internal entities by default: +The following example uses the xml.etree XML parser provided by the Python standard library to +parse a string xml_src. That string is from an untrusted source, so this code is be +vulnerable to a DoS attack, since the xml.etree XML parser expands internal entities by default:

    - +

    -At the time of writing, node-expat does not provide a way of controlling entity -expansion, but the example could be rewritten to use the sax package instead, -which only expands standard entities such as &amp;: +It is not possible to guard against internal entity expansion with +xml.etree, so to guard against these attacks, the following example uses +the defusedxml +PyPI package instead, which is not exposed to such internal entity expansion attacks.

    - +
    @@ -56,5 +62,13 @@ Wikipedia: Bryan Sullivan: Security Briefs - XML Denial of Service Attacks and Defenses. +
  • +Python 3 standard library: +XML Vulnerabilities. +
  • +
  • +Python 2 standard library: +XML Vulnerabilities. +
  • diff --git a/python/ql/src/experimental/Security/NEW/CWE-776/examples/XmlBomb.js b/python/ql/src/experimental/Security/NEW/CWE-776/examples/XmlBomb.js deleted file mode 100644 index f72902a53041..000000000000 --- a/python/ql/src/experimental/Security/NEW/CWE-776/examples/XmlBomb.js +++ /dev/null @@ -1,10 +0,0 @@ -const app = require("express")(), - expat = require("node-expat"); - -app.post("upload", (req, res) => { - let xmlSrc = req.body, - parser = new expat.Parser(); - parser.on("startElement", handleStart); - parser.on("text", handleText); - parser.write(xmlSrc); -}); diff --git a/python/ql/src/experimental/Security/NEW/CWE-776/examples/XmlBombBad.py b/python/ql/src/experimental/Security/NEW/CWE-776/examples/XmlBombBad.py new file mode 100644 index 000000000000..d52054d94929 --- /dev/null +++ b/python/ql/src/experimental/Security/NEW/CWE-776/examples/XmlBombBad.py @@ -0,0 +1,10 @@ +from flask import Flask, request +import xml.etree.ElementTree as ET + +app = Flask(__name__) + +@app.post("/upload") +def upload(): + xml_src = request.get_data() + doc = ET.fromstring(xml_src) + return ET.tostring(doc) diff --git a/python/ql/src/experimental/Security/NEW/CWE-776/examples/XmlBombGood.js b/python/ql/src/experimental/Security/NEW/CWE-776/examples/XmlBombGood.js deleted file mode 100644 index a8c5bc97e631..000000000000 --- a/python/ql/src/experimental/Security/NEW/CWE-776/examples/XmlBombGood.js +++ /dev/null @@ -1,10 +0,0 @@ -const app = require("express")(), - sax = require("sax"); - -app.post("upload", (req, res) => { - let xmlSrc = req.body, - parser = sax.parser(true); - parser.onopentag = handleStart; - parser.ontext = handleText; - parser.write(xmlSrc); -}); diff --git a/python/ql/src/experimental/Security/NEW/CWE-776/examples/XmlBombGood.py b/python/ql/src/experimental/Security/NEW/CWE-776/examples/XmlBombGood.py new file mode 100644 index 000000000000..5e4261e35da9 --- /dev/null +++ b/python/ql/src/experimental/Security/NEW/CWE-776/examples/XmlBombGood.py @@ -0,0 +1,10 @@ +from flask import Flask, request +import defusedxml.ElementTree as ET + +app = Flask(__name__) + +@app.post("/upload") +def upload(): + xml_src = request.get_data() + doc = ET.fromstring(xml_src) + return ET.tostring(doc) From 9caf4be21be7370a0317ae44205dfb35a5169073 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 29 Mar 2022 15:33:57 +0200 Subject: [PATCH 10/51] Python: Add PortSwigger link to `Xxe.qhelp` I found this resource quite good myself at least :) --- python/ql/src/experimental/Security/NEW/CWE-611/Xxe.qhelp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/ql/src/experimental/Security/NEW/CWE-611/Xxe.qhelp b/python/ql/src/experimental/Security/NEW/CWE-611/Xxe.qhelp index 7254e292309f..19bbc955fd68 100644 --- a/python/ql/src/experimental/Security/NEW/CWE-611/Xxe.qhelp +++ b/python/ql/src/experimental/Security/NEW/CWE-611/Xxe.qhelp @@ -66,5 +66,9 @@ Python 3 standard library: Python 2 standard library: XML Vulnerabilities. +
  • +PortSwigger: +XML external entity (XXE) injection. +
  • From e005a5c0ab7409ebb6fb0002cdc7ab11a1b54bd1 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 29 Mar 2022 15:50:24 +0200 Subject: [PATCH 11/51] Python: Promote `XMLParsing` concept --- python/ql/lib/semmle/python/Concepts.qll | 62 +++++++++++++++++ .../experimental/semmle/python/Concepts.qll | 68 ------------------- .../semmle/python/frameworks/Xml.qll | 4 +- .../dataflow/XmlBombCustomizations.qll | 5 +- .../security/dataflow/XxeCustomizations.qll | 5 +- .../XML/ExperimentalXmlConceptsTests.ql | 2 +- 6 files changed, 70 insertions(+), 76 deletions(-) diff --git a/python/ql/lib/semmle/python/Concepts.qll b/python/ql/lib/semmle/python/Concepts.qll index a768f29795c6..3d83ec100a57 100644 --- a/python/ql/lib/semmle/python/Concepts.qll +++ b/python/ql/lib/semmle/python/Concepts.qll @@ -550,6 +550,68 @@ module XML { abstract string getName(); } } + + /** + * A kind of XML vulnerability. + * + * See overview of kinds at https://pypi.org/project/defusedxml/#python-xml-libraries + */ + class XMLVulnerabilityKind extends string { + XMLVulnerabilityKind() { + this in ["Billion Laughs", "Quadratic Blowup", "XXE", "DTD retrieval"] + } + + /** Holds for Billion Laughs vulnerability kind. */ + predicate isBillionLaughs() { this = "Billion Laughs" } + + /** Holds for Quadratic Blowup vulnerability kind. */ + predicate isQuadraticBlowup() { this = "Quadratic Blowup" } + + /** Holds for XXE vulnerability kind. */ + predicate isXxe() { this = "XXE" } + + /** Holds for DTD retrieval vulnerability kind. */ + predicate isDtdRetrieval() { this = "DTD retrieval" } + } + + /** + * A data-flow node that parses XML. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `XMLParsing` instead. + */ + class XMLParsing extends DataFlow::Node instanceof XMLParsing::Range { + /** + * Gets the argument containing the content to parse. + */ + DataFlow::Node getAnInput() { result = super.getAnInput() } + + /** + * Holds if this XML parsing is vulnerable to `kind`. + */ + predicate vulnerableTo(XMLVulnerabilityKind kind) { super.vulnerableTo(kind) } + } + + /** Provides classes for modeling XML parsing APIs. */ + module XMLParsing { + /** + * A data-flow node that parses XML. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `XMLParsing` instead. + */ + abstract class Range extends DataFlow::Node { + /** + * Gets the argument containing the content to parse. + */ + abstract DataFlow::Node getAnInput(); + + /** + * Holds if this XML parsing is vulnerable to `kind`. + */ + abstract predicate vulnerableTo(XMLVulnerabilityKind kind); + } + } } /** Provides classes for modeling LDAP-related APIs. */ diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll index 6fdba4d3627f..09b44d95e89a 100644 --- a/python/ql/src/experimental/semmle/python/Concepts.qll +++ b/python/ql/src/experimental/semmle/python/Concepts.qll @@ -14,74 +14,6 @@ private import semmle.python.dataflow.new.RemoteFlowSources private import semmle.python.dataflow.new.TaintTracking private import experimental.semmle.python.Frameworks -/** - * Since there is both XML module in normal and experimental Concepts, - * we have to rename the experimental module as this. - */ -module ExperimentalXML { - /** - * A kind of XML vulnerability. - * - * See https://pypi.org/project/defusedxml/#python-xml-libraries - */ - class XMLVulnerabilityKind extends string { - XMLVulnerabilityKind() { - this in ["Billion Laughs", "Quadratic Blowup", "XXE", "DTD retrieval"] - } - - /** Holds for Billion Laughs vulnerability kind. */ - predicate isBillionLaughs() { this = "Billion Laughs" } - - /** Holds for Quadratic Blowup vulnerability kind. */ - predicate isQuadraticBlowup() { this = "Quadratic Blowup" } - - /** Holds for XXE vulnerability kind. */ - predicate isXxe() { this = "XXE" } - - /** Holds for DTD retrieval vulnerability kind. */ - predicate isDtdRetrieval() { this = "DTD retrieval" } - } - - /** - * A data-flow node that parses XML. - * - * Extend this class to model new APIs. If you want to refine existing API models, - * extend `XMLParsing` instead. - */ - class XMLParsing extends DataFlow::Node instanceof XMLParsing::Range { - /** - * Gets the argument containing the content to parse. - */ - DataFlow::Node getAnInput() { result = super.getAnInput() } - - /** - * Holds if this XML parsing is vulnerable to `kind`. - */ - predicate vulnerableTo(XMLVulnerabilityKind kind) { super.vulnerableTo(kind) } - } - - /** Provides classes for modeling XML parsing APIs. */ - module XMLParsing { - /** - * A data-flow node that parses XML. - * - * Extend this class to model new APIs. If you want to refine existing API models, - * extend `XMLParsing` instead. - */ - abstract class Range extends DataFlow::Node { - /** - * Gets the argument containing the content to parse. - */ - abstract DataFlow::Node getAnInput(); - - /** - * Holds if this XML parsing is vulnerable to `kind`. - */ - abstract predicate vulnerableTo(XMLVulnerabilityKind kind); - } - } -} - /** Provides classes for modeling LDAP query execution-related APIs. */ module LdapQuery { /** diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index a2f36f66f2e3..87aa236804dd 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -5,11 +5,9 @@ private import python private import semmle.python.dataflow.new.DataFlow -private import experimental.semmle.python.Concepts +private import semmle.python.Concepts private import semmle.python.ApiGraphs -module XML = ExperimentalXML; - private module XmlEtree { /** * Provides models for `xml.etree` parsers diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll index 66a16a4494af..a4cbfe61821e 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll @@ -6,7 +6,8 @@ private import python private import semmle.python.dataflow.new.DataFlow -private import experimental.semmle.python.Concepts +private import semmle.python.Concepts +import experimental.semmle.python.frameworks.Xml // needed until modeling have been promoted private import semmle.python.dataflow.new.RemoteFlowSources /** @@ -40,7 +41,7 @@ module XmlBomb { */ class XmlParsingWithEntityResolution extends Sink { XmlParsingWithEntityResolution() { - exists(ExperimentalXML::XMLParsing parsing, ExperimentalXML::XMLVulnerabilityKind kind | + exists(XML::XMLParsing parsing, XML::XMLVulnerabilityKind kind | (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and parsing.vulnerableTo(kind) and this = parsing.getAnInput() diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll index b2992dd335f1..c118e1b2ff97 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll @@ -6,7 +6,8 @@ private import python private import semmle.python.dataflow.new.DataFlow -private import experimental.semmle.python.Concepts +private import semmle.python.Concepts +import experimental.semmle.python.frameworks.Xml // needed until modeling have been promoted private import semmle.python.dataflow.new.RemoteFlowSources /** @@ -40,7 +41,7 @@ module Xxe { */ class XmlParsingWithExternalEntityResolution extends Sink { XmlParsingWithExternalEntityResolution() { - exists(ExperimentalXML::XMLParsing parsing, ExperimentalXML::XMLVulnerabilityKind kind | + exists(XML::XMLParsing parsing, XML::XMLVulnerabilityKind kind | kind.isXxe() and parsing.vulnerableTo(kind) and this = parsing.getAnInput() diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql b/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql index 81bc391d0e55..679dbc3456c8 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql +++ b/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql @@ -1,5 +1,5 @@ import python -import experimental.semmle.python.Concepts +import semmle.python.Concepts import experimental.semmle.python.frameworks.Xml import semmle.python.dataflow.new.DataFlow import TestUtilities.InlineExpectationsTest From e45288e812a0cd0f87cb909768b60847ec5aa997 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 29 Mar 2022 15:51:27 +0200 Subject: [PATCH 12/51] Python: => `XMLParsingVulnerabilityKind` Since there are other XML vulnerabilities that are not about parsing, this is more correct. --- python/ql/lib/semmle/python/Concepts.qll | 8 +++---- .../Security/CWE-611/SimpleXmlRpcServer.ql | 2 +- .../semmle/python/frameworks/Xml.qll | 24 +++++++++---------- .../dataflow/XmlBombCustomizations.qll | 2 +- .../security/dataflow/XxeCustomizations.qll | 2 +- .../XML/ExperimentalXmlConceptsTests.ql | 2 +- 6 files changed, 20 insertions(+), 20 deletions(-) diff --git a/python/ql/lib/semmle/python/Concepts.qll b/python/ql/lib/semmle/python/Concepts.qll index 3d83ec100a57..c430594d05bb 100644 --- a/python/ql/lib/semmle/python/Concepts.qll +++ b/python/ql/lib/semmle/python/Concepts.qll @@ -556,8 +556,8 @@ module XML { * * See overview of kinds at https://pypi.org/project/defusedxml/#python-xml-libraries */ - class XMLVulnerabilityKind extends string { - XMLVulnerabilityKind() { + class XMLParsingVulnerabilityKind extends string { + XMLParsingVulnerabilityKind() { this in ["Billion Laughs", "Quadratic Blowup", "XXE", "DTD retrieval"] } @@ -589,7 +589,7 @@ module XML { /** * Holds if this XML parsing is vulnerable to `kind`. */ - predicate vulnerableTo(XMLVulnerabilityKind kind) { super.vulnerableTo(kind) } + predicate vulnerableTo(XMLParsingVulnerabilityKind kind) { super.vulnerableTo(kind) } } /** Provides classes for modeling XML parsing APIs. */ @@ -609,7 +609,7 @@ module XML { /** * Holds if this XML parsing is vulnerable to `kind`. */ - abstract predicate vulnerableTo(XMLVulnerabilityKind kind); + abstract predicate vulnerableTo(XMLParsingVulnerabilityKind kind); } } } diff --git a/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql b/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql index cda0633690c5..3d2a736ed496 100644 --- a/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql +++ b/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql @@ -17,7 +17,7 @@ from DataFlow::CallCfgNode call, string kinds where call = API::moduleImport("xmlrpc").getMember("server").getMember("SimpleXMLRPCServer").getACall() and kinds = - strictconcat(ExperimentalXML::XMLVulnerabilityKind kind | + strictconcat(ExperimentalXML::XMLParsingVulnerabilityKind kind | kind.isBillionLaughs() or kind.isQuadraticBlowup() | kind, ", " diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index 87aa236804dd..4987e24bce4f 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -66,7 +66,7 @@ private module XmlEtree { override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } - override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { kind.isBillionLaughs() or kind.isQuadraticBlowup() } } @@ -103,7 +103,7 @@ private module XmlEtree { ] } - override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { // note: it does not matter what `xml.etree` parser you are using, you cannot // change the security features anyway :| kind.isBillionLaughs() or kind.isQuadraticBlowup() @@ -218,7 +218,7 @@ private module SaxBasedParsing { override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] } - override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { // always vuln to these (kind.isBillionLaughs() or kind.isQuadraticBlowup()) or @@ -251,7 +251,7 @@ private module SaxBasedParsing { ] } - override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { // always vuln to these (kind.isBillionLaughs() or kind.isQuadraticBlowup()) or @@ -290,7 +290,7 @@ private module SaxBasedParsing { DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] } - override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { this.getParserArg() = saxParserWithFeatureExternalGesTurnedOn() and (kind.isXxe() or kind.isDtdRetrieval()) or @@ -317,7 +317,7 @@ private module Lxml { */ abstract class InstanceSource extends DataFlow::LocalSourceNode { /** Holds if this instance is vulnerable to `kind`. */ - abstract predicate vulnerableTo(XML::XMLVulnerabilityKind kind); + abstract predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind); } /** @@ -331,7 +331,7 @@ private module Lxml { } // NOTE: it's not possible to change settings of a parser after constructing it - override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { kind.isXxe() and ( // resolve_entities has default True @@ -361,7 +361,7 @@ private module Lxml { API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall() } - override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { // as highlighted by // https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser // by default XXE is allow. so as long as the default parser has not been @@ -385,7 +385,7 @@ private module Lxml { } /** Gets a reference to an `lxml.etree` parser instance, that is vulnerable to `kind`. */ - DataFlow::Node instanceVulnerableTo(XML::XMLVulnerabilityKind kind) { + DataFlow::Node instanceVulnerableTo(XML::XMLParsingVulnerabilityKind kind) { exists(InstanceSource origin | result = instance(origin) and origin.vulnerableTo(kind)) } @@ -397,7 +397,7 @@ private module Lxml { override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } - override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { this.calls(instanceVulnerableTo(kind), "feed") } } @@ -436,7 +436,7 @@ private module Lxml { DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] } - override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { this.getParserArg() = XMLParser::instanceVulnerableTo(kind) or kind.isXxe() and @@ -456,7 +456,7 @@ private module Xmltodict { result in [this.getArg(0), this.getArgByName("xml_input")] } - override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and this.getArgByName("disable_entities").getALocalSource().asExpr() = any(False f) } diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll index a4cbfe61821e..c5e69c1e0e33 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll @@ -41,7 +41,7 @@ module XmlBomb { */ class XmlParsingWithEntityResolution extends Sink { XmlParsingWithEntityResolution() { - exists(XML::XMLParsing parsing, XML::XMLVulnerabilityKind kind | + exists(XML::XMLParsing parsing, XML::XMLParsingVulnerabilityKind kind | (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and parsing.vulnerableTo(kind) and this = parsing.getAnInput() diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll index c118e1b2ff97..27d011625a6d 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll @@ -41,7 +41,7 @@ module Xxe { */ class XmlParsingWithExternalEntityResolution extends Sink { XmlParsingWithExternalEntityResolution() { - exists(XML::XMLParsing parsing, XML::XMLVulnerabilityKind kind | + exists(XML::XMLParsing parsing, XML::XMLParsingVulnerabilityKind kind | kind.isXxe() and parsing.vulnerableTo(kind) and this = parsing.getAnInput() diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql b/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql index 679dbc3456c8..98237b447ea0 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql +++ b/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql @@ -21,7 +21,7 @@ class XmlParsingTest extends InlineExpectationsTest { tag = "input" ) or - exists(XML::XMLVulnerabilityKind kind | + exists(XML::XMLParsingVulnerabilityKind kind | parsing.vulnerableTo(kind) and location = parsing.getLocation() and element = parsing.toString() and From 35ccba2ec10b2610969ac790d8ca8fa76a282ad9 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 29 Mar 2022 15:57:00 +0200 Subject: [PATCH 13/51] Python: Promote `XMLParsing` concept test --- ...tsTests.expected => ConceptsTest.expected} | 0 .../frameworks/XML/ConceptsTest.ql | 3 ++ .../XML/ExperimentalXmlConceptsTests.ql | 33 --------------- .../frameworks/XML/lxml_etree.py | 40 +++++++++---------- .../library-tests/frameworks/XML/xml_dom.py | 24 +++++------ .../library-tests/frameworks/XML/xml_etree.py | 34 ++++++++-------- .../library-tests/frameworks/XML/xml_sax.py | 26 ++++++------ .../library-tests/frameworks/XML/xmltodict.py | 6 +-- .../test/experimental/meta/ConceptsTest.qll | 27 +++++++++++++ 9 files changed, 95 insertions(+), 98 deletions(-) rename python/ql/test/experimental/library-tests/frameworks/XML/{ExperimentalXmlConceptsTests.expected => ConceptsTest.expected} (100%) create mode 100644 python/ql/test/experimental/library-tests/frameworks/XML/ConceptsTest.ql delete mode 100644 python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.expected b/python/ql/test/experimental/library-tests/frameworks/XML/ConceptsTest.expected similarity index 100% rename from python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.expected rename to python/ql/test/experimental/library-tests/frameworks/XML/ConceptsTest.expected diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/ConceptsTest.ql b/python/ql/test/experimental/library-tests/frameworks/XML/ConceptsTest.ql new file mode 100644 index 000000000000..95728bd6dc89 --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/XML/ConceptsTest.ql @@ -0,0 +1,3 @@ +import python +import experimental.meta.ConceptsTest +import experimental.semmle.python.frameworks.Xml // needed until modeling have been promoted diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql b/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql deleted file mode 100644 index 98237b447ea0..000000000000 --- a/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql +++ /dev/null @@ -1,33 +0,0 @@ -import python -import semmle.python.Concepts -import experimental.semmle.python.frameworks.Xml -import semmle.python.dataflow.new.DataFlow -import TestUtilities.InlineExpectationsTest -private import semmle.python.dataflow.new.internal.PrintNode - -class XmlParsingTest extends InlineExpectationsTest { - XmlParsingTest() { this = "XmlParsingTest" } - - override string getARelevantTag() { result in ["input", "vuln"] } - - override predicate hasActualResult(Location location, string element, string tag, string value) { - exists(location.getFile().getRelativePath()) and - exists(XML::XMLParsing parsing | - exists(DataFlow::Node input | - input = parsing.getAnInput() and - location = input.getLocation() and - element = input.toString() and - value = prettyNodeForInlineTest(input) and - tag = "input" - ) - or - exists(XML::XMLParsingVulnerabilityKind kind | - parsing.vulnerableTo(kind) and - location = parsing.getLocation() and - element = parsing.toString() and - value = "'" + kind + "'" and - tag = "vuln" - ) - ) - } -} diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py index 22930a58af37..ee8f3fc69c14 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py @@ -4,51 +4,51 @@ x = "some xml" # different parsing methods -lxml.etree.fromstring(x) # $ input=x vuln='XXE' -lxml.etree.fromstring(text=x) # $ input=x vuln='XXE' +lxml.etree.fromstring(x) # $ xmlInput=x xmlVuln='XXE' +lxml.etree.fromstring(text=x) # $ xmlInput=x xmlVuln='XXE' -lxml.etree.fromstringlist([x]) # $ input=List vuln='XXE' -lxml.etree.fromstringlist(strings=[x]) # $ input=List vuln='XXE' +lxml.etree.fromstringlist([x]) # $ xmlInput=List xmlVuln='XXE' +lxml.etree.fromstringlist(strings=[x]) # $ xmlInput=List xmlVuln='XXE' -lxml.etree.XML(x) # $ input=x vuln='XXE' -lxml.etree.XML(text=x) # $ input=x vuln='XXE' +lxml.etree.XML(x) # $ xmlInput=x xmlVuln='XXE' +lxml.etree.XML(text=x) # $ xmlInput=x xmlVuln='XXE' -lxml.etree.parse(StringIO(x)) # $ input=StringIO(..) vuln='XXE' -lxml.etree.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='XXE' +lxml.etree.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='XXE' +lxml.etree.parse(source=StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='XXE' -lxml.etree.parseid(StringIO(x)) # $ input=StringIO(..) vuln='XXE' -lxml.etree.parseid(source=StringIO(x)) # $ input=StringIO(..) vuln='XXE' +lxml.etree.parseid(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='XXE' +lxml.etree.parseid(source=StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='XXE' # With default parsers (nothing changed) parser = lxml.etree.XMLParser() -lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE' +lxml.etree.fromstring(x, parser=parser) # $ xmlInput=x xmlVuln='XXE' parser = lxml.etree.get_default_parser() -lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE' +lxml.etree.fromstring(x, parser=parser) # $ xmlInput=x xmlVuln='XXE' # manual use of feed method parser = lxml.etree.XMLParser() -parser.feed(x) # $ input=x vuln='XXE' -parser.feed(data=x) # $ input=x vuln='XXE' +parser.feed(x) # $ xmlInput=x xmlVuln='XXE' +parser.feed(data=x) # $ xmlInput=x xmlVuln='XXE' parser.close() # XXE-safe parser = lxml.etree.XMLParser(resolve_entities=False) -lxml.etree.fromstring(x, parser) # $ input=x -lxml.etree.fromstring(x, parser=parser) # $ input=x +lxml.etree.fromstring(x, parser) # $ xmlInput=x +lxml.etree.fromstring(x, parser=parser) # $ xmlInput=x # XXE-vuln parser = lxml.etree.XMLParser(resolve_entities=True) -lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE' +lxml.etree.fromstring(x, parser=parser) # $ xmlInput=x xmlVuln='XXE' # Billion laughs vuln (also XXE) parser = lxml.etree.XMLParser(huge_tree=True) -lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' vuln='XXE' +lxml.etree.fromstring(x, parser=parser) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' xmlVuln='XXE' # Safe for both Billion laughs and XXE parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True) -lxml.etree.fromstring(x, parser=parser) # $ input=x +lxml.etree.fromstring(x, parser=parser) # $ xmlInput=x # DTD retrival vuln (also XXE) parser = lxml.etree.XMLParser(load_dtd=True, no_network=False) -lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='DTD retrieval' vuln='XXE' +lxml.etree.fromstring(x, parser=parser) # $ xmlInput=x xmlVuln='DTD retrieval' xmlVuln='XXE' diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py index 7dce29fc7b9d..b86770b8d6cd 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py @@ -6,26 +6,26 @@ x = "some xml" # minidom -xml.dom.minidom.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' -xml.dom.minidom.parse(file=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.dom.minidom.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.dom.minidom.parse(file=StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.dom.minidom.parseString(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' -xml.dom.minidom.parseString(string=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.dom.minidom.parseString(x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.dom.minidom.parseString(string=x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' # pulldom -xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' -xml.dom.pulldom.parse(stream_or_string=StringIO(x))['START_DOCUMENT'][1] # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.dom.pulldom.parse(stream_or_string=StringIO(x))['START_DOCUMENT'][1] # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.dom.pulldom.parseString(x)['START_DOCUMENT'][1] # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' -xml.dom.pulldom.parseString(string=x)['START_DOCUMENT'][1] # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.dom.pulldom.parseString(x)['START_DOCUMENT'][1] # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.dom.pulldom.parseString(string=x)['START_DOCUMENT'][1] # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' # These are based on SAX parses, and you can specify your own, so you can expose yourself to XXE (yay/) parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_external_ges, True) -xml.dom.minidom.parse(StringIO(x), parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' -xml.dom.minidom.parse(StringIO(x), parser=parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' +xml.dom.minidom.parse(StringIO(x), parser) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' +xml.dom.minidom.parse(StringIO(x), parser=parser) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' -xml.dom.pulldom.parse(StringIO(x), parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' -xml.dom.pulldom.parse(StringIO(x), parser=parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' +xml.dom.pulldom.parse(StringIO(x), parser) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' +xml.dom.pulldom.parse(StringIO(x), parser=parser) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py index df126e458e2d..c5d141a3715c 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py @@ -4,39 +4,39 @@ x = "some xml" # Parsing in different ways -xml.etree.ElementTree.fromstring(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' -xml.etree.ElementTree.fromstring(text=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.fromstring(x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.etree.ElementTree.fromstring(text=x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.etree.ElementTree.fromstringlist([x]) # $ input=List vuln='Billion Laughs' vuln='Quadratic Blowup' -xml.etree.ElementTree.fromstringlist(sequence=[x]) # $ input=List vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.fromstringlist([x]) # $ xmlInput=List xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.etree.ElementTree.fromstringlist(sequence=[x]) # $ xmlInput=List xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.etree.ElementTree.XML(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' -xml.etree.ElementTree.XML(text=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.XML(x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.etree.ElementTree.XML(text=x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.etree.ElementTree.XMLID(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' -xml.etree.ElementTree.XMLID(text=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.XMLID(x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.etree.ElementTree.XMLID(text=x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.etree.ElementTree.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' -xml.etree.ElementTree.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.etree.ElementTree.parse(source=StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.etree.ElementTree.iterparse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' -xml.etree.ElementTree.iterparse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.iterparse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.etree.ElementTree.iterparse(source=StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' # With parsers (no options available to disable/enable security features) parser = xml.etree.ElementTree.XMLParser() -xml.etree.ElementTree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.fromstring(x, parser=parser) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' # manual use of feed method parser = xml.etree.ElementTree.XMLParser() -parser.feed(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' -parser.feed(data=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +parser.feed(x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +parser.feed(data=x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' parser.close() # manual use of feed method on XMLPullParser parser = xml.etree.ElementTree.XMLPullParser() -parser.feed(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' -parser.feed(data=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +parser.feed(x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +parser.feed(data=x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' parser.close() # note: it's technically possible to use the thing wrapper func `fromstring` with an diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py index 158e62ffae6b..c0e5923c5c08 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py @@ -10,41 +10,41 @@ def __init__(self): def characters(self, data): self._result.append(data) -xml.sax.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' -xml.sax.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.sax.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.sax.parse(source=StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.sax.parseString(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' -xml.sax.parseString(string=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.sax.parseString(x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.sax.parseString(string=x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' parser = xml.sax.make_parser() -parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' -parser.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +parser.parse(source=StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' # You can make it vuln to both XXE and DTD retrieval by setting this flag # see https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_external_ges, True) -parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' +parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_external_ges, False) -parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' # Forward Type Tracking test def func(cond): parser = xml.sax.make_parser() if cond: parser.setFeature(xml.sax.handler.feature_external_ges, True) - parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' + parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' else: - parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' + parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' # make it vuln, then making it safe # a bit of an edge-case, but is nice to be able to handle. parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_external_ges, True) parser.setFeature(xml.sax.handler.feature_external_ges, False) -parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' def check_conditional_assignment(cond): parser = xml.sax.make_parser() @@ -52,7 +52,7 @@ def check_conditional_assignment(cond): parser.setFeature(xml.sax.handler.feature_external_ges, True) else: parser.setFeature(xml.sax.handler.feature_external_ges, False) - parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' + parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' def check_conditional_assignment2(cond): parser = xml.sax.make_parser() @@ -61,4 +61,4 @@ def check_conditional_assignment2(cond): else: flag_value = False parser.setFeature(xml.sax.handler.feature_external_ges, flag_value) - parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' + parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py b/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py index 473e51c9fe66..27d04862f83e 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py @@ -2,7 +2,7 @@ x = "some xml" -xmltodict.parse(x) # $ input=x -xmltodict.parse(xml_input=x) # $ input=x +xmltodict.parse(x) # $ xmlInput=x +xmltodict.parse(xml_input=x) # $ xmlInput=x -xmltodict.parse(x, disable_entities=False) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xmltodict.parse(x, disable_entities=False) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' diff --git a/python/ql/test/experimental/meta/ConceptsTest.qll b/python/ql/test/experimental/meta/ConceptsTest.qll index 8f9435f633fe..e9f713569639 100644 --- a/python/ql/test/experimental/meta/ConceptsTest.qll +++ b/python/ql/test/experimental/meta/ConceptsTest.qll @@ -539,3 +539,30 @@ class HttpClientRequestTest extends InlineExpectationsTest { ) } } + +class XmlParsingTest extends InlineExpectationsTest { + XmlParsingTest() { this = "XmlParsingTest" } + + override string getARelevantTag() { result in ["xmlInput", "xmlVuln"] } + + override predicate hasActualResult(Location location, string element, string tag, string value) { + exists(location.getFile().getRelativePath()) and + exists(XML::XMLParsing parsing | + exists(DataFlow::Node input | + input = parsing.getAnInput() and + location = input.getLocation() and + element = input.toString() and + value = prettyNodeForInlineTest(input) and + tag = "xmlInput" + ) + or + exists(XML::XMLParsingVulnerabilityKind kind | + parsing.vulnerableTo(kind) and + location = parsing.getLocation() and + element = parsing.toString() and + value = "'" + kind + "'" and + tag = "xmlVuln" + ) + ) + } +} From 1ea4bcc59f4ccbfe02e454ae3223a8fa34ac33e3 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 29 Mar 2022 16:48:30 +0200 Subject: [PATCH 14/51] Python: Make `XMLParsing` a `Decoding` subclass --- python/ql/lib/semmle/python/Concepts.qll | 16 ++---- .../semmle/python/frameworks/Xml.qll | 52 +++++++++++++++++++ .../frameworks/XML/lxml_etree.py | 42 +++++++-------- .../library-tests/frameworks/XML/xml_dom.py | 24 ++++----- .../library-tests/frameworks/XML/xml_etree.py | 38 +++++++------- .../library-tests/frameworks/XML/xml_sax.py | 26 +++++----- .../library-tests/frameworks/XML/xmltodict.py | 6 +-- .../test/experimental/meta/ConceptsTest.qll | 8 --- 8 files changed, 124 insertions(+), 88 deletions(-) diff --git a/python/ql/lib/semmle/python/Concepts.qll b/python/ql/lib/semmle/python/Concepts.qll index c430594d05bb..b553c8d927da 100644 --- a/python/ql/lib/semmle/python/Concepts.qll +++ b/python/ql/lib/semmle/python/Concepts.qll @@ -580,12 +580,7 @@ module XML { * Extend this class to model new APIs. If you want to refine existing API models, * extend `XMLParsing` instead. */ - class XMLParsing extends DataFlow::Node instanceof XMLParsing::Range { - /** - * Gets the argument containing the content to parse. - */ - DataFlow::Node getAnInput() { result = super.getAnInput() } - + class XMLParsing extends Decoding instanceof XMLParsing::Range { /** * Holds if this XML parsing is vulnerable to `kind`. */ @@ -600,16 +595,13 @@ module XML { * Extend this class to model new APIs. If you want to refine existing API models, * extend `XMLParsing` instead. */ - abstract class Range extends DataFlow::Node { - /** - * Gets the argument containing the content to parse. - */ - abstract DataFlow::Node getAnInput(); - + abstract class Range extends Decoding::Range { /** * Holds if this XML parsing is vulnerable to `kind`. */ abstract predicate vulnerableTo(XMLParsingVulnerabilityKind kind); + + override string getFormat() { result = "XML" } } } } diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index 4987e24bce4f..c072295c4610 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -69,6 +69,15 @@ private module XmlEtree { override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { kind.isBillionLaughs() or kind.isQuadraticBlowup() } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { + exists(DataFlow::Node objRef | + DataFlow::localFlow(this.getObject(), objRef) and + result.(DataFlow::MethodCallNode).calls(objRef, "close") + ) + } } } @@ -108,6 +117,10 @@ private module XmlEtree { // change the security features anyway :| kind.isBillionLaughs() or kind.isQuadraticBlowup() } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { result = this } } } @@ -226,6 +239,15 @@ private module SaxBasedParsing { this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and (kind.isXxe() or kind.isDtdRetrieval()) } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { + // note: the output of parsing with SAX is that the content handler gets the + // data... but we don't currently model this (it's not trivial to do, and won't + // really give us any value, at least not as of right now). + none() + } } /** @@ -259,6 +281,15 @@ private module SaxBasedParsing { this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and (kind.isXxe() or kind.isDtdRetrieval()) } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { + // note: the output of parsing with SAX is that the content handler gets the + // data... but we don't currently model this (it's not trivial to do, and won't + // really give us any value, at least not as of right now). + none() + } } /** @@ -296,6 +327,10 @@ private module SaxBasedParsing { or (kind.isBillionLaughs() or kind.isQuadraticBlowup()) } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { result = this } } } @@ -400,6 +435,15 @@ private module Lxml { override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { this.calls(instanceVulnerableTo(kind), "feed") } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { + exists(DataFlow::Node objRef | + DataFlow::localFlow(this.getObject(), objRef) and + result.(DataFlow::MethodCallNode).calls(objRef, "close") + ) + } } } @@ -442,6 +486,10 @@ private module Lxml { kind.isXxe() and not exists(this.getParserArg()) } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { result = this } } } @@ -460,5 +508,9 @@ private module Xmltodict { (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and this.getArgByName("disable_entities").getALocalSource().asExpr() = any(False f) } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { result = this } } } diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py index ee8f3fc69c14..f1dbd5390ada 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py @@ -4,51 +4,51 @@ x = "some xml" # different parsing methods -lxml.etree.fromstring(x) # $ xmlInput=x xmlVuln='XXE' -lxml.etree.fromstring(text=x) # $ xmlInput=x xmlVuln='XXE' +lxml.etree.fromstring(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) +lxml.etree.fromstring(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) -lxml.etree.fromstringlist([x]) # $ xmlInput=List xmlVuln='XXE' -lxml.etree.fromstringlist(strings=[x]) # $ xmlInput=List xmlVuln='XXE' +lxml.etree.fromstringlist([x]) # $ decodeFormat=XML decodeInput=List xmlVuln='XXE' decodeOutput=lxml.etree.fromstringlist(..) +lxml.etree.fromstringlist(strings=[x]) # $ decodeFormat=XML decodeInput=List xmlVuln='XXE' decodeOutput=lxml.etree.fromstringlist(..) -lxml.etree.XML(x) # $ xmlInput=x xmlVuln='XXE' -lxml.etree.XML(text=x) # $ xmlInput=x xmlVuln='XXE' +lxml.etree.XML(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.XML(..) +lxml.etree.XML(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.XML(..) -lxml.etree.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='XXE' -lxml.etree.parse(source=StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='XXE' +lxml.etree.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XXE' decodeOutput=lxml.etree.parse(..) +lxml.etree.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XXE' decodeOutput=lxml.etree.parse(..) -lxml.etree.parseid(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='XXE' -lxml.etree.parseid(source=StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='XXE' +lxml.etree.parseid(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..) +lxml.etree.parseid(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..) # With default parsers (nothing changed) parser = lxml.etree.XMLParser() -lxml.etree.fromstring(x, parser=parser) # $ xmlInput=x xmlVuln='XXE' +lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) parser = lxml.etree.get_default_parser() -lxml.etree.fromstring(x, parser=parser) # $ xmlInput=x xmlVuln='XXE' +lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) # manual use of feed method parser = lxml.etree.XMLParser() -parser.feed(x) # $ xmlInput=x xmlVuln='XXE' -parser.feed(data=x) # $ xmlInput=x xmlVuln='XXE' -parser.close() +parser.feed(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' +parser.feed(data=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' +parser.close() # $ decodeOutput=parser.close() # XXE-safe parser = lxml.etree.XMLParser(resolve_entities=False) -lxml.etree.fromstring(x, parser) # $ xmlInput=x -lxml.etree.fromstring(x, parser=parser) # $ xmlInput=x +lxml.etree.fromstring(x, parser) # $ decodeFormat=XML decodeInput=x decodeOutput=lxml.etree.fromstring(..) +lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x decodeOutput=lxml.etree.fromstring(..) # XXE-vuln parser = lxml.etree.XMLParser(resolve_entities=True) -lxml.etree.fromstring(x, parser=parser) # $ xmlInput=x xmlVuln='XXE' +lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) # Billion laughs vuln (also XXE) parser = lxml.etree.XMLParser(huge_tree=True) -lxml.etree.fromstring(x, parser=parser) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' xmlVuln='XXE' +lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) # Safe for both Billion laughs and XXE parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True) -lxml.etree.fromstring(x, parser=parser) # $ xmlInput=x +lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x decodeOutput=lxml.etree.fromstring(..) # DTD retrival vuln (also XXE) parser = lxml.etree.XMLParser(load_dtd=True, no_network=False) -lxml.etree.fromstring(x, parser=parser) # $ xmlInput=x xmlVuln='DTD retrieval' xmlVuln='XXE' +lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py index b86770b8d6cd..c6152c75807e 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py @@ -6,26 +6,26 @@ x = "some xml" # minidom -xml.dom.minidom.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.dom.minidom.parse(file=StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.dom.minidom.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parse(..) +xml.dom.minidom.parse(file=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parse(..) -xml.dom.minidom.parseString(x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.dom.minidom.parseString(string=x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.dom.minidom.parseString(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parseString(..) +xml.dom.minidom.parseString(string=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parseString(..) # pulldom -xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.dom.pulldom.parse(stream_or_string=StringIO(x))['START_DOCUMENT'][1] # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parse(..) +xml.dom.pulldom.parse(stream_or_string=StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parse(..) -xml.dom.pulldom.parseString(x)['START_DOCUMENT'][1] # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.dom.pulldom.parseString(string=x)['START_DOCUMENT'][1] # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.dom.pulldom.parseString(x)['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parseString(..) +xml.dom.pulldom.parseString(string=x)['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parseString(..) # These are based on SAX parses, and you can specify your own, so you can expose yourself to XXE (yay/) parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_external_ges, True) -xml.dom.minidom.parse(StringIO(x), parser) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' -xml.dom.minidom.parse(StringIO(x), parser=parser) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' +xml.dom.minidom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..) +xml.dom.minidom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..) -xml.dom.pulldom.parse(StringIO(x), parser) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' -xml.dom.pulldom.parse(StringIO(x), parser=parser) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' +xml.dom.pulldom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..) +xml.dom.pulldom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..) diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py index c5d141a3715c..0ed750ba8c78 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py @@ -4,40 +4,40 @@ x = "some xml" # Parsing in different ways -xml.etree.ElementTree.fromstring(x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.etree.ElementTree.fromstring(text=x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.etree.ElementTree.fromstring(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.fromstring(..) +xml.etree.ElementTree.fromstring(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.fromstring(..) -xml.etree.ElementTree.fromstringlist([x]) # $ xmlInput=List xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.etree.ElementTree.fromstringlist(sequence=[x]) # $ xmlInput=List xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.etree.ElementTree.fromstringlist([x]) # $ decodeFormat=XML decodeInput=List xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.fromstringlist(..) +xml.etree.ElementTree.fromstringlist(sequence=[x]) # $ decodeFormat=XML decodeInput=List xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.fromstringlist(..) -xml.etree.ElementTree.XML(x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.etree.ElementTree.XML(text=x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.etree.ElementTree.XML(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.XML(..) +xml.etree.ElementTree.XML(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.XML(..) -xml.etree.ElementTree.XMLID(x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.etree.ElementTree.XMLID(text=x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.etree.ElementTree.XMLID(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.XMLID(..) +xml.etree.ElementTree.XMLID(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.XMLID(..) -xml.etree.ElementTree.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.etree.ElementTree.parse(source=StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.etree.ElementTree.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.parse(..) +xml.etree.ElementTree.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.parse(..) -xml.etree.ElementTree.iterparse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.etree.ElementTree.iterparse(source=StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.etree.ElementTree.iterparse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.iterparse(..) +xml.etree.ElementTree.iterparse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.iterparse(..) # With parsers (no options available to disable/enable security features) parser = xml.etree.ElementTree.XMLParser() -xml.etree.ElementTree.fromstring(x, parser=parser) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.etree.ElementTree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.fromstring(..) # manual use of feed method parser = xml.etree.ElementTree.XMLParser() -parser.feed(x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -parser.feed(data=x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -parser.close() +parser.feed(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +parser.feed(data=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +parser.close() # $ decodeOutput=parser.close() # manual use of feed method on XMLPullParser parser = xml.etree.ElementTree.XMLPullParser() -parser.feed(x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -parser.feed(data=x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -parser.close() +parser.feed(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +parser.feed(data=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +parser.close() # $ decodeOutput=parser.close() # note: it's technically possible to use the thing wrapper func `fromstring` with an # `lxml` parser, and thereby change what vulnerabilities you are exposed to.. but it diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py index c0e5923c5c08..8dbe9d4ae99e 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py @@ -10,41 +10,41 @@ def __init__(self): def characters(self, data): self._result.append(data) -xml.sax.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.sax.parse(source=StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.sax.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.sax.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.sax.parseString(x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.sax.parseString(string=x) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.sax.parseString(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.sax.parseString(string=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' parser = xml.sax.make_parser() -parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -parser.parse(source=StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +parser.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' # You can make it vuln to both XXE and DTD retrieval by setting this flag # see https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_external_ges, True) -parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' +parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_external_ges, False) -parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' # Forward Type Tracking test def func(cond): parser = xml.sax.make_parser() if cond: parser.setFeature(xml.sax.handler.feature_external_ges, True) - parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' + parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' else: - parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' + parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' # make it vuln, then making it safe # a bit of an edge-case, but is nice to be able to handle. parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_external_ges, True) parser.setFeature(xml.sax.handler.feature_external_ges, False) -parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' def check_conditional_assignment(cond): parser = xml.sax.make_parser() @@ -52,7 +52,7 @@ def check_conditional_assignment(cond): parser.setFeature(xml.sax.handler.feature_external_ges, True) else: parser.setFeature(xml.sax.handler.feature_external_ges, False) - parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' + parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' def check_conditional_assignment2(cond): parser = xml.sax.make_parser() @@ -61,4 +61,4 @@ def check_conditional_assignment2(cond): else: flag_value = False parser.setFeature(xml.sax.handler.feature_external_ges, flag_value) - parser.parse(StringIO(x)) # $ xmlInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' + parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py b/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py index 27d04862f83e..01dc2f3c4843 100644 --- a/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py @@ -2,7 +2,7 @@ x = "some xml" -xmltodict.parse(x) # $ xmlInput=x -xmltodict.parse(xml_input=x) # $ xmlInput=x +xmltodict.parse(x) # $ decodeFormat=XML decodeInput=x decodeOutput=xmltodict.parse(..) +xmltodict.parse(xml_input=x) # $ decodeFormat=XML decodeInput=x decodeOutput=xmltodict.parse(..) -xmltodict.parse(x, disable_entities=False) # $ xmlInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xmltodict.parse(x, disable_entities=False) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xmltodict.parse(..) diff --git a/python/ql/test/experimental/meta/ConceptsTest.qll b/python/ql/test/experimental/meta/ConceptsTest.qll index e9f713569639..24cbbab2d448 100644 --- a/python/ql/test/experimental/meta/ConceptsTest.qll +++ b/python/ql/test/experimental/meta/ConceptsTest.qll @@ -548,14 +548,6 @@ class XmlParsingTest extends InlineExpectationsTest { override predicate hasActualResult(Location location, string element, string tag, string value) { exists(location.getFile().getRelativePath()) and exists(XML::XMLParsing parsing | - exists(DataFlow::Node input | - input = parsing.getAnInput() and - location = input.getLocation() and - element = input.toString() and - value = prettyNodeForInlineTest(input) and - tag = "xmlInput" - ) - or exists(XML::XMLParsingVulnerabilityKind kind | parsing.vulnerableTo(kind) and location = parsing.getLocation() and From c4473c5f6506e6dcb8e6736f7d3ddd0acea022d4 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 31 Mar 2022 10:08:02 +0200 Subject: [PATCH 15/51] Python: Rename lxml XPath tests --- .../ql/test/library-tests/frameworks/lxml/{test.py => xpath.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename python/ql/test/library-tests/frameworks/lxml/{test.py => xpath.py} (100%) diff --git a/python/ql/test/library-tests/frameworks/lxml/test.py b/python/ql/test/library-tests/frameworks/lxml/xpath.py similarity index 100% rename from python/ql/test/library-tests/frameworks/lxml/test.py rename to python/ql/test/library-tests/frameworks/lxml/xpath.py From 3040adfd9bdc26a0c54ef04453a1c8b2420bb4c5 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 31 Mar 2022 10:08:26 +0200 Subject: [PATCH 16/51] Python: Handle `XMLParser().close()` for XPath --- .../ql/lib/semmle/python/frameworks/Lxml.qll | 26 ++++++++++++++----- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Lxml.qll b/python/ql/lib/semmle/python/frameworks/Lxml.qll index 9259668a5c84..ab29f33e7cf7 100644 --- a/python/ql/lib/semmle/python/frameworks/Lxml.qll +++ b/python/ql/lib/semmle/python/frameworks/Lxml.qll @@ -57,13 +57,25 @@ private module Lxml { */ class XPathCall extends XML::XPathExecution::Range, DataFlow::CallCfgNode { XPathCall() { - this = - API::moduleImport("lxml") - .getMember("etree") - .getMember(["parse", "fromstring", "fromstringlist", "HTML", "XML"]) - .getReturn() - .getMember("xpath") - .getACall() + exists(API::Node parseResult | + parseResult = + API::moduleImport("lxml") + .getMember("etree") + .getMember(["parse", "fromstring", "fromstringlist", "HTML", "XML"]) + .getReturn() + or + // TODO: lxml.etree.parseid()[0] will contain the root element from parsing + // but we don't really have a way to model that nicely. + parseResult = + API::moduleImport("lxml") + .getMember("etree") + .getMember("XMLParser") + .getReturn() + .getMember("close") + .getReturn() + | + this = parseResult.getMember("xpath").getACall() + ) } override DataFlow::Node getXPath() { result in [this.getArg(0), this.getArgByName("_path")] } From 80b5cde3a2d3123029630450e41475f89253938c Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 31 Mar 2022 10:19:08 +0200 Subject: [PATCH 17/51] Python: Promote lxml parsing modeling --- .../ql/lib/semmle/python/frameworks/Lxml.qll | 163 ++++++++++++++++++ .../semmle/python/frameworks/Xml.qll | 159 ----------------- .../frameworks/lxml/parsing.py} | 0 .../library-tests/frameworks/lxml/xpath.py | 8 +- 4 files changed, 167 insertions(+), 163 deletions(-) rename python/ql/test/{experimental/library-tests/frameworks/XML/lxml_etree.py => library-tests/frameworks/lxml/parsing.py} (100%) diff --git a/python/ql/lib/semmle/python/frameworks/Lxml.qll b/python/ql/lib/semmle/python/frameworks/Lxml.qll index ab29f33e7cf7..de89345a7d6d 100644 --- a/python/ql/lib/semmle/python/frameworks/Lxml.qll +++ b/python/ql/lib/semmle/python/frameworks/Lxml.qll @@ -19,6 +19,9 @@ private import semmle.python.ApiGraphs * - https://lxml.de/tutorial.html */ private module Lxml { + // --------------------------------------------------------------------------- + // XPath + // --------------------------------------------------------------------------- /** * A class constructor compiling an XPath expression. * @@ -97,4 +100,164 @@ private module Lxml { override string getName() { result = "lxml.etree" } } + + // --------------------------------------------------------------------------- + // Parsing + // --------------------------------------------------------------------------- + /** + * Provides models for `lxml.etree` parsers. + * + * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser + */ + module XMLParser { + /** + * A source of instances of `lxml.etree` parsers, extend this class to model new instances. + * + * This can include instantiations of the class, return values from function + * calls, or a special parameter that will be set when functions are called by an external + * library. + * + * Use the predicate `XMLParser::instance()` to get references to instances of `lxml.etree` parsers. + */ + abstract class InstanceSource extends DataFlow::LocalSourceNode { + /** Holds if this instance is vulnerable to `kind`. */ + abstract predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind); + } + + /** + * A call to `lxml.etree.XMLParser`. + * + * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser + */ + private class LXMLParser extends InstanceSource, DataFlow::CallCfgNode { + LXMLParser() { + this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall() + } + + // NOTE: it's not possible to change settings of a parser after constructing it + override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { + kind.isXxe() and + ( + // resolve_entities has default True + not exists(this.getArgByName("resolve_entities")) + or + this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(True t) + ) + or + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and + this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) and + not this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(False t) + or + kind.isDtdRetrieval() and + this.getArgByName("load_dtd").getALocalSource().asExpr() = any(True t) and + this.getArgByName("no_network").getALocalSource().asExpr() = any(False t) + } + } + + /** + * A call to `lxml.etree.get_default_parser`. + * + * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser + */ + private class LXMLDefaultParser extends InstanceSource, DataFlow::CallCfgNode { + LXMLDefaultParser() { + this = + API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall() + } + + override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { + // as highlighted by + // https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser + // by default XXE is allow. so as long as the default parser has not been + // overridden, the result is also vuln to XXE. + kind.isXxe() + // TODO: take into account that you can override the default parser with `lxml.etree.set_default_parser`. + } + } + + /** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */ + private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t, InstanceSource origin) { + t.start() and + result = origin + or + exists(DataFlow::TypeTracker t2 | result = instance(t2, origin).track(t2, t)) + } + + /** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */ + DataFlow::Node instance(InstanceSource origin) { + instance(DataFlow::TypeTracker::end(), origin).flowsTo(result) + } + + /** Gets a reference to an `lxml.etree` parser instance, that is vulnerable to `kind`. */ + DataFlow::Node instanceVulnerableTo(XML::XMLParsingVulnerabilityKind kind) { + exists(InstanceSource origin | result = instance(origin) and origin.vulnerableTo(kind)) + } + + /** + * A call to the `feed` method of an `lxml` parser. + */ + private class LXMLParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range { + LXMLParserFeedCall() { this.calls(instance(_), "feed") } + + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } + + override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { + this.calls(instanceVulnerableTo(kind), "feed") + } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { + exists(DataFlow::Node objRef | + DataFlow::localFlow(this.getObject(), objRef) and + result.(DataFlow::MethodCallNode).calls(objRef, "close") + ) + } + } + } + + /** + * A call to either of: + * - `lxml.etree.fromstring` + * - `lxml.etree.fromstringlist` + * - `lxml.etree.XML` + * - `lxml.etree.parse` + * - `lxml.etree.parseid` + * + * See https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstring + */ + private class LXMLParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { + LXMLParsing() { + this = + API::moduleImport("lxml") + .getMember("etree") + .getMember(["fromstring", "fromstringlist", "XML", "parse", "parseid"]) + .getACall() + } + + override DataFlow::Node getAnInput() { + result in [ + this.getArg(0), + // fromstring / XML + this.getArgByName("text"), + // fromstringlist + this.getArgByName("strings"), + // parse / parseid + this.getArgByName("source"), + ] + } + + DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] } + + override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { + this.getParserArg() = XMLParser::instanceVulnerableTo(kind) + or + kind.isXxe() and + not exists(this.getParserArg()) + } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { result = this } + } } diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index c072295c4610..b31151eed1a0 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -334,165 +334,6 @@ private module SaxBasedParsing { } } -private module Lxml { - /** - * Provides models for `lxml.etree` parsers. - * - * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser - */ - module XMLParser { - /** - * A source of instances of `lxml.etree` parsers, extend this class to model new instances. - * - * This can include instantiations of the class, return values from function - * calls, or a special parameter that will be set when functions are called by an external - * library. - * - * Use the predicate `XMLParser::instance()` to get references to instances of `lxml.etree` parsers. - */ - abstract class InstanceSource extends DataFlow::LocalSourceNode { - /** Holds if this instance is vulnerable to `kind`. */ - abstract predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind); - } - - /** - * A call to `lxml.etree.XMLParser`. - * - * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser - */ - private class LXMLParser extends InstanceSource, DataFlow::CallCfgNode { - LXMLParser() { - this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall() - } - - // NOTE: it's not possible to change settings of a parser after constructing it - override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { - kind.isXxe() and - ( - // resolve_entities has default True - not exists(this.getArgByName("resolve_entities")) - or - this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(True t) - ) - or - (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and - this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) and - not this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(False t) - or - kind.isDtdRetrieval() and - this.getArgByName("load_dtd").getALocalSource().asExpr() = any(True t) and - this.getArgByName("no_network").getALocalSource().asExpr() = any(False t) - } - } - - /** - * A call to `lxml.etree.get_default_parser`. - * - * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser - */ - private class LXMLDefaultParser extends InstanceSource, DataFlow::CallCfgNode { - LXMLDefaultParser() { - this = - API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall() - } - - override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { - // as highlighted by - // https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser - // by default XXE is allow. so as long as the default parser has not been - // overridden, the result is also vuln to XXE. - kind.isXxe() - // TODO: take into account that you can override the default parser with `lxml.etree.set_default_parser`. - } - } - - /** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */ - private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t, InstanceSource origin) { - t.start() and - result = origin - or - exists(DataFlow::TypeTracker t2 | result = instance(t2, origin).track(t2, t)) - } - - /** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */ - DataFlow::Node instance(InstanceSource origin) { - instance(DataFlow::TypeTracker::end(), origin).flowsTo(result) - } - - /** Gets a reference to an `lxml.etree` parser instance, that is vulnerable to `kind`. */ - DataFlow::Node instanceVulnerableTo(XML::XMLParsingVulnerabilityKind kind) { - exists(InstanceSource origin | result = instance(origin) and origin.vulnerableTo(kind)) - } - - /** - * A call to the `feed` method of an `lxml` parser. - */ - private class LXMLParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range { - LXMLParserFeedCall() { this.calls(instance(_), "feed") } - - override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } - - override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { - this.calls(instanceVulnerableTo(kind), "feed") - } - - override predicate mayExecuteInput() { none() } - - override DataFlow::Node getOutput() { - exists(DataFlow::Node objRef | - DataFlow::localFlow(this.getObject(), objRef) and - result.(DataFlow::MethodCallNode).calls(objRef, "close") - ) - } - } - } - - /** - * A call to either of: - * - `lxml.etree.fromstring` - * - `lxml.etree.fromstringlist` - * - `lxml.etree.XML` - * - `lxml.etree.parse` - * - `lxml.etree.parseid` - * - * See https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstring - */ - private class LXMLParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { - LXMLParsing() { - this = - API::moduleImport("lxml") - .getMember("etree") - .getMember(["fromstring", "fromstringlist", "XML", "parse", "parseid"]) - .getACall() - } - - override DataFlow::Node getAnInput() { - result in [ - this.getArg(0), - // fromstring / XML - this.getArgByName("text"), - // fromstringlist - this.getArgByName("strings"), - // parse / parseid - this.getArgByName("source"), - ] - } - - DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] } - - override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { - this.getParserArg() = XMLParser::instanceVulnerableTo(kind) - or - kind.isXxe() and - not exists(this.getParserArg()) - } - - override predicate mayExecuteInput() { none() } - - override DataFlow::Node getOutput() { result = this } - } -} - private module Xmltodict { /** * A call to `xmltodict.parse`. diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py b/python/ql/test/library-tests/frameworks/lxml/parsing.py similarity index 100% rename from python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py rename to python/ql/test/library-tests/frameworks/lxml/parsing.py diff --git a/python/ql/test/library-tests/frameworks/lxml/xpath.py b/python/ql/test/library-tests/frameworks/lxml/xpath.py index e8ce583503a3..9cf3a0883bd6 100644 --- a/python/ql/test/library-tests/frameworks/lxml/xpath.py +++ b/python/ql/test/library-tests/frameworks/lxml/xpath.py @@ -2,20 +2,20 @@ from io import StringIO def test_parse(): - tree = etree.parse(StringIO('')) + tree = etree.parse(StringIO('')) # $ decodeFormat=XML decodeInput=StringIO(..) decodeOutput=etree.parse(..) xmlVuln='XXE' r = tree.xpath('/foo/bar') # $ getXPath='/foo/bar' def test_XPath_class(): - root = etree.XML("TEXT") + root = etree.XML("TEXT") # $ decodeFormat=XML decodeInput="TEXT" decodeOutput=etree.XML(..) xmlVuln='XXE' find_text = etree.XPath("path") # $ constructedXPath="path" text = find_text(root)[0] def test_ETXpath_class(): - root = etree.XML("TEXT") + root = etree.XML("TEXT") # $ decodeFormat=XML decodeInput="TEXT" decodeOutput=etree.XML(..) xmlVuln='XXE' find_text = etree.ETXPath("path") # $ constructedXPath="path" text = find_text(root)[0] def test_XPathEvaluator_class(): - root = etree.XML("TEXT") + root = etree.XML("TEXT") # $ decodeFormat=XML decodeInput="TEXT" decodeOutput=etree.XML(..) xmlVuln='XXE' search_root = etree.XPathEvaluator(root) text = search_root("path")[0] # $ getXPath="path" From 7f5f7679f8f9f14db7fac551bfb6071c08c41767 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 31 Mar 2022 10:28:34 +0200 Subject: [PATCH 18/51] Python: Promote `xmltodict` modeling --- docs/codeql/support/reusables/frameworks.rst | 1 + python/ql/lib/semmle/python/Frameworks.qll | 1 + .../semmle/python/frameworks/Xmltodict.qll | 39 +++++++++++++++++++ .../semmle/python/frameworks/Xml.qll | 22 ----------- .../xmltodict/ConceptsTest.expected | 0 .../frameworks/xmltodict/ConceptsTest.ql | 2 + .../frameworks/xmltodict/test.py} | 0 7 files changed, 43 insertions(+), 22 deletions(-) create mode 100644 python/ql/lib/semmle/python/frameworks/Xmltodict.qll create mode 100644 python/ql/test/library-tests/frameworks/xmltodict/ConceptsTest.expected create mode 100644 python/ql/test/library-tests/frameworks/xmltodict/ConceptsTest.ql rename python/ql/test/{experimental/library-tests/frameworks/XML/xmltodict.py => library-tests/frameworks/xmltodict/test.py} (100%) diff --git a/docs/codeql/support/reusables/frameworks.rst b/docs/codeql/support/reusables/frameworks.rst index 93280c6732ad..12bcd5af8e64 100644 --- a/docs/codeql/support/reusables/frameworks.rst +++ b/docs/codeql/support/reusables/frameworks.rst @@ -214,3 +214,4 @@ Python built-in support libtaxii, TAXII utility library libxml2, XML processing library lxml, XML processing library + xmltodict, XML processing library diff --git a/python/ql/lib/semmle/python/Frameworks.qll b/python/ql/lib/semmle/python/Frameworks.qll index b94b8aee5d96..4812628d262f 100644 --- a/python/ql/lib/semmle/python/Frameworks.qll +++ b/python/ql/lib/semmle/python/Frameworks.qll @@ -52,3 +52,4 @@ private import semmle.python.frameworks.Ujson private import semmle.python.frameworks.Urllib3 private import semmle.python.frameworks.Yaml private import semmle.python.frameworks.Yarl +private import semmle.python.frameworks.Xmltodict diff --git a/python/ql/lib/semmle/python/frameworks/Xmltodict.qll b/python/ql/lib/semmle/python/frameworks/Xmltodict.qll new file mode 100644 index 000000000000..bb65607251fa --- /dev/null +++ b/python/ql/lib/semmle/python/frameworks/Xmltodict.qll @@ -0,0 +1,39 @@ +/** + * Provides classes modeling security-relevant aspects of the `xmltodict` PyPI package. + * + * See + * - https://pypi.org/project/xmltodict/ + */ + +private import python +private import semmle.python.dataflow.new.DataFlow +private import semmle.python.Concepts +private import semmle.python.ApiGraphs + +/** + * Provides classes modeling security-relevant aspects of the `xmltodict` PyPI package + * + * See + * - https://pypi.org/project/xmltodict/ + */ +private module Xmltodict { + /** + * A call to `xmltodict.parse`. + */ + private class XMLtoDictParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { + XMLtoDictParsing() { this = API::moduleImport("xmltodict").getMember("parse").getACall() } + + override DataFlow::Node getAnInput() { + result in [this.getArg(0), this.getArgByName("xml_input")] + } + + override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and + this.getArgByName("disable_entities").getALocalSource().asExpr() = any(False f) + } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { result = this } + } +} diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index b31151eed1a0..c98370ba85ab 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -333,25 +333,3 @@ private module SaxBasedParsing { override DataFlow::Node getOutput() { result = this } } } - -private module Xmltodict { - /** - * A call to `xmltodict.parse`. - */ - private class XMLtoDictParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { - XMLtoDictParsing() { this = API::moduleImport("xmltodict").getMember("parse").getACall() } - - override DataFlow::Node getAnInput() { - result in [this.getArg(0), this.getArgByName("xml_input")] - } - - override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { - (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and - this.getArgByName("disable_entities").getALocalSource().asExpr() = any(False f) - } - - override predicate mayExecuteInput() { none() } - - override DataFlow::Node getOutput() { result = this } - } -} diff --git a/python/ql/test/library-tests/frameworks/xmltodict/ConceptsTest.expected b/python/ql/test/library-tests/frameworks/xmltodict/ConceptsTest.expected new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/ql/test/library-tests/frameworks/xmltodict/ConceptsTest.ql b/python/ql/test/library-tests/frameworks/xmltodict/ConceptsTest.ql new file mode 100644 index 000000000000..b557a0bccb69 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/xmltodict/ConceptsTest.ql @@ -0,0 +1,2 @@ +import python +import experimental.meta.ConceptsTest diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py b/python/ql/test/library-tests/frameworks/xmltodict/test.py similarity index 100% rename from python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py rename to python/ql/test/library-tests/frameworks/xmltodict/test.py From 64aa503cc3b6374744efbaa2d6f4c322d03a3faa Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 31 Mar 2022 10:42:05 +0200 Subject: [PATCH 19/51] Python: Promote `xml.etree` modeling --- .../lib/semmle/python/frameworks/Stdlib.qll | 117 +++++++++++++++++ .../semmle/python/frameworks/Xml.qll | 118 +----------------- .../frameworks/stdlib/XPathExecution.py | 2 +- .../frameworks/stdlib}/xml_etree.py | 0 4 files changed, 119 insertions(+), 118 deletions(-) rename python/ql/test/{experimental/library-tests/frameworks/XML => library-tests/frameworks/stdlib}/xml_etree.py (100%) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 234a8802f0f4..263cdfcd0b35 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -3174,6 +3174,123 @@ private module StdlibPrivate { } } } + + // --------------------------------------------------------------------------- + // xml.etree + // --------------------------------------------------------------------------- + /** + * Provides models for `xml.etree` parsers + * + * See + * - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLParser + * - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLPullParser + */ + module XMLParser { + /** + * A source of instances of `xml.etree` parsers, extend this class to model new instances. + * + * This can include instantiations of the class, return values from function + * calls, or a special parameter that will be set when functions are called by an external + * library. + * + * Use the predicate `XMLParser::instance()` to get references to instances of `xml.etree` parsers. + */ + abstract class InstanceSource extends DataFlow::LocalSourceNode { } + + /** A direct instantiation of `xml.etree` parsers. */ + private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode { + ClassInstantiation() { + this = + API::moduleImport("xml") + .getMember("etree") + .getMember("ElementTree") + .getMember("XMLParser") + .getACall() + or + this = + API::moduleImport("xml") + .getMember("etree") + .getMember("ElementTree") + .getMember("XMLPullParser") + .getACall() + } + } + + /** Gets a reference to an `xml.etree` parser instance. */ + private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) { + t.start() and + result instanceof InstanceSource + or + exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t)) + } + + /** Gets a reference to an `xml.etree` parser instance. */ + DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) } + + /** + * A call to the `feed` method of an `xml.etree` parser. + */ + private class XMLEtreeParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range { + XMLEtreeParserFeedCall() { this.calls(instance(), "feed") } + + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } + + override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { + kind.isBillionLaughs() or kind.isQuadraticBlowup() + } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { + exists(DataFlow::Node objRef | + DataFlow::localFlow(this.getObject(), objRef) and + result.(DataFlow::MethodCallNode).calls(objRef, "close") + ) + } + } + } + + /** + * A call to either of: + * - `xml.etree.ElementTree.fromstring` + * - `xml.etree.ElementTree.fromstringlist` + * - `xml.etree.ElementTree.XML` + * - `xml.etree.ElementTree.XMLID` + * - `xml.etree.ElementTree.parse` + * - `xml.etree.ElementTree.iterparse` + */ + private class XMLEtreeParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { + XMLEtreeParsing() { + this = + API::moduleImport("xml") + .getMember("etree") + .getMember("ElementTree") + .getMember(["fromstring", "fromstringlist", "XML", "XMLID", "parse", "iterparse"]) + .getACall() + } + + override DataFlow::Node getAnInput() { + result in [ + this.getArg(0), + // fromstring / XML / XMLID + this.getArgByName("text"), + // fromstringlist + this.getArgByName("sequence"), + // parse / iterparse + this.getArgByName("source"), + ] + } + + override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { + // note: it does not matter what `xml.etree` parser you are using, you cannot + // change the security features anyway :| + kind.isBillionLaughs() or kind.isQuadraticBlowup() + } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { result = this } + } } // --------------------------------------------------------------------------- diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index c98370ba85ab..88def863824b 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -8,129 +8,13 @@ private import semmle.python.dataflow.new.DataFlow private import semmle.python.Concepts private import semmle.python.ApiGraphs -private module XmlEtree { - /** - * Provides models for `xml.etree` parsers - * - * See - * - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLParser - * - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLPullParser - */ - module XMLParser { - /** - * A source of instances of `xml.etree` parsers, extend this class to model new instances. - * - * This can include instantiations of the class, return values from function - * calls, or a special parameter that will be set when functions are called by an external - * library. - * - * Use the predicate `XMLParser::instance()` to get references to instances of `xml.etree` parsers. - */ - abstract class InstanceSource extends DataFlow::LocalSourceNode { } - - /** A direct instantiation of `xml.etree` parsers. */ - private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode { - ClassInstantiation() { - this = - API::moduleImport("xml") - .getMember("etree") - .getMember("ElementTree") - .getMember("XMLParser") - .getACall() - or - this = - API::moduleImport("xml") - .getMember("etree") - .getMember("ElementTree") - .getMember("XMLPullParser") - .getACall() - } - } - - /** Gets a reference to an `xml.etree` parser instance. */ - private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) { - t.start() and - result instanceof InstanceSource - or - exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t)) - } - - /** Gets a reference to an `xml.etree` parser instance. */ - DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) } - - /** - * A call to the `feed` method of an `xml.etree` parser. - */ - private class XMLEtreeParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range { - XMLEtreeParserFeedCall() { this.calls(instance(), "feed") } - - override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } - - override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { - kind.isBillionLaughs() or kind.isQuadraticBlowup() - } - - override predicate mayExecuteInput() { none() } - - override DataFlow::Node getOutput() { - exists(DataFlow::Node objRef | - DataFlow::localFlow(this.getObject(), objRef) and - result.(DataFlow::MethodCallNode).calls(objRef, "close") - ) - } - } - } - - /** - * A call to either of: - * - `xml.etree.ElementTree.fromstring` - * - `xml.etree.ElementTree.fromstringlist` - * - `xml.etree.ElementTree.XML` - * - `xml.etree.ElementTree.XMLID` - * - `xml.etree.ElementTree.parse` - * - `xml.etree.ElementTree.iterparse` - */ - private class XMLEtreeParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { - XMLEtreeParsing() { - this = - API::moduleImport("xml") - .getMember("etree") - .getMember("ElementTree") - .getMember(["fromstring", "fromstringlist", "XML", "XMLID", "parse", "iterparse"]) - .getACall() - } - - override DataFlow::Node getAnInput() { - result in [ - this.getArg(0), - // fromstring / XML / XMLID - this.getArgByName("text"), - // fromstringlist - this.getArgByName("sequence"), - // parse / iterparse - this.getArgByName("source"), - ] - } - - override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { - // note: it does not matter what `xml.etree` parser you are using, you cannot - // change the security features anyway :| - kind.isBillionLaughs() or kind.isQuadraticBlowup() - } - - override predicate mayExecuteInput() { none() } - - override DataFlow::Node getOutput() { result = this } - } -} - private module SaxBasedParsing { /** * A call to the `setFeature` method on a XML sax parser. * * See https://docs.python.org/3.10/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.setFeature */ - class SaxParserSetFeatureCall extends DataFlow::MethodCallNode { + private class SaxParserSetFeatureCall extends DataFlow::MethodCallNode { SaxParserSetFeatureCall() { this = API::moduleImport("xml") diff --git a/python/ql/test/library-tests/frameworks/stdlib/XPathExecution.py b/python/ql/test/library-tests/frameworks/stdlib/XPathExecution.py index 98bdaefac27b..d39b0e04888a 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/XPathExecution.py +++ b/python/ql/test/library-tests/frameworks/stdlib/XPathExecution.py @@ -2,7 +2,7 @@ ns = {'dc': 'http://purl.org/dc/elements/1.1/'} import xml.etree.ElementTree as ET -tree = ET.parse('country_data.xml') +tree = ET.parse('country_data.xml') # $ decodeFormat=XML decodeInput='country_data.xml' decodeOutput=ET.parse(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' root = tree.getroot() root.find(match, namespaces=ns) # $ getXPath=match diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py b/python/ql/test/library-tests/frameworks/stdlib/xml_etree.py similarity index 100% rename from python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py rename to python/ql/test/library-tests/frameworks/stdlib/xml_etree.py From a315aa84b2bdfad3cd3196336bbc1bc6fc658415 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 31 Mar 2022 11:13:12 +0200 Subject: [PATCH 20/51] Python: Add some links in QLDocs --- python/ql/lib/semmle/python/frameworks/Lxml.qll | 7 ++++++- python/ql/lib/semmle/python/frameworks/Stdlib.qll | 8 ++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/python/ql/lib/semmle/python/frameworks/Lxml.qll b/python/ql/lib/semmle/python/frameworks/Lxml.qll index de89345a7d6d..e1052efbf999 100644 --- a/python/ql/lib/semmle/python/frameworks/Lxml.qll +++ b/python/ql/lib/semmle/python/frameworks/Lxml.qll @@ -224,7 +224,12 @@ private module Lxml { * - `lxml.etree.parse` * - `lxml.etree.parseid` * - * See https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstring + * See + * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstring + * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstringlist + * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.XML + * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parse + * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parseid */ private class LXMLParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { LXMLParsing() { diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 263cdfcd0b35..6c8de0648522 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -3258,6 +3258,14 @@ private module StdlibPrivate { * - `xml.etree.ElementTree.XMLID` * - `xml.etree.ElementTree.parse` * - `xml.etree.ElementTree.iterparse` + * + * See + * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.fromstring + * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.fromstringlist + * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.XML + * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLID + * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.parse + * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.iterparse */ private class XMLEtreeParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { XMLEtreeParsing() { From 6774085e7af76b7faa952d2b23cbc9232a57273d Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 31 Mar 2022 11:19:25 +0200 Subject: [PATCH 21/51] Python: Add note about parseid/XMLID --- python/ql/lib/semmle/python/frameworks/Lxml.qll | 9 ++++++++- python/ql/lib/semmle/python/frameworks/Stdlib.qll | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Lxml.qll b/python/ql/lib/semmle/python/frameworks/Lxml.qll index e1052efbf999..e090b9dbf053 100644 --- a/python/ql/lib/semmle/python/frameworks/Lxml.qll +++ b/python/ql/lib/semmle/python/frameworks/Lxml.qll @@ -263,6 +263,13 @@ private module Lxml { override predicate mayExecuteInput() { none() } - override DataFlow::Node getOutput() { result = this } + override DataFlow::Node getOutput() { + // Note: for `parseid` the result of the call is a tuple with `(root, dict)`, so + // maybe we should not just say that the entire tuple is the decoding output... my + // gut feeling is that THIS instance doesn't matter too much, but that it would be + // nice to be able to do this in general. (this is a problem for both `lxml.etree` + // and `xml.etree`) + result = this + } } } diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 6c8de0648522..77ec1b5f9da6 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -3297,7 +3297,14 @@ private module StdlibPrivate { override predicate mayExecuteInput() { none() } - override DataFlow::Node getOutput() { result = this } + override DataFlow::Node getOutput() { + // Note: for `XMLID` the result of the call is a tuple with `(root, dict)`, so + // maybe we should not just say that the entire tuple is the decoding output... my + // gut feeling is that THIS instance doesn't matter too much, but that it would be + // nice to be able to do this in general. (this is a problem for both `lxml.etree` + // and `xml.etree`) + result = this + } } } From 12cbdcde284e4e8fbce7a02ae0f65cedeee7e4eb Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 31 Mar 2022 11:21:24 +0200 Subject: [PATCH 22/51] Python: Model `lxml.etree.XMLID` --- python/ql/lib/semmle/python/frameworks/Lxml.qll | 8 +++++--- python/ql/test/library-tests/frameworks/lxml/parsing.py | 3 +++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Lxml.qll b/python/ql/lib/semmle/python/frameworks/Lxml.qll index e090b9dbf053..60cc850fd349 100644 --- a/python/ql/lib/semmle/python/frameworks/Lxml.qll +++ b/python/ql/lib/semmle/python/frameworks/Lxml.qll @@ -221,6 +221,7 @@ private module Lxml { * - `lxml.etree.fromstring` * - `lxml.etree.fromstringlist` * - `lxml.etree.XML` + * - `lxml.etree.XMLID` * - `lxml.etree.parse` * - `lxml.etree.parseid` * @@ -228,6 +229,7 @@ private module Lxml { * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstring * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstringlist * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.XML + * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.XMLID * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parse * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parseid */ @@ -236,14 +238,14 @@ private module Lxml { this = API::moduleImport("lxml") .getMember("etree") - .getMember(["fromstring", "fromstringlist", "XML", "parse", "parseid"]) + .getMember(["fromstring", "fromstringlist", "XML", "XMLID", "parse", "parseid"]) .getACall() } override DataFlow::Node getAnInput() { result in [ this.getArg(0), - // fromstring / XML + // fromstring / XML / XMLID this.getArgByName("text"), // fromstringlist this.getArgByName("strings"), @@ -264,7 +266,7 @@ private module Lxml { override predicate mayExecuteInput() { none() } override DataFlow::Node getOutput() { - // Note: for `parseid` the result of the call is a tuple with `(root, dict)`, so + // Note: for `parseid`/XMLID the result of the call is a tuple with `(root, dict)`, so // maybe we should not just say that the entire tuple is the decoding output... my // gut feeling is that THIS instance doesn't matter too much, but that it would be // nice to be able to do this in general. (this is a problem for both `lxml.etree` diff --git a/python/ql/test/library-tests/frameworks/lxml/parsing.py b/python/ql/test/library-tests/frameworks/lxml/parsing.py index f1dbd5390ada..e69a68a6ad2c 100644 --- a/python/ql/test/library-tests/frameworks/lxml/parsing.py +++ b/python/ql/test/library-tests/frameworks/lxml/parsing.py @@ -13,6 +13,9 @@ lxml.etree.XML(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.XML(..) lxml.etree.XML(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.XML(..) +lxml.etree.XMLID(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.XMLID(..) +lxml.etree.XMLID(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.XMLID(..) + lxml.etree.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XXE' decodeOutput=lxml.etree.parse(..) lxml.etree.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XXE' decodeOutput=lxml.etree.parse(..) From 386ff5361415f17c248285300de71ca735e92f7a Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 31 Mar 2022 11:32:22 +0200 Subject: [PATCH 23/51] Python: Model `lxml.iterparse` --- .../ql/lib/semmle/python/frameworks/Lxml.qll | 30 +++++++++++++++++++ .../library-tests/frameworks/lxml/parsing.py | 18 ++++++++--- 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Lxml.qll b/python/ql/lib/semmle/python/frameworks/Lxml.qll index 60cc850fd349..821fc6bac801 100644 --- a/python/ql/lib/semmle/python/frameworks/Lxml.qll +++ b/python/ql/lib/semmle/python/frameworks/Lxml.qll @@ -274,4 +274,34 @@ private module Lxml { result = this } } + + /** + * A call to `lxml.etree.iterparse` + * + * See + * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.iterparse + */ + private class LXMLIterparseCall extends DataFlow::CallCfgNode, XML::XMLParsing::Range { + LXMLIterparseCall() { + this = API::moduleImport("lxml").getMember("etree").getMember("iterparse").getACall() + } + + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] } + + override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { + // note that there is no `resolve_entities` argument, so it's not possible to turn off XXE :O + kind.isXxe() + or + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and + this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) + or + kind.isDtdRetrieval() and + this.getArgByName("load_dtd").getALocalSource().asExpr() = any(True t) and + this.getArgByName("no_network").getALocalSource().asExpr() = any(False t) + } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { result = this } + } } diff --git a/python/ql/test/library-tests/frameworks/lxml/parsing.py b/python/ql/test/library-tests/frameworks/lxml/parsing.py index e69a68a6ad2c..5abd626caf46 100644 --- a/python/ql/test/library-tests/frameworks/lxml/parsing.py +++ b/python/ql/test/library-tests/frameworks/lxml/parsing.py @@ -16,11 +16,15 @@ lxml.etree.XMLID(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.XMLID(..) lxml.etree.XMLID(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.XMLID(..) -lxml.etree.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XXE' decodeOutput=lxml.etree.parse(..) -lxml.etree.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XXE' decodeOutput=lxml.etree.parse(..) +xml_file = 'xml_file' +lxml.etree.parse(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parse(..) +lxml.etree.parse(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parse(..) -lxml.etree.parseid(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..) -lxml.etree.parseid(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..) +lxml.etree.parseid(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..) +lxml.etree.parseid(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..) + +lxml.etree.iterparse(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) +lxml.etree.iterparse(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) # With default parsers (nothing changed) parser = lxml.etree.XMLParser() @@ -55,3 +59,9 @@ # DTD retrival vuln (also XXE) parser = lxml.etree.XMLParser(load_dtd=True, no_network=False) lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) + +# iterparse configurations ... this doesn't use a parser argument but takes MOST (!) of +# the normal XMLParser arguments. Specifically, it doesn't allow disabling XXE :O + +lxml.etree.iterparse(xml_file, huge_tree=True) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) +lxml.etree.iterparse(xml_file, load_dtd=True, no_network=False) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) From 543454eff234ac2d403b932cb82b38309dea8002 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 31 Mar 2022 11:47:29 +0200 Subject: [PATCH 24/51] Python: Model file access from XML parsing --- .../ql/lib/semmle/python/frameworks/Lxml.qll | 29 ++++++++++++++++++- .../lib/semmle/python/frameworks/Stdlib.qll | 29 +++++++++++++++++++ .../library-tests/frameworks/lxml/parsing.py | 16 +++++----- .../library-tests/frameworks/lxml/xpath.py | 2 +- .../frameworks/stdlib/XPathExecution.py | 2 +- .../frameworks/stdlib/xml_etree.py | 8 ++--- 6 files changed, 71 insertions(+), 15 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Lxml.qll b/python/ql/lib/semmle/python/frameworks/Lxml.qll index 821fc6bac801..a3825a70db0f 100644 --- a/python/ql/lib/semmle/python/frameworks/Lxml.qll +++ b/python/ql/lib/semmle/python/frameworks/Lxml.qll @@ -275,13 +275,38 @@ private module Lxml { } } + /** + * A call to `lxml.etree.ElementTree.parse` or `lxml.etree.ElementTree.parseid`, which + * takes either a filename or a file-like object as argument. To capture the filename + * for path-injection, we have this subclass. + * + * See + * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parse + * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parseid + */ + private class FileAccessFromLXMLParsing extends LXMLParsing, FileSystemAccess::Range { + FileAccessFromLXMLParsing() { + this = API::moduleImport("lxml").getMember("etree").getMember(["parse", "parseid"]).getACall() + // I considered whether we should try to reduce FPs from people passing file-like + // objects, which will not be a file system access (and couldn't cause a + // path-injection). + // + // I suppose that once we have proper flow-summary support for file-like objects, + // we can make the XXE/XML-bomb sinks allow an access-path, while the + // path-injection sink wouldn't, and then we will not end up with such FPs. + } + + override DataFlow::Node getAPathArgument() { result = this.getAnInput() } + } + /** * A call to `lxml.etree.iterparse` * * See * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.iterparse */ - private class LXMLIterparseCall extends DataFlow::CallCfgNode, XML::XMLParsing::Range { + private class LXMLIterparseCall extends DataFlow::CallCfgNode, XML::XMLParsing::Range, + FileSystemAccess::Range { LXMLIterparseCall() { this = API::moduleImport("lxml").getMember("etree").getMember("iterparse").getACall() } @@ -303,5 +328,7 @@ private module Lxml { override predicate mayExecuteInput() { none() } override DataFlow::Node getOutput() { result = this } + + override DataFlow::Node getAPathArgument() { result = this.getAnInput() } } } diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 77ec1b5f9da6..3afbf71f4954 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -3306,6 +3306,35 @@ private module StdlibPrivate { result = this } } + + /** + * A call to `xml.etree.ElementTree.parse` or `xml.etree.ElementTree.iterparse`, which + * takes either a filename or a file-like object as argument. To capture the filename + * for path-injection, we have this subclass. + * + * See + * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.parse + * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.iterparse + */ + private class FileAccessFromXMLEtreeParsing extends XMLEtreeParsing, FileSystemAccess::Range { + FileAccessFromXMLEtreeParsing() { + this = + API::moduleImport("xml") + .getMember("etree") + .getMember("ElementTree") + .getMember(["parse", "iterparse"]) + .getACall() + // I considered whether we should try to reduce FPs from people passing file-like + // objects, which will not be a file system access (and couldn't cause a + // path-injection). + // + // I suppose that once we have proper flow-summary support for file-like objects, + // we can make the XXE/XML-bomb sinks allow an access-path, while the + // path-injection sink wouldn't, and then we will not end up with such FPs. + } + + override DataFlow::Node getAPathArgument() { result = this.getAnInput() } + } } // --------------------------------------------------------------------------- diff --git a/python/ql/test/library-tests/frameworks/lxml/parsing.py b/python/ql/test/library-tests/frameworks/lxml/parsing.py index 5abd626caf46..ca68c99a90ea 100644 --- a/python/ql/test/library-tests/frameworks/lxml/parsing.py +++ b/python/ql/test/library-tests/frameworks/lxml/parsing.py @@ -17,14 +17,14 @@ lxml.etree.XMLID(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XXE' decodeOutput=lxml.etree.XMLID(..) xml_file = 'xml_file' -lxml.etree.parse(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parse(..) -lxml.etree.parse(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parse(..) +lxml.etree.parse(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parse(..) getAPathArgument=xml_file +lxml.etree.parse(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parse(..) getAPathArgument=xml_file -lxml.etree.parseid(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..) -lxml.etree.parseid(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..) +lxml.etree.parseid(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..) getAPathArgument=xml_file +lxml.etree.parseid(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.parseid(..) getAPathArgument=xml_file -lxml.etree.iterparse(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) -lxml.etree.iterparse(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) +lxml.etree.iterparse(xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file +lxml.etree.iterparse(source=xml_file) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file # With default parsers (nothing changed) parser = lxml.etree.XMLParser() @@ -63,5 +63,5 @@ # iterparse configurations ... this doesn't use a parser argument but takes MOST (!) of # the normal XMLParser arguments. Specifically, it doesn't allow disabling XXE :O -lxml.etree.iterparse(xml_file, huge_tree=True) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) -lxml.etree.iterparse(xml_file, load_dtd=True, no_network=False) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) +lxml.etree.iterparse(xml_file, huge_tree=True) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file +lxml.etree.iterparse(xml_file, load_dtd=True, no_network=False) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file diff --git a/python/ql/test/library-tests/frameworks/lxml/xpath.py b/python/ql/test/library-tests/frameworks/lxml/xpath.py index 9cf3a0883bd6..f67c8dae17c8 100644 --- a/python/ql/test/library-tests/frameworks/lxml/xpath.py +++ b/python/ql/test/library-tests/frameworks/lxml/xpath.py @@ -2,7 +2,7 @@ from io import StringIO def test_parse(): - tree = etree.parse(StringIO('')) # $ decodeFormat=XML decodeInput=StringIO(..) decodeOutput=etree.parse(..) xmlVuln='XXE' + tree = etree.parse(StringIO('')) # $ decodeFormat=XML decodeInput=StringIO(..) decodeOutput=etree.parse(..) xmlVuln='XXE' getAPathArgument=StringIO(..) r = tree.xpath('/foo/bar') # $ getXPath='/foo/bar' def test_XPath_class(): diff --git a/python/ql/test/library-tests/frameworks/stdlib/XPathExecution.py b/python/ql/test/library-tests/frameworks/stdlib/XPathExecution.py index d39b0e04888a..b501e2d4ccb3 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/XPathExecution.py +++ b/python/ql/test/library-tests/frameworks/stdlib/XPathExecution.py @@ -2,7 +2,7 @@ ns = {'dc': 'http://purl.org/dc/elements/1.1/'} import xml.etree.ElementTree as ET -tree = ET.parse('country_data.xml') # $ decodeFormat=XML decodeInput='country_data.xml' decodeOutput=ET.parse(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +tree = ET.parse('country_data.xml') # $ decodeFormat=XML decodeInput='country_data.xml' decodeOutput=ET.parse(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument='country_data.xml' root = tree.getroot() root.find(match, namespaces=ns) # $ getXPath=match diff --git a/python/ql/test/library-tests/frameworks/stdlib/xml_etree.py b/python/ql/test/library-tests/frameworks/stdlib/xml_etree.py index 0ed750ba8c78..684aaaa4a9cd 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/xml_etree.py +++ b/python/ql/test/library-tests/frameworks/stdlib/xml_etree.py @@ -16,11 +16,11 @@ xml.etree.ElementTree.XMLID(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.XMLID(..) xml.etree.ElementTree.XMLID(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.XMLID(..) -xml.etree.ElementTree.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.parse(..) -xml.etree.ElementTree.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.parse(..) +xml.etree.ElementTree.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.parse(..) getAPathArgument=StringIO(..) +xml.etree.ElementTree.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.parse(..) getAPathArgument=StringIO(..) -xml.etree.ElementTree.iterparse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.iterparse(..) -xml.etree.ElementTree.iterparse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.iterparse(..) +xml.etree.ElementTree.iterparse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.iterparse(..) getAPathArgument=StringIO(..) +xml.etree.ElementTree.iterparse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.iterparse(..) getAPathArgument=StringIO(..) # With parsers (no options available to disable/enable security features) From db43d043c4cdd59c65424f20fdcdc0a7d79a632c Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 31 Mar 2022 11:54:08 +0200 Subject: [PATCH 25/51] Python: Add test showing misalignment of xml.etree modeling --- .../test/library-tests/frameworks/stdlib/XPathExecution.py | 5 +++++ python/ql/test/library-tests/frameworks/stdlib/xml_etree.py | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/python/ql/test/library-tests/frameworks/stdlib/XPathExecution.py b/python/ql/test/library-tests/frameworks/stdlib/XPathExecution.py index b501e2d4ccb3..37043d7049c6 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/XPathExecution.py +++ b/python/ql/test/library-tests/frameworks/stdlib/XPathExecution.py @@ -15,3 +15,8 @@ tree.find(match, namespaces=ns) # $ getXPath=match tree.findall(match, namespaces=ns) # $ getXPath=match tree.findtext(match, default=None, namespaces=ns) # $ getXPath=match + +parser = ET.XMLParser() +parser.feed("bar") # $ decodeFormat=XML decodeInput="bar" xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +tree = parser.close() # $ decodeOutput=parser.close() +tree.find(match, namespaces=ns) # $ MISSING: getXPath=match diff --git a/python/ql/test/library-tests/frameworks/stdlib/xml_etree.py b/python/ql/test/library-tests/frameworks/stdlib/xml_etree.py index 684aaaa4a9cd..da04cedbdfcc 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/xml_etree.py +++ b/python/ql/test/library-tests/frameworks/stdlib/xml_etree.py @@ -22,6 +22,10 @@ xml.etree.ElementTree.iterparse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.iterparse(..) getAPathArgument=StringIO(..) xml.etree.ElementTree.iterparse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.iterparse(..) getAPathArgument=StringIO(..) +tree = xml.etree.ElementTree.ElementTree() +tree.parse("file.xml") # $ MISSING: decodeFormat=XML decodeInput="file.xml" xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=tree.parse(..) getAPathArgument="file.xml" +tree.parse(source="file.xml") # $ MISSING: decodeFormat=XML decodeInput="file.xml" xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=tree.parse(..) getAPathArgument="file.xml" + # With parsers (no options available to disable/enable security features) parser = xml.etree.ElementTree.XMLParser() From 70b3eecdd506fcb2e17f3eb027e7b05073c257df Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 31 Mar 2022 17:13:11 +0200 Subject: [PATCH 26/51] Python: Merge `xml.etree.ElementTree` models I forgot about the existing ones when I promoted it --- .../lib/semmle/python/frameworks/Stdlib.qll | 127 +++++++++--------- 1 file changed, 62 insertions(+), 65 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 3afbf71f4954..85cf61cdbafe 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -2835,70 +2835,6 @@ private module StdlibPrivate { override string getKind() { result = Escaping::getRegexKind() } } - // --------------------------------------------------------------------------- - // xml.etree.ElementTree - // --------------------------------------------------------------------------- - /** - * An instance of `xml.etree.ElementTree.ElementTree`. - * - * See https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.ElementTree - */ - private API::Node elementTreeInstance() { - //parse to a tree - result = - API::moduleImport("xml") - .getMember("etree") - .getMember("ElementTree") - .getMember("parse") - .getReturn() - or - // construct a tree without parsing - result = - API::moduleImport("xml") - .getMember("etree") - .getMember("ElementTree") - .getMember("ElementTree") - .getReturn() - } - - /** - * An instance of `xml.etree.ElementTree.Element`. - * - * See https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.Element - */ - private API::Node elementInstance() { - // parse or go to the root of a tree - result = elementTreeInstance().getMember(["parse", "getroot"]).getReturn() - or - // parse directly to an element - result = - API::moduleImport("xml") - .getMember("etree") - .getMember("ElementTree") - .getMember(["fromstring", "fromstringlist", "XML"]) - .getReturn() - } - - /** - * A call to a find method on a tree or an element will execute an XPath expression. - */ - private class ElementTreeFindCall extends XML::XPathExecution::Range, DataFlow::CallCfgNode { - string methodName; - - ElementTreeFindCall() { - methodName in ["find", "findall", "findtext"] and - ( - this = elementTreeInstance().getMember(methodName).getACall() - or - this = elementInstance().getMember(methodName).getACall() - ) - } - - override DataFlow::Node getXPath() { result in [this.getArg(0), this.getArgByName("match")] } - - override string getName() { result = "xml.etree" } - } - // --------------------------------------------------------------------------- // urllib // --------------------------------------------------------------------------- @@ -3176,8 +3112,69 @@ private module StdlibPrivate { } // --------------------------------------------------------------------------- - // xml.etree + // xml.etree.ElementTree // --------------------------------------------------------------------------- + /** + * An instance of `xml.etree.ElementTree.ElementTree`. + * + * See https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.ElementTree + */ + private API::Node elementTreeInstance() { + //parse to a tree + result = + API::moduleImport("xml") + .getMember("etree") + .getMember("ElementTree") + .getMember("parse") + .getReturn() + or + // construct a tree without parsing + result = + API::moduleImport("xml") + .getMember("etree") + .getMember("ElementTree") + .getMember("ElementTree") + .getReturn() + } + + /** + * An instance of `xml.etree.ElementTree.Element`. + * + * See https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.Element + */ + private API::Node elementInstance() { + // parse or go to the root of a tree + result = elementTreeInstance().getMember(["parse", "getroot"]).getReturn() + or + // parse directly to an element + result = + API::moduleImport("xml") + .getMember("etree") + .getMember("ElementTree") + .getMember(["fromstring", "fromstringlist", "XML"]) + .getReturn() + } + + /** + * A call to a find method on a tree or an element will execute an XPath expression. + */ + private class ElementTreeFindCall extends XML::XPathExecution::Range, DataFlow::CallCfgNode { + string methodName; + + ElementTreeFindCall() { + methodName in ["find", "findall", "findtext"] and + ( + this = elementTreeInstance().getMember(methodName).getACall() + or + this = elementInstance().getMember(methodName).getACall() + ) + } + + override DataFlow::Node getXPath() { result in [this.getArg(0), this.getArgByName("match")] } + + override string getName() { result = "xml.etree" } + } + /** * Provides models for `xml.etree` parsers * From 05bb0ef97688627eacd4b6ed247b84a707385ed5 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 31 Mar 2022 17:24:16 +0200 Subject: [PATCH 27/51] Python: Align `xml.etree.ElementTree` modeling I didn't find a good way to actually share the stuff, so we kinda just have 2 things that look very similar :| --- python/ql/lib/semmle/python/frameworks/Stdlib.qll | 14 ++++++++++++++ .../frameworks/stdlib/XPathExecution.py | 4 ++-- .../library-tests/frameworks/stdlib/xml_etree.py | 4 ++-- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 85cf61cdbafe..1118133d215e 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -3153,6 +3153,15 @@ private module StdlibPrivate { .getMember("ElementTree") .getMember(["fromstring", "fromstringlist", "XML"]) .getReturn() + or + result = + API::moduleImport("xml") + .getMember("etree") + .getMember("ElementTree") + .getMember("XMLParser") + .getReturn() + .getMember("close") + .getReturn() } /** @@ -3255,6 +3264,7 @@ private module StdlibPrivate { * - `xml.etree.ElementTree.XMLID` * - `xml.etree.ElementTree.parse` * - `xml.etree.ElementTree.iterparse` + * - `parse` method on an `xml.etree.ElementTree.ElementTree` instance * * See * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.fromstring @@ -3272,6 +3282,8 @@ private module StdlibPrivate { .getMember("ElementTree") .getMember(["fromstring", "fromstringlist", "XML", "XMLID", "parse", "iterparse"]) .getACall() + or + this = elementTreeInstance().getMember("parse").getACall() } override DataFlow::Node getAnInput() { @@ -3321,6 +3333,8 @@ private module StdlibPrivate { .getMember("ElementTree") .getMember(["parse", "iterparse"]) .getACall() + or + this = elementTreeInstance().getMember("parse").getACall() // I considered whether we should try to reduce FPs from people passing file-like // objects, which will not be a file system access (and couldn't cause a // path-injection). diff --git a/python/ql/test/library-tests/frameworks/stdlib/XPathExecution.py b/python/ql/test/library-tests/frameworks/stdlib/XPathExecution.py index 37043d7049c6..5faff5ed8689 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/XPathExecution.py +++ b/python/ql/test/library-tests/frameworks/stdlib/XPathExecution.py @@ -10,7 +10,7 @@ root.findtext(match, default=None, namespaces=ns) # $ getXPath=match tree = ET.ElementTree() -tree.parse("index.xhtml") +tree.parse("index.xhtml") # $ decodeFormat=XML decodeInput="index.xhtml" decodeOutput=tree.parse(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument="index.xhtml" tree.find(match, namespaces=ns) # $ getXPath=match tree.findall(match, namespaces=ns) # $ getXPath=match @@ -19,4 +19,4 @@ parser = ET.XMLParser() parser.feed("bar") # $ decodeFormat=XML decodeInput="bar" xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' tree = parser.close() # $ decodeOutput=parser.close() -tree.find(match, namespaces=ns) # $ MISSING: getXPath=match +tree.find(match, namespaces=ns) # $ getXPath=match diff --git a/python/ql/test/library-tests/frameworks/stdlib/xml_etree.py b/python/ql/test/library-tests/frameworks/stdlib/xml_etree.py index da04cedbdfcc..00f3b964b182 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/xml_etree.py +++ b/python/ql/test/library-tests/frameworks/stdlib/xml_etree.py @@ -23,8 +23,8 @@ xml.etree.ElementTree.iterparse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.iterparse(..) getAPathArgument=StringIO(..) tree = xml.etree.ElementTree.ElementTree() -tree.parse("file.xml") # $ MISSING: decodeFormat=XML decodeInput="file.xml" xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=tree.parse(..) getAPathArgument="file.xml" -tree.parse(source="file.xml") # $ MISSING: decodeFormat=XML decodeInput="file.xml" xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=tree.parse(..) getAPathArgument="file.xml" +tree.parse("file.xml") # $ decodeFormat=XML decodeInput="file.xml" xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=tree.parse(..) getAPathArgument="file.xml" +tree.parse(source="file.xml") # $ decodeFormat=XML decodeInput="file.xml" xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=tree.parse(..) getAPathArgument="file.xml" # With parsers (no options available to disable/enable security features) From e11269715dc55e3509625489267601b736c324f1 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 31 Mar 2022 17:44:00 +0200 Subject: [PATCH 28/51] Python: Promote `xml.sax` and `xml.dom.*` modeling --- .../lib/semmle/python/frameworks/Stdlib.qll | 214 ++++++++++++++++++ .../semmle/python/frameworks/Xml.qll | 210 ----------------- .../frameworks/stdlib}/xml_dom.py | 0 .../frameworks/stdlib}/xml_sax.py | 0 4 files changed, 214 insertions(+), 210 deletions(-) rename python/ql/test/{experimental/library-tests/frameworks/XML => library-tests/frameworks/stdlib}/xml_dom.py (100%) rename python/ql/test/{experimental/library-tests/frameworks/XML => library-tests/frameworks/stdlib}/xml_sax.py (100%) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 1118133d215e..418f3475c1ea 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -3346,6 +3346,220 @@ private module StdlibPrivate { override DataFlow::Node getAPathArgument() { result = this.getAnInput() } } + + // --------------------------------------------------------------------------- + // xml.sax + // --------------------------------------------------------------------------- + /** + * A call to the `setFeature` method on a XML sax parser. + * + * See https://docs.python.org/3.10/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.setFeature + */ + private class SaxParserSetFeatureCall extends DataFlow::MethodCallNode { + SaxParserSetFeatureCall() { + this = + API::moduleImport("xml") + .getMember("sax") + .getMember("make_parser") + .getReturn() + .getMember("setFeature") + .getACall() + } + + // The keyword argument names does not match documentation. I checked (with Python + // 3.9.5) that the names used here actually works. + DataFlow::Node getFeatureArg() { result in [this.getArg(0), this.getArgByName("name")] } + + DataFlow::Node getStateArg() { result in [this.getArg(1), this.getArgByName("state")] } + } + + /** Gets a back-reference to the `setFeature` state argument `arg`. */ + private DataFlow::TypeTrackingNode saxParserSetFeatureStateArgBacktracker( + DataFlow::TypeBackTracker t, DataFlow::Node arg + ) { + t.start() and + arg = any(SaxParserSetFeatureCall c).getStateArg() and + result = arg.getALocalSource() + or + exists(DataFlow::TypeBackTracker t2 | + result = saxParserSetFeatureStateArgBacktracker(t2, arg).backtrack(t2, t) + ) + } + + /** Gets a back-reference to the `setFeature` state argument `arg`. */ + DataFlow::LocalSourceNode saxParserSetFeatureStateArgBacktracker(DataFlow::Node arg) { + result = saxParserSetFeatureStateArgBacktracker(DataFlow::TypeBackTracker::end(), arg) + } + + /** + * Gets a reference to a XML sax parser that has `feature_external_ges` turned on. + * + * See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges + */ + private DataFlow::Node saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker t) { + t.start() and + exists(SaxParserSetFeatureCall call | + call.getFeatureArg() = + API::moduleImport("xml") + .getMember("sax") + .getMember("handler") + .getMember("feature_external_ges") + .getAUse() and + saxParserSetFeatureStateArgBacktracker(call.getStateArg()) + .asExpr() + .(BooleanLiteral) + .booleanValue() = true and + result = call.getObject() + ) + or + exists(DataFlow::TypeTracker t2 | + t = t2.smallstep(saxParserWithFeatureExternalGesTurnedOn(t2), result) + ) and + // take account of that we can set the feature to False, which makes the parser safe again + not exists(SaxParserSetFeatureCall call | + call.getObject() = result and + call.getFeatureArg() = + API::moduleImport("xml") + .getMember("sax") + .getMember("handler") + .getMember("feature_external_ges") + .getAUse() and + saxParserSetFeatureStateArgBacktracker(call.getStateArg()) + .asExpr() + .(BooleanLiteral) + .booleanValue() = false + ) + } + + /** + * Gets a reference to a XML sax parser that has `feature_external_ges` turned on. + * + * See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges + */ + DataFlow::Node saxParserWithFeatureExternalGesTurnedOn() { + result = saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker::end()) + } + + /** + * A call to the `parse` method on a SAX XML parser. + */ + private class XMLSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range { + XMLSaxInstanceParsing() { + this = + API::moduleImport("xml") + .getMember("sax") + .getMember("make_parser") + .getReturn() + .getMember("parse") + .getACall() + } + + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] } + + override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { + // always vuln to these + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) + or + // can be vuln to other things if features has been turned on + this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and + (kind.isXxe() or kind.isDtdRetrieval()) + } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { + // note: the output of parsing with SAX is that the content handler gets the + // data... but we don't currently model this (it's not trivial to do, and won't + // really give us any value, at least not as of right now). + none() + } + } + + /** + * A call to either `parse` or `parseString` from `xml.sax` module. + * + * See: + * - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parse + * - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parseString + */ + private class XMLSaxParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range { + XMLSaxParsing() { + this = + API::moduleImport("xml").getMember("sax").getMember(["parse", "parseString"]).getACall() + } + + override DataFlow::Node getAnInput() { + result in [ + this.getArg(0), + // parseString + this.getArgByName("string"), + // parse + this.getArgByName("source"), + ] + } + + override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { + // always vuln to these + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) + or + // can be vuln to other things if features has been turned on + this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and + (kind.isXxe() or kind.isDtdRetrieval()) + } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { + // note: the output of parsing with SAX is that the content handler gets the + // data... but we don't currently model this (it's not trivial to do, and won't + // really give us any value, at least not as of right now). + none() + } + } + + // --------------------------------------------------------------------------- + // xml.dom.* + // --------------------------------------------------------------------------- + /** + * A call to the `parse` or `parseString` methods from `xml.dom.minidom` or `xml.dom.pulldom`. + * + * Both of these modules are based on SAX parsers. + */ + private class XMLDomParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { + XMLDomParsing() { + this = + API::moduleImport("xml") + .getMember("dom") + .getMember(["minidom", "pulldom"]) + .getMember(["parse", "parseString"]) + .getACall() + } + + override DataFlow::Node getAnInput() { + result in [ + this.getArg(0), + // parseString + this.getArgByName("string"), + // minidom.parse + this.getArgByName("file"), + // pulldom.parse + this.getArgByName("stream_or_string"), + ] + } + + DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] } + + override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { + this.getParserArg() = saxParserWithFeatureExternalGesTurnedOn() and + (kind.isXxe() or kind.isDtdRetrieval()) + or + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) + } + + override predicate mayExecuteInput() { none() } + + override DataFlow::Node getOutput() { result = this } + } } // --------------------------------------------------------------------------- diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll index 88def863824b..344a19a01091 100644 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -7,213 +7,3 @@ private import python private import semmle.python.dataflow.new.DataFlow private import semmle.python.Concepts private import semmle.python.ApiGraphs - -private module SaxBasedParsing { - /** - * A call to the `setFeature` method on a XML sax parser. - * - * See https://docs.python.org/3.10/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.setFeature - */ - private class SaxParserSetFeatureCall extends DataFlow::MethodCallNode { - SaxParserSetFeatureCall() { - this = - API::moduleImport("xml") - .getMember("sax") - .getMember("make_parser") - .getReturn() - .getMember("setFeature") - .getACall() - } - - // The keyword argument names does not match documentation. I checked (with Python - // 3.9.5) that the names used here actually works. - DataFlow::Node getFeatureArg() { result in [this.getArg(0), this.getArgByName("name")] } - - DataFlow::Node getStateArg() { result in [this.getArg(1), this.getArgByName("state")] } - } - - /** Gets a back-reference to the `setFeature` state argument `arg`. */ - private DataFlow::TypeTrackingNode saxParserSetFeatureStateArgBacktracker( - DataFlow::TypeBackTracker t, DataFlow::Node arg - ) { - t.start() and - arg = any(SaxParserSetFeatureCall c).getStateArg() and - result = arg.getALocalSource() - or - exists(DataFlow::TypeBackTracker t2 | - result = saxParserSetFeatureStateArgBacktracker(t2, arg).backtrack(t2, t) - ) - } - - /** Gets a back-reference to the `setFeature` state argument `arg`. */ - DataFlow::LocalSourceNode saxParserSetFeatureStateArgBacktracker(DataFlow::Node arg) { - result = saxParserSetFeatureStateArgBacktracker(DataFlow::TypeBackTracker::end(), arg) - } - - /** - * Gets a reference to a XML sax parser that has `feature_external_ges` turned on. - * - * See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges - */ - private DataFlow::Node saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker t) { - t.start() and - exists(SaxParserSetFeatureCall call | - call.getFeatureArg() = - API::moduleImport("xml") - .getMember("sax") - .getMember("handler") - .getMember("feature_external_ges") - .getAUse() and - saxParserSetFeatureStateArgBacktracker(call.getStateArg()) - .asExpr() - .(BooleanLiteral) - .booleanValue() = true and - result = call.getObject() - ) - or - exists(DataFlow::TypeTracker t2 | - t = t2.smallstep(saxParserWithFeatureExternalGesTurnedOn(t2), result) - ) and - // take account of that we can set the feature to False, which makes the parser safe again - not exists(SaxParserSetFeatureCall call | - call.getObject() = result and - call.getFeatureArg() = - API::moduleImport("xml") - .getMember("sax") - .getMember("handler") - .getMember("feature_external_ges") - .getAUse() and - saxParserSetFeatureStateArgBacktracker(call.getStateArg()) - .asExpr() - .(BooleanLiteral) - .booleanValue() = false - ) - } - - /** - * Gets a reference to a XML sax parser that has `feature_external_ges` turned on. - * - * See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges - */ - DataFlow::Node saxParserWithFeatureExternalGesTurnedOn() { - result = saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker::end()) - } - - /** - * A call to the `parse` method on a SAX XML parser. - */ - private class XMLSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range { - XMLSaxInstanceParsing() { - this = - API::moduleImport("xml") - .getMember("sax") - .getMember("make_parser") - .getReturn() - .getMember("parse") - .getACall() - } - - override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] } - - override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { - // always vuln to these - (kind.isBillionLaughs() or kind.isQuadraticBlowup()) - or - // can be vuln to other things if features has been turned on - this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and - (kind.isXxe() or kind.isDtdRetrieval()) - } - - override predicate mayExecuteInput() { none() } - - override DataFlow::Node getOutput() { - // note: the output of parsing with SAX is that the content handler gets the - // data... but we don't currently model this (it's not trivial to do, and won't - // really give us any value, at least not as of right now). - none() - } - } - - /** - * A call to either `parse` or `parseString` from `xml.sax` module. - * - * See: - * - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parse - * - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parseString - */ - private class XMLSaxParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range { - XMLSaxParsing() { - this = - API::moduleImport("xml").getMember("sax").getMember(["parse", "parseString"]).getACall() - } - - override DataFlow::Node getAnInput() { - result in [ - this.getArg(0), - // parseString - this.getArgByName("string"), - // parse - this.getArgByName("source"), - ] - } - - override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { - // always vuln to these - (kind.isBillionLaughs() or kind.isQuadraticBlowup()) - or - // can be vuln to other things if features has been turned on - this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and - (kind.isXxe() or kind.isDtdRetrieval()) - } - - override predicate mayExecuteInput() { none() } - - override DataFlow::Node getOutput() { - // note: the output of parsing with SAX is that the content handler gets the - // data... but we don't currently model this (it's not trivial to do, and won't - // really give us any value, at least not as of right now). - none() - } - } - - /** - * A call to the `parse` or `parseString` methods from `xml.dom.minidom` or `xml.dom.pulldom`. - * - * Both of these modules are based on SAX parsers. - */ - private class XMLDomParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { - XMLDomParsing() { - this = - API::moduleImport("xml") - .getMember("dom") - .getMember(["minidom", "pulldom"]) - .getMember(["parse", "parseString"]) - .getACall() - } - - override DataFlow::Node getAnInput() { - result in [ - this.getArg(0), - // parseString - this.getArgByName("string"), - // minidom.parse - this.getArgByName("file"), - // pulldom.parse - this.getArgByName("stream_or_string"), - ] - } - - DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] } - - override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { - this.getParserArg() = saxParserWithFeatureExternalGesTurnedOn() and - (kind.isXxe() or kind.isDtdRetrieval()) - or - (kind.isBillionLaughs() or kind.isQuadraticBlowup()) - } - - override predicate mayExecuteInput() { none() } - - override DataFlow::Node getOutput() { result = this } - } -} diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py b/python/ql/test/library-tests/frameworks/stdlib/xml_dom.py similarity index 100% rename from python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py rename to python/ql/test/library-tests/frameworks/stdlib/xml_dom.py diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py b/python/ql/test/library-tests/frameworks/stdlib/xml_sax.py similarity index 100% rename from python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py rename to python/ql/test/library-tests/frameworks/stdlib/xml_sax.py From 1d7cec60ae09489618b7e561845b5a361c274583 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 31 Mar 2022 17:50:23 +0200 Subject: [PATCH 29/51] Python: `xml.sax.parse` is not a method call And it's not possible to provide a parser argument either --- python/ql/lib/semmle/python/frameworks/Stdlib.qll | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 418f3475c1ea..5659c7c8e910 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -3482,7 +3482,7 @@ private module StdlibPrivate { * - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parse * - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parseString */ - private class XMLSaxParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range { + private class XMLSaxParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { XMLSaxParsing() { this = API::moduleImport("xml").getMember("sax").getMember(["parse", "parseString"]).getACall() @@ -3501,10 +3501,6 @@ private module StdlibPrivate { override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { // always vuln to these (kind.isBillionLaughs() or kind.isQuadraticBlowup()) - or - // can be vuln to other things if features has been turned on - this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and - (kind.isXxe() or kind.isDtdRetrieval()) } override predicate mayExecuteInput() { none() } From b4c0065aeb160839129d25cc3ee1818564670d21 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 31 Mar 2022 18:03:35 +0200 Subject: [PATCH 30/51] Python: Extend FileSystemAccess for `xml.sax` and `xml.dom.*` parsing --- .../lib/semmle/python/frameworks/Stdlib.qll | 72 ++++++++++++++++++- .../frameworks/stdlib/xml_dom.py | 16 ++--- .../frameworks/stdlib/xml_sax.py | 22 +++--- 3 files changed, 90 insertions(+), 20 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 5659c7c8e910..38fe32a3b3c2 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -3442,8 +3442,11 @@ private module StdlibPrivate { /** * A call to the `parse` method on a SAX XML parser. + * + * See https://docs.python.org/3/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.parse */ - private class XMLSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range { + private class XMLSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range, + FileSystemAccess::Range { XMLSaxInstanceParsing() { this = API::moduleImport("xml") @@ -3473,6 +3476,17 @@ private module StdlibPrivate { // really give us any value, at least not as of right now). none() } + + override DataFlow::Node getAPathArgument() { + // I considered whether we should try to reduce FPs from people passing file-like + // objects, which will not be a file system access (and couldn't cause a + // path-injection). + // + // I suppose that once we have proper flow-summary support for file-like objects, + // we can make the XXE/XML-bomb sinks allow an access-path, while the + // path-injection sink wouldn't, and then we will not end up with such FPs. + result = this.getAnInput() + } } /** @@ -3513,6 +3527,29 @@ private module StdlibPrivate { } } + /** + * A call to `xml.sax.parse`, which takes either a filename or a file-like object as + * argument. To capture the filename for path-injection, we have this subclass. + * + * See + * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.parse + * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.iterparse + */ + private class FileAccessFromXMLSaxParsing extends XMLSaxParsing, FileSystemAccess::Range { + FileAccessFromXMLSaxParsing() { + this = API::moduleImport("xml").getMember("sax").getMember("parse").getACall() + // I considered whether we should try to reduce FPs from people passing file-like + // objects, which will not be a file system access (and couldn't cause a + // path-injection). + // + // I suppose that once we have proper flow-summary support for file-like objects, + // we can make the XXE/XML-bomb sinks allow an access-path, while the + // path-injection sink wouldn't, and then we will not end up with such FPs. + } + + override DataFlow::Node getAPathArgument() { result = this.getAnInput() } + } + // --------------------------------------------------------------------------- // xml.dom.* // --------------------------------------------------------------------------- @@ -3520,6 +3557,10 @@ private module StdlibPrivate { * A call to the `parse` or `parseString` methods from `xml.dom.minidom` or `xml.dom.pulldom`. * * Both of these modules are based on SAX parsers. + * + * See + * - https://docs.python.org/3/library/xml.dom.minidom.html#xml.dom.minidom.parse + * - https://docs.python.org/3/library/xml.dom.pulldom.html#xml.dom.pulldom.parse */ private class XMLDomParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { XMLDomParsing() { @@ -3556,6 +3597,35 @@ private module StdlibPrivate { override DataFlow::Node getOutput() { result = this } } + + /** + * A call to the `parse` or `parseString` methods from `xml.dom.minidom` or + * `xml.dom.pulldom`, which takes either a filename or a file-like object as argument. + * To capture the filename for path-injection, we have this subclass. + * + * See + * - https://docs.python.org/3/library/xml.dom.minidom.html#xml.dom.minidom.parse + * - https://docs.python.org/3/library/xml.dom.pulldom.html#xml.dom.pulldom.parse + */ + private class FileAccessFromXMLDomParsing extends XMLDomParsing, FileSystemAccess::Range { + FileAccessFromXMLDomParsing() { + this = + API::moduleImport("xml") + .getMember("dom") + .getMember(["minidom", "pulldom"]) + .getMember("parse") + .getACall() + // I considered whether we should try to reduce FPs from people passing file-like + // objects, which will not be a file system access (and couldn't cause a + // path-injection). + // + // I suppose that once we have proper flow-summary support for file-like objects, + // we can make the XXE/XML-bomb sinks allow an access-path, while the + // path-injection sink wouldn't, and then we will not end up with such FPs. + } + + override DataFlow::Node getAPathArgument() { result = this.getAnInput() } + } } // --------------------------------------------------------------------------- diff --git a/python/ql/test/library-tests/frameworks/stdlib/xml_dom.py b/python/ql/test/library-tests/frameworks/stdlib/xml_dom.py index c6152c75807e..b3a1ab7f9309 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/xml_dom.py +++ b/python/ql/test/library-tests/frameworks/stdlib/xml_dom.py @@ -6,16 +6,16 @@ x = "some xml" # minidom -xml.dom.minidom.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parse(..) -xml.dom.minidom.parse(file=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parse(..) +xml.dom.minidom.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..) +xml.dom.minidom.parse(file=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..) xml.dom.minidom.parseString(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parseString(..) xml.dom.minidom.parseString(string=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parseString(..) # pulldom -xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parse(..) -xml.dom.pulldom.parse(stream_or_string=StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parse(..) +xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..) +xml.dom.pulldom.parse(stream_or_string=StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..) xml.dom.pulldom.parseString(x)['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parseString(..) xml.dom.pulldom.parseString(string=x)['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parseString(..) @@ -24,8 +24,8 @@ # These are based on SAX parses, and you can specify your own, so you can expose yourself to XXE (yay/) parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_external_ges, True) -xml.dom.minidom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..) -xml.dom.minidom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..) +xml.dom.minidom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..) +xml.dom.minidom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..) -xml.dom.pulldom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..) -xml.dom.pulldom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..) +xml.dom.pulldom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..) +xml.dom.pulldom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..) diff --git a/python/ql/test/library-tests/frameworks/stdlib/xml_sax.py b/python/ql/test/library-tests/frameworks/stdlib/xml_sax.py index 8dbe9d4ae99e..c08034907a4d 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/xml_sax.py +++ b/python/ql/test/library-tests/frameworks/stdlib/xml_sax.py @@ -10,41 +10,41 @@ def __init__(self): def characters(self, data): self._result.append(data) -xml.sax.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.sax.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.sax.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..) +xml.sax.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..) xml.sax.parseString(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' xml.sax.parseString(string=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' parser = xml.sax.make_parser() -parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -parser.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..) +parser.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..) # You can make it vuln to both XXE and DTD retrieval by setting this flag # see https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_external_ges, True) -parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' +parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' getAPathArgument=StringIO(..) parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_external_ges, False) -parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..) # Forward Type Tracking test def func(cond): parser = xml.sax.make_parser() if cond: parser.setFeature(xml.sax.handler.feature_external_ges, True) - parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' + parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' getAPathArgument=StringIO(..) else: - parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' + parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..) # make it vuln, then making it safe # a bit of an edge-case, but is nice to be able to handle. parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_external_ges, True) parser.setFeature(xml.sax.handler.feature_external_ges, False) -parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..) def check_conditional_assignment(cond): parser = xml.sax.make_parser() @@ -52,7 +52,7 @@ def check_conditional_assignment(cond): parser.setFeature(xml.sax.handler.feature_external_ges, True) else: parser.setFeature(xml.sax.handler.feature_external_ges, False) - parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' + parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' getAPathArgument=StringIO(..) def check_conditional_assignment2(cond): parser = xml.sax.make_parser() @@ -61,4 +61,4 @@ def check_conditional_assignment2(cond): else: flag_value = False parser.setFeature(xml.sax.handler.feature_external_ges, flag_value) - parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' + parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' getAPathArgument=StringIO(..) From 673220b231fdfcd225f4d29cbc76be67f156b21c Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 31 Mar 2022 18:18:35 +0200 Subject: [PATCH 31/51] Python: Minor cleanup of `XmlParsingTest` --- .../ql/test/experimental/meta/ConceptsTest.qll | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/python/ql/test/experimental/meta/ConceptsTest.qll b/python/ql/test/experimental/meta/ConceptsTest.qll index 24cbbab2d448..cd90d716dd4b 100644 --- a/python/ql/test/experimental/meta/ConceptsTest.qll +++ b/python/ql/test/experimental/meta/ConceptsTest.qll @@ -543,18 +543,16 @@ class HttpClientRequestTest extends InlineExpectationsTest { class XmlParsingTest extends InlineExpectationsTest { XmlParsingTest() { this = "XmlParsingTest" } - override string getARelevantTag() { result in ["xmlInput", "xmlVuln"] } + override string getARelevantTag() { result in ["xmlVuln"] } override predicate hasActualResult(Location location, string element, string tag, string value) { exists(location.getFile().getRelativePath()) and - exists(XML::XMLParsing parsing | - exists(XML::XMLParsingVulnerabilityKind kind | - parsing.vulnerableTo(kind) and - location = parsing.getLocation() and - element = parsing.toString() and - value = "'" + kind + "'" and - tag = "xmlVuln" - ) + exists(XML::XMLParsing parsing, XML::XMLParsingVulnerabilityKind kind | + parsing.vulnerableTo(kind) and + location = parsing.getLocation() and + element = parsing.toString() and + value = "'" + kind + "'" and + tag = "xmlVuln" ) } } From 5083023aa80238c58811aa7e56df6dddf4e6b33a Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 31 Mar 2022 18:37:47 +0200 Subject: [PATCH 32/51] Python: Move XML parsing PoC Since the folder where it used to live is now empty otherwise :O --- python/PoCs/README.md | 1 + .../library-tests/frameworks/XML/poc => PoCs/XmlParsing}/PoC.py | 0 .../library-tests/frameworks/XML/poc => PoCs/XmlParsing}/flag | 0 python/ql/lib/semmle/python/Concepts.qll | 2 ++ .../library-tests/frameworks/XML/poc/this-dir-is-not-extracted | 1 - 5 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 python/PoCs/README.md rename python/{ql/test/experimental/library-tests/frameworks/XML/poc => PoCs/XmlParsing}/PoC.py (100%) rename python/{ql/test/experimental/library-tests/frameworks/XML/poc => PoCs/XmlParsing}/flag (100%) delete mode 100644 python/ql/test/experimental/library-tests/frameworks/XML/poc/this-dir-is-not-extracted diff --git a/python/PoCs/README.md b/python/PoCs/README.md new file mode 100644 index 000000000000..20eeb5dbd78d --- /dev/null +++ b/python/PoCs/README.md @@ -0,0 +1 @@ +A place to collect proof of concept for how certain vulnerabilities work. diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py b/python/PoCs/XmlParsing/PoC.py similarity index 100% rename from python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py rename to python/PoCs/XmlParsing/PoC.py diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/poc/flag b/python/PoCs/XmlParsing/flag similarity index 100% rename from python/ql/test/experimental/library-tests/frameworks/XML/poc/flag rename to python/PoCs/XmlParsing/flag diff --git a/python/ql/lib/semmle/python/Concepts.qll b/python/ql/lib/semmle/python/Concepts.qll index b553c8d927da..b1727e4829d9 100644 --- a/python/ql/lib/semmle/python/Concepts.qll +++ b/python/ql/lib/semmle/python/Concepts.qll @@ -555,6 +555,8 @@ module XML { * A kind of XML vulnerability. * * See overview of kinds at https://pypi.org/project/defusedxml/#python-xml-libraries + * + * See PoC at `python/PoCs/XmlParsing/PoC.py` for some tests of vulnerable XML parsing. */ class XMLParsingVulnerabilityKind extends string { XMLParsingVulnerabilityKind() { diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/poc/this-dir-is-not-extracted b/python/ql/test/experimental/library-tests/frameworks/XML/poc/this-dir-is-not-extracted deleted file mode 100644 index b1925ade1d3a..000000000000 --- a/python/ql/test/experimental/library-tests/frameworks/XML/poc/this-dir-is-not-extracted +++ /dev/null @@ -1 +0,0 @@ -just FYI From b8d3c5e96fbfc0b5770591d699b94695f3d15a26 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 31 Mar 2022 18:40:26 +0200 Subject: [PATCH 33/51] Python: Remove last bits of experimental XML modeling --- python/ql/src/experimental/semmle/python/Frameworks.qll | 1 - .../ql/src/experimental/semmle/python/frameworks/Xml.qll | 9 --------- .../python/security/dataflow/XmlBombCustomizations.qll | 1 - .../python/security/dataflow/XxeCustomizations.qll | 1 - .../library-tests/frameworks/XML/ConceptsTest.expected | 0 .../library-tests/frameworks/XML/ConceptsTest.ql | 3 --- 6 files changed, 15 deletions(-) delete mode 100644 python/ql/src/experimental/semmle/python/frameworks/Xml.qll delete mode 100644 python/ql/test/experimental/library-tests/frameworks/XML/ConceptsTest.expected delete mode 100644 python/ql/test/experimental/library-tests/frameworks/XML/ConceptsTest.ql diff --git a/python/ql/src/experimental/semmle/python/Frameworks.qll b/python/ql/src/experimental/semmle/python/Frameworks.qll index edbed61c41c4..81b2c1bee23d 100644 --- a/python/ql/src/experimental/semmle/python/Frameworks.qll +++ b/python/ql/src/experimental/semmle/python/Frameworks.qll @@ -3,7 +3,6 @@ */ private import experimental.semmle.python.frameworks.Stdlib -private import experimental.semmle.python.frameworks.Xml private import experimental.semmle.python.frameworks.Flask private import experimental.semmle.python.frameworks.Django private import experimental.semmle.python.frameworks.Werkzeug diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll deleted file mode 100644 index 344a19a01091..000000000000 --- a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll +++ /dev/null @@ -1,9 +0,0 @@ -/** - * Provides class and predicates to track external data that - * may represent malicious XML objects. - */ - -private import python -private import semmle.python.dataflow.new.DataFlow -private import semmle.python.Concepts -private import semmle.python.ApiGraphs diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll index c5e69c1e0e33..d6f2e0791f99 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll @@ -7,7 +7,6 @@ private import python private import semmle.python.dataflow.new.DataFlow private import semmle.python.Concepts -import experimental.semmle.python.frameworks.Xml // needed until modeling have been promoted private import semmle.python.dataflow.new.RemoteFlowSources /** diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll index 27d011625a6d..a4473285b8db 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll @@ -7,7 +7,6 @@ private import python private import semmle.python.dataflow.new.DataFlow private import semmle.python.Concepts -import experimental.semmle.python.frameworks.Xml // needed until modeling have been promoted private import semmle.python.dataflow.new.RemoteFlowSources /** diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/ConceptsTest.expected b/python/ql/test/experimental/library-tests/frameworks/XML/ConceptsTest.expected deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/ConceptsTest.ql b/python/ql/test/experimental/library-tests/frameworks/XML/ConceptsTest.ql deleted file mode 100644 index 95728bd6dc89..000000000000 --- a/python/ql/test/experimental/library-tests/frameworks/XML/ConceptsTest.ql +++ /dev/null @@ -1,3 +0,0 @@ -import python -import experimental.meta.ConceptsTest -import experimental.semmle.python.frameworks.Xml // needed until modeling have been promoted From 4abab2206618b950509b45ed516b8a9c11f7732d Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 31 Mar 2022 18:47:50 +0200 Subject: [PATCH 34/51] Python: Promote XXE and XML-bomb queries Need to write a change-note as well, but will do that tomorrow --- .../{experimental/Security/NEW => Security}/CWE-611/Xxe.qhelp | 0 .../src/{experimental/Security/NEW => Security}/CWE-611/Xxe.ql | 0 .../Security/NEW => Security}/CWE-611/examples/XxeBad.py | 0 .../Security/NEW => Security}/CWE-611/examples/XxeGood.py | 0 .../Security/NEW => Security}/CWE-776/XmlBomb.qhelp | 0 .../{experimental/Security/NEW => Security}/CWE-776/XmlBomb.ql | 0 .../Security/NEW => Security}/CWE-776/examples/XmlBombBad.py | 0 .../Security/NEW => Security}/CWE-776/examples/XmlBombGood.py | 0 .../test/experimental/query-tests/Security/CWE-611-Xxe/Xxe.qlref | 1 - .../query-tests/Security/CWE-776-XmlBomb/XmlBomb.qlref | 1 - .../query-tests/Security/CWE-611-Xxe/Xxe.expected | 0 python/ql/test/query-tests/Security/CWE-611-Xxe/Xxe.qlref | 1 + .../{experimental => }/query-tests/Security/CWE-611-Xxe/test.py | 0 .../query-tests/Security/CWE-776-XmlBomb/XmlBomb.expected | 0 .../ql/test/query-tests/Security/CWE-776-XmlBomb/XmlBomb.qlref | 1 + .../query-tests/Security/CWE-776-XmlBomb/test.py | 0 16 files changed, 2 insertions(+), 2 deletions(-) rename python/ql/src/{experimental/Security/NEW => Security}/CWE-611/Xxe.qhelp (100%) rename python/ql/src/{experimental/Security/NEW => Security}/CWE-611/Xxe.ql (100%) rename python/ql/src/{experimental/Security/NEW => Security}/CWE-611/examples/XxeBad.py (100%) rename python/ql/src/{experimental/Security/NEW => Security}/CWE-611/examples/XxeGood.py (100%) rename python/ql/src/{experimental/Security/NEW => Security}/CWE-776/XmlBomb.qhelp (100%) rename python/ql/src/{experimental/Security/NEW => Security}/CWE-776/XmlBomb.ql (100%) rename python/ql/src/{experimental/Security/NEW => Security}/CWE-776/examples/XmlBombBad.py (100%) rename python/ql/src/{experimental/Security/NEW => Security}/CWE-776/examples/XmlBombGood.py (100%) delete mode 100644 python/ql/test/experimental/query-tests/Security/CWE-611-Xxe/Xxe.qlref delete mode 100644 python/ql/test/experimental/query-tests/Security/CWE-776-XmlBomb/XmlBomb.qlref rename python/ql/test/{experimental => }/query-tests/Security/CWE-611-Xxe/Xxe.expected (100%) create mode 100644 python/ql/test/query-tests/Security/CWE-611-Xxe/Xxe.qlref rename python/ql/test/{experimental => }/query-tests/Security/CWE-611-Xxe/test.py (100%) rename python/ql/test/{experimental => }/query-tests/Security/CWE-776-XmlBomb/XmlBomb.expected (100%) create mode 100644 python/ql/test/query-tests/Security/CWE-776-XmlBomb/XmlBomb.qlref rename python/ql/test/{experimental => }/query-tests/Security/CWE-776-XmlBomb/test.py (100%) diff --git a/python/ql/src/experimental/Security/NEW/CWE-611/Xxe.qhelp b/python/ql/src/Security/CWE-611/Xxe.qhelp similarity index 100% rename from python/ql/src/experimental/Security/NEW/CWE-611/Xxe.qhelp rename to python/ql/src/Security/CWE-611/Xxe.qhelp diff --git a/python/ql/src/experimental/Security/NEW/CWE-611/Xxe.ql b/python/ql/src/Security/CWE-611/Xxe.ql similarity index 100% rename from python/ql/src/experimental/Security/NEW/CWE-611/Xxe.ql rename to python/ql/src/Security/CWE-611/Xxe.ql diff --git a/python/ql/src/experimental/Security/NEW/CWE-611/examples/XxeBad.py b/python/ql/src/Security/CWE-611/examples/XxeBad.py similarity index 100% rename from python/ql/src/experimental/Security/NEW/CWE-611/examples/XxeBad.py rename to python/ql/src/Security/CWE-611/examples/XxeBad.py diff --git a/python/ql/src/experimental/Security/NEW/CWE-611/examples/XxeGood.py b/python/ql/src/Security/CWE-611/examples/XxeGood.py similarity index 100% rename from python/ql/src/experimental/Security/NEW/CWE-611/examples/XxeGood.py rename to python/ql/src/Security/CWE-611/examples/XxeGood.py diff --git a/python/ql/src/experimental/Security/NEW/CWE-776/XmlBomb.qhelp b/python/ql/src/Security/CWE-776/XmlBomb.qhelp similarity index 100% rename from python/ql/src/experimental/Security/NEW/CWE-776/XmlBomb.qhelp rename to python/ql/src/Security/CWE-776/XmlBomb.qhelp diff --git a/python/ql/src/experimental/Security/NEW/CWE-776/XmlBomb.ql b/python/ql/src/Security/CWE-776/XmlBomb.ql similarity index 100% rename from python/ql/src/experimental/Security/NEW/CWE-776/XmlBomb.ql rename to python/ql/src/Security/CWE-776/XmlBomb.ql diff --git a/python/ql/src/experimental/Security/NEW/CWE-776/examples/XmlBombBad.py b/python/ql/src/Security/CWE-776/examples/XmlBombBad.py similarity index 100% rename from python/ql/src/experimental/Security/NEW/CWE-776/examples/XmlBombBad.py rename to python/ql/src/Security/CWE-776/examples/XmlBombBad.py diff --git a/python/ql/src/experimental/Security/NEW/CWE-776/examples/XmlBombGood.py b/python/ql/src/Security/CWE-776/examples/XmlBombGood.py similarity index 100% rename from python/ql/src/experimental/Security/NEW/CWE-776/examples/XmlBombGood.py rename to python/ql/src/Security/CWE-776/examples/XmlBombGood.py diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611-Xxe/Xxe.qlref b/python/ql/test/experimental/query-tests/Security/CWE-611-Xxe/Xxe.qlref deleted file mode 100644 index f8a07d7d2ee3..000000000000 --- a/python/ql/test/experimental/query-tests/Security/CWE-611-Xxe/Xxe.qlref +++ /dev/null @@ -1 +0,0 @@ -experimental/Security/NEW/CWE-611/Xxe.ql diff --git a/python/ql/test/experimental/query-tests/Security/CWE-776-XmlBomb/XmlBomb.qlref b/python/ql/test/experimental/query-tests/Security/CWE-776-XmlBomb/XmlBomb.qlref deleted file mode 100644 index 5eadbb1f26f9..000000000000 --- a/python/ql/test/experimental/query-tests/Security/CWE-776-XmlBomb/XmlBomb.qlref +++ /dev/null @@ -1 +0,0 @@ -experimental/Security/NEW/CWE-776/XmlBomb.ql diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611-Xxe/Xxe.expected b/python/ql/test/query-tests/Security/CWE-611-Xxe/Xxe.expected similarity index 100% rename from python/ql/test/experimental/query-tests/Security/CWE-611-Xxe/Xxe.expected rename to python/ql/test/query-tests/Security/CWE-611-Xxe/Xxe.expected diff --git a/python/ql/test/query-tests/Security/CWE-611-Xxe/Xxe.qlref b/python/ql/test/query-tests/Security/CWE-611-Xxe/Xxe.qlref new file mode 100644 index 000000000000..62a3f7f22d97 --- /dev/null +++ b/python/ql/test/query-tests/Security/CWE-611-Xxe/Xxe.qlref @@ -0,0 +1 @@ +Security/CWE-611/Xxe.ql diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611-Xxe/test.py b/python/ql/test/query-tests/Security/CWE-611-Xxe/test.py similarity index 100% rename from python/ql/test/experimental/query-tests/Security/CWE-611-Xxe/test.py rename to python/ql/test/query-tests/Security/CWE-611-Xxe/test.py diff --git a/python/ql/test/experimental/query-tests/Security/CWE-776-XmlBomb/XmlBomb.expected b/python/ql/test/query-tests/Security/CWE-776-XmlBomb/XmlBomb.expected similarity index 100% rename from python/ql/test/experimental/query-tests/Security/CWE-776-XmlBomb/XmlBomb.expected rename to python/ql/test/query-tests/Security/CWE-776-XmlBomb/XmlBomb.expected diff --git a/python/ql/test/query-tests/Security/CWE-776-XmlBomb/XmlBomb.qlref b/python/ql/test/query-tests/Security/CWE-776-XmlBomb/XmlBomb.qlref new file mode 100644 index 000000000000..c983b357446f --- /dev/null +++ b/python/ql/test/query-tests/Security/CWE-776-XmlBomb/XmlBomb.qlref @@ -0,0 +1 @@ +Security/CWE-776/XmlBomb.ql diff --git a/python/ql/test/experimental/query-tests/Security/CWE-776-XmlBomb/test.py b/python/ql/test/query-tests/Security/CWE-776-XmlBomb/test.py similarity index 100% rename from python/ql/test/experimental/query-tests/Security/CWE-776-XmlBomb/test.py rename to python/ql/test/query-tests/Security/CWE-776-XmlBomb/test.py From d2b03bb4809b1156d1d0799ca739da4265c68ba7 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 31 Mar 2022 20:37:28 +0200 Subject: [PATCH 35/51] Python: Fix `SimpleXmlRpcServer.ql` --- .../src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql b/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql index 3d2a736ed496..53ff6eeedb80 100644 --- a/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql +++ b/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql @@ -10,14 +10,14 @@ */ private import python -private import experimental.semmle.python.Concepts +private import semmle.python.Concepts private import semmle.python.ApiGraphs from DataFlow::CallCfgNode call, string kinds where call = API::moduleImport("xmlrpc").getMember("server").getMember("SimpleXMLRPCServer").getACall() and kinds = - strictconcat(ExperimentalXML::XMLParsingVulnerabilityKind kind | + strictconcat(XML::XMLParsingVulnerabilityKind kind | kind.isBillionLaughs() or kind.isQuadraticBlowup() | kind, ", " From ab59d5c786893d71dc044107af0517e56b460171 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 5 Apr 2022 11:06:22 +0200 Subject: [PATCH 36/51] Python: Rename to `XmlParsing` To follow our style guide --- python/ql/lib/semmle/python/Concepts.qll | 8 ++++---- python/ql/lib/semmle/python/frameworks/Lxml.qll | 6 +++--- python/ql/lib/semmle/python/frameworks/Stdlib.qll | 10 +++++----- python/ql/lib/semmle/python/frameworks/Xmltodict.qll | 2 +- .../python/security/dataflow/XmlBombCustomizations.qll | 2 +- .../python/security/dataflow/XxeCustomizations.qll | 2 +- python/ql/test/experimental/meta/ConceptsTest.qll | 2 +- 7 files changed, 16 insertions(+), 16 deletions(-) diff --git a/python/ql/lib/semmle/python/Concepts.qll b/python/ql/lib/semmle/python/Concepts.qll index b1727e4829d9..b0a5e1766a2f 100644 --- a/python/ql/lib/semmle/python/Concepts.qll +++ b/python/ql/lib/semmle/python/Concepts.qll @@ -580,9 +580,9 @@ module XML { * A data-flow node that parses XML. * * Extend this class to model new APIs. If you want to refine existing API models, - * extend `XMLParsing` instead. + * extend `XmlParsing` instead. */ - class XMLParsing extends Decoding instanceof XMLParsing::Range { + class XmlParsing extends Decoding instanceof XmlParsing::Range { /** * Holds if this XML parsing is vulnerable to `kind`. */ @@ -590,12 +590,12 @@ module XML { } /** Provides classes for modeling XML parsing APIs. */ - module XMLParsing { + module XmlParsing { /** * A data-flow node that parses XML. * * Extend this class to model new APIs. If you want to refine existing API models, - * extend `XMLParsing` instead. + * extend `XmlParsing` instead. */ abstract class Range extends Decoding::Range { /** diff --git a/python/ql/lib/semmle/python/frameworks/Lxml.qll b/python/ql/lib/semmle/python/frameworks/Lxml.qll index a3825a70db0f..05dfd388dace 100644 --- a/python/ql/lib/semmle/python/frameworks/Lxml.qll +++ b/python/ql/lib/semmle/python/frameworks/Lxml.qll @@ -196,7 +196,7 @@ private module Lxml { /** * A call to the `feed` method of an `lxml` parser. */ - private class LXMLParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range { + private class LXMLParserFeedCall extends DataFlow::MethodCallNode, XML::XmlParsing::Range { LXMLParserFeedCall() { this.calls(instance(_), "feed") } override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } @@ -233,7 +233,7 @@ private module Lxml { * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parse * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parseid */ - private class LXMLParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { + private class LXMLParsing extends DataFlow::CallCfgNode, XML::XmlParsing::Range { LXMLParsing() { this = API::moduleImport("lxml") @@ -305,7 +305,7 @@ private module Lxml { * See * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.iterparse */ - private class LXMLIterparseCall extends DataFlow::CallCfgNode, XML::XMLParsing::Range, + private class LXMLIterparseCall extends DataFlow::CallCfgNode, XML::XmlParsing::Range, FileSystemAccess::Range { LXMLIterparseCall() { this = API::moduleImport("lxml").getMember("etree").getMember("iterparse").getACall() diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 38fe32a3b3c2..e45e8e3a8794 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -3236,7 +3236,7 @@ private module StdlibPrivate { /** * A call to the `feed` method of an `xml.etree` parser. */ - private class XMLEtreeParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range { + private class XMLEtreeParserFeedCall extends DataFlow::MethodCallNode, XML::XmlParsing::Range { XMLEtreeParserFeedCall() { this.calls(instance(), "feed") } override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } @@ -3274,7 +3274,7 @@ private module StdlibPrivate { * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.parse * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.iterparse */ - private class XMLEtreeParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { + private class XMLEtreeParsing extends DataFlow::CallCfgNode, XML::XmlParsing::Range { XMLEtreeParsing() { this = API::moduleImport("xml") @@ -3445,7 +3445,7 @@ private module StdlibPrivate { * * See https://docs.python.org/3/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.parse */ - private class XMLSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range, + private class XMLSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XmlParsing::Range, FileSystemAccess::Range { XMLSaxInstanceParsing() { this = @@ -3496,7 +3496,7 @@ private module StdlibPrivate { * - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parse * - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parseString */ - private class XMLSaxParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { + private class XMLSaxParsing extends DataFlow::CallCfgNode, XML::XmlParsing::Range { XMLSaxParsing() { this = API::moduleImport("xml").getMember("sax").getMember(["parse", "parseString"]).getACall() @@ -3562,7 +3562,7 @@ private module StdlibPrivate { * - https://docs.python.org/3/library/xml.dom.minidom.html#xml.dom.minidom.parse * - https://docs.python.org/3/library/xml.dom.pulldom.html#xml.dom.pulldom.parse */ - private class XMLDomParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { + private class XMLDomParsing extends DataFlow::CallCfgNode, XML::XmlParsing::Range { XMLDomParsing() { this = API::moduleImport("xml") diff --git a/python/ql/lib/semmle/python/frameworks/Xmltodict.qll b/python/ql/lib/semmle/python/frameworks/Xmltodict.qll index bb65607251fa..84b0b0fe03f0 100644 --- a/python/ql/lib/semmle/python/frameworks/Xmltodict.qll +++ b/python/ql/lib/semmle/python/frameworks/Xmltodict.qll @@ -20,7 +20,7 @@ private module Xmltodict { /** * A call to `xmltodict.parse`. */ - private class XMLtoDictParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { + private class XMLtoDictParsing extends DataFlow::CallCfgNode, XML::XmlParsing::Range { XMLtoDictParsing() { this = API::moduleImport("xmltodict").getMember("parse").getACall() } override DataFlow::Node getAnInput() { diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll index d6f2e0791f99..5da602173a12 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll @@ -40,7 +40,7 @@ module XmlBomb { */ class XmlParsingWithEntityResolution extends Sink { XmlParsingWithEntityResolution() { - exists(XML::XMLParsing parsing, XML::XMLParsingVulnerabilityKind kind | + exists(XML::XmlParsing parsing, XML::XMLParsingVulnerabilityKind kind | (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and parsing.vulnerableTo(kind) and this = parsing.getAnInput() diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll index a4473285b8db..355b3aeefc97 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll @@ -40,7 +40,7 @@ module Xxe { */ class XmlParsingWithExternalEntityResolution extends Sink { XmlParsingWithExternalEntityResolution() { - exists(XML::XMLParsing parsing, XML::XMLParsingVulnerabilityKind kind | + exists(XML::XmlParsing parsing, XML::XMLParsingVulnerabilityKind kind | kind.isXxe() and parsing.vulnerableTo(kind) and this = parsing.getAnInput() diff --git a/python/ql/test/experimental/meta/ConceptsTest.qll b/python/ql/test/experimental/meta/ConceptsTest.qll index cd90d716dd4b..24c3c270413a 100644 --- a/python/ql/test/experimental/meta/ConceptsTest.qll +++ b/python/ql/test/experimental/meta/ConceptsTest.qll @@ -547,7 +547,7 @@ class XmlParsingTest extends InlineExpectationsTest { override predicate hasActualResult(Location location, string element, string tag, string value) { exists(location.getFile().getRelativePath()) and - exists(XML::XMLParsing parsing, XML::XMLParsingVulnerabilityKind kind | + exists(XML::XmlParsing parsing, XML::XMLParsingVulnerabilityKind kind | parsing.vulnerableTo(kind) and location = parsing.getLocation() and element = parsing.toString() and From 1f285b8983c15e31b08886aa4080f4fad3c8b42b Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 5 Apr 2022 11:07:12 +0200 Subject: [PATCH 37/51] Python: Rename to `XmlParsingVulnerabilityKind` To keep up with style guide --- python/ql/lib/semmle/python/Concepts.qll | 8 ++++---- python/ql/lib/semmle/python/frameworks/Lxml.qll | 14 +++++++------- python/ql/lib/semmle/python/frameworks/Stdlib.qll | 10 +++++----- .../ql/lib/semmle/python/frameworks/Xmltodict.qll | 2 +- .../Security/CWE-611/SimpleXmlRpcServer.ql | 2 +- .../security/dataflow/XmlBombCustomizations.qll | 2 +- .../python/security/dataflow/XxeCustomizations.qll | 2 +- python/ql/test/experimental/meta/ConceptsTest.qll | 2 +- 8 files changed, 21 insertions(+), 21 deletions(-) diff --git a/python/ql/lib/semmle/python/Concepts.qll b/python/ql/lib/semmle/python/Concepts.qll index b0a5e1766a2f..091ce31a1571 100644 --- a/python/ql/lib/semmle/python/Concepts.qll +++ b/python/ql/lib/semmle/python/Concepts.qll @@ -558,8 +558,8 @@ module XML { * * See PoC at `python/PoCs/XmlParsing/PoC.py` for some tests of vulnerable XML parsing. */ - class XMLParsingVulnerabilityKind extends string { - XMLParsingVulnerabilityKind() { + class XmlParsingVulnerabilityKind extends string { + XmlParsingVulnerabilityKind() { this in ["Billion Laughs", "Quadratic Blowup", "XXE", "DTD retrieval"] } @@ -586,7 +586,7 @@ module XML { /** * Holds if this XML parsing is vulnerable to `kind`. */ - predicate vulnerableTo(XMLParsingVulnerabilityKind kind) { super.vulnerableTo(kind) } + predicate vulnerableTo(XmlParsingVulnerabilityKind kind) { super.vulnerableTo(kind) } } /** Provides classes for modeling XML parsing APIs. */ @@ -601,7 +601,7 @@ module XML { /** * Holds if this XML parsing is vulnerable to `kind`. */ - abstract predicate vulnerableTo(XMLParsingVulnerabilityKind kind); + abstract predicate vulnerableTo(XmlParsingVulnerabilityKind kind); override string getFormat() { result = "XML" } } diff --git a/python/ql/lib/semmle/python/frameworks/Lxml.qll b/python/ql/lib/semmle/python/frameworks/Lxml.qll index 05dfd388dace..6d310563ade7 100644 --- a/python/ql/lib/semmle/python/frameworks/Lxml.qll +++ b/python/ql/lib/semmle/python/frameworks/Lxml.qll @@ -121,7 +121,7 @@ private module Lxml { */ abstract class InstanceSource extends DataFlow::LocalSourceNode { /** Holds if this instance is vulnerable to `kind`. */ - abstract predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind); + abstract predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind); } /** @@ -135,7 +135,7 @@ private module Lxml { } // NOTE: it's not possible to change settings of a parser after constructing it - override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { kind.isXxe() and ( // resolve_entities has default True @@ -165,7 +165,7 @@ private module Lxml { API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall() } - override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { // as highlighted by // https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser // by default XXE is allow. so as long as the default parser has not been @@ -189,7 +189,7 @@ private module Lxml { } /** Gets a reference to an `lxml.etree` parser instance, that is vulnerable to `kind`. */ - DataFlow::Node instanceVulnerableTo(XML::XMLParsingVulnerabilityKind kind) { + DataFlow::Node instanceVulnerableTo(XML::XmlParsingVulnerabilityKind kind) { exists(InstanceSource origin | result = instance(origin) and origin.vulnerableTo(kind)) } @@ -201,7 +201,7 @@ private module Lxml { override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } - override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { this.calls(instanceVulnerableTo(kind), "feed") } @@ -256,7 +256,7 @@ private module Lxml { DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] } - override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { this.getParserArg() = XMLParser::instanceVulnerableTo(kind) or kind.isXxe() and @@ -313,7 +313,7 @@ private module Lxml { override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] } - override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { // note that there is no `resolve_entities` argument, so it's not possible to turn off XXE :O kind.isXxe() or diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index e45e8e3a8794..91ba7bc75b5a 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -3241,7 +3241,7 @@ private module StdlibPrivate { override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } - override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { kind.isBillionLaughs() or kind.isQuadraticBlowup() } @@ -3298,7 +3298,7 @@ private module StdlibPrivate { ] } - override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { // note: it does not matter what `xml.etree` parser you are using, you cannot // change the security features anyway :| kind.isBillionLaughs() or kind.isQuadraticBlowup() @@ -3459,7 +3459,7 @@ private module StdlibPrivate { override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] } - override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { // always vuln to these (kind.isBillionLaughs() or kind.isQuadraticBlowup()) or @@ -3512,7 +3512,7 @@ private module StdlibPrivate { ] } - override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { // always vuln to these (kind.isBillionLaughs() or kind.isQuadraticBlowup()) } @@ -3586,7 +3586,7 @@ private module StdlibPrivate { DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] } - override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { this.getParserArg() = saxParserWithFeatureExternalGesTurnedOn() and (kind.isXxe() or kind.isDtdRetrieval()) or diff --git a/python/ql/lib/semmle/python/frameworks/Xmltodict.qll b/python/ql/lib/semmle/python/frameworks/Xmltodict.qll index 84b0b0fe03f0..db2c443d8e92 100644 --- a/python/ql/lib/semmle/python/frameworks/Xmltodict.qll +++ b/python/ql/lib/semmle/python/frameworks/Xmltodict.qll @@ -27,7 +27,7 @@ private module Xmltodict { result in [this.getArg(0), this.getArgByName("xml_input")] } - override predicate vulnerableTo(XML::XMLParsingVulnerabilityKind kind) { + override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and this.getArgByName("disable_entities").getALocalSource().asExpr() = any(False f) } diff --git a/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql b/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql index 53ff6eeedb80..e638c13853fe 100644 --- a/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql +++ b/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql @@ -17,7 +17,7 @@ from DataFlow::CallCfgNode call, string kinds where call = API::moduleImport("xmlrpc").getMember("server").getMember("SimpleXMLRPCServer").getACall() and kinds = - strictconcat(XML::XMLParsingVulnerabilityKind kind | + strictconcat(XML::XmlParsingVulnerabilityKind kind | kind.isBillionLaughs() or kind.isQuadraticBlowup() | kind, ", " diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll index 5da602173a12..05f6fc57a345 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll @@ -40,7 +40,7 @@ module XmlBomb { */ class XmlParsingWithEntityResolution extends Sink { XmlParsingWithEntityResolution() { - exists(XML::XmlParsing parsing, XML::XMLParsingVulnerabilityKind kind | + exists(XML::XmlParsing parsing, XML::XmlParsingVulnerabilityKind kind | (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and parsing.vulnerableTo(kind) and this = parsing.getAnInput() diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll index 355b3aeefc97..0fc139ec4f32 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll @@ -40,7 +40,7 @@ module Xxe { */ class XmlParsingWithExternalEntityResolution extends Sink { XmlParsingWithExternalEntityResolution() { - exists(XML::XmlParsing parsing, XML::XMLParsingVulnerabilityKind kind | + exists(XML::XmlParsing parsing, XML::XmlParsingVulnerabilityKind kind | kind.isXxe() and parsing.vulnerableTo(kind) and this = parsing.getAnInput() diff --git a/python/ql/test/experimental/meta/ConceptsTest.qll b/python/ql/test/experimental/meta/ConceptsTest.qll index 24c3c270413a..73bcf8b4aa98 100644 --- a/python/ql/test/experimental/meta/ConceptsTest.qll +++ b/python/ql/test/experimental/meta/ConceptsTest.qll @@ -547,7 +547,7 @@ class XmlParsingTest extends InlineExpectationsTest { override predicate hasActualResult(Location location, string element, string tag, string value) { exists(location.getFile().getRelativePath()) and - exists(XML::XmlParsing parsing, XML::XMLParsingVulnerabilityKind kind | + exists(XML::XmlParsing parsing, XML::XmlParsingVulnerabilityKind kind | parsing.vulnerableTo(kind) and location = parsing.getLocation() and element = parsing.toString() and From a7dab53ed2df129e7bdab97cd04f73b9b133574b Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 5 Apr 2022 11:46:49 +0200 Subject: [PATCH 38/51] Python: Add change-note --- python/ql/src/change-notes/2022-04-05-add-xxe-and-xmlbomb.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 python/ql/src/change-notes/2022-04-05-add-xxe-and-xmlbomb.md diff --git a/python/ql/src/change-notes/2022-04-05-add-xxe-and-xmlbomb.md b/python/ql/src/change-notes/2022-04-05-add-xxe-and-xmlbomb.md new file mode 100644 index 000000000000..bd867091aea3 --- /dev/null +++ b/python/ql/src/change-notes/2022-04-05-add-xxe-and-xmlbomb.md @@ -0,0 +1,5 @@ +--- +category: newQuery +--- +* "XML external entity expansion" (`py/xxe`). Results will appear by default. This query was based on [an experimental query by @jorgectf](https://github.com/github/codeql/pull/6112). +* "XML internal entity expansion" (`py/xml-bomb`). Results will appear by default. This query was based on [an experimental query by @jorgectf](https://github.com/github/codeql/pull/6112). From b7f56dd17e982ddace861a561ba851e8e7cf7e5c Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 5 Apr 2022 12:31:09 +0200 Subject: [PATCH 39/51] Python: Rewrite concepts to use `extends ... instanceof ...` This caused compilation time for `ConceptsTest.ql` to go from 1m24s to 7s --- python/ql/lib/semmle/python/Concepts.qll | 241 ++++++++--------------- 1 file changed, 77 insertions(+), 164 deletions(-) diff --git a/python/ql/lib/semmle/python/Concepts.qll b/python/ql/lib/semmle/python/Concepts.qll index 091ce31a1571..eec0cd0d1a0d 100644 --- a/python/ql/lib/semmle/python/Concepts.qll +++ b/python/ql/lib/semmle/python/Concepts.qll @@ -17,13 +17,9 @@ private import semmle.python.Frameworks * Extend this class to refine existing API models. If you want to model new APIs, * extend `SystemCommandExecution::Range` instead. */ -class SystemCommandExecution extends DataFlow::Node { - SystemCommandExecution::Range range; - - SystemCommandExecution() { this = range } - +class SystemCommandExecution extends DataFlow::Node instanceof SystemCommandExecution::Range { /** Gets the argument that specifies the command to be executed. */ - DataFlow::Node getCommand() { result = range.getCommand() } + DataFlow::Node getCommand() { result = super.getCommand() } } /** Provides a class for modeling new system-command execution APIs. */ @@ -48,13 +44,9 @@ module SystemCommandExecution { * Extend this class to refine existing API models. If you want to model new APIs, * extend `FileSystemAccess::Range` instead. */ -class FileSystemAccess extends DataFlow::Node { - FileSystemAccess::Range range; - - FileSystemAccess() { this = range } - +class FileSystemAccess extends DataFlow::Node instanceof FileSystemAccess::Range { /** Gets an argument to this file system access that is interpreted as a path. */ - DataFlow::Node getAPathArgument() { result = range.getAPathArgument() } + DataFlow::Node getAPathArgument() { result = super.getAPathArgument() } } /** Provides a class for modeling new file system access APIs. */ @@ -78,14 +70,12 @@ module FileSystemAccess { * Extend this class to refine existing API models. If you want to model new APIs, * extend `FileSystemWriteAccess::Range` instead. */ -class FileSystemWriteAccess extends FileSystemAccess { - override FileSystemWriteAccess::Range range; - +class FileSystemWriteAccess extends FileSystemAccess instanceof FileSystemWriteAccess::Range { /** * Gets a node that represents data to be written to the file system (possibly with * some transformation happening before it is written, like JSON encoding). */ - DataFlow::Node getADataNode() { result = range.getADataNode() } + DataFlow::Node getADataNode() { result = super.getADataNode() } } /** Provides a class for modeling new file system writes. */ @@ -111,13 +101,9 @@ module Path { * A data-flow node that performs path normalization. This is often needed in order * to safely access paths. */ - class PathNormalization extends DataFlow::Node { - PathNormalization::Range range; - - PathNormalization() { this = range } - + class PathNormalization extends DataFlow::Node instanceof PathNormalization::Range { /** Gets an argument to this path normalization that is interpreted as a path. */ - DataFlow::Node getPathArg() { result = range.getPathArg() } + DataFlow::Node getPathArg() { result = super.getPathArg() } } /** Provides a class for modeling new path normalization APIs. */ @@ -133,12 +119,10 @@ module Path { } /** A data-flow node that checks that a path is safe to access. */ - class SafeAccessCheck extends DataFlow::BarrierGuard { - SafeAccessCheck::Range range; - - SafeAccessCheck() { this = range } - - override predicate checks(ControlFlowNode node, boolean branch) { range.checks(node, branch) } + class SafeAccessCheck extends DataFlow::BarrierGuard instanceof SafeAccessCheck::Range { + override predicate checks(ControlFlowNode node, boolean branch) { + SafeAccessCheck::Range.super.checks(node, branch) + } } /** Provides a class for modeling new path safety checks. */ @@ -160,22 +144,18 @@ module Path { * Extend this class to refine existing API models. If you want to model new APIs, * extend `Decoding::Range` instead. */ -class Decoding extends DataFlow::Node { - Decoding::Range range; - - Decoding() { this = range } - +class Decoding extends DataFlow::Node instanceof Decoding::Range { /** Holds if this call may execute code embedded in its input. */ - predicate mayExecuteInput() { range.mayExecuteInput() } + predicate mayExecuteInput() { super.mayExecuteInput() } /** Gets an input that is decoded by this function. */ - DataFlow::Node getAnInput() { result = range.getAnInput() } + DataFlow::Node getAnInput() { result = super.getAnInput() } /** Gets the output that contains the decoded data produced by this function. */ - DataFlow::Node getOutput() { result = range.getOutput() } + DataFlow::Node getOutput() { result = super.getOutput() } /** Gets an identifier for the format this function decodes from, such as "JSON". */ - string getFormat() { result = range.getFormat() } + string getFormat() { result = super.getFormat() } } /** Provides a class for modeling new decoding mechanisms. */ @@ -226,19 +206,15 @@ private class DecodingAdditionalTaintStep extends TaintTracking::AdditionalTaint * Extend this class to refine existing API models. If you want to model new APIs, * extend `Encoding::Range` instead. */ -class Encoding extends DataFlow::Node { - Encoding::Range range; - - Encoding() { this = range } - +class Encoding extends DataFlow::Node instanceof Encoding::Range { /** Gets an input that is encoded by this function. */ - DataFlow::Node getAnInput() { result = range.getAnInput() } + DataFlow::Node getAnInput() { result = super.getAnInput() } /** Gets the output that contains the encoded data produced by this function. */ - DataFlow::Node getOutput() { result = range.getOutput() } + DataFlow::Node getOutput() { result = super.getOutput() } /** Gets an identifier for the format this function decodes from, such as "JSON". */ - string getFormat() { result = range.getFormat() } + string getFormat() { result = super.getFormat() } } /** Provides a class for modeling new encoding mechanisms. */ @@ -280,13 +256,9 @@ private class EncodingAdditionalTaintStep extends TaintTracking::AdditionalTaint * Extend this class to refine existing API models. If you want to model new APIs, * extend `Logging::Range` instead. */ -class Logging extends DataFlow::Node { - Logging::Range range; - - Logging() { this = range } - +class Logging extends DataFlow::Node instanceof Logging::Range { /** Gets an input that is logged. */ - DataFlow::Node getAnInput() { result = range.getAnInput() } + DataFlow::Node getAnInput() { result = super.getAnInput() } } /** Provides a class for modeling new logging mechanisms. */ @@ -309,13 +281,9 @@ module Logging { * Extend this class to refine existing API models. If you want to model new APIs, * extend `CodeExecution::Range` instead. */ -class CodeExecution extends DataFlow::Node { - CodeExecution::Range range; - - CodeExecution() { this = range } - +class CodeExecution extends DataFlow::Node instanceof CodeExecution::Range { /** Gets the argument that specifies the code to be executed. */ - DataFlow::Node getCode() { result = range.getCode() } + DataFlow::Node getCode() { result = super.getCode() } } /** Provides a class for modeling new dynamic code execution APIs. */ @@ -343,13 +311,9 @@ module CodeExecution { * Extend this class to refine existing API models. If you want to model new APIs, * extend `SqlConstruction::Range` instead. */ -class SqlConstruction extends DataFlow::Node { - SqlConstruction::Range range; - - SqlConstruction() { this = range } - +class SqlConstruction extends DataFlow::Node instanceof SqlConstruction::Range { /** Gets the argument that specifies the SQL statements to be constructed. */ - DataFlow::Node getSql() { result = range.getSql() } + DataFlow::Node getSql() { result = super.getSql() } } /** Provides a class for modeling new SQL execution APIs. */ @@ -380,13 +344,9 @@ module SqlConstruction { * Extend this class to refine existing API models. If you want to model new APIs, * extend `SqlExecution::Range` instead. */ -class SqlExecution extends DataFlow::Node { - SqlExecution::Range range; - - SqlExecution() { this = range } - +class SqlExecution extends DataFlow::Node instanceof SqlExecution::Range { /** Gets the argument that specifies the SQL statements to be executed. */ - DataFlow::Node getSql() { result = range.getSql() } + DataFlow::Node getSql() { result = super.getSql() } } /** Provides a class for modeling new SQL execution APIs. */ @@ -412,22 +372,18 @@ module SqlExecution { * Extend this class to refine existing API models. If you want to model new APIs, * extend `RegexExecution::Range` instead. */ -class RegexExecution extends DataFlow::Node { - RegexExecution::Range range; - - RegexExecution() { this = range } - +class RegexExecution extends DataFlow::Node instanceof RegexExecution::Range { /** Gets the data flow node for the regex being executed by this node. */ - DataFlow::Node getRegex() { result = range.getRegex() } + DataFlow::Node getRegex() { result = super.getRegex() } /** Gets a dataflow node for the string to be searched or matched against. */ - DataFlow::Node getString() { result = range.getString() } + DataFlow::Node getString() { result = super.getString() } /** * Gets the name of this regex execution, typically the name of an executing method. * This is used for nice alert messages and should include the module if possible. */ - string getName() { result = range.getName() } + string getName() { result = super.getName() } } /** Provides classes for modeling new regular-expression execution APIs. */ @@ -466,19 +422,15 @@ module XML { * Extend this class to refine existing API models. If you want to model new APIs, * extend `XPathConstruction::Range` instead. */ - class XPathConstruction extends DataFlow::Node { - XPathConstruction::Range range; - - XPathConstruction() { this = range } - + class XPathConstruction extends DataFlow::Node instanceof XPathConstruction::Range { /** Gets the argument that specifies the XPath expressions to be constructed. */ - DataFlow::Node getXPath() { result = range.getXPath() } + DataFlow::Node getXPath() { result = super.getXPath() } /** * Gets the name of this XPath expression construction, typically the name of an executing method. * This is used for nice alert messages and should include the module if possible. */ - string getName() { result = range.getName() } + string getName() { result = super.getName() } } /** Provides a class for modeling new XPath construction APIs. */ @@ -513,19 +465,15 @@ module XML { * Extend this class to refine existing API models. If you want to model new APIs, * extend `XPathExecution::Range` instead. */ - class XPathExecution extends DataFlow::Node { - XPathExecution::Range range; - - XPathExecution() { this = range } - + class XPathExecution extends DataFlow::Node instanceof XPathExecution::Range { /** Gets the data flow node for the XPath expression being executed by this node. */ - DataFlow::Node getXPath() { result = range.getXPath() } + DataFlow::Node getXPath() { result = super.getXPath() } /** * Gets the name of this XPath expression execution, typically the name of an executing method. * This is used for nice alert messages and should include the module if possible. */ - string getName() { result = range.getName() } + string getName() { result = super.getName() } } /** Provides classes for modeling new regular-expression execution APIs. */ @@ -616,16 +564,12 @@ module LDAP { * Extend this class to refine existing API models. If you want to model new APIs, * extend `LDAPQuery::Range` instead. */ - class LdapExecution extends DataFlow::Node { - LdapExecution::Range range; - - LdapExecution() { this = range } - + class LdapExecution extends DataFlow::Node instanceof LdapExecution::Range { /** Gets the argument containing the filter string. */ - DataFlow::Node getFilter() { result = range.getFilter() } + DataFlow::Node getFilter() { result = super.getFilter() } /** Gets the argument containing the base DN. */ - DataFlow::Node getBaseDn() { result = range.getBaseDn() } + DataFlow::Node getBaseDn() { result = super.getBaseDn() } } /** Provides classes for modeling new LDAP query execution-related APIs. */ @@ -653,26 +597,23 @@ module LDAP { * Extend this class to refine existing API models. If you want to model new APIs, * extend `Escaping::Range` instead. */ -class Escaping extends DataFlow::Node { - Escaping::Range range; - +class Escaping extends DataFlow::Node instanceof Escaping::Range { Escaping() { - this = range and // escapes that don't have _both_ input/output defined are not valid - exists(range.getAnInput()) and - exists(range.getOutput()) + exists(super.getAnInput()) and + exists(super.getOutput()) } /** Gets an input that will be escaped. */ - DataFlow::Node getAnInput() { result = range.getAnInput() } + DataFlow::Node getAnInput() { result = super.getAnInput() } /** Gets the output that contains the escaped data. */ - DataFlow::Node getOutput() { result = range.getOutput() } + DataFlow::Node getOutput() { result = super.getOutput() } /** * Gets the context that this function escapes for, such as `html`, or `url`. */ - string getKind() { result = range.getKind() } + string getKind() { result = super.getKind() } } /** Provides a class for modeling new escaping APIs. */ @@ -730,7 +671,7 @@ module Escaping { * `

    {}

    `. */ class HtmlEscaping extends Escaping { - HtmlEscaping() { range.getKind() = Escaping::getHtmlKind() } + HtmlEscaping() { super.getKind() = Escaping::getHtmlKind() } } /** @@ -738,7 +679,7 @@ class HtmlEscaping extends Escaping { * the body of a regex. */ class RegexEscaping extends Escaping { - RegexEscaping() { range.getKind() = Escaping::getRegexKind() } + RegexEscaping() { super.getKind() = Escaping::getRegexKind() } } /** @@ -746,14 +687,14 @@ class RegexEscaping extends Escaping { * in an LDAP search. */ class LdapDnEscaping extends Escaping { - LdapDnEscaping() { range.getKind() = Escaping::getLdapDnKind() } + LdapDnEscaping() { super.getKind() = Escaping::getLdapDnKind() } } /** * An escape of a string so it can be safely used as a filter in an LDAP search. */ class LdapFilterEscaping extends Escaping { - LdapFilterEscaping() { range.getKind() = Escaping::getLdapFilterKind() } + LdapFilterEscaping() { super.getKind() = Escaping::getLdapFilterKind() } } /** Provides classes for modeling HTTP-related APIs. */ @@ -772,29 +713,25 @@ module HTTP { * Extend this class to refine existing API models. If you want to model new APIs, * extend `RouteSetup::Range` instead. */ - class RouteSetup extends DataFlow::Node { - RouteSetup::Range range; - - RouteSetup() { this = range } - + class RouteSetup extends DataFlow::Node instanceof RouteSetup::Range { /** Gets the URL pattern for this route, if it can be statically determined. */ - string getUrlPattern() { result = range.getUrlPattern() } + string getUrlPattern() { result = super.getUrlPattern() } /** * Gets a function that will handle incoming requests for this route, if any. * * NOTE: This will be modified in the near future to have a `RequestHandler` result, instead of a `Function`. */ - Function getARequestHandler() { result = range.getARequestHandler() } + Function getARequestHandler() { result = super.getARequestHandler() } /** * Gets a parameter that will receive parts of the url when handling incoming * requests for this route, if any. These automatically become a `RemoteFlowSource`. */ - Parameter getARoutedParameter() { result = range.getARoutedParameter() } + Parameter getARoutedParameter() { result = super.getARoutedParameter() } /** Gets a string that identifies the framework used for this route setup. */ - string getFramework() { result = range.getFramework() } + string getFramework() { result = super.getFramework() } } /** Provides a class for modeling new HTTP routing APIs. */ @@ -841,19 +778,15 @@ module HTTP { * Extend this class to refine existing API models. If you want to model new APIs, * extend `RequestHandler::Range` instead. */ - class RequestHandler extends Function { - RequestHandler::Range range; - - RequestHandler() { this = range } - + class RequestHandler extends Function instanceof RequestHandler::Range { /** * Gets a parameter that could receive parts of the url when handling incoming * requests, if any. These automatically become a `RemoteFlowSource`. */ - Parameter getARoutedParameter() { result = range.getARoutedParameter() } + Parameter getARoutedParameter() { result = super.getARoutedParameter() } /** Gets a string that identifies the framework used for this route setup. */ - string getFramework() { result = range.getFramework() } + string getFramework() { result = super.getFramework() } } /** Provides a class for modeling new HTTP request handlers. */ @@ -909,16 +842,12 @@ module HTTP { * Extend this class to refine existing API models. If you want to model new APIs, * extend `HttpResponse::Range` instead. */ - class HttpResponse extends DataFlow::Node { - HttpResponse::Range range; - - HttpResponse() { this = range } - + class HttpResponse extends DataFlow::Node instanceof HttpResponse::Range { /** Gets the data-flow node that specifies the body of this HTTP response. */ - DataFlow::Node getBody() { result = range.getBody() } + DataFlow::Node getBody() { result = super.getBody() } /** Gets the mimetype of this HTTP response, if it can be statically determined. */ - string getMimetype() { result = range.getMimetype() } + string getMimetype() { result = super.getMimetype() } } /** Provides a class for modeling new HTTP response APIs. */ @@ -964,13 +893,9 @@ module HTTP { * Extend this class to refine existing API models. If you want to model new APIs, * extend `HttpRedirectResponse::Range` instead. */ - class HttpRedirectResponse extends HttpResponse { - override HttpRedirectResponse::Range range; - - HttpRedirectResponse() { this = range } - + class HttpRedirectResponse extends HttpResponse instanceof HttpRedirectResponse::Range { /** Gets the data-flow node that specifies the location of this HTTP redirect response. */ - DataFlow::Node getRedirectLocation() { result = range.getRedirectLocation() } + DataFlow::Node getRedirectLocation() { result = super.getRedirectLocation() } } /** Provides a class for modeling new HTTP redirect response APIs. */ @@ -996,25 +921,21 @@ module HTTP { * Extend this class to refine existing API models. If you want to model new APIs, * extend `HTTP::CookieWrite::Range` instead. */ - class CookieWrite extends DataFlow::Node { - CookieWrite::Range range; - - CookieWrite() { this = range } - + class CookieWrite extends DataFlow::Node instanceof CookieWrite::Range { /** * Gets the argument, if any, specifying the raw cookie header. */ - DataFlow::Node getHeaderArg() { result = range.getHeaderArg() } + DataFlow::Node getHeaderArg() { result = super.getHeaderArg() } /** * Gets the argument, if any, specifying the cookie name. */ - DataFlow::Node getNameArg() { result = range.getNameArg() } + DataFlow::Node getNameArg() { result = super.getNameArg() } /** * Gets the argument, if any, specifying the cookie value. */ - DataFlow::Node getValueArg() { result = range.getValueArg() } + DataFlow::Node getValueArg() { result = super.getValueArg() } } /** Provides a class for modeling new cookie writes on HTTP responses. */ @@ -1131,27 +1052,23 @@ module Cryptography { * Extend this class to refine existing API models. If you want to model new APIs, * extend `KeyGeneration::Range` instead. */ - class KeyGeneration extends DataFlow::Node { - KeyGeneration::Range range; - - KeyGeneration() { this = range } - + class KeyGeneration extends DataFlow::Node instanceof KeyGeneration::Range { /** Gets the name of the cryptographic algorithm (for example `"RSA"` or `"AES"`). */ - string getName() { result = range.getName() } + string getName() { result = super.getName() } /** Gets the argument that specifies the size of the key in bits, if available. */ - DataFlow::Node getKeySizeArg() { result = range.getKeySizeArg() } + DataFlow::Node getKeySizeArg() { result = super.getKeySizeArg() } /** * Gets the size of the key generated (in bits), as well as the `origin` that * explains how we obtained this specific key size. */ int getKeySizeWithOrigin(DataFlow::Node origin) { - result = range.getKeySizeWithOrigin(origin) + result = super.getKeySizeWithOrigin(origin) } /** Gets the minimum key size (in bits) for this algorithm to be considered secure. */ - int minimumSecureKeySize() { result = range.minimumSecureKeySize() } + int minimumSecureKeySize() { result = super.minimumSecureKeySize() } } /** Provides classes for modeling new key-pair generation APIs. */ @@ -1230,16 +1147,12 @@ module Cryptography { * Extend this class to refine existing API models. If you want to model new APIs, * extend `CryptographicOperation::Range` instead. */ - class CryptographicOperation extends DataFlow::Node { - CryptographicOperation::Range range; - - CryptographicOperation() { this = range } - + class CryptographicOperation extends DataFlow::Node instanceof CryptographicOperation::Range { /** Gets the algorithm used, if it matches a known `CryptographicAlgorithm`. */ - CryptographicAlgorithm getAlgorithm() { result = range.getAlgorithm() } + CryptographicAlgorithm getAlgorithm() { result = super.getAlgorithm() } /** Gets an input the algorithm is used on, for example the plain text input to be encrypted. */ - DataFlow::Node getAnInput() { result = range.getAnInput() } + DataFlow::Node getAnInput() { result = super.getAnInput() } } /** Provides classes for modeling new applications of a cryptographic algorithms. */ From c784f15762b8ea2f749e1f3d92fe29d498b63de3 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 6 Apr 2022 15:40:04 +0200 Subject: [PATCH 40/51] Python: Rename more XML classes to follow convention - `XMLEtree` to `XmlEtree` - `XMLSax` to `XmlSax` - `LXML` to `Lxml` - `XMLParser` to `XmlParser` --- .../ql/lib/semmle/python/frameworks/Lxml.qll | 30 +++++++++---------- .../lib/semmle/python/frameworks/Stdlib.qll | 28 ++++++++--------- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Lxml.qll b/python/ql/lib/semmle/python/frameworks/Lxml.qll index 6d310563ade7..24afbd199df8 100644 --- a/python/ql/lib/semmle/python/frameworks/Lxml.qll +++ b/python/ql/lib/semmle/python/frameworks/Lxml.qll @@ -109,7 +109,7 @@ private module Lxml { * * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser */ - module XMLParser { + module XmlParser { /** * A source of instances of `lxml.etree` parsers, extend this class to model new instances. * @@ -117,7 +117,7 @@ private module Lxml { * calls, or a special parameter that will be set when functions are called by an external * library. * - * Use the predicate `XMLParser::instance()` to get references to instances of `lxml.etree` parsers. + * Use the predicate `XmlParser::instance()` to get references to instances of `lxml.etree` parsers. */ abstract class InstanceSource extends DataFlow::LocalSourceNode { /** Holds if this instance is vulnerable to `kind`. */ @@ -129,8 +129,8 @@ private module Lxml { * * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser */ - private class LXMLParser extends InstanceSource, DataFlow::CallCfgNode { - LXMLParser() { + private class LxmlParser extends InstanceSource, DataFlow::CallCfgNode { + LxmlParser() { this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall() } @@ -159,8 +159,8 @@ private module Lxml { * * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser */ - private class LXMLDefaultParser extends InstanceSource, DataFlow::CallCfgNode { - LXMLDefaultParser() { + private class LxmlDefaultParser extends InstanceSource, DataFlow::CallCfgNode { + LxmlDefaultParser() { this = API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall() } @@ -196,8 +196,8 @@ private module Lxml { /** * A call to the `feed` method of an `lxml` parser. */ - private class LXMLParserFeedCall extends DataFlow::MethodCallNode, XML::XmlParsing::Range { - LXMLParserFeedCall() { this.calls(instance(_), "feed") } + private class LxmlParserFeedCall extends DataFlow::MethodCallNode, XML::XmlParsing::Range { + LxmlParserFeedCall() { this.calls(instance(_), "feed") } override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } @@ -233,8 +233,8 @@ private module Lxml { * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parse * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parseid */ - private class LXMLParsing extends DataFlow::CallCfgNode, XML::XmlParsing::Range { - LXMLParsing() { + private class LxmlParsing extends DataFlow::CallCfgNode, XML::XmlParsing::Range { + LxmlParsing() { this = API::moduleImport("lxml") .getMember("etree") @@ -257,7 +257,7 @@ private module Lxml { DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] } override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { - this.getParserArg() = XMLParser::instanceVulnerableTo(kind) + this.getParserArg() = XmlParser::instanceVulnerableTo(kind) or kind.isXxe() and not exists(this.getParserArg()) @@ -284,8 +284,8 @@ private module Lxml { * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parse * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parseid */ - private class FileAccessFromLXMLParsing extends LXMLParsing, FileSystemAccess::Range { - FileAccessFromLXMLParsing() { + private class FileAccessFromLxmlParsing extends LxmlParsing, FileSystemAccess::Range { + FileAccessFromLxmlParsing() { this = API::moduleImport("lxml").getMember("etree").getMember(["parse", "parseid"]).getACall() // I considered whether we should try to reduce FPs from people passing file-like // objects, which will not be a file system access (and couldn't cause a @@ -305,9 +305,9 @@ private module Lxml { * See * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.iterparse */ - private class LXMLIterparseCall extends DataFlow::CallCfgNode, XML::XmlParsing::Range, + private class LxmlIterparseCall extends DataFlow::CallCfgNode, XML::XmlParsing::Range, FileSystemAccess::Range { - LXMLIterparseCall() { + LxmlIterparseCall() { this = API::moduleImport("lxml").getMember("etree").getMember("iterparse").getACall() } diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 91ba7bc75b5a..8508aaef5f0d 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -3191,7 +3191,7 @@ private module StdlibPrivate { * - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLParser * - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLPullParser */ - module XMLParser { + module XmlParser { /** * A source of instances of `xml.etree` parsers, extend this class to model new instances. * @@ -3199,7 +3199,7 @@ private module StdlibPrivate { * calls, or a special parameter that will be set when functions are called by an external * library. * - * Use the predicate `XMLParser::instance()` to get references to instances of `xml.etree` parsers. + * Use the predicate `XmlParser::instance()` to get references to instances of `xml.etree` parsers. */ abstract class InstanceSource extends DataFlow::LocalSourceNode { } @@ -3236,8 +3236,8 @@ private module StdlibPrivate { /** * A call to the `feed` method of an `xml.etree` parser. */ - private class XMLEtreeParserFeedCall extends DataFlow::MethodCallNode, XML::XmlParsing::Range { - XMLEtreeParserFeedCall() { this.calls(instance(), "feed") } + private class XmlEtreeParserFeedCall extends DataFlow::MethodCallNode, XML::XmlParsing::Range { + XmlEtreeParserFeedCall() { this.calls(instance(), "feed") } override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } @@ -3274,8 +3274,8 @@ private module StdlibPrivate { * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.parse * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.iterparse */ - private class XMLEtreeParsing extends DataFlow::CallCfgNode, XML::XmlParsing::Range { - XMLEtreeParsing() { + private class XmlEtreeParsing extends DataFlow::CallCfgNode, XML::XmlParsing::Range { + XmlEtreeParsing() { this = API::moduleImport("xml") .getMember("etree") @@ -3325,8 +3325,8 @@ private module StdlibPrivate { * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.parse * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.iterparse */ - private class FileAccessFromXMLEtreeParsing extends XMLEtreeParsing, FileSystemAccess::Range { - FileAccessFromXMLEtreeParsing() { + private class FileAccessFromXmlEtreeParsing extends XmlEtreeParsing, FileSystemAccess::Range { + FileAccessFromXmlEtreeParsing() { this = API::moduleImport("xml") .getMember("etree") @@ -3445,9 +3445,9 @@ private module StdlibPrivate { * * See https://docs.python.org/3/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.parse */ - private class XMLSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XmlParsing::Range, + private class XmlSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XmlParsing::Range, FileSystemAccess::Range { - XMLSaxInstanceParsing() { + XmlSaxInstanceParsing() { this = API::moduleImport("xml") .getMember("sax") @@ -3496,8 +3496,8 @@ private module StdlibPrivate { * - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parse * - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parseString */ - private class XMLSaxParsing extends DataFlow::CallCfgNode, XML::XmlParsing::Range { - XMLSaxParsing() { + private class XmlSaxParsing extends DataFlow::CallCfgNode, XML::XmlParsing::Range { + XmlSaxParsing() { this = API::moduleImport("xml").getMember("sax").getMember(["parse", "parseString"]).getACall() } @@ -3535,8 +3535,8 @@ private module StdlibPrivate { * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.parse * - https://docs.python.org/3/library/xml.etree.elementtree.html#xml.etree.ElementTree.iterparse */ - private class FileAccessFromXMLSaxParsing extends XMLSaxParsing, FileSystemAccess::Range { - FileAccessFromXMLSaxParsing() { + private class FileAccessFromXmlSaxParsing extends XmlSaxParsing, FileSystemAccess::Range { + FileAccessFromXmlSaxParsing() { this = API::moduleImport("xml").getMember("sax").getMember("parse").getACall() // I considered whether we should try to reduce FPs from people passing file-like // objects, which will not be a file system access (and couldn't cause a From f2f0873d911dc9bb685fa708707e3f4c1de6fc9d Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Wed, 6 Apr 2022 15:49:06 +0200 Subject: [PATCH 41/51] Python: Use new `API::CallNode` for XML constant check This also means that the detection of the values passed to these keyword arguments will no longer just be from a local scope, but can also be across function boundaries. --- .../ql/lib/semmle/python/frameworks/Lxml.qll | 21 ++++++++++--------- .../semmle/python/frameworks/Xmltodict.qll | 4 ++-- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Lxml.qll b/python/ql/lib/semmle/python/frameworks/Lxml.qll index 24afbd199df8..a77da9e79150 100644 --- a/python/ql/lib/semmle/python/frameworks/Lxml.qll +++ b/python/ql/lib/semmle/python/frameworks/Lxml.qll @@ -129,7 +129,7 @@ private module Lxml { * * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser */ - private class LxmlParser extends InstanceSource, DataFlow::CallCfgNode { + private class LxmlParser extends InstanceSource, API::CallNode { LxmlParser() { this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall() } @@ -141,16 +141,17 @@ private module Lxml { // resolve_entities has default True not exists(this.getArgByName("resolve_entities")) or - this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(True t) + this.getKeywordParameter("resolve_entities").getAValueReachingRhs().asExpr() = any(True t) ) or (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and - this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) and - not this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(False t) + this.getKeywordParameter("huge_tree").getAValueReachingRhs().asExpr() = any(True t) and + not this.getKeywordParameter("resolve_entities").getAValueReachingRhs().asExpr() = + any(False t) or kind.isDtdRetrieval() and - this.getArgByName("load_dtd").getALocalSource().asExpr() = any(True t) and - this.getArgByName("no_network").getALocalSource().asExpr() = any(False t) + this.getKeywordParameter("load_dtd").getAValueReachingRhs().asExpr() = any(True t) and + this.getKeywordParameter("no_network").getAValueReachingRhs().asExpr() = any(False t) } } @@ -305,7 +306,7 @@ private module Lxml { * See * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.iterparse */ - private class LxmlIterparseCall extends DataFlow::CallCfgNode, XML::XmlParsing::Range, + private class LxmlIterparseCall extends API::CallNode, XML::XmlParsing::Range, FileSystemAccess::Range { LxmlIterparseCall() { this = API::moduleImport("lxml").getMember("etree").getMember("iterparse").getACall() @@ -318,11 +319,11 @@ private module Lxml { kind.isXxe() or (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and - this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) + this.getKeywordParameter("huge_tree").getAValueReachingRhs().asExpr() = any(True t) or kind.isDtdRetrieval() and - this.getArgByName("load_dtd").getALocalSource().asExpr() = any(True t) and - this.getArgByName("no_network").getALocalSource().asExpr() = any(False t) + this.getKeywordParameter("load_dtd").getAValueReachingRhs().asExpr() = any(True t) and + this.getKeywordParameter("no_network").getAValueReachingRhs().asExpr() = any(False t) } override predicate mayExecuteInput() { none() } diff --git a/python/ql/lib/semmle/python/frameworks/Xmltodict.qll b/python/ql/lib/semmle/python/frameworks/Xmltodict.qll index db2c443d8e92..95d44d6d1b0a 100644 --- a/python/ql/lib/semmle/python/frameworks/Xmltodict.qll +++ b/python/ql/lib/semmle/python/frameworks/Xmltodict.qll @@ -20,7 +20,7 @@ private module Xmltodict { /** * A call to `xmltodict.parse`. */ - private class XMLtoDictParsing extends DataFlow::CallCfgNode, XML::XmlParsing::Range { + private class XMLtoDictParsing extends API::CallNode, XML::XmlParsing::Range { XMLtoDictParsing() { this = API::moduleImport("xmltodict").getMember("parse").getACall() } override DataFlow::Node getAnInput() { @@ -29,7 +29,7 @@ private module Xmltodict { override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and - this.getArgByName("disable_entities").getALocalSource().asExpr() = any(False f) + this.getKeywordParameter("disable_entities").getAValueReachingRhs().asExpr() = any(False f) } override predicate mayExecuteInput() { none() } From 7728b6cf1b750eadf462606dbc3ca0660e86417d Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 7 Apr 2022 10:45:43 +0200 Subject: [PATCH 42/51] Python: Change XmlBomb vulnerability kind --- python/ql/lib/semmle/python/Concepts.qll | 19 ++++++---- .../ql/lib/semmle/python/frameworks/Lxml.qll | 4 +- .../lib/semmle/python/frameworks/Stdlib.qll | 12 +++--- .../semmle/python/frameworks/Xmltodict.qll | 2 +- .../Security/CWE-611/SimpleXmlRpcServer.ql | 12 ++---- .../dataflow/XmlBombCustomizations.qll | 2 +- .../library-tests/frameworks/lxml/parsing.py | 4 +- .../frameworks/stdlib/XPathExecution.py | 6 +-- .../frameworks/stdlib/xml_dom.py | 24 ++++++------ .../frameworks/stdlib/xml_etree.py | 38 +++++++++---------- .../frameworks/stdlib/xml_sax.py | 26 ++++++------- .../frameworks/xmltodict/test.py | 2 +- 12 files changed, 73 insertions(+), 78 deletions(-) diff --git a/python/ql/lib/semmle/python/Concepts.qll b/python/ql/lib/semmle/python/Concepts.qll index eec0cd0d1a0d..4fadc953c3b1 100644 --- a/python/ql/lib/semmle/python/Concepts.qll +++ b/python/ql/lib/semmle/python/Concepts.qll @@ -507,15 +507,18 @@ module XML { * See PoC at `python/PoCs/XmlParsing/PoC.py` for some tests of vulnerable XML parsing. */ class XmlParsingVulnerabilityKind extends string { - XmlParsingVulnerabilityKind() { - this in ["Billion Laughs", "Quadratic Blowup", "XXE", "DTD retrieval"] - } - - /** Holds for Billion Laughs vulnerability kind. */ - predicate isBillionLaughs() { this = "Billion Laughs" } + XmlParsingVulnerabilityKind() { this in ["XML bomb", "XXE", "DTD retrieval"] } - /** Holds for Quadratic Blowup vulnerability kind. */ - predicate isQuadraticBlowup() { this = "Quadratic Blowup" } + /** + * Holds for XML bomb vulnerability kind, such as 'Billion Laughs' and 'Quadratic + * Blowup'. + * + * While a parser could technically be vulnerable to one and not the other, from our + * point of view the interesting part is that it IS vulnerable to these types of + * attacks, and not so much which one specifically works. In practice I haven't seen + * a parser that is vulnerable to one and not the other. + */ + predicate isXmlBomb() { this = "XML bomb" } /** Holds for XXE vulnerability kind. */ predicate isXxe() { this = "XXE" } diff --git a/python/ql/lib/semmle/python/frameworks/Lxml.qll b/python/ql/lib/semmle/python/frameworks/Lxml.qll index a77da9e79150..cfb83fd5732d 100644 --- a/python/ql/lib/semmle/python/frameworks/Lxml.qll +++ b/python/ql/lib/semmle/python/frameworks/Lxml.qll @@ -144,7 +144,7 @@ private module Lxml { this.getKeywordParameter("resolve_entities").getAValueReachingRhs().asExpr() = any(True t) ) or - (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and + kind.isXmlBomb() and this.getKeywordParameter("huge_tree").getAValueReachingRhs().asExpr() = any(True t) and not this.getKeywordParameter("resolve_entities").getAValueReachingRhs().asExpr() = any(False t) @@ -318,7 +318,7 @@ private module Lxml { // note that there is no `resolve_entities` argument, so it's not possible to turn off XXE :O kind.isXxe() or - (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and + kind.isXmlBomb() and this.getKeywordParameter("huge_tree").getAValueReachingRhs().asExpr() = any(True t) or kind.isDtdRetrieval() and diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 8508aaef5f0d..f4b6915d440a 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -3241,9 +3241,7 @@ private module StdlibPrivate { override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } - override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { - kind.isBillionLaughs() or kind.isQuadraticBlowup() - } + override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { kind.isXmlBomb() } override predicate mayExecuteInput() { none() } @@ -3301,7 +3299,7 @@ private module StdlibPrivate { override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { // note: it does not matter what `xml.etree` parser you are using, you cannot // change the security features anyway :| - kind.isBillionLaughs() or kind.isQuadraticBlowup() + kind.isXmlBomb() } override predicate mayExecuteInput() { none() } @@ -3461,7 +3459,7 @@ private module StdlibPrivate { override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { // always vuln to these - (kind.isBillionLaughs() or kind.isQuadraticBlowup()) + kind.isXmlBomb() or // can be vuln to other things if features has been turned on this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and @@ -3514,7 +3512,7 @@ private module StdlibPrivate { override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { // always vuln to these - (kind.isBillionLaughs() or kind.isQuadraticBlowup()) + kind.isXmlBomb() } override predicate mayExecuteInput() { none() } @@ -3590,7 +3588,7 @@ private module StdlibPrivate { this.getParserArg() = saxParserWithFeatureExternalGesTurnedOn() and (kind.isXxe() or kind.isDtdRetrieval()) or - (kind.isBillionLaughs() or kind.isQuadraticBlowup()) + kind.isXmlBomb() } override predicate mayExecuteInput() { none() } diff --git a/python/ql/lib/semmle/python/frameworks/Xmltodict.qll b/python/ql/lib/semmle/python/frameworks/Xmltodict.qll index 95d44d6d1b0a..f63fec7afe4c 100644 --- a/python/ql/lib/semmle/python/frameworks/Xmltodict.qll +++ b/python/ql/lib/semmle/python/frameworks/Xmltodict.qll @@ -28,7 +28,7 @@ private module Xmltodict { } override predicate vulnerableTo(XML::XmlParsingVulnerabilityKind kind) { - (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and + kind.isXmlBomb() and this.getKeywordParameter("disable_entities").getAValueReachingRhs().asExpr() = any(False f) } diff --git a/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql b/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql index e638c13853fe..e31fdc88629f 100644 --- a/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql +++ b/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql @@ -13,13 +13,7 @@ private import python private import semmle.python.Concepts private import semmle.python.ApiGraphs -from DataFlow::CallCfgNode call, string kinds +from DataFlow::CallCfgNode call where - call = API::moduleImport("xmlrpc").getMember("server").getMember("SimpleXMLRPCServer").getACall() and - kinds = - strictconcat(XML::XmlParsingVulnerabilityKind kind | - kind.isBillionLaughs() or kind.isQuadraticBlowup() - | - kind, ", " - ) -select call, "SimpleXMLRPCServer is vulnerable to: " + kinds + "." + call = API::moduleImport("xmlrpc").getMember("server").getMember("SimpleXMLRPCServer").getACall() +select call, "SimpleXMLRPCServer is vulnerable to XML bombs" diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll index 05f6fc57a345..7cc4ec5bad52 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll @@ -41,7 +41,7 @@ module XmlBomb { class XmlParsingWithEntityResolution extends Sink { XmlParsingWithEntityResolution() { exists(XML::XmlParsing parsing, XML::XmlParsingVulnerabilityKind kind | - (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and + kind.isXmlBomb() and parsing.vulnerableTo(kind) and this = parsing.getAnInput() ) diff --git a/python/ql/test/library-tests/frameworks/lxml/parsing.py b/python/ql/test/library-tests/frameworks/lxml/parsing.py index ca68c99a90ea..63cdc79b4c1d 100644 --- a/python/ql/test/library-tests/frameworks/lxml/parsing.py +++ b/python/ql/test/library-tests/frameworks/lxml/parsing.py @@ -50,7 +50,7 @@ # Billion laughs vuln (also XXE) parser = lxml.etree.XMLParser(huge_tree=True) -lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) +lxml.etree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' xmlVuln='XXE' decodeOutput=lxml.etree.fromstring(..) # Safe for both Billion laughs and XXE parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True) @@ -63,5 +63,5 @@ # iterparse configurations ... this doesn't use a parser argument but takes MOST (!) of # the normal XMLParser arguments. Specifically, it doesn't allow disabling XXE :O -lxml.etree.iterparse(xml_file, huge_tree=True) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file +lxml.etree.iterparse(xml_file, huge_tree=True) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='XML bomb' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file lxml.etree.iterparse(xml_file, load_dtd=True, no_network=False) # $ decodeFormat=XML decodeInput=xml_file xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=lxml.etree.iterparse(..) getAPathArgument=xml_file diff --git a/python/ql/test/library-tests/frameworks/stdlib/XPathExecution.py b/python/ql/test/library-tests/frameworks/stdlib/XPathExecution.py index 5faff5ed8689..bf7dd08185b7 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/XPathExecution.py +++ b/python/ql/test/library-tests/frameworks/stdlib/XPathExecution.py @@ -2,7 +2,7 @@ ns = {'dc': 'http://purl.org/dc/elements/1.1/'} import xml.etree.ElementTree as ET -tree = ET.parse('country_data.xml') # $ decodeFormat=XML decodeInput='country_data.xml' decodeOutput=ET.parse(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument='country_data.xml' +tree = ET.parse('country_data.xml') # $ decodeFormat=XML decodeInput='country_data.xml' decodeOutput=ET.parse(..) xmlVuln='XML bomb' getAPathArgument='country_data.xml' root = tree.getroot() root.find(match, namespaces=ns) # $ getXPath=match @@ -10,13 +10,13 @@ root.findtext(match, default=None, namespaces=ns) # $ getXPath=match tree = ET.ElementTree() -tree.parse("index.xhtml") # $ decodeFormat=XML decodeInput="index.xhtml" decodeOutput=tree.parse(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument="index.xhtml" +tree.parse("index.xhtml") # $ decodeFormat=XML decodeInput="index.xhtml" decodeOutput=tree.parse(..) xmlVuln='XML bomb' getAPathArgument="index.xhtml" tree.find(match, namespaces=ns) # $ getXPath=match tree.findall(match, namespaces=ns) # $ getXPath=match tree.findtext(match, default=None, namespaces=ns) # $ getXPath=match parser = ET.XMLParser() -parser.feed("bar") # $ decodeFormat=XML decodeInput="bar" xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +parser.feed("bar") # $ decodeFormat=XML decodeInput="bar" xmlVuln='XML bomb' tree = parser.close() # $ decodeOutput=parser.close() tree.find(match, namespaces=ns) # $ getXPath=match diff --git a/python/ql/test/library-tests/frameworks/stdlib/xml_dom.py b/python/ql/test/library-tests/frameworks/stdlib/xml_dom.py index b3a1ab7f9309..8d511c517334 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/xml_dom.py +++ b/python/ql/test/library-tests/frameworks/stdlib/xml_dom.py @@ -6,26 +6,26 @@ x = "some xml" # minidom -xml.dom.minidom.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..) -xml.dom.minidom.parse(file=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..) +xml.dom.minidom.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..) +xml.dom.minidom.parse(file=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..) -xml.dom.minidom.parseString(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parseString(..) -xml.dom.minidom.parseString(string=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.minidom.parseString(..) +xml.dom.minidom.parseString(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.dom.minidom.parseString(..) +xml.dom.minidom.parseString(string=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.dom.minidom.parseString(..) # pulldom -xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..) -xml.dom.pulldom.parse(stream_or_string=StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..) +xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..) +xml.dom.pulldom.parse(stream_or_string=StringIO(x))['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..) -xml.dom.pulldom.parseString(x)['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parseString(..) -xml.dom.pulldom.parseString(string=x)['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.dom.pulldom.parseString(..) +xml.dom.pulldom.parseString(x)['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.dom.pulldom.parseString(..) +xml.dom.pulldom.parseString(string=x)['START_DOCUMENT'][1] # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.dom.pulldom.parseString(..) # These are based on SAX parses, and you can specify your own, so you can expose yourself to XXE (yay/) parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_external_ges, True) -xml.dom.minidom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..) -xml.dom.minidom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..) +xml.dom.minidom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..) +xml.dom.minidom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=xml.dom.minidom.parse(..) getAPathArgument=StringIO(..) -xml.dom.pulldom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..) -xml.dom.pulldom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..) +xml.dom.pulldom.parse(StringIO(x), parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..) +xml.dom.pulldom.parse(StringIO(x), parser=parser) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' xmlVuln='DTD retrieval' xmlVuln='XXE' decodeOutput=xml.dom.pulldom.parse(..) getAPathArgument=StringIO(..) diff --git a/python/ql/test/library-tests/frameworks/stdlib/xml_etree.py b/python/ql/test/library-tests/frameworks/stdlib/xml_etree.py index 00f3b964b182..441f9adc87a1 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/xml_etree.py +++ b/python/ql/test/library-tests/frameworks/stdlib/xml_etree.py @@ -4,43 +4,43 @@ x = "some xml" # Parsing in different ways -xml.etree.ElementTree.fromstring(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.fromstring(..) -xml.etree.ElementTree.fromstring(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.fromstring(..) +xml.etree.ElementTree.fromstring(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.fromstring(..) +xml.etree.ElementTree.fromstring(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.fromstring(..) -xml.etree.ElementTree.fromstringlist([x]) # $ decodeFormat=XML decodeInput=List xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.fromstringlist(..) -xml.etree.ElementTree.fromstringlist(sequence=[x]) # $ decodeFormat=XML decodeInput=List xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.fromstringlist(..) +xml.etree.ElementTree.fromstringlist([x]) # $ decodeFormat=XML decodeInput=List xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.fromstringlist(..) +xml.etree.ElementTree.fromstringlist(sequence=[x]) # $ decodeFormat=XML decodeInput=List xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.fromstringlist(..) -xml.etree.ElementTree.XML(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.XML(..) -xml.etree.ElementTree.XML(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.XML(..) +xml.etree.ElementTree.XML(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.XML(..) +xml.etree.ElementTree.XML(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.XML(..) -xml.etree.ElementTree.XMLID(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.XMLID(..) -xml.etree.ElementTree.XMLID(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.XMLID(..) +xml.etree.ElementTree.XMLID(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.XMLID(..) +xml.etree.ElementTree.XMLID(text=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.XMLID(..) -xml.etree.ElementTree.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.parse(..) getAPathArgument=StringIO(..) -xml.etree.ElementTree.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.parse(..) getAPathArgument=StringIO(..) +xml.etree.ElementTree.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.parse(..) getAPathArgument=StringIO(..) +xml.etree.ElementTree.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.parse(..) getAPathArgument=StringIO(..) -xml.etree.ElementTree.iterparse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.iterparse(..) getAPathArgument=StringIO(..) -xml.etree.ElementTree.iterparse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.iterparse(..) getAPathArgument=StringIO(..) +xml.etree.ElementTree.iterparse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.iterparse(..) getAPathArgument=StringIO(..) +xml.etree.ElementTree.iterparse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.iterparse(..) getAPathArgument=StringIO(..) tree = xml.etree.ElementTree.ElementTree() -tree.parse("file.xml") # $ decodeFormat=XML decodeInput="file.xml" xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=tree.parse(..) getAPathArgument="file.xml" -tree.parse(source="file.xml") # $ decodeFormat=XML decodeInput="file.xml" xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=tree.parse(..) getAPathArgument="file.xml" +tree.parse("file.xml") # $ decodeFormat=XML decodeInput="file.xml" xmlVuln='XML bomb' decodeOutput=tree.parse(..) getAPathArgument="file.xml" +tree.parse(source="file.xml") # $ decodeFormat=XML decodeInput="file.xml" xmlVuln='XML bomb' decodeOutput=tree.parse(..) getAPathArgument="file.xml" # With parsers (no options available to disable/enable security features) parser = xml.etree.ElementTree.XMLParser() -xml.etree.ElementTree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xml.etree.ElementTree.fromstring(..) +xml.etree.ElementTree.fromstring(x, parser=parser) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xml.etree.ElementTree.fromstring(..) # manual use of feed method parser = xml.etree.ElementTree.XMLParser() -parser.feed(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -parser.feed(data=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +parser.feed(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' +parser.feed(data=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' parser.close() # $ decodeOutput=parser.close() # manual use of feed method on XMLPullParser parser = xml.etree.ElementTree.XMLPullParser() -parser.feed(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -parser.feed(data=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +parser.feed(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' +parser.feed(data=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' parser.close() # $ decodeOutput=parser.close() # note: it's technically possible to use the thing wrapper func `fromstring` with an diff --git a/python/ql/test/library-tests/frameworks/stdlib/xml_sax.py b/python/ql/test/library-tests/frameworks/stdlib/xml_sax.py index c08034907a4d..6199fd76cc10 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/xml_sax.py +++ b/python/ql/test/library-tests/frameworks/stdlib/xml_sax.py @@ -10,41 +10,41 @@ def __init__(self): def characters(self, data): self._result.append(data) -xml.sax.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..) -xml.sax.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..) +xml.sax.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' getAPathArgument=StringIO(..) +xml.sax.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' getAPathArgument=StringIO(..) -xml.sax.parseString(x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' -xml.sax.parseString(string=x) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' +xml.sax.parseString(x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' +xml.sax.parseString(string=x) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' parser = xml.sax.make_parser() -parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..) -parser.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..) +parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' getAPathArgument=StringIO(..) +parser.parse(source=StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' getAPathArgument=StringIO(..) # You can make it vuln to both XXE and DTD retrieval by setting this flag # see https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_external_ges, True) -parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' getAPathArgument=StringIO(..) +parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' xmlVuln='DTD retrieval' xmlVuln='XXE' getAPathArgument=StringIO(..) parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_external_ges, False) -parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..) +parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' getAPathArgument=StringIO(..) # Forward Type Tracking test def func(cond): parser = xml.sax.make_parser() if cond: parser.setFeature(xml.sax.handler.feature_external_ges, True) - parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' getAPathArgument=StringIO(..) + parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' xmlVuln='DTD retrieval' xmlVuln='XXE' getAPathArgument=StringIO(..) else: - parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..) + parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' getAPathArgument=StringIO(..) # make it vuln, then making it safe # a bit of an edge-case, but is nice to be able to handle. parser = xml.sax.make_parser() parser.setFeature(xml.sax.handler.feature_external_ges, True) parser.setFeature(xml.sax.handler.feature_external_ges, False) -parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' getAPathArgument=StringIO(..) +parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' getAPathArgument=StringIO(..) def check_conditional_assignment(cond): parser = xml.sax.make_parser() @@ -52,7 +52,7 @@ def check_conditional_assignment(cond): parser.setFeature(xml.sax.handler.feature_external_ges, True) else: parser.setFeature(xml.sax.handler.feature_external_ges, False) - parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' getAPathArgument=StringIO(..) + parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' xmlVuln='DTD retrieval' xmlVuln='XXE' getAPathArgument=StringIO(..) def check_conditional_assignment2(cond): parser = xml.sax.make_parser() @@ -61,4 +61,4 @@ def check_conditional_assignment2(cond): else: flag_value = False parser.setFeature(xml.sax.handler.feature_external_ges, flag_value) - parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='Billion Laughs' xmlVuln='DTD retrieval' xmlVuln='Quadratic Blowup' xmlVuln='XXE' getAPathArgument=StringIO(..) + parser.parse(StringIO(x)) # $ decodeFormat=XML decodeInput=StringIO(..) xmlVuln='XML bomb' xmlVuln='DTD retrieval' xmlVuln='XXE' getAPathArgument=StringIO(..) diff --git a/python/ql/test/library-tests/frameworks/xmltodict/test.py b/python/ql/test/library-tests/frameworks/xmltodict/test.py index 01dc2f3c4843..ef236f7796c7 100644 --- a/python/ql/test/library-tests/frameworks/xmltodict/test.py +++ b/python/ql/test/library-tests/frameworks/xmltodict/test.py @@ -5,4 +5,4 @@ xmltodict.parse(x) # $ decodeFormat=XML decodeInput=x decodeOutput=xmltodict.parse(..) xmltodict.parse(xml_input=x) # $ decodeFormat=XML decodeInput=x decodeOutput=xmltodict.parse(..) -xmltodict.parse(x, disable_entities=False) # $ decodeFormat=XML decodeInput=x xmlVuln='Billion Laughs' xmlVuln='Quadratic Blowup' decodeOutput=xmltodict.parse(..) +xmltodict.parse(x, disable_entities=False) # $ decodeFormat=XML decodeInput=x xmlVuln='XML bomb' decodeOutput=xmltodict.parse(..) From 405480c41045f943e025aa7d21a33b971b231cf2 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 7 Apr 2022 15:34:56 +0200 Subject: [PATCH 43/51] Python: Rename sink definitions for XXE/XML bomb --- .../python/security/dataflow/XmlBombCustomizations.qll | 7 +++---- .../semmle/python/security/dataflow/XxeCustomizations.qll | 7 +++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll index 7cc4ec5bad52..a2fe1b8ecb27 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll @@ -35,11 +35,10 @@ module XmlBomb { } /** - * A call to an XML parser that performs internal entity expansion, viewed - * as a data flow sink for XML-bomb vulnerabilities. + * A call to an XML parser that is vulnerable to XML bombs. */ - class XmlParsingWithEntityResolution extends Sink { - XmlParsingWithEntityResolution() { + class XmlParsingVulnerableToXmlBomb extends Sink { + XmlParsingVulnerableToXmlBomb() { exists(XML::XmlParsing parsing, XML::XmlParsingVulnerabilityKind kind | kind.isXmlBomb() and parsing.vulnerableTo(kind) and diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll index 0fc139ec4f32..1d1ad087f849 100644 --- a/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll @@ -35,11 +35,10 @@ module Xxe { } /** - * A call to an XML parser that performs external entity expansion, viewed - * as a data flow sink for XXE vulnerabilities. + * A call to an XML parser that is vulnerable to XXE. */ - class XmlParsingWithExternalEntityResolution extends Sink { - XmlParsingWithExternalEntityResolution() { + class XmlParsingVulnerableToXxe extends Sink { + XmlParsingVulnerableToXxe() { exists(XML::XmlParsing parsing, XML::XmlParsingVulnerabilityKind kind | kind.isXxe() and parsing.vulnerableTo(kind) and From 8191be9d7506bec7909a19f001276d2716d4f600 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 7 Apr 2022 15:36:04 +0200 Subject: [PATCH 44/51] Python: Move last XXE/XML bomb out of experimental --- .../semmle/python/security/dataflow/XmlBombCustomizations.qll | 0 .../semmle/python/security/dataflow/XmlBombQuery.qll | 0 .../semmle/python/security/dataflow/XxeCustomizations.qll | 0 .../semmle/python/security/dataflow/XxeQuery.qll | 0 python/ql/src/Security/CWE-611/Xxe.ql | 2 +- python/ql/src/Security/CWE-776/XmlBomb.ql | 2 +- 6 files changed, 2 insertions(+), 2 deletions(-) rename python/ql/{src/experimental => lib}/semmle/python/security/dataflow/XmlBombCustomizations.qll (100%) rename python/ql/{src/experimental => lib}/semmle/python/security/dataflow/XmlBombQuery.qll (100%) rename python/ql/{src/experimental => lib}/semmle/python/security/dataflow/XxeCustomizations.qll (100%) rename python/ql/{src/experimental => lib}/semmle/python/security/dataflow/XxeQuery.qll (100%) diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/XmlBombCustomizations.qll similarity index 100% rename from python/ql/src/experimental/semmle/python/security/dataflow/XmlBombCustomizations.qll rename to python/ql/lib/semmle/python/security/dataflow/XmlBombCustomizations.qll diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlBombQuery.qll b/python/ql/lib/semmle/python/security/dataflow/XmlBombQuery.qll similarity index 100% rename from python/ql/src/experimental/semmle/python/security/dataflow/XmlBombQuery.qll rename to python/ql/lib/semmle/python/security/dataflow/XmlBombQuery.qll diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/XxeCustomizations.qll similarity index 100% rename from python/ql/src/experimental/semmle/python/security/dataflow/XxeCustomizations.qll rename to python/ql/lib/semmle/python/security/dataflow/XxeCustomizations.qll diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XxeQuery.qll b/python/ql/lib/semmle/python/security/dataflow/XxeQuery.qll similarity index 100% rename from python/ql/src/experimental/semmle/python/security/dataflow/XxeQuery.qll rename to python/ql/lib/semmle/python/security/dataflow/XxeQuery.qll diff --git a/python/ql/src/Security/CWE-611/Xxe.ql b/python/ql/src/Security/CWE-611/Xxe.ql index f706ea6e909c..5cc6da254677 100644 --- a/python/ql/src/Security/CWE-611/Xxe.ql +++ b/python/ql/src/Security/CWE-611/Xxe.ql @@ -13,7 +13,7 @@ */ import python -import experimental.semmle.python.security.dataflow.XxeQuery +import semmle.python.security.dataflow.XxeQuery import DataFlow::PathGraph from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink diff --git a/python/ql/src/Security/CWE-776/XmlBomb.ql b/python/ql/src/Security/CWE-776/XmlBomb.ql index 2a1ea5916c44..54d483db17ea 100644 --- a/python/ql/src/Security/CWE-776/XmlBomb.ql +++ b/python/ql/src/Security/CWE-776/XmlBomb.ql @@ -13,7 +13,7 @@ */ import python -import experimental.semmle.python.security.dataflow.XmlBombQuery +import semmle.python.security.dataflow.XmlBombQuery import DataFlow::PathGraph from Configuration cfg, DataFlow::PathNode source, DataFlow::PathNode sink From 517444b5ff3067a178c57bdda5d523bd8c16316c Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 7 Apr 2022 16:42:40 +0200 Subject: [PATCH 45/51] Python: Fix `SimpleXmlRpcServer.expected` --- .../CWE-611-SimpleXmlRpcServer/SimpleXmlRpcServer.expected | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611-SimpleXmlRpcServer/SimpleXmlRpcServer.expected b/python/ql/test/experimental/query-tests/Security/CWE-611-SimpleXmlRpcServer/SimpleXmlRpcServer.expected index 4a08d61c47af..5f848fb56bb0 100644 --- a/python/ql/test/experimental/query-tests/Security/CWE-611-SimpleXmlRpcServer/SimpleXmlRpcServer.expected +++ b/python/ql/test/experimental/query-tests/Security/CWE-611-SimpleXmlRpcServer/SimpleXmlRpcServer.expected @@ -1 +1 @@ -| xmlrpc_server.py:7:10:7:48 | ControlFlowNode for SimpleXMLRPCServer() | SimpleXMLRPCServer is vulnerable to: Billion Laughs, Quadratic Blowup. | +| xmlrpc_server.py:7:10:7:48 | ControlFlowNode for SimpleXMLRPCServer() | SimpleXMLRPCServer is vulnerable to XML bombs | From 714465bf39d97e31aa6f0a7aa01c57e16f3c3078 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Mon, 2 May 2022 11:29:00 +0200 Subject: [PATCH 46/51] Python: Refactor `SaxParserSetFeatureCall` Originally made by @erik-krogh in https://github.com/github/codeql/pull/8693/files#diff-9627c1fb9a1cc77fb93e6b7e31af1a4fa908f2a60362cfb34377d24debb97398 Could not be applied directly to this PR, since this PR deletes the file. --- .../lib/semmle/python/frameworks/Stdlib.qll | 38 ++++--------------- 1 file changed, 7 insertions(+), 31 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 10eaa9dc3b60..bf2b01930d21 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -3408,7 +3408,7 @@ private module StdlibPrivate { * * See https://docs.python.org/3.10/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.setFeature */ - private class SaxParserSetFeatureCall extends DataFlow::MethodCallNode { + private class SaxParserSetFeatureCall extends API::CallNode, DataFlow::MethodCallNode { SaxParserSetFeatureCall() { this = API::moduleImport("xml") @@ -3421,27 +3421,9 @@ private module StdlibPrivate { // The keyword argument names does not match documentation. I checked (with Python // 3.9.5) that the names used here actually works. - DataFlow::Node getFeatureArg() { result in [this.getArg(0), this.getArgByName("name")] } + API::Node getFeatureArg() { result = this.getParameter(0, "name") } - DataFlow::Node getStateArg() { result in [this.getArg(1), this.getArgByName("state")] } - } - - /** Gets a back-reference to the `setFeature` state argument `arg`. */ - private DataFlow::TypeTrackingNode saxParserSetFeatureStateArgBacktracker( - DataFlow::TypeBackTracker t, DataFlow::Node arg - ) { - t.start() and - arg = any(SaxParserSetFeatureCall c).getStateArg() and - result = arg.getALocalSource() - or - exists(DataFlow::TypeBackTracker t2 | - result = saxParserSetFeatureStateArgBacktracker(t2, arg).backtrack(t2, t) - ) - } - - /** Gets a back-reference to the `setFeature` state argument `arg`. */ - DataFlow::LocalSourceNode saxParserSetFeatureStateArgBacktracker(DataFlow::Node arg) { - result = saxParserSetFeatureStateArgBacktracker(DataFlow::TypeBackTracker::end(), arg) + API::Node getStateArg() { result = this.getParameter(1, "state") } } /** @@ -3452,16 +3434,13 @@ private module StdlibPrivate { private DataFlow::Node saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker t) { t.start() and exists(SaxParserSetFeatureCall call | - call.getFeatureArg() = + call.getFeatureArg().getARhs() = API::moduleImport("xml") .getMember("sax") .getMember("handler") .getMember("feature_external_ges") .getAUse() and - saxParserSetFeatureStateArgBacktracker(call.getStateArg()) - .asExpr() - .(BooleanLiteral) - .booleanValue() = true and + call.getStateArg().getAValueReachingRhs().asExpr().(BooleanLiteral).booleanValue() = true and result = call.getObject() ) or @@ -3471,16 +3450,13 @@ private module StdlibPrivate { // take account of that we can set the feature to False, which makes the parser safe again not exists(SaxParserSetFeatureCall call | call.getObject() = result and - call.getFeatureArg() = + call.getFeatureArg().getARhs() = API::moduleImport("xml") .getMember("sax") .getMember("handler") .getMember("feature_external_ges") .getAUse() and - saxParserSetFeatureStateArgBacktracker(call.getStateArg()) - .asExpr() - .(BooleanLiteral) - .booleanValue() = false + call.getStateArg().getAValueReachingRhs().asExpr().(BooleanLiteral).booleanValue() = false ) } From f5854f33da4a51c39e4a6bccb778393d92e29efe Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Mon, 9 May 2022 10:53:25 +0200 Subject: [PATCH 47/51] Python: Apply suggestions from code review Co-authored-by: yoff --- python/ql/src/Security/CWE-776/XmlBomb.qhelp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ql/src/Security/CWE-776/XmlBomb.qhelp b/python/ql/src/Security/CWE-776/XmlBomb.qhelp index f20dd526fdd0..8841f98ab27e 100644 --- a/python/ql/src/Security/CWE-776/XmlBomb.qhelp +++ b/python/ql/src/Security/CWE-776/XmlBomb.qhelp @@ -39,7 +39,7 @@ PyPI package, which has been created to prevent XML attacks (both XXE and XML bo

    The following example uses the xml.etree XML parser provided by the Python standard library to -parse a string xml_src. That string is from an untrusted source, so this code is be +parse a string xml_src. That string is from an untrusted source, so this code is vulnerable to a DoS attack, since the xml.etree XML parser expands internal entities by default:

    From f22bd039f3014cb00e2f6211b686f5b2fc9198fd Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Mon, 9 May 2022 10:56:39 +0200 Subject: [PATCH 48/51] Python: Slight refactor of `LxmlParsing` --- python/ql/lib/semmle/python/frameworks/Lxml.qll | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Lxml.qll b/python/ql/lib/semmle/python/frameworks/Lxml.qll index cfb83fd5732d..70e46a6d3b05 100644 --- a/python/ql/lib/semmle/python/frameworks/Lxml.qll +++ b/python/ql/lib/semmle/python/frameworks/Lxml.qll @@ -235,12 +235,11 @@ private module Lxml { * - https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.parseid */ private class LxmlParsing extends DataFlow::CallCfgNode, XML::XmlParsing::Range { + string functionName; + LxmlParsing() { - this = - API::moduleImport("lxml") - .getMember("etree") - .getMember(["fromstring", "fromstringlist", "XML", "XMLID", "parse", "parseid"]) - .getACall() + functionName in ["fromstring", "fromstringlist", "XML", "XMLID", "parse", "parseid"] and + this = API::moduleImport("lxml").getMember("etree").getMember(functionName).getACall() } override DataFlow::Node getAnInput() { @@ -287,7 +286,7 @@ private module Lxml { */ private class FileAccessFromLxmlParsing extends LxmlParsing, FileSystemAccess::Range { FileAccessFromLxmlParsing() { - this = API::moduleImport("lxml").getMember("etree").getMember(["parse", "parseid"]).getACall() + functionName in ["parse", "parseid"] // I considered whether we should try to reduce FPs from people passing file-like // objects, which will not be a file system access (and couldn't cause a // path-injection). From 36349222a9561c6996fbd6f2e30ab8580313e5ac Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Mon, 9 May 2022 11:00:25 +0200 Subject: [PATCH 49/51] Python: Fix casing of `XMLDomParsing` --- python/ql/lib/semmle/python/frameworks/Stdlib.qll | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index bf2b01930d21..e67e90cc7940 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -3591,8 +3591,8 @@ private module StdlibPrivate { * - https://docs.python.org/3/library/xml.dom.minidom.html#xml.dom.minidom.parse * - https://docs.python.org/3/library/xml.dom.pulldom.html#xml.dom.pulldom.parse */ - private class XMLDomParsing extends DataFlow::CallCfgNode, XML::XmlParsing::Range { - XMLDomParsing() { + private class XmlDomParsing extends DataFlow::CallCfgNode, XML::XmlParsing::Range { + XmlDomParsing() { this = API::moduleImport("xml") .getMember("dom") @@ -3636,8 +3636,8 @@ private module StdlibPrivate { * - https://docs.python.org/3/library/xml.dom.minidom.html#xml.dom.minidom.parse * - https://docs.python.org/3/library/xml.dom.pulldom.html#xml.dom.pulldom.parse */ - private class FileAccessFromXMLDomParsing extends XMLDomParsing, FileSystemAccess::Range { - FileAccessFromXMLDomParsing() { + private class FileAccessFromXmlDomParsing extends XmlDomParsing, FileSystemAccess::Range { + FileAccessFromXmlDomParsing() { this = API::moduleImport("xml") .getMember("dom") From de05b108faaa469952bf8d83cfa0f2b5d6e086b4 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Mon, 9 May 2022 11:01:13 +0200 Subject: [PATCH 50/51] Python: Fix singleton set --- python/ql/test/experimental/meta/ConceptsTest.qll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ql/test/experimental/meta/ConceptsTest.qll b/python/ql/test/experimental/meta/ConceptsTest.qll index 73bcf8b4aa98..7b8649b7abbc 100644 --- a/python/ql/test/experimental/meta/ConceptsTest.qll +++ b/python/ql/test/experimental/meta/ConceptsTest.qll @@ -543,7 +543,7 @@ class HttpClientRequestTest extends InlineExpectationsTest { class XmlParsingTest extends InlineExpectationsTest { XmlParsingTest() { this = "XmlParsingTest" } - override string getARelevantTag() { result in ["xmlVuln"] } + override string getARelevantTag() { result = "xmlVuln" } override predicate hasActualResult(Location location, string element, string tag, string value) { exists(location.getFile().getRelativePath()) and From 4a6789182d4d4d18526e05d05cf3028c3a59b92c Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Mon, 9 May 2022 16:37:12 +0200 Subject: [PATCH 51/51] Python: Apply suggestions from code review Co-authored-by: yoff --- python/ql/lib/semmle/python/frameworks/Stdlib.qll | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index e67e90cc7940..ef60841acd66 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -3265,14 +3265,7 @@ private module StdlibPrivate { API::moduleImport("xml") .getMember("etree") .getMember("ElementTree") - .getMember("XMLParser") - .getACall() - or - this = - API::moduleImport("xml") - .getMember("etree") - .getMember("ElementTree") - .getMember("XMLPullParser") + .getMember(["XMLParser", "XMLPullParser"]) .getACall() } }