diff --git a/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql b/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql new file mode 100644 index 000000000000..cda0633690c5 --- /dev/null +++ b/python/ql/src/experimental/Security/CWE-611/SimpleXmlRpcServer.ql @@ -0,0 +1,25 @@ +/** + * @name SimpleXMLRPCServer DoS vulnerability + * @description SimpleXMLRPCServer is vulnerable to DoS attacks from untrusted user input + * @kind problem + * @problem.severity warning + * @precision high + * @id py/simple-xml-rpc-server-dos + * @tags security + * external/cwe/cwe-776 + */ + +private import python +private import experimental.semmle.python.Concepts +private import semmle.python.ApiGraphs + +from DataFlow::CallCfgNode call, string kinds +where + call = API::moduleImport("xmlrpc").getMember("server").getMember("SimpleXMLRPCServer").getACall() and + kinds = + strictconcat(ExperimentalXML::XMLVulnerabilityKind kind | + kind.isBillionLaughs() or kind.isQuadraticBlowup() + | + kind, ", " + ) +select call, "SimpleXMLRPCServer is vulnerable to: " + kinds + "." diff --git a/python/ql/src/experimental/Security/CWE-611/XXE.xml b/python/ql/src/experimental/Security/CWE-611/XXE.xml new file mode 100644 index 000000000000..ddd196f2f137 --- /dev/null +++ b/python/ql/src/experimental/Security/CWE-611/XXE.xml @@ -0,0 +1,4 @@ + +]> +&xxe; \ No newline at end of file diff --git a/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.py b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.py new file mode 100644 index 000000000000..0e9eec933d7d --- /dev/null +++ b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.py @@ -0,0 +1,25 @@ +from flask import request, Flask +import lxml.etree +import xml.etree.ElementTree + +app = Flask(__name__) + +# BAD +@app.route("/bad") +def bad(): + xml_content = request.args['xml_content'] + + parser = lxml.etree.XMLParser() + parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser) + + return parsed_xml.text + +# GOOD +@app.route("/good") +def good(): + xml_content = request.args['xml_content'] + + parser = lxml.etree.XMLParser(resolve_entities=False) + parsed_xml = xml.etree.ElementTree.fromstring(xml_content, parser=parser) + + return parsed_xml.text \ No newline at end of file diff --git a/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.qhelp b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.qhelp new file mode 100644 index 000000000000..6da1bf1d3063 --- /dev/null +++ b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.qhelp @@ -0,0 +1,48 @@ + + + + +

+Parsing untrusted XML files with a weakly configured XML parser may lead to attacks such as XML External Entity (XXE), +Billion Laughs, Quadratic Blowup and DTD retrieval. +This type of attack uses external entity references to access arbitrary files on a system, carry out denial of +service, or server side request forgery. Even when the result of parsing is not returned to the user, out-of-band +data retrieval techniques may allow attackers to steal sensitive data. Denial of services can also be carried out +in this situation. +

+
+ + +

+Use defusedxml, a Python package aimed +to prevent any potentially malicious operation. +

+
+ + +

+The following example calls xml.etree.ElementTree.fromstring using a parser (lxml.etree.XMLParser) +that is not safely configured on untrusted data, and is therefore inherently unsafe. +

+ +

+Providing an input (xml_content) like the following XML content against /bad, the request response would contain the contents of +/etc/passwd. +

+ +
+ + +
  • Python 3 XML Vulnerabilities.
  • +
  • Python 2 XML Vulnerabilities.
  • +
  • Python XML Parsing.
  • +
  • OWASP vulnerability description: XML External Entity (XXE) Processing.
  • +
  • OWASP guidance on parsing xml files: XXE Prevention Cheat Sheet.
  • +
  • Paper by Timothy Morgen: XML Schema, DTD, and Entity Attacks
  • +
  • Out-of-band data retrieval: Timur Yunusov & Alexey Osipov, Black hat EU 2013: XML Out-Of-Band Data Retrieval.
  • +
  • Denial of service attack (Billion laughs): Billion Laughs.
  • +
    + +
    diff --git a/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql new file mode 100644 index 000000000000..922ca346b173 --- /dev/null +++ b/python/ql/src/experimental/Security/CWE-611/XmlEntityInjection.ql @@ -0,0 +1,31 @@ +/** + * @name XML Entity injection + * @description User input should not be parsed allowing the injection of entities. + * @kind path-problem + * @problem.severity error + * @id py/xml-entity-injection + * @tags security + * external/cwe/cwe-611 + * external/cwe/cwe-776 + * external/cwe/cwe-827 + */ + +// determine precision above +import python +import experimental.semmle.python.security.dataflow.XmlEntityInjection +import DataFlow::PathGraph + +from + XmlEntityInjection::XmlEntityInjectionConfiguration config, DataFlow::PathNode source, + DataFlow::PathNode sink, string kinds +where + config.hasFlowPath(source, sink) and + kinds = + strictconcat(string kind | + kind = sink.getNode().(XmlEntityInjection::Sink).getVulnerableKind() + | + kind, ", " + ) +select sink.getNode(), source, sink, + "$@ XML input is constructed from a $@ and is vulnerable to: " + kinds + ".", sink.getNode(), + "This", source.getNode(), "user-provided value" diff --git a/python/ql/src/experimental/semmle/python/Concepts.qll b/python/ql/src/experimental/semmle/python/Concepts.qll index bb83fbda5838..ce5617071845 100644 --- a/python/ql/src/experimental/semmle/python/Concepts.qll +++ b/python/ql/src/experimental/semmle/python/Concepts.qll @@ -14,6 +14,74 @@ private import semmle.python.dataflow.new.RemoteFlowSources private import semmle.python.dataflow.new.TaintTracking private import experimental.semmle.python.Frameworks +/** + * Since there is both XML module in normal and experimental Concepts, + * we have to rename the experimental module as this. + */ +module ExperimentalXML { + /** + * A kind of XML vulnerability. + * + * See https://pypi.org/project/defusedxml/#python-xml-libraries + */ + class XMLVulnerabilityKind extends string { + XMLVulnerabilityKind() { + this in ["Billion Laughs", "Quadratic Blowup", "XXE", "DTD retrieval"] + } + + /** Holds for Billion Laughs vulnerability kind. */ + predicate isBillionLaughs() { this = "Billion Laughs" } + + /** Holds for Quadratic Blowup vulnerability kind. */ + predicate isQuadraticBlowup() { this = "Quadratic Blowup" } + + /** Holds for XXE vulnerability kind. */ + predicate isXxe() { this = "XXE" } + + /** Holds for DTD retrieval vulnerability kind. */ + predicate isDtdRetrieval() { this = "DTD retrieval" } + } + + /** + * A data-flow node that parses XML. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `XMLParsing` instead. + */ + class XMLParsing extends DataFlow::Node instanceof XMLParsing::Range { + /** + * Gets the argument containing the content to parse. + */ + DataFlow::Node getAnInput() { result = super.getAnInput() } + + /** + * Holds if this XML parsing is vulnerable to `kind`. + */ + predicate vulnerableTo(XMLVulnerabilityKind kind) { super.vulnerableTo(kind) } + } + + /** Provides classes for modeling XML parsing APIs. */ + module XMLParsing { + /** + * A data-flow node that parses XML. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `XMLParsing` instead. + */ + abstract class Range extends DataFlow::Node { + /** + * Gets the argument containing the content to parse. + */ + abstract DataFlow::Node getAnInput(); + + /** + * Holds if this XML parsing is vulnerable to `kind`. + */ + abstract predicate vulnerableTo(XMLVulnerabilityKind kind); + } + } +} + /** Provides classes for modeling LDAP query execution-related APIs. */ module LDAPQuery { /** diff --git a/python/ql/src/experimental/semmle/python/Frameworks.qll b/python/ql/src/experimental/semmle/python/Frameworks.qll index 81b2c1bee23d..edbed61c41c4 100644 --- a/python/ql/src/experimental/semmle/python/Frameworks.qll +++ b/python/ql/src/experimental/semmle/python/Frameworks.qll @@ -3,6 +3,7 @@ */ private import experimental.semmle.python.frameworks.Stdlib +private import experimental.semmle.python.frameworks.Xml private import experimental.semmle.python.frameworks.Flask private import experimental.semmle.python.frameworks.Django private import experimental.semmle.python.frameworks.Werkzeug diff --git a/python/ql/src/experimental/semmle/python/frameworks/Xml.qll b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll new file mode 100644 index 000000000000..a2f36f66f2e3 --- /dev/null +++ b/python/ql/src/experimental/semmle/python/frameworks/Xml.qll @@ -0,0 +1,466 @@ +/** + * Provides class and predicates to track external data that + * may represent malicious XML objects. + */ + +private import python +private import semmle.python.dataflow.new.DataFlow +private import experimental.semmle.python.Concepts +private import semmle.python.ApiGraphs + +module XML = ExperimentalXML; + +private module XmlEtree { + /** + * Provides models for `xml.etree` parsers + * + * See + * - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLParser + * - https://docs.python.org/3.10/library/xml.etree.elementtree.html#xml.etree.ElementTree.XMLPullParser + */ + module XMLParser { + /** + * A source of instances of `xml.etree` parsers, extend this class to model new instances. + * + * This can include instantiations of the class, return values from function + * calls, or a special parameter that will be set when functions are called by an external + * library. + * + * Use the predicate `XMLParser::instance()` to get references to instances of `xml.etree` parsers. + */ + abstract class InstanceSource extends DataFlow::LocalSourceNode { } + + /** A direct instantiation of `xml.etree` parsers. */ + private class ClassInstantiation extends InstanceSource, DataFlow::CallCfgNode { + ClassInstantiation() { + this = + API::moduleImport("xml") + .getMember("etree") + .getMember("ElementTree") + .getMember("XMLParser") + .getACall() + or + this = + API::moduleImport("xml") + .getMember("etree") + .getMember("ElementTree") + .getMember("XMLPullParser") + .getACall() + } + } + + /** Gets a reference to an `xml.etree` parser instance. */ + private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t) { + t.start() and + result instanceof InstanceSource + or + exists(DataFlow::TypeTracker t2 | result = instance(t2).track(t2, t)) + } + + /** Gets a reference to an `xml.etree` parser instance. */ + DataFlow::Node instance() { instance(DataFlow::TypeTracker::end()).flowsTo(result) } + + /** + * A call to the `feed` method of an `xml.etree` parser. + */ + private class XMLEtreeParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range { + XMLEtreeParserFeedCall() { this.calls(instance(), "feed") } + + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } + + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { + kind.isBillionLaughs() or kind.isQuadraticBlowup() + } + } + } + + /** + * A call to either of: + * - `xml.etree.ElementTree.fromstring` + * - `xml.etree.ElementTree.fromstringlist` + * - `xml.etree.ElementTree.XML` + * - `xml.etree.ElementTree.XMLID` + * - `xml.etree.ElementTree.parse` + * - `xml.etree.ElementTree.iterparse` + */ + private class XMLEtreeParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { + XMLEtreeParsing() { + this = + API::moduleImport("xml") + .getMember("etree") + .getMember("ElementTree") + .getMember(["fromstring", "fromstringlist", "XML", "XMLID", "parse", "iterparse"]) + .getACall() + } + + override DataFlow::Node getAnInput() { + result in [ + this.getArg(0), + // fromstring / XML / XMLID + this.getArgByName("text"), + // fromstringlist + this.getArgByName("sequence"), + // parse / iterparse + this.getArgByName("source"), + ] + } + + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { + // note: it does not matter what `xml.etree` parser you are using, you cannot + // change the security features anyway :| + kind.isBillionLaughs() or kind.isQuadraticBlowup() + } + } +} + +private module SaxBasedParsing { + /** + * A call to the `setFeature` method on a XML sax parser. + * + * See https://docs.python.org/3.10/library/xml.sax.reader.html#xml.sax.xmlreader.XMLReader.setFeature + */ + class SaxParserSetFeatureCall extends DataFlow::MethodCallNode { + SaxParserSetFeatureCall() { + this = + API::moduleImport("xml") + .getMember("sax") + .getMember("make_parser") + .getReturn() + .getMember("setFeature") + .getACall() + } + + // The keyword argument names does not match documentation. I checked (with Python + // 3.9.5) that the names used here actually works. + DataFlow::Node getFeatureArg() { result in [this.getArg(0), this.getArgByName("name")] } + + DataFlow::Node getStateArg() { result in [this.getArg(1), this.getArgByName("state")] } + } + + /** Gets a back-reference to the `setFeature` state argument `arg`. */ + private DataFlow::TypeTrackingNode saxParserSetFeatureStateArgBacktracker( + DataFlow::TypeBackTracker t, DataFlow::Node arg + ) { + t.start() and + arg = any(SaxParserSetFeatureCall c).getStateArg() and + result = arg.getALocalSource() + or + exists(DataFlow::TypeBackTracker t2 | + result = saxParserSetFeatureStateArgBacktracker(t2, arg).backtrack(t2, t) + ) + } + + /** Gets a back-reference to the `setFeature` state argument `arg`. */ + DataFlow::LocalSourceNode saxParserSetFeatureStateArgBacktracker(DataFlow::Node arg) { + result = saxParserSetFeatureStateArgBacktracker(DataFlow::TypeBackTracker::end(), arg) + } + + /** + * Gets a reference to a XML sax parser that has `feature_external_ges` turned on. + * + * See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges + */ + private DataFlow::Node saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker t) { + t.start() and + exists(SaxParserSetFeatureCall call | + call.getFeatureArg() = + API::moduleImport("xml") + .getMember("sax") + .getMember("handler") + .getMember("feature_external_ges") + .getAUse() and + saxParserSetFeatureStateArgBacktracker(call.getStateArg()) + .asExpr() + .(BooleanLiteral) + .booleanValue() = true and + result = call.getObject() + ) + or + exists(DataFlow::TypeTracker t2 | + t = t2.smallstep(saxParserWithFeatureExternalGesTurnedOn(t2), result) + ) and + // take account of that we can set the feature to False, which makes the parser safe again + not exists(SaxParserSetFeatureCall call | + call.getObject() = result and + call.getFeatureArg() = + API::moduleImport("xml") + .getMember("sax") + .getMember("handler") + .getMember("feature_external_ges") + .getAUse() and + saxParserSetFeatureStateArgBacktracker(call.getStateArg()) + .asExpr() + .(BooleanLiteral) + .booleanValue() = false + ) + } + + /** + * Gets a reference to a XML sax parser that has `feature_external_ges` turned on. + * + * See https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges + */ + DataFlow::Node saxParserWithFeatureExternalGesTurnedOn() { + result = saxParserWithFeatureExternalGesTurnedOn(DataFlow::TypeTracker::end()) + } + + /** + * A call to the `parse` method on a SAX XML parser. + */ + private class XMLSaxInstanceParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range { + XMLSaxInstanceParsing() { + this = + API::moduleImport("xml") + .getMember("sax") + .getMember("make_parser") + .getReturn() + .getMember("parse") + .getACall() + } + + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("source")] } + + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { + // always vuln to these + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) + or + // can be vuln to other things if features has been turned on + this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and + (kind.isXxe() or kind.isDtdRetrieval()) + } + } + + /** + * A call to either `parse` or `parseString` from `xml.sax` module. + * + * See: + * - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parse + * - https://docs.python.org/3.10/library/xml.sax.html#xml.sax.parseString + */ + private class XMLSaxParsing extends DataFlow::MethodCallNode, XML::XMLParsing::Range { + XMLSaxParsing() { + this = + API::moduleImport("xml").getMember("sax").getMember(["parse", "parseString"]).getACall() + } + + override DataFlow::Node getAnInput() { + result in [ + this.getArg(0), + // parseString + this.getArgByName("string"), + // parse + this.getArgByName("source"), + ] + } + + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { + // always vuln to these + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) + or + // can be vuln to other things if features has been turned on + this.getObject() = saxParserWithFeatureExternalGesTurnedOn() and + (kind.isXxe() or kind.isDtdRetrieval()) + } + } + + /** + * A call to the `parse` or `parseString` methods from `xml.dom.minidom` or `xml.dom.pulldom`. + * + * Both of these modules are based on SAX parsers. + */ + private class XMLDomParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { + XMLDomParsing() { + this = + API::moduleImport("xml") + .getMember("dom") + .getMember(["minidom", "pulldom"]) + .getMember(["parse", "parseString"]) + .getACall() + } + + override DataFlow::Node getAnInput() { + result in [ + this.getArg(0), + // parseString + this.getArgByName("string"), + // minidom.parse + this.getArgByName("file"), + // pulldom.parse + this.getArgByName("stream_or_string"), + ] + } + + DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] } + + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { + this.getParserArg() = saxParserWithFeatureExternalGesTurnedOn() and + (kind.isXxe() or kind.isDtdRetrieval()) + or + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) + } + } +} + +private module Lxml { + /** + * Provides models for `lxml.etree` parsers. + * + * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser + */ + module XMLParser { + /** + * A source of instances of `lxml.etree` parsers, extend this class to model new instances. + * + * This can include instantiations of the class, return values from function + * calls, or a special parameter that will be set when functions are called by an external + * library. + * + * Use the predicate `XMLParser::instance()` to get references to instances of `lxml.etree` parsers. + */ + abstract class InstanceSource extends DataFlow::LocalSourceNode { + /** Holds if this instance is vulnerable to `kind`. */ + abstract predicate vulnerableTo(XML::XMLVulnerabilityKind kind); + } + + /** + * A call to `lxml.etree.XMLParser`. + * + * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser + */ + private class LXMLParser extends InstanceSource, DataFlow::CallCfgNode { + LXMLParser() { + this = API::moduleImport("lxml").getMember("etree").getMember("XMLParser").getACall() + } + + // NOTE: it's not possible to change settings of a parser after constructing it + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { + kind.isXxe() and + ( + // resolve_entities has default True + not exists(this.getArgByName("resolve_entities")) + or + this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(True t) + ) + or + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and + this.getArgByName("huge_tree").getALocalSource().asExpr() = any(True t) and + not this.getArgByName("resolve_entities").getALocalSource().asExpr() = any(False t) + or + kind.isDtdRetrieval() and + this.getArgByName("load_dtd").getALocalSource().asExpr() = any(True t) and + this.getArgByName("no_network").getALocalSource().asExpr() = any(False t) + } + } + + /** + * A call to `lxml.etree.get_default_parser`. + * + * See https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.get_default_parser + */ + private class LXMLDefaultParser extends InstanceSource, DataFlow::CallCfgNode { + LXMLDefaultParser() { + this = + API::moduleImport("lxml").getMember("etree").getMember("get_default_parser").getACall() + } + + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { + // as highlighted by + // https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser + // by default XXE is allow. so as long as the default parser has not been + // overridden, the result is also vuln to XXE. + kind.isXxe() + // TODO: take into account that you can override the default parser with `lxml.etree.set_default_parser`. + } + } + + /** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */ + private DataFlow::TypeTrackingNode instance(DataFlow::TypeTracker t, InstanceSource origin) { + t.start() and + result = origin + or + exists(DataFlow::TypeTracker t2 | result = instance(t2, origin).track(t2, t)) + } + + /** Gets a reference to an `lxml.etree` parsers instance, with origin in `origin` */ + DataFlow::Node instance(InstanceSource origin) { + instance(DataFlow::TypeTracker::end(), origin).flowsTo(result) + } + + /** Gets a reference to an `lxml.etree` parser instance, that is vulnerable to `kind`. */ + DataFlow::Node instanceVulnerableTo(XML::XMLVulnerabilityKind kind) { + exists(InstanceSource origin | result = instance(origin) and origin.vulnerableTo(kind)) + } + + /** + * A call to the `feed` method of an `lxml` parser. + */ + private class LXMLParserFeedCall extends DataFlow::MethodCallNode, XML::XMLParsing::Range { + LXMLParserFeedCall() { this.calls(instance(_), "feed") } + + override DataFlow::Node getAnInput() { result in [this.getArg(0), this.getArgByName("data")] } + + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { + this.calls(instanceVulnerableTo(kind), "feed") + } + } + } + + /** + * A call to either of: + * - `lxml.etree.fromstring` + * - `lxml.etree.fromstringlist` + * - `lxml.etree.XML` + * - `lxml.etree.parse` + * - `lxml.etree.parseid` + * + * See https://lxml.de/apidoc/lxml.etree.html?highlight=parseids#lxml.etree.fromstring + */ + private class LXMLParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { + LXMLParsing() { + this = + API::moduleImport("lxml") + .getMember("etree") + .getMember(["fromstring", "fromstringlist", "XML", "parse", "parseid"]) + .getACall() + } + + override DataFlow::Node getAnInput() { + result in [ + this.getArg(0), + // fromstring / XML + this.getArgByName("text"), + // fromstringlist + this.getArgByName("strings"), + // parse / parseid + this.getArgByName("source"), + ] + } + + DataFlow::Node getParserArg() { result in [this.getArg(1), this.getArgByName("parser")] } + + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { + this.getParserArg() = XMLParser::instanceVulnerableTo(kind) + or + kind.isXxe() and + not exists(this.getParserArg()) + } + } +} + +private module Xmltodict { + /** + * A call to `xmltodict.parse`. + */ + private class XMLtoDictParsing extends DataFlow::CallCfgNode, XML::XMLParsing::Range { + XMLtoDictParsing() { this = API::moduleImport("xmltodict").getMember("parse").getACall() } + + override DataFlow::Node getAnInput() { + result in [this.getArg(0), this.getArgByName("xml_input")] + } + + override predicate vulnerableTo(XML::XMLVulnerabilityKind kind) { + (kind.isBillionLaughs() or kind.isQuadraticBlowup()) and + this.getArgByName("disable_entities").getALocalSource().asExpr() = any(False f) + } + } +} diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll new file mode 100644 index 000000000000..35220e153d12 --- /dev/null +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjection.qll @@ -0,0 +1,28 @@ +import python +import experimental.semmle.python.Concepts +import semmle.python.dataflow.new.DataFlow +import semmle.python.dataflow.new.TaintTracking +import semmle.python.dataflow.new.RemoteFlowSources +import semmle.python.dataflow.new.BarrierGuards + +module XmlEntityInjection { + import XmlEntityInjectionCustomizations::XmlEntityInjection + + class XmlEntityInjectionConfiguration extends TaintTracking::Configuration { + XmlEntityInjectionConfiguration() { this = "XmlEntityInjectionConfiguration" } + + override predicate isSource(DataFlow::Node source) { + source instanceof RemoteFlowSourceAsSource + } + + override predicate isSink(DataFlow::Node sink) { sink instanceof Sink } + + override predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { + guard instanceof SanitizerGuard + } + + override predicate isAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + any(AdditionalTaintStep s).step(nodeFrom, nodeTo) + } + } +} diff --git a/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll new file mode 100644 index 000000000000..e420c738a978 --- /dev/null +++ b/python/ql/src/experimental/semmle/python/security/dataflow/XmlEntityInjectionCustomizations.qll @@ -0,0 +1,86 @@ +/** + * Provides default sources, sinks and sanitizers for detecting + * "ldap injection" + * vulnerabilities, as well as extension points for adding your own. + */ + +private import python +private import semmle.python.dataflow.new.DataFlow +private import experimental.semmle.python.Concepts +private import semmle.python.dataflow.new.RemoteFlowSources +private import semmle.python.dataflow.new.BarrierGuards +private import semmle.python.ApiGraphs + +/** + * Provides default sources, sinks and sanitizers for detecting "xml injection" + * vulnerabilities, as well as extension points for adding your own. + */ +module XmlEntityInjection { + /** + * A data flow source for "xml injection" vulnerabilities. + */ + abstract class Source extends DataFlow::Node { } + + /** + * A data flow sink for "xml injection" vulnerabilities. + */ + abstract class Sink extends DataFlow::Node { + /** Gets the kind of XML injection that this sink is vulnerable to. */ + abstract string getVulnerableKind(); + } + + /** + * A sanitizer guard for "xml injection" vulnerabilities. + */ + abstract class SanitizerGuard extends DataFlow::BarrierGuard { } + + /** + * A unit class for adding additional taint steps. + * + * Extend this class to add additional taint steps that should apply to `XmlEntityInjection` + * taint configuration. + */ + class AdditionalTaintStep extends Unit { + /** + * Holds if the step from `nodeFrom` to `nodeTo` should be considered a taint + * step for `XmlEntityInjection` configuration. + */ + abstract predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo); + } + + /** + * An input to a direct XML parsing function, considered as a flow sink. + * + * See `XML::XMLParsing`. + */ + class XMLParsingInputAsSink extends Sink { + ExperimentalXML::XMLParsing xmlParsing; + + XMLParsingInputAsSink() { this = xmlParsing.getAnInput() } + + override string getVulnerableKind() { xmlParsing.vulnerableTo(result) } + } + + /** + * A source of remote user input, considered as a flow source. + */ + class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } + + /** + * A comparison with a constant string, considered as a sanitizer-guard. + */ + class StringConstCompareAsSanitizerGuard extends SanitizerGuard, StringConstCompare { } + + /** + * A taint step for `io`'s `StringIO` and `BytesIO` methods. + */ + class IoAdditionalTaintStep extends AdditionalTaintStep { + override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + exists(DataFlow::CallCfgNode ioCalls | + ioCalls = API::moduleImport("io").getMember(["StringIO", "BytesIO"]).getACall() and + nodeFrom = ioCalls.getArg(0) and + nodeTo = ioCalls + ) + } + } +} diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.expected b/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.expected new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql b/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql new file mode 100644 index 000000000000..81bc391d0e55 --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/XML/ExperimentalXmlConceptsTests.ql @@ -0,0 +1,33 @@ +import python +import experimental.semmle.python.Concepts +import experimental.semmle.python.frameworks.Xml +import semmle.python.dataflow.new.DataFlow +import TestUtilities.InlineExpectationsTest +private import semmle.python.dataflow.new.internal.PrintNode + +class XmlParsingTest extends InlineExpectationsTest { + XmlParsingTest() { this = "XmlParsingTest" } + + override string getARelevantTag() { result in ["input", "vuln"] } + + override predicate hasActualResult(Location location, string element, string tag, string value) { + exists(location.getFile().getRelativePath()) and + exists(XML::XMLParsing parsing | + exists(DataFlow::Node input | + input = parsing.getAnInput() and + location = input.getLocation() and + element = input.toString() and + value = prettyNodeForInlineTest(input) and + tag = "input" + ) + or + exists(XML::XMLVulnerabilityKind kind | + parsing.vulnerableTo(kind) and + location = parsing.getLocation() and + element = parsing.toString() and + value = "'" + kind + "'" and + tag = "vuln" + ) + ) + } +} diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py new file mode 100644 index 000000000000..22930a58af37 --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/XML/lxml_etree.py @@ -0,0 +1,54 @@ +from io import StringIO +import lxml.etree + +x = "some xml" + +# different parsing methods +lxml.etree.fromstring(x) # $ input=x vuln='XXE' +lxml.etree.fromstring(text=x) # $ input=x vuln='XXE' + +lxml.etree.fromstringlist([x]) # $ input=List vuln='XXE' +lxml.etree.fromstringlist(strings=[x]) # $ input=List vuln='XXE' + +lxml.etree.XML(x) # $ input=x vuln='XXE' +lxml.etree.XML(text=x) # $ input=x vuln='XXE' + +lxml.etree.parse(StringIO(x)) # $ input=StringIO(..) vuln='XXE' +lxml.etree.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='XXE' + +lxml.etree.parseid(StringIO(x)) # $ input=StringIO(..) vuln='XXE' +lxml.etree.parseid(source=StringIO(x)) # $ input=StringIO(..) vuln='XXE' + +# With default parsers (nothing changed) +parser = lxml.etree.XMLParser() +lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE' + +parser = lxml.etree.get_default_parser() +lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE' + +# manual use of feed method +parser = lxml.etree.XMLParser() +parser.feed(x) # $ input=x vuln='XXE' +parser.feed(data=x) # $ input=x vuln='XXE' +parser.close() + +# XXE-safe +parser = lxml.etree.XMLParser(resolve_entities=False) +lxml.etree.fromstring(x, parser) # $ input=x +lxml.etree.fromstring(x, parser=parser) # $ input=x + +# XXE-vuln +parser = lxml.etree.XMLParser(resolve_entities=True) +lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='XXE' + +# Billion laughs vuln (also XXE) +parser = lxml.etree.XMLParser(huge_tree=True) +lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' vuln='XXE' + +# Safe for both Billion laughs and XXE +parser = lxml.etree.XMLParser(resolve_entities=False, huge_tree=True) +lxml.etree.fromstring(x, parser=parser) # $ input=x + +# DTD retrival vuln (also XXE) +parser = lxml.etree.XMLParser(load_dtd=True, no_network=False) +lxml.etree.fromstring(x, parser=parser) # $ input=x vuln='DTD retrieval' vuln='XXE' diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py b/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py new file mode 100644 index 000000000000..adcace1aa0a6 --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/XML/poc/PoC.py @@ -0,0 +1,677 @@ +#!/usr/bin/env python3 + +# this file doesn't have a .py extension so the extractor doesn't pick it up, so it +# doesn't have to be annotated + +# This file shows the ways to make exploit vulnerable XML parsing +# see +# https://pypi.org/project/defusedxml/#python-xml-libraries +# https://docs.python.org/3.10/library/xml.html#xml-vulnerabilities + +import pathlib +from flask import Flask +import threading +import multiprocessing +import time +from io import StringIO +import pytest + +HOST = "localhost" +PORT = 8080 + + +FLAG_PATH = pathlib.Path(__file__).with_name("flag") + +# ============================================================================== +# xml samples + +ok_xml = f""" +hello world +""" + +local_xxe = f""" + +]> +&xxe; +""" + +remote_xxe = f""" + +]> +&remote_xxe; +""" + +billion_laughs = """ + + + + + + + + + + + +]> +&lol9;""" + +quadratic_blowup = f""" + +]> +{"&oops;"*20000}""" + +dtd_retrieval = f""" + +bar +""" + +# ============================================================================== +# other setup + +# we set up local Flask application so we can tests whether loading external resources +# works (such as SSRF from DTD-retrival works) +app = Flask(__name__) + +@app.route("/alive") +def alive(): + return "ok" + +hit_dtd = False +@app.route("/test.dtd") +def test_dtd(): + global hit_dtd + hit_dtd = True + return """""" + +hit_xxe = False +@app.route("/xxe") +def test_xxe(): + global hit_xxe + hit_xxe = True + return "ok" + +def run_app(): + app.run(host=HOST, port=PORT) + +@pytest.fixture(scope="session", autouse=True) +def flask_app_running(): + # run flask in other thread + flask_thread = threading.Thread(target=run_app, daemon=True) + flask_thread.start() + + # give flask a bit of time to start + time.sleep(0.1) + + # ensure that the server works + import requests + requests.get(f"http://{HOST}:{PORT}/alive") + + yield + +def expects_timeout(func): + def inner(): + proc = multiprocessing.Process(target=func) + proc.start() + time.sleep(0.1) + assert proc.exitcode == None + proc.kill() + proc.join() + return inner + + +class TestExpectsTimeout: + "test that expects_timeout works as expected" + + @staticmethod + @expects_timeout + def test_slow(): + time.sleep(1000) + + @staticmethod + def test_fast(): + @expects_timeout + def fast_func(): + return "done!" + + with pytest.raises(AssertionError): + fast_func() + +# ============================================================================== +import xml.sax +import xml.sax.handler + +class SimpleHandler(xml.sax.ContentHandler): + def __init__(self): + self.result = [] + + def characters(self, data): + self.result.append(data) + +class TestSax(): + # always vuln to billion laughs, quadratic + + @staticmethod + @expects_timeout + def test_billion_laughs_allowed_by_default(): + parser = xml.sax.make_parser() + parser.parse(StringIO(billion_laughs)) + + @staticmethod + @expects_timeout + def test_quardratic_blowup_allowed_by_default(): + parser = xml.sax.make_parser() + parser.parse(StringIO(quadratic_blowup)) + + @staticmethod + def test_ok_xml(): + handler = SimpleHandler() + parser = xml.sax.make_parser() + parser.setContentHandler(handler) + parser.parse(StringIO(ok_xml)) + assert handler.result == ["hello world"], handler.result + + @staticmethod + def test_xxe_disabled_by_default(): + handler = SimpleHandler() + parser = xml.sax.make_parser() + parser.setContentHandler(handler) + parser.parse(StringIO(local_xxe)) + assert handler.result == [], handler.result + + @staticmethod + def test_local_xxe_manually_enabled(): + handler = SimpleHandler() + parser = xml.sax.make_parser() + parser.setContentHandler(handler) + parser.setFeature(xml.sax.handler.feature_external_ges, True) + parser.parse(StringIO(local_xxe)) + assert handler.result[0] == "SECRET_FLAG", handler.result + + @staticmethod + def test_remote_xxe_manually_enabled(): + global hit_xxe + hit_xxe = False + + handler = SimpleHandler() + parser = xml.sax.make_parser() + parser.setContentHandler(handler) + parser.setFeature(xml.sax.handler.feature_external_ges, True) + parser.parse(StringIO(remote_xxe)) + assert handler.result == ["ok"], handler.result + assert hit_xxe == True + + @staticmethod + def test_dtd_disabled_by_default(): + global hit_dtd + hit_dtd = False + + parser = xml.sax.make_parser() + parser.parse(StringIO(dtd_retrieval)) + assert hit_dtd == False + + @staticmethod + def test_dtd_manually_enabled(): + global hit_dtd + hit_dtd = False + + parser = xml.sax.make_parser() + parser.setFeature(xml.sax.handler.feature_external_ges, True) + parser.parse(StringIO(dtd_retrieval)) + assert hit_dtd == True + + +# ============================================================================== +import xml.etree.ElementTree + +class TestEtree: + + # always vuln to billion laughs, quadratic + @staticmethod + @expects_timeout + def test_billion_laughs_allowed_by_default(): + parser = xml.etree.ElementTree.XMLParser() + _root = xml.etree.ElementTree.fromstring(billion_laughs, parser=parser) + + @staticmethod + @expects_timeout + def test_quardratic_blowup_allowed_by_default(): + parser = xml.etree.ElementTree.XMLParser() + _root = xml.etree.ElementTree.fromstring(quadratic_blowup, parser=parser) + + @staticmethod + def test_ok_xml(): + parser = xml.etree.ElementTree.XMLParser() + root = xml.etree.ElementTree.fromstring(ok_xml, parser=parser) + assert root.tag == "test" + assert root.text == "hello world" + + @staticmethod + def test_ok_xml_sax_parser(): + # you _can_ pass a SAX parser to xml.etree... but it doesn't give you the output :| + parser = xml.sax.make_parser() + root = xml.etree.ElementTree.fromstring(ok_xml, parser=parser) + assert root == None + + @staticmethod + def test_ok_xml_lxml_parser(): + # this is technically possible, since parsers follow the same API, and the + # `fromstring` function is just a thin wrapper... seems very unlikely that + # anyone would do this though :| + parser = lxml.etree.XMLParser() + root = xml.etree.ElementTree.fromstring(ok_xml, parser=parser) + assert root.tag == "test" + assert root.text == "hello world" + + @staticmethod + def test_xxe_not_possible(): + parser = xml.etree.ElementTree.XMLParser() + try: + _root = xml.etree.ElementTree.fromstring(local_xxe, parser=parser) + assert False + except xml.etree.ElementTree.ParseError as e: + assert "undefined entity &xxe" in str(e) + + @staticmethod + def test_dtd_not_possible(): + global hit_dtd + hit_dtd = False + + parser = xml.etree.ElementTree.XMLParser() + _root = xml.etree.ElementTree.fromstring(dtd_retrieval, parser=parser) + assert hit_dtd == False + +# ============================================================================== +import lxml.etree + +class TestLxml: + # see https://lxml.de/apidoc/lxml.etree.html?highlight=xmlparser#lxml.etree.XMLParser + @staticmethod + def test_billion_laughs_disabled_by_default(): + parser = lxml.etree.XMLParser() + try: + _root = lxml.etree.fromstring(billion_laughs, parser=parser) + assert False + except lxml.etree.XMLSyntaxError as e: + assert "Detected an entity reference loop" in str(e) + + @staticmethod + def test_quardratic_blowup_disabled_by_default(): + parser = lxml.etree.XMLParser() + try: + _root = lxml.etree.fromstring(quadratic_blowup, parser=parser) + assert False + except lxml.etree.XMLSyntaxError as e: + assert "Detected an entity reference loop" in str(e) + + @staticmethod + @expects_timeout + def test_billion_laughs_manually_enabled(): + parser = lxml.etree.XMLParser(huge_tree=True) + root = lxml.etree.fromstring(billion_laughs, parser=parser) + + @staticmethod + @expects_timeout + def test_quadratic_blowup_manually_enabled(): + parser = lxml.etree.XMLParser(huge_tree=True) + root = lxml.etree.fromstring(quadratic_blowup, parser=parser) + + @staticmethod + def test_billion_laughs_huge_tree_not_enough(): + parser = lxml.etree.XMLParser(huge_tree=True, resolve_entities=False) + root = lxml.etree.fromstring(billion_laughs, parser=parser) + assert root.tag == "lolz" + assert root.text == None + + @staticmethod + def test_quadratic_blowup_huge_tree_not_enough(): + parser = lxml.etree.XMLParser(huge_tree=True, resolve_entities=False) + root = lxml.etree.fromstring(quadratic_blowup, parser=parser) + assert root.tag == "foo" + assert root.text == None + + @staticmethod + def test_ok_xml(): + parser = lxml.etree.XMLParser() + root = lxml.etree.fromstring(ok_xml, parser=parser) + assert root.tag == "test" + assert root.text == "hello world" + + @staticmethod + def test_local_xxe_enabled_by_default(): + parser = lxml.etree.XMLParser() + root = lxml.etree.fromstring(local_xxe, parser=parser) + assert root.tag == "test" + assert root.text == "SECRET_FLAG\n", root.text + + @staticmethod + def test_local_xxe_disabled(): + parser = lxml.etree.XMLParser(resolve_entities=False) + root = lxml.etree.fromstring(local_xxe, parser=parser) + assert root.tag == "test" + assert root.text == None + + @staticmethod + def test_remote_xxe_disabled_by_default(): + global hit_xxe + hit_xxe = False + + parser = lxml.etree.XMLParser() + try: + root = lxml.etree.fromstring(remote_xxe, parser=parser) + assert False + except lxml.etree.XMLSyntaxError as e: + assert "Failure to process entity remote_xxe" in str(e) + assert hit_xxe == False + + @staticmethod + def test_remote_xxe_manually_enabled(): + global hit_xxe + hit_xxe = False + + parser = lxml.etree.XMLParser(no_network=False) + root = lxml.etree.fromstring(remote_xxe, parser=parser) + assert root.tag == "test" + assert root.text == "ok" + assert hit_xxe == True + + @staticmethod + def test_dtd_disabled_by_default(): + global hit_dtd + hit_dtd = False + + parser = lxml.etree.XMLParser() + root = lxml.etree.fromstring(dtd_retrieval, parser=parser) + assert hit_dtd == False + + @staticmethod + def test_dtd_manually_enabled(): + global hit_dtd + hit_dtd = False + + # Need to set BOTH load_dtd and no_network + parser = lxml.etree.XMLParser(load_dtd=True) + root = lxml.etree.fromstring(dtd_retrieval, parser=parser) + assert hit_dtd == False + + parser = lxml.etree.XMLParser(no_network=False) + root = lxml.etree.fromstring(dtd_retrieval, parser=parser) + assert hit_dtd == False + + parser = lxml.etree.XMLParser(load_dtd=True, no_network=False) + root = lxml.etree.fromstring(dtd_retrieval, parser=parser) + assert hit_dtd == True + + hit_dtd = False + + # Setting dtd_validation also does not allow the remote access + parser = lxml.etree.XMLParser(dtd_validation=True, load_dtd=True) + try: + root = lxml.etree.fromstring(dtd_retrieval, parser=parser) + except lxml.etree.XMLSyntaxError: + pass + assert hit_dtd == False + + +# ============================================================================== + +import xmltodict + +class TestXmltodict: + @staticmethod + def test_billion_laughs_disabled_by_default(): + d = xmltodict.parse(billion_laughs) + assert d == {"lolz": None}, d + + @staticmethod + def test_quardratic_blowup_disabled_by_default(): + d = xmltodict.parse(quadratic_blowup) + assert d == {"foo": None}, d + + @staticmethod + @expects_timeout + def test_billion_laughs_manually_enabled(): + xmltodict.parse(billion_laughs, disable_entities=False) + + @staticmethod + @expects_timeout + def test_quardratic_blowup_manually_enabled(): + xmltodict.parse(quadratic_blowup, disable_entities=False) + + @staticmethod + def test_ok_xml(): + d = xmltodict.parse(ok_xml) + assert d == {"test": "hello world"}, d + + @staticmethod + def test_local_xxe_not_possible(): + d = xmltodict.parse(local_xxe) + assert d == {"test": None} + + d = xmltodict.parse(local_xxe, disable_entities=False) + assert d == {"test": None} + + @staticmethod + def test_remote_xxe_not_possible(): + global hit_xxe + hit_xxe = False + + d = xmltodict.parse(remote_xxe) + assert d == {"test": None} + assert hit_xxe == False + + d = xmltodict.parse(remote_xxe, disable_entities=False) + assert d == {"test": None} + assert hit_xxe == False + + @staticmethod + def test_dtd_not_possible(): + global hit_dtd + hit_dtd = False + + d = xmltodict.parse(dtd_retrieval) + assert hit_dtd == False + +# ============================================================================== +import xml.dom.minidom + +class TestMinidom: + @staticmethod + @expects_timeout + def test_billion_laughs(): + xml.dom.minidom.parseString(billion_laughs) + + @staticmethod + @expects_timeout + def test_quardratic_blowup(): + xml.dom.minidom.parseString(quadratic_blowup) + + @staticmethod + def test_ok_xml(): + doc = xml.dom.minidom.parseString(ok_xml) + assert doc.documentElement.tagName == "test" + assert doc.documentElement.childNodes[0].data == "hello world" + + @staticmethod + def test_xxe(): + # disabled by default + doc = xml.dom.minidom.parseString(local_xxe) + assert doc.documentElement.tagName == "test" + assert doc.documentElement.childNodes == [] + + # but can be turned on + parser = xml.sax.make_parser() + parser.setFeature(xml.sax.handler.feature_external_ges, True) + doc = xml.dom.minidom.parseString(local_xxe, parser=parser) + assert doc.documentElement.tagName == "test" + assert doc.documentElement.childNodes[0].data == "SECRET_FLAG" + + # which also works remotely + global hit_xxe + hit_xxe = False + + parser = xml.sax.make_parser() + parser.setFeature(xml.sax.handler.feature_external_ges, True) + _doc = xml.dom.minidom.parseString(remote_xxe, parser=parser) + assert hit_xxe == True + + @staticmethod + def test_dtd(): + # not possible by default + global hit_dtd + hit_dtd = False + + _doc = xml.dom.minidom.parseString(dtd_retrieval) + assert hit_dtd == False + + # but can be turned on + parser = xml.sax.make_parser() + parser.setFeature(xml.sax.handler.feature_external_ges, True) + _doc = xml.dom.minidom.parseString(dtd_retrieval, parser=parser) + assert hit_dtd == True + +# ============================================================================== +import xml.dom.pulldom + +class TestPulldom: + @staticmethod + @expects_timeout + def test_billion_laughs(): + doc = xml.dom.pulldom.parseString(billion_laughs) + # you NEED to iterate over the items for it to take long + for event, node in doc: + pass + + @staticmethod + @expects_timeout + def test_quardratic_blowup(): + doc = xml.dom.pulldom.parseString(quadratic_blowup) + for event, node in doc: + pass + + @staticmethod + def test_ok_xml(): + doc = xml.dom.pulldom.parseString(ok_xml) + for event, node in doc: + if event == xml.dom.pulldom.START_ELEMENT: + assert node.tagName == "test" + elif event == xml.dom.pulldom.CHARACTERS: + assert node.data == "hello world" + + @staticmethod + def test_xxe(): + # disabled by default + doc = xml.dom.pulldom.parseString(local_xxe) + found_flag = False + for event, node in doc: + if event == xml.dom.pulldom.START_ELEMENT: + assert node.tagName == "test" + elif event == xml.dom.pulldom.CHARACTERS: + if node.data == "SECRET_FLAG": + found_flag = True + assert found_flag == False + + # but can be turned on + parser = xml.sax.make_parser() + parser.setFeature(xml.sax.handler.feature_external_ges, True) + doc = xml.dom.pulldom.parseString(local_xxe, parser=parser) + found_flag = False + for event, node in doc: + if event == xml.dom.pulldom.START_ELEMENT: + assert node.tagName == "test" + elif event == xml.dom.pulldom.CHARACTERS: + if node.data == "SECRET_FLAG": + found_flag = True + assert found_flag == True + + # which also works remotely + global hit_xxe + hit_xxe = False + parser = xml.sax.make_parser() + parser.setFeature(xml.sax.handler.feature_external_ges, True) + doc = xml.dom.pulldom.parseString(remote_xxe, parser=parser) + assert hit_xxe == False + for event, node in doc: + pass + assert hit_xxe == True + + @staticmethod + def test_dtd(): + # not possible by default + global hit_dtd + hit_dtd = False + + doc = xml.dom.pulldom.parseString(dtd_retrieval) + for event, node in doc: + pass + assert hit_dtd == False + + # but can be turned on + parser = xml.sax.make_parser() + parser.setFeature(xml.sax.handler.feature_external_ges, True) + doc = xml.dom.pulldom.parseString(dtd_retrieval, parser=parser) + for event, node in doc: + pass + assert hit_dtd == True + +# ============================================================================== +import xml.parsers.expat + +class TestExpat: + # this is the underlying parser implementation used by the rest of the Python + # standard library. But people are probably not using this directly. + + @staticmethod + @expects_timeout + def test_billion_laughs(): + parser = xml.parsers.expat.ParserCreate() + parser.Parse(billion_laughs, True) + + @staticmethod + @expects_timeout + def test_quardratic_blowup(): + parser = xml.parsers.expat.ParserCreate() + parser.Parse(quadratic_blowup, True) + + @staticmethod + def test_ok_xml(): + char_data_recv = [] + def char_data_handler(data): + char_data_recv.append(data) + + parser = xml.parsers.expat.ParserCreate() + parser.CharacterDataHandler = char_data_handler + parser.Parse(ok_xml, True) + + assert char_data_recv == ["hello world"] + + @staticmethod + def test_xxe(): + # not vuln by default + char_data_recv = [] + def char_data_handler(data): + char_data_recv.append(data) + + parser = xml.parsers.expat.ParserCreate() + parser.CharacterDataHandler = char_data_handler + parser.Parse(local_xxe, True) + + assert char_data_recv == [] + + # there might be ways to make it vuln, but I did not investigate futher. + + @staticmethod + def test_dtd(): + # not vuln by default + global hit_dtd + hit_dtd = False + + parser = xml.parsers.expat.ParserCreate() + parser.Parse(dtd_retrieval, True) + assert hit_dtd == False + + # there might be ways to make it vuln, but I did not investigate futher. diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/poc/flag b/python/ql/test/experimental/library-tests/frameworks/XML/poc/flag new file mode 100644 index 000000000000..45c9436ee9f2 --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/XML/poc/flag @@ -0,0 +1 @@ +SECRET_FLAG diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/poc/this-dir-is-not-extracted b/python/ql/test/experimental/library-tests/frameworks/XML/poc/this-dir-is-not-extracted new file mode 100644 index 000000000000..b1925ade1d3a --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/XML/poc/this-dir-is-not-extracted @@ -0,0 +1 @@ +just FYI diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py new file mode 100644 index 000000000000..7dce29fc7b9d --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_dom.py @@ -0,0 +1,31 @@ +from io import StringIO +import xml.dom.minidom +import xml.dom.pulldom +import xml.sax + +x = "some xml" + +# minidom +xml.dom.minidom.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.dom.minidom.parse(file=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' + +xml.dom.minidom.parseString(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.dom.minidom.parseString(string=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' + + +# pulldom +xml.dom.pulldom.parse(StringIO(x))['START_DOCUMENT'][1] # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.dom.pulldom.parse(stream_or_string=StringIO(x))['START_DOCUMENT'][1] # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' + +xml.dom.pulldom.parseString(x)['START_DOCUMENT'][1] # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.dom.pulldom.parseString(string=x)['START_DOCUMENT'][1] # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' + + +# These are based on SAX parses, and you can specify your own, so you can expose yourself to XXE (yay/) +parser = xml.sax.make_parser() +parser.setFeature(xml.sax.handler.feature_external_ges, True) +xml.dom.minidom.parse(StringIO(x), parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' +xml.dom.minidom.parse(StringIO(x), parser=parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' + +xml.dom.pulldom.parse(StringIO(x), parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' +xml.dom.pulldom.parse(StringIO(x), parser=parser) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py new file mode 100644 index 000000000000..df126e458e2d --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_etree.py @@ -0,0 +1,45 @@ +from io import StringIO +import xml.etree.ElementTree + +x = "some xml" + +# Parsing in different ways +xml.etree.ElementTree.fromstring(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.fromstring(text=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' + +xml.etree.ElementTree.fromstringlist([x]) # $ input=List vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.fromstringlist(sequence=[x]) # $ input=List vuln='Billion Laughs' vuln='Quadratic Blowup' + +xml.etree.ElementTree.XML(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.XML(text=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' + +xml.etree.ElementTree.XMLID(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.XMLID(text=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' + +xml.etree.ElementTree.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' + +xml.etree.ElementTree.iterparse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.etree.ElementTree.iterparse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' + + +# With parsers (no options available to disable/enable security features) +parser = xml.etree.ElementTree.XMLParser() +xml.etree.ElementTree.fromstring(x, parser=parser) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' + +# manual use of feed method +parser = xml.etree.ElementTree.XMLParser() +parser.feed(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +parser.feed(data=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +parser.close() + +# manual use of feed method on XMLPullParser +parser = xml.etree.ElementTree.XMLPullParser() +parser.feed(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +parser.feed(data=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +parser.close() + +# note: it's technically possible to use the thing wrapper func `fromstring` with an +# `lxml` parser, and thereby change what vulnerabilities you are exposed to.. but it +# seems very unlikely that anyone would do this, so we have intentionally not added any +# tests for this. diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py b/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py new file mode 100644 index 000000000000..158e62ffae6b --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xml_sax.py @@ -0,0 +1,64 @@ +from io import StringIO +import xml.sax + +x = "some xml" + +class MainHandler(xml.sax.ContentHandler): + def __init__(self): + self._result = [] + + def characters(self, data): + self._result.append(data) + +xml.sax.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.sax.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' + +xml.sax.parseString(x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' +xml.sax.parseString(string=x) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' + +parser = xml.sax.make_parser() +parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' +parser.parse(source=StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' + +# You can make it vuln to both XXE and DTD retrieval by setting this flag +# see https://docs.python.org/3/library/xml.sax.handler.html#xml.sax.handler.feature_external_ges +parser = xml.sax.make_parser() +parser.setFeature(xml.sax.handler.feature_external_ges, True) +parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' + +parser = xml.sax.make_parser() +parser.setFeature(xml.sax.handler.feature_external_ges, False) +parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' + +# Forward Type Tracking test +def func(cond): + parser = xml.sax.make_parser() + if cond: + parser.setFeature(xml.sax.handler.feature_external_ges, True) + parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' + else: + parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' + +# make it vuln, then making it safe +# a bit of an edge-case, but is nice to be able to handle. +parser = xml.sax.make_parser() +parser.setFeature(xml.sax.handler.feature_external_ges, True) +parser.setFeature(xml.sax.handler.feature_external_ges, False) +parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='Quadratic Blowup' + +def check_conditional_assignment(cond): + parser = xml.sax.make_parser() + if cond: + parser.setFeature(xml.sax.handler.feature_external_ges, True) + else: + parser.setFeature(xml.sax.handler.feature_external_ges, False) + parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' + +def check_conditional_assignment2(cond): + parser = xml.sax.make_parser() + if cond: + flag_value = True + else: + flag_value = False + parser.setFeature(xml.sax.handler.feature_external_ges, flag_value) + parser.parse(StringIO(x)) # $ input=StringIO(..) vuln='Billion Laughs' vuln='DTD retrieval' vuln='Quadratic Blowup' vuln='XXE' diff --git a/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py b/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py new file mode 100644 index 000000000000..473e51c9fe66 --- /dev/null +++ b/python/ql/test/experimental/library-tests/frameworks/XML/xmltodict.py @@ -0,0 +1,8 @@ +import xmltodict + +x = "some xml" + +xmltodict.parse(x) # $ input=x +xmltodict.parse(xml_input=x) # $ input=x + +xmltodict.parse(x, disable_entities=False) # $ input=x vuln='Billion Laughs' vuln='Quadratic Blowup' diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.expected new file mode 100644 index 000000000000..4a08d61c47af --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.expected @@ -0,0 +1 @@ +| xmlrpc_server.py:7:10:7:48 | ControlFlowNode for SimpleXMLRPCServer() | SimpleXMLRPCServer is vulnerable to: Billion Laughs, Quadratic Blowup. | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.qlref b/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.qlref new file mode 100644 index 000000000000..a0b30e6d69b8 --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/SimpleXmlRpcServer.qlref @@ -0,0 +1 @@ +experimental/Security/CWE-611/SimpleXmlRpcServer.ql diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected new file mode 100644 index 000000000000..25594b4ddaaf --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.expected @@ -0,0 +1,27 @@ +edges +| test.py:8:19:8:25 | ControlFlowNode for request | test.py:8:19:8:30 | ControlFlowNode for Attribute | +| test.py:8:19:8:30 | ControlFlowNode for Attribute | test.py:8:19:8:45 | ControlFlowNode for Subscript | +| test.py:8:19:8:45 | ControlFlowNode for Subscript | test.py:9:34:9:44 | ControlFlowNode for xml_content | +| test.py:13:19:13:25 | ControlFlowNode for request | test.py:13:19:13:30 | ControlFlowNode for Attribute | +| test.py:13:19:13:30 | ControlFlowNode for Attribute | test.py:13:19:13:45 | ControlFlowNode for Subscript | +| test.py:13:19:13:45 | ControlFlowNode for Subscript | test.py:15:34:15:44 | ControlFlowNode for xml_content | +| test.py:19:19:19:25 | ControlFlowNode for request | test.py:19:19:19:30 | ControlFlowNode for Attribute | +| test.py:19:19:19:30 | ControlFlowNode for Attribute | test.py:19:19:19:45 | ControlFlowNode for Subscript | +| test.py:19:19:19:45 | ControlFlowNode for Subscript | test.py:30:34:30:44 | ControlFlowNode for xml_content | +nodes +| test.py:8:19:8:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| test.py:8:19:8:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| test.py:8:19:8:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| test.py:9:34:9:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| test.py:13:19:13:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| test.py:13:19:13:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| test.py:13:19:13:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| test.py:15:34:15:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +| test.py:19:19:19:25 | ControlFlowNode for request | semmle.label | ControlFlowNode for request | +| test.py:19:19:19:30 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| test.py:19:19:19:45 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +| test.py:30:34:30:44 | ControlFlowNode for xml_content | semmle.label | ControlFlowNode for xml_content | +subpaths +#select +| test.py:9:34:9:44 | ControlFlowNode for xml_content | test.py:8:19:8:25 | ControlFlowNode for request | test.py:9:34:9:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: XXE. | test.py:9:34:9:44 | ControlFlowNode for xml_content | This | test.py:8:19:8:25 | ControlFlowNode for request | user-provided value | +| test.py:30:34:30:44 | ControlFlowNode for xml_content | test.py:19:19:19:25 | ControlFlowNode for request | test.py:30:34:30:44 | ControlFlowNode for xml_content | $@ XML input is constructed from a $@ and is vulnerable to: Billion Laughs, DTD retrieval, Quadratic Blowup, XXE. | test.py:30:34:30:44 | ControlFlowNode for xml_content | This | test.py:19:19:19:25 | ControlFlowNode for request | user-provided value | diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.qlref b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.qlref new file mode 100644 index 000000000000..36a7c8845fb7 --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/XmlEntityInjection.qlref @@ -0,0 +1 @@ +experimental/Security/CWE-611/XmlEntityInjection.ql diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/test.py b/python/ql/test/experimental/query-tests/Security/CWE-611/test.py new file mode 100644 index 000000000000..d9181c4cf346 --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/test.py @@ -0,0 +1,30 @@ +from flask import Flask, request +import lxml.etree + +app = Flask(__name__) + +@app.route("/vuln-handler") +def vuln_handler(): + xml_content = request.args['xml_content'] + return lxml.etree.fromstring(xml_content).text + +@app.route("/safe-handler") +def safe_handler(): + xml_content = request.args['xml_content'] + parser = lxml.etree.XMLParser(resolve_entities=False) + return lxml.etree.fromstring(xml_content, parser=parser).text + +@app.route("/super-vuln-handler") +def super_vuln_handler(): + xml_content = request.args['xml_content'] + parser = lxml.etree.XMLParser( + # allows XXE + resolve_entities=True, + # allows remote XXE + no_network=False, + # together with `no_network=False`, allows DTD-retrival + load_dtd=True, + # allows DoS attacks + huge_tree=True, + ) + return lxml.etree.fromstring(xml_content, parser=parser).text diff --git a/python/ql/test/experimental/query-tests/Security/CWE-611/xmlrpc_server.py b/python/ql/test/experimental/query-tests/Security/CWE-611/xmlrpc_server.py new file mode 100644 index 000000000000..83c18b549b3d --- /dev/null +++ b/python/ql/test/experimental/query-tests/Security/CWE-611/xmlrpc_server.py @@ -0,0 +1,12 @@ +from xmlrpc.server import SimpleXMLRPCServer + +def foo(n: str): + print("foo called with arg:", n, type(n)) + return "ok" + +server = SimpleXMLRPCServer(("127.0.0.1", 8000)) +server.register_function(foo, "foo") +server.serve_forever() + +# normal: curl 127.0.0.1:8000 --data-raw 'foo42' +# billion_laughs: curl 127.0.0.1:8000 --data-raw ']>foo&lol9;'