Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions python/ql/src/experimental/CWE-643/Xslt.qhelp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<!DOCTYPE qhelp SYSTEM "qhelp.dtd">
<qhelp>
<overview>
<p>
Processing an unvalidated XSL stylesheet can allow an attacker to change the structure and contents of the resultant XML, include arbitrary files from the file system, or execute arbitrary code.
</p>
</overview>
<recommendation>
<p>
This vulnerability can be prevented by not allowing untrusted user input to be passed as a XSL stylesheet.
If the application logic necessiates processing untrusted XSL stylesheets, the input should be properly filtered and sanitized before use.
</p>
</recommendation>
<example>
<p>In the example below, the XSL stylesheet is controlled by the user and hence leads to a vulnerability.</p>
<sample src="xslt.py" />
</example>
</qhelp>
35 changes: 35 additions & 0 deletions python/ql/src/experimental/CWE-643/Xslt.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/**
* @name Xslt query built from user-controlled sources
* @description Building a XSLT query from user-controlled sources is vulnerable to insertion of
* malicious XSLT code by the user.
* @kind path-problem
* @problem.severity error
* @precision high
* @id py/xslt-injection
* @tags security
* external/cwe/cwe-643
*/

import python
import semmle.python.security.Paths
/* Sources */
import semmle.python.web.HttpRequest
/* Sinks */
import experimental.semmle.python.security.injection.XSLT

class XSLTInjectionConfiguration extends TaintTracking::Configuration {
XSLTInjectionConfiguration() { this = "XSLT injection configuration" }

override predicate isSource(TaintTracking::Source source) {
source instanceof HttpRequestTaintSource
}

override predicate isSink(TaintTracking::Sink sink) {
sink instanceof XSLTInjection::XSLTInjectionSink
}
}

from XSLTInjectionConfiguration config, TaintedPathSource src, TaintedPathSink sink
where config.hasFlowPath(src, sink)
select sink.getSink(), src, sink, "This XSLT query depends on $@.", src.getSource(),
"a user-provided value"
14 changes: 14 additions & 0 deletions python/ql/src/experimental/CWE-643/xslt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from lxml import etree
from io import StringIO
from flask import Flask, request

app = Flask(__name__)


@app.route("/xslt")
def bad():
xsltQuery = request.args.get('xml', '')
xslt_root = etree.XML(xsltQuery)
f = StringIO('<foo><bar></bar></foo>')
tree = etree.parse(f)
result_tree = tree.xslt(xslt_root) # Not OK
119 changes: 119 additions & 0 deletions python/ql/src/experimental/semmle/python/security/injection/XSLT.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
/**
* Provides class and predicates to track external data that
* may represent malicious XSLT query objects.
*
* This module is intended to be imported into a taint-tracking query
* to extend `TaintKind` and `TaintSink`.
*/

import python
import semmle.python.dataflow.TaintTracking
import semmle.python.web.HttpRequest

/** Models XSLT Injection related classes and functions */
module XSLTInjection {
/** Returns a class value which refers to `lxml.etree` */
Value etree() { result = Value::named("lxml.etree") }

/** A generic taint sink that is vulnerable to XSLT injection. */
abstract class XSLTInjectionSink extends TaintSink { }

/**
* A kind of "taint", representing an untrusted XML string
*/
private class ExternalXmlStringKind extends ExternalStringKind {
ExternalXmlStringKind() { this = "etree.XML string" }

override TaintKind getTaintForFlowStep(ControlFlowNode fromnode, ControlFlowNode tonode) {
etreeXML(fromnode, tonode) and result instanceof ExternalXmlKind
or
etreeFromStringList(fromnode, tonode) and result instanceof ExternalXmlKind
or
etreeFromString(fromnode, tonode) and result instanceof ExternalXmlKind
}
}

/**
* A kind of "taint", representing a XML encoded string
*/
class ExternalXmlKind extends TaintKind {
ExternalXmlKind() { this = "lxml etree xml" }
}

private predicate etreeXML(ControlFlowNode fromnode, CallNode tonode) {
exists(CallNode call, AttrNode atr |
atr = etree().getAReference().getASuccessor() and
// XML(text, parser=None, base_url=None)
atr.getName() = "XML" and
atr = call.getFunction()
Comment on lines +44 to +48
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems like I was a bit quick to say "all good", would like you to change this getASuccessor as well 👍

|
call.getArg(0) = fromnode and
call = tonode
)
}

private predicate etreeFromString(ControlFlowNode fromnode, CallNode tonode) {
// fromstring(text, parser=None)
exists(CallNode call | call.getFunction().(AttrNode).getObject("fromstring").pointsTo(etree()) |
call.getArg(0) = fromnode and
call = tonode
)
}

private predicate etreeFromStringList(ControlFlowNode fromnode, CallNode tonode) {
// fromstringlist(strings, parser=None)
exists(CallNode call |
call.getFunction().(AttrNode).getObject("fromstringlist").pointsTo(etree())
|
call.getArg(0) = fromnode and
call = tonode
)
}

/**
* A Sink representing an argument to the `etree.XSLT` call.
*
* from lxml import etree
* root = etree.XML("<xmlContent>")
* find_text = etree.XSLT("`sink`")
*/
private class EtreeXSLTArgument extends XSLTInjectionSink {
override string toString() { result = "lxml.etree.XSLT" }

EtreeXSLTArgument() {
exists(CallNode call | call.getFunction().(AttrNode).getObject("XSLT").pointsTo(etree()) |
call.getArg(0) = this
)
}

override predicate sinks(TaintKind kind) { kind instanceof ExternalXmlKind }
}

/**
* A Sink representing an argument to the `XSLT` call to a parsed xml document.
*
* from lxml import etree
* from io import StringIO
* `sink` = etree.XML(xsltQuery)
* tree = etree.parse(f)
* result_tree = tree.xslt(`sink`)
*/
private class ParseXSLTArgument extends XSLTInjectionSink {
override string toString() { result = "lxml.etree.parse.xslt" }

ParseXSLTArgument() {
exists(
CallNode parseCall, CallNode xsltCall, ControlFlowNode obj, Variable var, AssignStmt assign
|
parseCall.getFunction().(AttrNode).getObject("parse").pointsTo(etree()) and
assign.getValue().(Call).getAFlowNode() = parseCall and
xsltCall.getFunction().(AttrNode).getObject("xslt") = obj and
var.getAUse() = obj and
assign.getATarget() = var.getAStore() and
xsltCall.getArg(0) = this
)
}

override predicate sinks(TaintKind kind) { kind instanceof ExternalXmlKind }
}
}
47 changes: 47 additions & 0 deletions python/ql/test/experimental/CWE-074/Xslt.expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
edges
| xslt.py:10:17:10:28 | dict of etree.XML string | xslt.py:10:17:10:43 | etree.XML string |
| xslt.py:10:17:10:28 | dict of etree.XML string | xslt.py:10:17:10:43 | etree.XML string |
| xslt.py:10:17:10:43 | etree.XML string | xslt.py:11:27:11:35 | etree.XML string |
| xslt.py:10:17:10:43 | etree.XML string | xslt.py:11:27:11:35 | etree.XML string |
| xslt.py:11:17:11:36 | lxml etree xml | xslt.py:14:29:14:37 | lxml etree xml |
| xslt.py:11:17:11:36 | lxml etree xml | xslt.py:14:29:14:37 | lxml etree xml |
| xslt.py:11:27:11:35 | etree.XML string | xslt.py:11:17:11:36 | lxml etree xml |
| xslt.py:11:27:11:35 | etree.XML string | xslt.py:11:17:11:36 | lxml etree xml |
| xsltInjection.py:10:17:10:28 | dict of etree.XML string | xsltInjection.py:10:17:10:43 | etree.XML string |
| xsltInjection.py:10:17:10:28 | dict of etree.XML string | xsltInjection.py:10:17:10:43 | etree.XML string |
| xsltInjection.py:10:17:10:43 | etree.XML string | xsltInjection.py:11:27:11:35 | etree.XML string |
| xsltInjection.py:10:17:10:43 | etree.XML string | xsltInjection.py:11:27:11:35 | etree.XML string |
| xsltInjection.py:11:17:11:36 | lxml etree xml | xsltInjection.py:12:28:12:36 | lxml etree xml |
| xsltInjection.py:11:17:11:36 | lxml etree xml | xsltInjection.py:12:28:12:36 | lxml etree xml |
| xsltInjection.py:11:27:11:35 | etree.XML string | xsltInjection.py:11:17:11:36 | lxml etree xml |
| xsltInjection.py:11:27:11:35 | etree.XML string | xsltInjection.py:11:17:11:36 | lxml etree xml |
| xsltInjection.py:17:17:17:28 | dict of etree.XML string | xsltInjection.py:17:17:17:43 | etree.XML string |
| xsltInjection.py:17:17:17:28 | dict of etree.XML string | xsltInjection.py:17:17:17:43 | etree.XML string |
| xsltInjection.py:17:17:17:43 | etree.XML string | xsltInjection.py:18:27:18:35 | etree.XML string |
| xsltInjection.py:17:17:17:43 | etree.XML string | xsltInjection.py:18:27:18:35 | etree.XML string |
| xsltInjection.py:18:17:18:36 | lxml etree xml | xsltInjection.py:21:29:21:37 | lxml etree xml |
| xsltInjection.py:18:17:18:36 | lxml etree xml | xsltInjection.py:21:29:21:37 | lxml etree xml |
| xsltInjection.py:18:27:18:35 | etree.XML string | xsltInjection.py:18:17:18:36 | lxml etree xml |
| xsltInjection.py:18:27:18:35 | etree.XML string | xsltInjection.py:18:17:18:36 | lxml etree xml |
| xsltInjection.py:26:17:26:28 | dict of etree.XML string | xsltInjection.py:26:17:26:43 | etree.XML string |
| xsltInjection.py:26:17:26:28 | dict of etree.XML string | xsltInjection.py:26:17:26:43 | etree.XML string |
| xsltInjection.py:26:17:26:43 | etree.XML string | xsltInjection.py:27:27:27:35 | etree.XML string |
| xsltInjection.py:26:17:26:43 | etree.XML string | xsltInjection.py:27:27:27:35 | etree.XML string |
| xsltInjection.py:27:17:27:36 | lxml etree xml | xsltInjection.py:31:24:31:32 | lxml etree xml |
| xsltInjection.py:27:17:27:36 | lxml etree xml | xsltInjection.py:31:24:31:32 | lxml etree xml |
| xsltInjection.py:27:27:27:35 | etree.XML string | xsltInjection.py:27:17:27:36 | lxml etree xml |
| xsltInjection.py:27:27:27:35 | etree.XML string | xsltInjection.py:27:17:27:36 | lxml etree xml |
| xsltInjection.py:35:17:35:28 | dict of etree.XML string | xsltInjection.py:35:17:35:43 | etree.XML string |
| xsltInjection.py:35:17:35:28 | dict of etree.XML string | xsltInjection.py:35:17:35:43 | etree.XML string |
| xsltInjection.py:35:17:35:43 | etree.XML string | xsltInjection.py:36:34:36:42 | etree.XML string |
| xsltInjection.py:35:17:35:43 | etree.XML string | xsltInjection.py:36:34:36:42 | etree.XML string |
| xsltInjection.py:36:17:36:43 | lxml etree xml | xsltInjection.py:40:24:40:32 | lxml etree xml |
| xsltInjection.py:36:17:36:43 | lxml etree xml | xsltInjection.py:40:24:40:32 | lxml etree xml |
| xsltInjection.py:36:34:36:42 | etree.XML string | xsltInjection.py:36:17:36:43 | lxml etree xml |
| xsltInjection.py:36:34:36:42 | etree.XML string | xsltInjection.py:36:17:36:43 | lxml etree xml |
#select
| xslt.py:14:29:14:37 | xslt_root | xslt.py:10:17:10:28 | dict of etree.XML string | xslt.py:14:29:14:37 | lxml etree xml | This XSLT query depends on $@. | xslt.py:10:17:10:28 | Attribute | a user-provided value |
| xsltInjection.py:12:28:12:36 | xslt_root | xsltInjection.py:10:17:10:28 | dict of etree.XML string | xsltInjection.py:12:28:12:36 | lxml etree xml | This XSLT query depends on $@. | xsltInjection.py:10:17:10:28 | Attribute | a user-provided value |
| xsltInjection.py:21:29:21:37 | xslt_root | xsltInjection.py:17:17:17:28 | dict of etree.XML string | xsltInjection.py:21:29:21:37 | lxml etree xml | This XSLT query depends on $@. | xsltInjection.py:17:17:17:28 | Attribute | a user-provided value |
| xsltInjection.py:31:24:31:32 | xslt_root | xsltInjection.py:26:17:26:28 | dict of etree.XML string | xsltInjection.py:31:24:31:32 | lxml etree xml | This XSLT query depends on $@. | xsltInjection.py:26:17:26:28 | Attribute | a user-provided value |
| xsltInjection.py:40:24:40:32 | xslt_root | xsltInjection.py:35:17:35:28 | dict of etree.XML string | xsltInjection.py:40:24:40:32 | lxml etree xml | This XSLT query depends on $@. | xsltInjection.py:35:17:35:28 | Attribute | a user-provided value |
1 change: 1 addition & 0 deletions python/ql/test/experimental/CWE-074/Xslt.qlref
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
experimental/CWE-643/Xslt.ql
12 changes: 12 additions & 0 deletions python/ql/test/experimental/CWE-074/XsltSinks.expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
| xslt.py:14:29:14:37 | lxml.etree.parse.xslt | lxml etree xml |
| xsltInjection.py:12:28:12:36 | lxml.etree.XSLT | lxml etree xml |
| xsltInjection.py:21:29:21:37 | lxml.etree.parse.xslt | lxml etree xml |
| xsltInjection.py:31:24:31:32 | lxml.etree.parse.xslt | lxml etree xml |
| xsltInjection.py:40:24:40:32 | lxml.etree.parse.xslt | lxml etree xml |
| xsltInjection.py:50:24:50:32 | lxml.etree.parse.xslt | lxml etree xml |
| xsltInjection.py:60:24:60:32 | lxml.etree.parse.xslt | lxml etree xml |
| xsltInjection.py:69:24:69:32 | lxml.etree.parse.xslt | lxml etree xml |
| xsltInjection.py:79:24:79:32 | lxml.etree.parse.xslt | lxml etree xml |
| xsltSinks.py:17:28:17:36 | lxml.etree.XSLT | lxml etree xml |
| xsltSinks.py:30:29:30:37 | lxml.etree.parse.xslt | lxml etree xml |
| xsltSinks.py:44:24:44:32 | lxml.etree.parse.xslt | lxml etree xml |
6 changes: 6 additions & 0 deletions python/ql/test/experimental/CWE-074/XsltSinks.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import python
import experimental.semmle.python.security.injection.XSLT

from XSLTInjection::XSLTInjectionSink sink, TaintKind kind
where sink.sinks(kind)
select sink, kind
1 change: 1 addition & 0 deletions python/ql/test/experimental/CWE-074/options
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
semmle-extractor-options: -p ../../query-tests/Security/lib/ --max-import-depth=3
14 changes: 14 additions & 0 deletions python/ql/test/experimental/CWE-074/xslt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from lxml import etree
from io import StringIO
from flask import Flask, request

app = Flask(__name__)


@app.route("/xslt")
def bad():
xsltQuery = request.args.get('xml', '')
xslt_root = etree.XML(xsltQuery)
f = StringIO('<foo><bar></bar></foo>')
tree = etree.parse(f)
result_tree = tree.xslt(xslt_root) # Not OK
79 changes: 79 additions & 0 deletions python/ql/test/experimental/CWE-074/xsltInjection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from lxml import etree
from io import StringIO
from flask import Flask, request

app = Flask(__name__)


@app.route("/xslt1")
def a():
xsltQuery = request.args.get('xml', '')
xslt_root = etree.XML(xsltQuery)
transform = etree.XSLT(xslt_root) # Not OK


@app.route("/xslt2")
def b():
xsltQuery = request.args.get('xml', '')
xslt_root = etree.XML(xsltQuery)
f = StringIO('<foo><bar></bar></foo>')
tree = etree.parse(f)
result_tree = tree.xslt(xslt_root) # Not OK


@app.route("/xslt3")
def c():
xsltQuery = request.args.get('xml', '')
xslt_root = etree.XML(xsltQuery)

f = StringIO('<foo><bar></bar></foo>')
tree = etree.parse(f)
result = tree.xslt(xslt_root, a="'A'") # Not OK

@app.route("/xslt4")
def d():
xsltQuery = request.args.get('xml', '')
xslt_root = etree.fromstring(xsltQuery)

f = StringIO('<foo><bar></bar></foo>')
tree = etree.parse(f)
result = tree.xslt(xslt_root, a="'A'") # Not OK

@app.route("/xslt5")
def e():
xsltQuery = request.args.get('xml', '')
xsltStrings = [xsltQuery,"asd","random"]
xslt_root = etree.fromstringlist(xsltStrings)

f = StringIO('<foo><bar></bar></foo>')
tree = etree.parse(f)
result = tree.xslt(xslt_root, a="'A'") # Not OK


@app.route("/xslt6")
def f():
xsltQuery = '<non><remote><query></query></remote></non>'
xslt_root = etree.XML(xsltQuery)

f = StringIO('<foo><bar></bar></foo>')
tree = etree.parse(f)
result = tree.xslt(xslt_root, a="'A'") # OK

@app.route("/xslt7")
def g():
xsltQuery = '<non><remote><query></query></remote></non>'
xslt_root = etree.fromstring(xsltQuery)

f = StringIO('<foo><bar></bar></foo>')
tree = etree.parse(f)
result = tree.xslt(xslt_root, a="'A'") # OK

@app.route("/xslt8")
def h():
xsltQuery = '<non><remote><query></query></remote></non>'
xsltStrings = [xsltQuery,"asd","random"]
xslt_root = etree.fromstringlist(xsltStrings)

f = StringIO('<foo><bar></bar></foo>')
tree = etree.parse(f)
result = tree.xslt(xslt_root, a="'A'") # OK
Loading