Skip to content

Commit

Permalink
[SPARK-24542][SQL] UDF series UDFXPathXXXX allow users to pass carefu…
Browse files Browse the repository at this point in the history
…lly crafted XML to access arbitrary files

## What changes were proposed in this pull request?

UDF series UDFXPathXXXX allow users to pass carefully crafted XML to access arbitrary files. Spark does not have built-in access control. When users use the external access control library, users might bypass them and access the file contents.

This PR basically patches the Hive fix to Apache Spark. https://issues.apache.org/jira/browse/HIVE-18879

## How was this patch tested?

A unit test case

Author: Xiao Li <[email protected]>

Closes apache#21549 from gatorsmile/xpathSecurity.
  • Loading branch information
gatorsmile authored and cloud-fan committed Jun 19, 2018
1 parent 1737d45 commit 9a75c18
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
import java.io.Reader;

import javax.xml.namespace.QName;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
Expand All @@ -37,9 +40,15 @@
* This is based on Hive's UDFXPathUtil implementation.
*/
public class UDFXPathUtil {
public static final String SAX_FEATURE_PREFIX = "http://xml.org/sax/features/";
public static final String EXTERNAL_GENERAL_ENTITIES_FEATURE = "external-general-entities";
public static final String EXTERNAL_PARAMETER_ENTITIES_FEATURE = "external-parameter-entities";
private DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
private DocumentBuilder builder = null;
private XPath xpath = XPathFactory.newInstance().newXPath();
private ReusableStringReader reader = new ReusableStringReader();
private InputSource inputSource = new InputSource(reader);

private XPathExpression expression = null;
private String oldPath = null;

Expand All @@ -65,14 +74,31 @@ public Object eval(String xml, String path, QName qname) throws XPathExpressionE
return null;
}

if (builder == null){
try {
initializeDocumentBuilderFactory();
builder = dbf.newDocumentBuilder();
} catch (ParserConfigurationException e) {
throw new RuntimeException(
"Error instantiating DocumentBuilder, cannot build xml parser", e);
}
}

reader.set(xml);
try {
return expression.evaluate(inputSource, qname);
return expression.evaluate(builder.parse(inputSource), qname);
} catch (XPathExpressionException e) {
throw new RuntimeException("Invalid XML document: " + e.getMessage() + "\n" + xml, e);
} catch (Exception e) {
throw new RuntimeException("Error loading expression '" + oldPath + "'", e);
}
}

private void initializeDocumentBuilderFactory() throws ParserConfigurationException {
dbf.setFeature(SAX_FEATURE_PREFIX + EXTERNAL_GENERAL_ENTITIES_FEATURE, false);
dbf.setFeature(SAX_FEATURE_PREFIX + EXTERNAL_PARAMETER_ENTITIES_FEATURE, false);
}

public Boolean evalBoolean(String xml, String path) throws XPathExpressionException {
return (Boolean) eval(xml, path, XPathConstants.BOOLEAN);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,27 @@ class UDFXPathUtilSuite extends SparkFunSuite {
assert(ret == "foo")
}

test("embedFailure") {
import org.apache.commons.io.FileUtils
import java.io.File
val secretValue = String.valueOf(Math.random)
val tempFile = File.createTempFile("verifyembed", ".tmp")
tempFile.deleteOnExit()
val fname = tempFile.getAbsolutePath

FileUtils.writeStringToFile(tempFile, secretValue)

val xml =
s"""<?xml version="1.0" encoding="utf-8"?>
|<!DOCTYPE test [
| <!ENTITY embed SYSTEM "$fname">
|]>
|<foo>&embed;</foo>
""".stripMargin
val evaled = new UDFXPathUtil().evalString(xml, "/foo")
assert(evaled.isEmpty)
}

test("number eval") {
var ret =
util.evalNumber("<a><b>true</b><b>false</b><b>b3</b><c>c1</c><c>-77</c></a>", "a/c[2]")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ class XPathExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {

// Test error message for invalid XML document
val e1 = intercept[RuntimeException] { testExpr("<a>/a>", "a", null.asInstanceOf[T]) }
assert(e1.getCause.getMessage.contains("Invalid XML document") &&
e1.getCause.getMessage.contains("<a>/a>"))
assert(e1.getCause.getCause.getMessage.contains(
"XML document structures must start and end within the same entity."))
assert(e1.getMessage.contains("<a>/a>"))

// Test error message for invalid xpath
val e2 = intercept[RuntimeException] { testExpr("<a></a>", "!#$", null.asInstanceOf[T]) }
Expand Down

0 comments on commit 9a75c18

Please sign in to comment.