Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-24542] [SQL] UDF series UDFXPathXXXX allow users to pass carefully crafted XML to access arbitrary files #21549

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
import java.io.Reader;

import javax.xml.namespace.QName;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
Expand All @@ -37,9 +40,15 @@
* This is based on Hive's UDFXPathUtil implementation.
*/
public class UDFXPathUtil {
public static final String SAX_FEATURE_PREFIX = "http://xml.org/sax/features/";
public static final String EXTERNAL_GENERAL_ENTITIES_FEATURE = "external-general-entities";
public static final String EXTERNAL_PARAMETER_ENTITIES_FEATURE = "external-parameter-entities";
private DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
private DocumentBuilder builder = null;
private XPath xpath = XPathFactory.newInstance().newXPath();
private ReusableStringReader reader = new ReusableStringReader();
private InputSource inputSource = new InputSource(reader);

private XPathExpression expression = null;
private String oldPath = null;

Expand All @@ -65,14 +74,30 @@ public Object eval(String xml, String path, QName qname) throws XPathExpressionE
return null;
}

if (builder == null){
try {
initializeDocumentBuilderFactory();
builder = dbf.newDocumentBuilder();
} catch (ParserConfigurationException e) {
throw new RuntimeException("Error instantiating DocumentBuilder, cannot build xml parser", e);
}
}

reader.set(xml);
try {
return expression.evaluate(inputSource, qname);
return expression.evaluate(builder.parse(inputSource), qname);
} catch (XPathExpressionException e) {
throw new RuntimeException("Invalid XML document: " + e.getMessage() + "\n" + xml, e);
} catch (Exception e) {
throw new RuntimeException("Error loading expression '" + oldPath + "'", e);
}
}

private void initializeDocumentBuilderFactory() throws ParserConfigurationException {
dbf.setFeature(SAX_FEATURE_PREFIX + EXTERNAL_GENERAL_ENTITIES_FEATURE, false);
dbf.setFeature(SAX_FEATURE_PREFIX + EXTERNAL_PARAMETER_ENTITIES_FEATURE, false);
}

public Boolean evalBoolean(String xml, String path) throws XPathExpressionException {
return (Boolean) eval(xml, path, XPathConstants.BOOLEAN);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,23 @@ class UDFXPathUtilSuite extends SparkFunSuite {
assert(ret == "foo")
}

test("embedFailure") {
import org.apache.commons.io.FileUtils
import java.io.File
val secretValue = String.valueOf(Math.random)
val tempFile = File.createTempFile("verifyembed", ".tmp")
tempFile.deleteOnExit()
val fname = tempFile.getAbsolutePath

FileUtils.writeStringToFile(tempFile, secretValue)

val xml =
"<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" + "<!DOCTYPE test [ \n" +
" <!ENTITY embed SYSTEM \"" + fname + "\"> \n" + "]>\n" + "<foo>&embed;</foo>"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we use multiline string to make it easier to read?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

xml has a unique syntax. A little bit hard to make it work sometimes.

val evaled = new UDFXPathUtil().evalString(xml, "/foo")
assert(evaled.isEmpty)
}

test("number eval") {
var ret =
util.evalNumber("<a><b>true</b><b>false</b><b>b3</b><c>c1</c><c>-77</c></a>", "a/c[2]")
Expand Down