diff options
author | Jon Bratseth <bratseth@oath.com> | 2018-10-14 20:39:29 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@oath.com> | 2018-10-14 20:39:29 +0200 |
commit | 9346bb603c94a76fae04c9ea873f53cc00cb55cf (patch) | |
tree | 28d2219032b7ab133e11669a7f58e8807d21d2b4 /vespajlib | |
parent | c0386da43b7104a9ccfea9d429561613f05f406e (diff) |
Add XML parse utility
Diffstat (limited to 'vespajlib')
-rw-r--r-- | vespajlib/src/main/java/com/yahoo/text/XML.java | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/text/XML.java b/vespajlib/src/main/java/com/yahoo/text/XML.java index f4cd355b0e1..3203c74cbc6 100644 --- a/vespajlib/src/main/java/com/yahoo/text/XML.java +++ b/vespajlib/src/main/java/com/yahoo/text/XML.java @@ -1,6 +1,7 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.text; +import java.io.File; import java.io.IOException; import java.io.Reader; import java.io.StringReader; @@ -210,6 +211,26 @@ public class XML { private static final Scan scanner = new Scan(); /** + * Parses an XML file without allowing external DTD's + * + * @throws IllegalArgumentException if parsing fails + */ + public static Document parse(File xmlFile) { + try { + DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); + // Prevent XXE + dbFactory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); + return dbFactory.newDocumentBuilder().parse(xmlFile); + } + catch (ParserConfigurationException e) { + throw new IllegalStateException("Could not disallow-doctype-decl", e); + } + catch (IOException | SAXException e) { + throw new IllegalArgumentException("Cannot parse '" + xmlFile + "'", e); + } + } + + /** * Replaces the characters that need to be escaped with their corresponding * character entities. * |