// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.config.model.builder.xml; import com.yahoo.component.ComponentId; import com.yahoo.component.ComponentSpecification; import com.yahoo.text.XML; import com.yahoo.yolean.Exceptions; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.xml.sax.ErrorHandler; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import java.io.IOException; import java.io.Reader; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Optional; import java.util.logging.Level; import java.util.logging.Logger; /** * Static methods for helping dom building * * @author bratseth */ public final class XmlHelper { private static final Logger log = Logger.getLogger(XmlHelper.class.getPackage().toString()); private static final String idReference = "idref"; // Access to this needs to be synchronized (as it is in getDocumentBuilder() below) public static final DocumentBuilderFactory factory = createDocumentBuilderFactory(); private XmlHelper() {} public static String nullIfEmpty(String attribute) { if (attribute.isEmpty()) return null; else return attribute; } /** * For searchers inside search chains, the id may be both a reference and an id at once, or just a reference. * In other cases, it is clear which one it is from context, so I think the difference is not worth bothering users * with, unless they are XML purists in which case they will have the option of writing this correctly. * - Jon */ public static String getIdString(Element element) { String idString = element.getAttribute("id"); if (idString == null || idString.trim().equals("")) idString = element.getAttribute(idReference); if (idString == null || idString.trim().equals("")) idString = element.getAttribute("ident"); return idString; } public static ComponentId getId(Element element) { return new ComponentId(getIdString(element)); } public static ComponentSpecification getIdRef(Element element) { return new ComponentSpecification(getIdString(element)); } public static Document getDocument(Reader reader) { return getDocument(reader, "unknown source"); } public static Document getDocument(Reader reader, String source) { Document doc; try { InputSource inputSource = new InputSource(reader); inputSource.setPublicId(source); doc = getDocumentBuilder().parse(inputSource); } catch (SAXException | IOException e) { throw new IllegalArgumentException(e); } return doc; } public static List splitAndDiscardEmpty(String field, String regex) { List ret = new ArrayList<>(); for (String t : field.split(regex)) { if (!t.isEmpty()) { ret.add(t); } } return ret; } public static List spaceSeparatedSymbols(String field) { return splitAndDiscardEmpty(field, " "); } public static Collection spaceSeparatedSymbolsFromAttribute(Element spec, String name) { return spaceSeparatedSymbols(spec.getAttribute(name)); } public static Collection valuesFromElements(Element parent, String elementName) { List symbols = new ArrayList<>(); for (Element symbol : XML.getChildren(parent, elementName)) { symbols.add(XML.getValue(symbol).trim()); } return symbols; } public static boolean isReference(Element element) { return element.hasAttribute(idReference); } /** * Creates a new XML document builder. * * @return A new DocumentBuilder instance, or null if we fail to get one. */ public static synchronized DocumentBuilder getDocumentBuilder() { try { DocumentBuilder docBuilder = factory.newDocumentBuilder(); docBuilder.setErrorHandler(new CustomErrorHandler(log)); log.log(Level.FINE, "XML parser now operational!"); return docBuilder; } catch (ParserConfigurationException e) { log.log(Level.WARNING, "No XML parser available - " + e); return null; } } public static Optional getOptionalAttribute(Element element, String name) { return Optional.ofNullable(element.getAttribute(name)).filter(s -> !s.isEmpty()); } public static Optional getOptionalChild(Element parent, String childName) { return Optional.ofNullable(XML.getChild(parent, childName)); } public static Optional getOptionalChildValue(Element parent, String childName) { Element child = XML.getChild(parent, childName); if (child == null) return Optional.empty(); if (child.getFirstChild() == null) return Optional.empty(); return Optional.ofNullable(child.getFirstChild().getNodeValue()); } /** Error handler which will output name of source for warnings and errors */ private static class CustomErrorHandler implements ErrorHandler { private final Logger logger; CustomErrorHandler(Logger logger) { super(); this.logger = logger; } public void warning(SAXParseException e) { logger.log(Level.WARNING, message(e)); } public void error(SAXParseException e) { throw new IllegalArgumentException(message(e)); } public void fatalError(SAXParseException e) { throw new IllegalArgumentException(message(e)); } private String message(SAXParseException e) { String sourceId = e.getPublicId() == null ? "" : e.getPublicId(); return "Invalid XML" + (sourceId.isEmpty() ? " (unknown source)" : " in " + sourceId) + ": " + Exceptions.toMessageString(e) + " [" + e.getLineNumber() + ":" + e.getColumnNumber() + "]"; } } private static DocumentBuilderFactory createDocumentBuilderFactory() { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setNamespaceAware(true); factory.setXIncludeAware(false); try { // XXE prevention factory.setFeature("http://xml.org/sax/features/external-general-entities", false); factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false); factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); return factory; } catch (ParserConfigurationException e) { log.log(Level.SEVERE, "Could not initialize XML parser", e); throw new RuntimeException(e); } } }