summaryrefslogtreecommitdiffstats
path: root/vespajlib
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@gmail.com>2022-08-26 09:58:10 +0200
committerGitHub <noreply@github.com>2022-08-26 09:58:10 +0200
commit40bb8680dbef01e603b8947a194c86e9acc14e30 (patch)
tree3badd5c97ff41449514805921e567c218661ab79 /vespajlib
parentd227d62f0cef26ebdb30c0d5280a2462cd39767d (diff)
parentffab68b3f5c28034eaf3a606c1b220c14f7204fa (diff)
Merge pull request #23770 from vespa-engine/bratseth/embedder-syntax-5
Bratseth/embedder syntax 5
Diffstat (limited to 'vespajlib')
-rw-r--r--vespajlib/abi-spec.json1
-rw-r--r--vespajlib/src/main/java/com/yahoo/text/XML.java504
2 files changed, 219 insertions, 286 deletions
diff --git a/vespajlib/abi-spec.json b/vespajlib/abi-spec.json
index fbf1203acdf..8714285acd8 100644
--- a/vespajlib/abi-spec.json
+++ b/vespajlib/abi-spec.json
@@ -3362,6 +3362,7 @@
"public static javax.xml.parsers.DocumentBuilder getDocumentBuilder(java.lang.String, java.lang.ClassLoader, boolean)",
"public static java.util.List getChildren(org.w3c.dom.Element)",
"public static java.util.List getChildren(org.w3c.dom.Element, java.lang.String)",
+ "public static java.util.Optional attribute(java.lang.String, org.w3c.dom.Element)",
"public static java.lang.String getValue(org.w3c.dom.Element)",
"public static org.w3c.dom.Element getChild(org.w3c.dom.Element, java.lang.String)",
"public static java.lang.String getNodePath(org.w3c.dom.Node, java.lang.String)",
diff --git a/vespajlib/src/main/java/com/yahoo/text/XML.java b/vespajlib/src/main/java/com/yahoo/text/XML.java
index 6aa42773ac0..255e6a67429 100644
--- a/vespajlib/src/main/java/com/yahoo/text/XML.java
+++ b/vespajlib/src/main/java/com/yahoo/text/XML.java
@@ -18,6 +18,7 @@ import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
+import java.util.Optional;
/**
* Static XML utility methods
@@ -29,280 +30,39 @@ import java.util.List;
*/
public class XML {
- /**
- * The point of this weird class and the jumble of abstract methods is
- * linking the scan for characters that must be quoted into the quoting
- * table, and making it actual work to make them go out of sync again.
- */
- private static abstract class LegalCharacters {
-
- // To quote http://www.w3.org/TR/REC-xml/ :
- // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
- // [#x10000-#x10FFFF]
- final boolean isLegal(int codepoint, boolean escapeLow, int stripCodePoint, boolean isAttribute) {
- if (codepoint == stripCodePoint) {
- return removeCodePoint();
- } else if (codepoint < ' ') {
- if (!escapeLow) {
- return true;
- }
- switch (codepoint) {
- case 0x09:
- case 0x0a:
- case 0x0d:
- return true;
- default:
- return ctrlEscapeCodePoint(codepoint);
- }
- } else if (codepoint >= 0x20 && codepoint <= 0xd7ff) {
- switch (codepoint) {
- case '&':
- return ampCodePoint();
- case '<':
- return ltCodePoint();
- case '>':
- return gtCodePoint();
- case '"':
- return quotCodePoint(isAttribute);
- default:
- return true;
- }
- } else if ((codepoint >= 0xe000 && codepoint <= 0xfffd)
- || (codepoint >= 0x10000 && codepoint <= 0x10ffff)) {
- return true;
- } else {
- return filterCodePoint(codepoint);
-
- }
- }
-
- private boolean quotCodePoint(boolean isAttribute) {
- if (isAttribute) {
- quoteQuot();
- return false;
- } else {
- return true;
- }
- }
-
- private boolean filterCodePoint(int codepoint) {
- replace(codepoint);
- return false;
- }
-
- private boolean gtCodePoint() {
- quoteGt();
- return false;
- }
-
- private boolean ltCodePoint() {
- quoteLt();
- return false;
- }
-
- private boolean ampCodePoint() {
- quoteAmp();
- return false;
- }
-
- private boolean ctrlEscapeCodePoint(int codepoint) {
- ctrlEscape(codepoint);
- return false;
- }
-
- private boolean removeCodePoint() {
- remove();
- return false;
- }
-
- protected abstract void quoteQuot();
-
- protected abstract void quoteGt();
-
- protected abstract void quoteLt();
-
- protected abstract void quoteAmp();
-
- protected abstract void remove();
-
- protected abstract void ctrlEscape(int codepoint);
-
- protected abstract void replace(int codepoint);
- }
-
- private static final class Quote extends LegalCharacters {
-
- char[] lastQuoted;
- private static final char[] EMPTY = new char[0];
- private static final char[] REPLACEMENT_CHARACTER = "\ufffd".toCharArray();
- private static final char[] AMP = "&amp;".toCharArray();
- private static final char[] LT = "&lt;".toCharArray();
- private static final char[] GT = "&gt;".toCharArray();
- private static final char[] QUOT = "&quot;".toCharArray();
-
- @Override
- protected void remove() {
- lastQuoted = EMPTY;
- }
-
- @Override
- protected void replace(final int codepoint) {
- lastQuoted = REPLACEMENT_CHARACTER;
- }
-
- @Override
- protected void quoteQuot() {
- lastQuoted = QUOT;
- }
-
- @Override
- protected void quoteGt() {
- lastQuoted = GT;
- }
-
- @Override
- protected void quoteLt() {
- lastQuoted = LT;
- }
-
- @Override
- protected void quoteAmp() {
- lastQuoted = AMP;
- }
-
- @Override
- protected void ctrlEscape(final int codepoint) {
- lastQuoted = REPLACEMENT_CHARACTER;
- }
- }
-
- private static final class Scan extends LegalCharacters {
-
- @Override
- protected void quoteQuot() {
- }
-
- @Override
- protected void quoteGt() {
- }
-
- @Override
- protected void quoteLt() {
- }
-
- @Override
- protected void quoteAmp() {
- }
-
- @Override
- protected void remove() {
- }
-
- @Override
- protected void ctrlEscape(final int codepoint) {
- }
-
- @Override
- protected void replace(final int codepoint) {
- }
- }
-
private static final Scan scanner = new Scan();
- /**
- * Replaces the characters that need to be escaped with their corresponding
- * character entities.
- *
- * @param s1
- * String possibly containing characters that need to be escaped
- * in XML
- *
- * @return Returns the input string with special characters that need to be
- * escaped replaced by character entities.
- */
- public static String xmlEscape(String s1) {
- return xmlEscape(s1, true, true, null, -1);
+ /** Replaces the characters that need to be escaped with their corresponding character entities. */
+ public static String xmlEscape(String string) {
+ return xmlEscape(string, true, true, null, -1);
}
- /**
- * Replaces the characters that need to be escaped with their corresponding
- * character entities.
- *
- * @param s1
- * String possibly containing characters that need to be escaped
- * in XML
- * @param isAttribute
- * Is the input string to be used as an attribute?
- *
- * @return Returns the input string with special characters that need to be
- * escaped replaced by character entities
- */
- public static String xmlEscape(String s1, boolean isAttribute) {
- return xmlEscape(s1, isAttribute, true, null, -1);
+ /** Replaces the characters that need to be escaped with their corresponding character entities. */
+ public static String xmlEscape(String string, boolean isAttribute) {
+ return xmlEscape(string, isAttribute, true, null, -1);
}
- /**
- * Replaces the characters that need to be escaped with their corresponding
- * character entities.
- *
- * @param s1
- * String possibly containing characters that need to be escaped
- * in XML
- * @param isAttribute
- * Is the input string to be used as an attribute?
- *
- *
- * @param stripCharacter
- * any occurrence of this character is removed from the string
- *
- * @return Returns the input string with special characters that need to be
- * escaped replaced by character entities
- */
- public static String xmlEscape(String s1, boolean isAttribute, char stripCharacter) {
- return xmlEscape(s1, isAttribute, true, null, (int) stripCharacter);
+ /** Replaces the characters that need to be escaped with their corresponding character entities. */
+ public static String xmlEscape(String string, boolean isAttribute, char stripCharacter) {
+ return xmlEscape(string, isAttribute, true, null, (int) stripCharacter);
}
- /**
- * Replaces the characters that need to be escaped with their corresponding
- * character entities.
- *
- * @param s1
- * String possibly containing characters that need to be escaped
- * in XML
- * @param isAttribute
- * Is the input string to be used as an attribute?
- *
- * @param escapeLowAscii
- * Should ascii characters below 32 be escaped as well
- *
- * @return Returns the input string with special characters that need to be
- * escaped replaced by character entities
- */
- public static String xmlEscape(String s1, boolean isAttribute, boolean escapeLowAscii) {
- return xmlEscape(s1, isAttribute, escapeLowAscii, null, -1);
+ /** Replaces the characters that need to be escaped with their corresponding character entities. */
+ public static String xmlEscape(String string, boolean isAttribute, boolean escapeLowAscii) {
+ return xmlEscape(string, isAttribute, escapeLowAscii, null, -1);
}
/**
- * Replaces the characters that need to be escaped with their corresponding
- * character entities.
- *
- * @param s1
- * String possibly containing characters that need to be escaped
- * in XML
- * @param isAttribute
- * Is the input string to be used as an attribute?
- *
- * @param escapeLowAscii
- * Should ascii characters below 32 be escaped as well
+ * Replaces the characters that need to be escaped with their corresponding character entities.
*
- * @param stripCharacter
- * any occurrence of this character is removed from the string
- *
- * @return Returns the input string with special characters that need to be
- * escaped replaced by character entities
+ * @param string the string possibly containing characters that need to be escaped in XML
+ * @param isAttribute whether the input string to be used as an attribute
+ * @param escapeLowAscii whether ascii characters below 32 should be escaped as well
+ * @param stripCharacter any occurrence of this character is removed from the string
+ * @return the input string with special characters that need to be escaped replaced by character entities
*/
- public static String xmlEscape(String s1, boolean isAttribute, boolean escapeLowAscii, char stripCharacter) {
- return xmlEscape(s1, isAttribute, escapeLowAscii, null, (int) stripCharacter);
+ public static String xmlEscape(String string, boolean isAttribute, boolean escapeLowAscii, char stripCharacter) {
+ return xmlEscape(string, isAttribute, escapeLowAscii, null, (int) stripCharacter);
}
/**
@@ -315,7 +75,6 @@ public class XML {
* <li>double quotes (&quot;) if isAttribute is <code>true</code>
* </ul>
* with character entities.
- *
*/
public static String xmlEscape(String string, boolean isAttribute, StringBuilder buffer) {
return xmlEscape(string, isAttribute, true, buffer, -1);
@@ -332,7 +91,6 @@ public class XML {
* <li>double quotes (&quot;) if isAttribute is <code>true</code>
* </ul>
* with character entities.
- *
*/
public static String xmlEscape(String string, boolean isAttribute, boolean escapeLowAscii, StringBuilder buffer) {
return xmlEscape(string, isAttribute, escapeLowAscii, buffer, -1);
@@ -438,15 +196,13 @@ public class XML {
}
}
- /**
- * Returns the Document of the string XML payload
- */
+ /** Returns the Document of the string XML payload. */
public static Document getDocument(String xmlString) {
return getDocument(new StringReader(xmlString));
}
/**
- * Creates a new XML DocumentBuilder
+ * Creates a new XML DocumentBuilder.
*
* @return a DocumentBuilder
* @throws RuntimeException if we fail to create one
@@ -456,7 +212,7 @@ public class XML {
}
/**
- * Creates a new XML DocumentBuilder
+ * Creates a new XML DocumentBuilder.
*
* @param implementation which jaxp implementation should be used
* @param classLoader which class loader should be used when getting a new DocumentBuilder
@@ -468,7 +224,7 @@ public class XML {
}
/**
- * Creates a new XML DocumentBuilder
+ * Creates a new XML DocumentBuilder.
*
* @return a DocumentBuilder
* @throws RuntimeException if we fail to create one
@@ -479,7 +235,7 @@ public class XML {
}
/**
- * Creates a new XML DocumentBuilder
+ * Creates a new XML DocumentBuilder.
*
* @param implementation which jaxp implementation should be used
* @param classLoader which class loader should be used when getting a new DocumentBuilder
@@ -508,7 +264,7 @@ public class XML {
}
/**
- * Returns the child Element objects from a w3c dom spec
+ * Returns the child Element objects from a w3c dom spec.
*
* @return List of elements. Empty list (never null) if none found or if the given element is null
*/
@@ -554,18 +310,21 @@ public class XML {
return ret;
}
+ /** Returns the given attribute name from element, or empty if the element does not have it. */
+ public static Optional<String> attribute(String name, Element element) {
+ if ( ! element.hasAttribute(name)) return Optional.empty();
+ return Optional.of(element.getAttribute(name));
+ }
+
/**
* Gets the string contents of the given Element. Returns "", never null if
* the element is null, or has no content
*/
public static String getValue(Element e) {
- if (e == null) {
- return "";
- }
+ if (e == null) return "";
Node child = e.getFirstChild();
- if (child == null) {
- return "";
- }
+ if (child == null) return "";
+ if (child.getNodeValue() == null) return "";
return child.getNodeValue();
}
@@ -575,14 +334,11 @@ public class XML {
}
/**
- * Returns the path to the given xml node, where each node name is separated
- * by the given separator string.
+ * Returns the path to the given xml node, where each node name is separated by the given separator string.
*
- * @param n
- * The xml node to find path to
- * @param sep
- * The separator string
- * @return The path to the xml node as a String
+ * @param n the xml node to find path to
+ * @param sep the separator string
+ * @return the path to the xml node as a String
*/
public static String getNodePath(Node n, String sep) {
if (n == null) {
@@ -657,8 +413,7 @@ public class XML {
* 1.1 (Second Edition)</a>. This does not check against reserved names, it
* only checks the set of characters used.
*
- * @param possibleName
- * a possibly valid XML name
+ * @param possibleName a possibly valid XML name
* @return true if the name may be used as an XML tag or attribute name
*/
public static boolean isName(CharSequence possibleName) {
@@ -683,4 +438,181 @@ public class XML {
return valid;
}
+ /**
+ * The point of this weird class and the jumble of abstract methods is
+ * linking the scan for characters that must be quoted into the quoting
+ * table, and making it actual work to make them go out of sync again.
+ */
+ private static abstract class LegalCharacters {
+ // To quote http://www.w3.org/TR/REC-xml/ :
+ // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
+ // [#x10000-#x10FFFF]
+ final boolean isLegal(int codepoint, boolean escapeLow, int stripCodePoint, boolean isAttribute) {
+ if (codepoint == stripCodePoint) {
+ return removeCodePoint();
+ } else if (codepoint < ' ') {
+ if (!escapeLow) {
+ return true;
+ }
+ switch (codepoint) {
+ case 0x09:
+ case 0x0a:
+ case 0x0d:
+ return true;
+ default:
+ return ctrlEscapeCodePoint(codepoint);
+ }
+ } else if (codepoint >= 0x20 && codepoint <= 0xd7ff) {
+ switch (codepoint) {
+ case '&':
+ return ampCodePoint();
+ case '<':
+ return ltCodePoint();
+ case '>':
+ return gtCodePoint();
+ case '"':
+ return quotCodePoint(isAttribute);
+ default:
+ return true;
+ }
+ } else if ((codepoint >= 0xe000 && codepoint <= 0xfffd)
+ || (codepoint >= 0x10000 && codepoint <= 0x10ffff)) {
+ return true;
+ } else {
+ return filterCodePoint(codepoint);
+
+ }
+ }
+
+ private boolean quotCodePoint(boolean isAttribute) {
+ if (isAttribute) {
+ quoteQuot();
+ return false;
+ } else {
+ return true;
+ }
+ }
+
+ private boolean filterCodePoint(int codepoint) {
+ replace(codepoint);
+ return false;
+ }
+
+ private boolean gtCodePoint() {
+ quoteGt();
+ return false;
+ }
+
+ private boolean ltCodePoint() {
+ quoteLt();
+ return false;
+ }
+
+ private boolean ampCodePoint() {
+ quoteAmp();
+ return false;
+ }
+
+ private boolean ctrlEscapeCodePoint(int codepoint) {
+ ctrlEscape(codepoint);
+ return false;
+ }
+
+ private boolean removeCodePoint() {
+ remove();
+ return false;
+ }
+
+ protected abstract void quoteQuot();
+
+ protected abstract void quoteGt();
+
+ protected abstract void quoteLt();
+
+ protected abstract void quoteAmp();
+
+ protected abstract void remove();
+
+ protected abstract void ctrlEscape(int codepoint);
+
+ protected abstract void replace(int codepoint);
+ }
+
+ private static final class Quote extends LegalCharacters {
+
+ char[] lastQuoted;
+ private static final char[] EMPTY = new char[0];
+ private static final char[] REPLACEMENT_CHARACTER = "\ufffd".toCharArray();
+ private static final char[] AMP = "&amp;".toCharArray();
+ private static final char[] LT = "&lt;".toCharArray();
+ private static final char[] GT = "&gt;".toCharArray();
+ private static final char[] QUOT = "&quot;".toCharArray();
+
+ @Override
+ protected void remove() {
+ lastQuoted = EMPTY;
+ }
+
+ @Override
+ protected void replace(final int codepoint) {
+ lastQuoted = REPLACEMENT_CHARACTER;
+ }
+
+ @Override
+ protected void quoteQuot() {
+ lastQuoted = QUOT;
+ }
+
+ @Override
+ protected void quoteGt() {
+ lastQuoted = GT;
+ }
+
+ @Override
+ protected void quoteLt() {
+ lastQuoted = LT;
+ }
+
+ @Override
+ protected void quoteAmp() {
+ lastQuoted = AMP;
+ }
+
+ @Override
+ protected void ctrlEscape(final int codepoint) {
+ lastQuoted = REPLACEMENT_CHARACTER;
+ }
+ }
+
+ private static final class Scan extends LegalCharacters {
+
+ @Override
+ protected void quoteQuot() {
+ }
+
+ @Override
+ protected void quoteGt() {
+ }
+
+ @Override
+ protected void quoteLt() {
+ }
+
+ @Override
+ protected void quoteAmp() {
+ }
+
+ @Override
+ protected void remove() {
+ }
+
+ @Override
+ protected void ctrlEscape(final int codepoint) {
+ }
+
+ @Override
+ protected void replace(final int codepoint) {
+ }
+ }
+
}