From 6e0094d2b126f72295bcecb854d3f5b56b7df02c Mon Sep 17 00:00:00 2001 From: Jon Bratseth Date: Wed, 24 Aug 2022 13:59:39 +0200 Subject: Cleanup --- vespajlib/src/main/java/com/yahoo/text/XML.java | 495 ++++++++++-------------- 1 file changed, 215 insertions(+), 280 deletions(-) (limited to 'vespajlib/src/main/java') diff --git a/vespajlib/src/main/java/com/yahoo/text/XML.java b/vespajlib/src/main/java/com/yahoo/text/XML.java index ec2791eefe8..255e6a67429 100644 --- a/vespajlib/src/main/java/com/yahoo/text/XML.java +++ b/vespajlib/src/main/java/com/yahoo/text/XML.java @@ -18,6 +18,7 @@ import java.io.Reader; import java.io.StringReader; import java.util.ArrayList; import java.util.List; +import java.util.Optional; /** * Static XML utility methods @@ -29,280 +30,39 @@ import java.util.List; */ public class XML { - /** - * The point of this weird class and the jumble of abstract methods is - * linking the scan for characters that must be quoted into the quoting - * table, and making it actual work to make them go out of sync again. - */ - private static abstract class LegalCharacters { - - // To quote http://www.w3.org/TR/REC-xml/ : - // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | - // [#x10000-#x10FFFF] - final boolean isLegal(int codepoint, boolean escapeLow, int stripCodePoint, boolean isAttribute) { - if (codepoint == stripCodePoint) { - return removeCodePoint(); - } else if (codepoint < ' ') { - if (!escapeLow) { - return true; - } - switch (codepoint) { - case 0x09: - case 0x0a: - case 0x0d: - return true; - default: - return ctrlEscapeCodePoint(codepoint); - } - } else if (codepoint >= 0x20 && codepoint <= 0xd7ff) { - switch (codepoint) { - case '&': - return ampCodePoint(); - case '<': - return ltCodePoint(); - case '>': - return gtCodePoint(); - case '"': - return quotCodePoint(isAttribute); - default: - return true; - } - } else if ((codepoint >= 0xe000 && codepoint <= 0xfffd) - || (codepoint >= 0x10000 && codepoint <= 0x10ffff)) { - return true; - } else { - return filterCodePoint(codepoint); - - } - } - - private boolean quotCodePoint(boolean isAttribute) { - if (isAttribute) { - quoteQuot(); - return false; - } else { - return true; - } - } - - private boolean filterCodePoint(int codepoint) { - replace(codepoint); - return false; - } - - private boolean gtCodePoint() { - quoteGt(); - return false; - } - - private boolean ltCodePoint() { - quoteLt(); - return false; - } - - private boolean ampCodePoint() { - quoteAmp(); - return false; - } - - private boolean ctrlEscapeCodePoint(int codepoint) { - ctrlEscape(codepoint); - return false; - } - - private boolean removeCodePoint() { - remove(); - return false; - } - - protected abstract void quoteQuot(); - - protected abstract void quoteGt(); - - protected abstract void quoteLt(); - - protected abstract void quoteAmp(); - - protected abstract void remove(); - - protected abstract void ctrlEscape(int codepoint); - - protected abstract void replace(int codepoint); - } - - private static final class Quote extends LegalCharacters { - - char[] lastQuoted; - private static final char[] EMPTY = new char[0]; - private static final char[] REPLACEMENT_CHARACTER = "\ufffd".toCharArray(); - private static final char[] AMP = "&".toCharArray(); - private static final char[] LT = "<".toCharArray(); - private static final char[] GT = ">".toCharArray(); - private static final char[] QUOT = """.toCharArray(); - - @Override - protected void remove() { - lastQuoted = EMPTY; - } - - @Override - protected void replace(final int codepoint) { - lastQuoted = REPLACEMENT_CHARACTER; - } - - @Override - protected void quoteQuot() { - lastQuoted = QUOT; - } - - @Override - protected void quoteGt() { - lastQuoted = GT; - } - - @Override - protected void quoteLt() { - lastQuoted = LT; - } - - @Override - protected void quoteAmp() { - lastQuoted = AMP; - } - - @Override - protected void ctrlEscape(final int codepoint) { - lastQuoted = REPLACEMENT_CHARACTER; - } - } - - private static final class Scan extends LegalCharacters { - - @Override - protected void quoteQuot() { - } - - @Override - protected void quoteGt() { - } - - @Override - protected void quoteLt() { - } - - @Override - protected void quoteAmp() { - } - - @Override - protected void remove() { - } - - @Override - protected void ctrlEscape(final int codepoint) { - } - - @Override - protected void replace(final int codepoint) { - } - } - private static final Scan scanner = new Scan(); - /** - * Replaces the characters that need to be escaped with their corresponding - * character entities. - * - * @param s1 - * String possibly containing characters that need to be escaped - * in XML - * - * @return Returns the input string with special characters that need to be - * escaped replaced by character entities. - */ - public static String xmlEscape(String s1) { - return xmlEscape(s1, true, true, null, -1); + /** Replaces the characters that need to be escaped with their corresponding character entities. */ + public static String xmlEscape(String string) { + return xmlEscape(string, true, true, null, -1); } - /** - * Replaces the characters that need to be escaped with their corresponding - * character entities. - * - * @param s1 - * String possibly containing characters that need to be escaped - * in XML - * @param isAttribute - * Is the input string to be used as an attribute? - * - * @return Returns the input string with special characters that need to be - * escaped replaced by character entities - */ - public static String xmlEscape(String s1, boolean isAttribute) { - return xmlEscape(s1, isAttribute, true, null, -1); + /** Replaces the characters that need to be escaped with their corresponding character entities. */ + public static String xmlEscape(String string, boolean isAttribute) { + return xmlEscape(string, isAttribute, true, null, -1); } - /** - * Replaces the characters that need to be escaped with their corresponding - * character entities. - * - * @param s1 - * String possibly containing characters that need to be escaped - * in XML - * @param isAttribute - * Is the input string to be used as an attribute? - * - * - * @param stripCharacter - * any occurrence of this character is removed from the string - * - * @return Returns the input string with special characters that need to be - * escaped replaced by character entities - */ - public static String xmlEscape(String s1, boolean isAttribute, char stripCharacter) { - return xmlEscape(s1, isAttribute, true, null, (int) stripCharacter); + /** Replaces the characters that need to be escaped with their corresponding character entities. */ + public static String xmlEscape(String string, boolean isAttribute, char stripCharacter) { + return xmlEscape(string, isAttribute, true, null, (int) stripCharacter); } - /** - * Replaces the characters that need to be escaped with their corresponding - * character entities. - * - * @param s1 - * String possibly containing characters that need to be escaped - * in XML - * @param isAttribute - * Is the input string to be used as an attribute? - * - * @param escapeLowAscii - * Should ascii characters below 32 be escaped as well - * - * @return Returns the input string with special characters that need to be - * escaped replaced by character entities - */ - public static String xmlEscape(String s1, boolean isAttribute, boolean escapeLowAscii) { - return xmlEscape(s1, isAttribute, escapeLowAscii, null, -1); + /** Replaces the characters that need to be escaped with their corresponding character entities. */ + public static String xmlEscape(String string, boolean isAttribute, boolean escapeLowAscii) { + return xmlEscape(string, isAttribute, escapeLowAscii, null, -1); } /** - * Replaces the characters that need to be escaped with their corresponding - * character entities. + * Replaces the characters that need to be escaped with their corresponding character entities. * - * @param s1 - * String possibly containing characters that need to be escaped - * in XML - * @param isAttribute - * Is the input string to be used as an attribute? - * - * @param escapeLowAscii - * Should ascii characters below 32 be escaped as well - * - * @param stripCharacter - * any occurrence of this character is removed from the string - * - * @return Returns the input string with special characters that need to be - * escaped replaced by character entities + * @param string the string possibly containing characters that need to be escaped in XML + * @param isAttribute whether the input string to be used as an attribute + * @param escapeLowAscii whether ascii characters below 32 should be escaped as well + * @param stripCharacter any occurrence of this character is removed from the string + * @return the input string with special characters that need to be escaped replaced by character entities */ - public static String xmlEscape(String s1, boolean isAttribute, boolean escapeLowAscii, char stripCharacter) { - return xmlEscape(s1, isAttribute, escapeLowAscii, null, (int) stripCharacter); + public static String xmlEscape(String string, boolean isAttribute, boolean escapeLowAscii, char stripCharacter) { + return xmlEscape(string, isAttribute, escapeLowAscii, null, (int) stripCharacter); } /** @@ -315,7 +75,6 @@ public class XML { *
  • double quotes (") if isAttribute is true * * with character entities. - * */ public static String xmlEscape(String string, boolean isAttribute, StringBuilder buffer) { return xmlEscape(string, isAttribute, true, buffer, -1); @@ -332,7 +91,6 @@ public class XML { *
  • double quotes (") if isAttribute is true * * with character entities. - * */ public static String xmlEscape(String string, boolean isAttribute, boolean escapeLowAscii, StringBuilder buffer) { return xmlEscape(string, isAttribute, escapeLowAscii, buffer, -1); @@ -438,15 +196,13 @@ public class XML { } } - /** - * Returns the Document of the string XML payload - */ + /** Returns the Document of the string XML payload. */ public static Document getDocument(String xmlString) { return getDocument(new StringReader(xmlString)); } /** - * Creates a new XML DocumentBuilder + * Creates a new XML DocumentBuilder. * * @return a DocumentBuilder * @throws RuntimeException if we fail to create one @@ -456,7 +212,7 @@ public class XML { } /** - * Creates a new XML DocumentBuilder + * Creates a new XML DocumentBuilder. * * @param implementation which jaxp implementation should be used * @param classLoader which class loader should be used when getting a new DocumentBuilder @@ -468,7 +224,7 @@ public class XML { } /** - * Creates a new XML DocumentBuilder + * Creates a new XML DocumentBuilder. * * @return a DocumentBuilder * @throws RuntimeException if we fail to create one @@ -479,7 +235,7 @@ public class XML { } /** - * Creates a new XML DocumentBuilder + * Creates a new XML DocumentBuilder. * * @param implementation which jaxp implementation should be used * @param classLoader which class loader should be used when getting a new DocumentBuilder @@ -508,7 +264,7 @@ public class XML { } /** - * Returns the child Element objects from a w3c dom spec + * Returns the child Element objects from a w3c dom spec. * * @return List of elements. Empty list (never null) if none found or if the given element is null */ @@ -554,6 +310,12 @@ public class XML { return ret; } + /** Returns the given attribute name from element, or empty if the element does not have it. */ + public static Optional attribute(String name, Element element) { + if ( ! element.hasAttribute(name)) return Optional.empty(); + return Optional.of(element.getAttribute(name)); + } + /** * Gets the string contents of the given Element. Returns "", never null if * the element is null, or has no content @@ -572,14 +334,11 @@ public class XML { } /** - * Returns the path to the given xml node, where each node name is separated - * by the given separator string. + * Returns the path to the given xml node, where each node name is separated by the given separator string. * - * @param n - * The xml node to find path to - * @param sep - * The separator string - * @return The path to the xml node as a String + * @param n the xml node to find path to + * @param sep the separator string + * @return the path to the xml node as a String */ public static String getNodePath(Node n, String sep) { if (n == null) { @@ -654,8 +413,7 @@ public class XML { * 1.1 (Second Edition). This does not check against reserved names, it * only checks the set of characters used. * - * @param possibleName - * a possibly valid XML name + * @param possibleName a possibly valid XML name * @return true if the name may be used as an XML tag or attribute name */ public static boolean isName(CharSequence possibleName) { @@ -680,4 +438,181 @@ public class XML { return valid; } + /** + * The point of this weird class and the jumble of abstract methods is + * linking the scan for characters that must be quoted into the quoting + * table, and making it actual work to make them go out of sync again. + */ + private static abstract class LegalCharacters { + // To quote http://www.w3.org/TR/REC-xml/ : + // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | + // [#x10000-#x10FFFF] + final boolean isLegal(int codepoint, boolean escapeLow, int stripCodePoint, boolean isAttribute) { + if (codepoint == stripCodePoint) { + return removeCodePoint(); + } else if (codepoint < ' ') { + if (!escapeLow) { + return true; + } + switch (codepoint) { + case 0x09: + case 0x0a: + case 0x0d: + return true; + default: + return ctrlEscapeCodePoint(codepoint); + } + } else if (codepoint >= 0x20 && codepoint <= 0xd7ff) { + switch (codepoint) { + case '&': + return ampCodePoint(); + case '<': + return ltCodePoint(); + case '>': + return gtCodePoint(); + case '"': + return quotCodePoint(isAttribute); + default: + return true; + } + } else if ((codepoint >= 0xe000 && codepoint <= 0xfffd) + || (codepoint >= 0x10000 && codepoint <= 0x10ffff)) { + return true; + } else { + return filterCodePoint(codepoint); + + } + } + + private boolean quotCodePoint(boolean isAttribute) { + if (isAttribute) { + quoteQuot(); + return false; + } else { + return true; + } + } + + private boolean filterCodePoint(int codepoint) { + replace(codepoint); + return false; + } + + private boolean gtCodePoint() { + quoteGt(); + return false; + } + + private boolean ltCodePoint() { + quoteLt(); + return false; + } + + private boolean ampCodePoint() { + quoteAmp(); + return false; + } + + private boolean ctrlEscapeCodePoint(int codepoint) { + ctrlEscape(codepoint); + return false; + } + + private boolean removeCodePoint() { + remove(); + return false; + } + + protected abstract void quoteQuot(); + + protected abstract void quoteGt(); + + protected abstract void quoteLt(); + + protected abstract void quoteAmp(); + + protected abstract void remove(); + + protected abstract void ctrlEscape(int codepoint); + + protected abstract void replace(int codepoint); + } + + private static final class Quote extends LegalCharacters { + + char[] lastQuoted; + private static final char[] EMPTY = new char[0]; + private static final char[] REPLACEMENT_CHARACTER = "\ufffd".toCharArray(); + private static final char[] AMP = "&".toCharArray(); + private static final char[] LT = "<".toCharArray(); + private static final char[] GT = ">".toCharArray(); + private static final char[] QUOT = """.toCharArray(); + + @Override + protected void remove() { + lastQuoted = EMPTY; + } + + @Override + protected void replace(final int codepoint) { + lastQuoted = REPLACEMENT_CHARACTER; + } + + @Override + protected void quoteQuot() { + lastQuoted = QUOT; + } + + @Override + protected void quoteGt() { + lastQuoted = GT; + } + + @Override + protected void quoteLt() { + lastQuoted = LT; + } + + @Override + protected void quoteAmp() { + lastQuoted = AMP; + } + + @Override + protected void ctrlEscape(final int codepoint) { + lastQuoted = REPLACEMENT_CHARACTER; + } + } + + private static final class Scan extends LegalCharacters { + + @Override + protected void quoteQuot() { + } + + @Override + protected void quoteGt() { + } + + @Override + protected void quoteLt() { + } + + @Override + protected void quoteAmp() { + } + + @Override + protected void remove() { + } + + @Override + protected void ctrlEscape(final int codepoint) { + } + + @Override + protected void replace(final int codepoint) { + } + } + } -- cgit v1.2.3