diff options
author | Jon Bratseth <bratseth@verizonmedia.com> | 2019-12-16 10:07:30 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@verizonmedia.com> | 2019-12-16 10:07:30 +0100 |
commit | baa6a81aa07f37a543c836710b4c65b7831fd9db (patch) | |
tree | 7e0724c298a1b45f356956b897133c07b618dff5 /vespajlib/src/main/java/com/yahoo | |
parent | 45992cec4b915513372b1d8e777e505aaee3f4a1 (diff) | |
parent | 65dd6eb4932a7c29cbad7d717bc34da73d0bb723 (diff) |
Merge with master
Diffstat (limited to 'vespajlib/src/main/java/com/yahoo')
-rw-r--r-- | vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java | 13 | ||||
-rw-r--r-- | vespajlib/src/main/java/com/yahoo/text/Ascii7BitMatcher.java | 64 |
2 files changed, 72 insertions, 5 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java b/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java index 43d1bb0e468..4770ad1b1f0 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java @@ -1,11 +1,14 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.tensor; +import com.yahoo.text.Ascii7BitMatcher; + import java.util.Arrays; import java.util.Optional; -import java.util.regex.Pattern; import java.util.stream.Collectors; +import static com.yahoo.text.Ascii7BitMatcher.charsAndNumbers; + /** * An immutable address to a tensor cell. This simply supplies a value to each dimension * in a particular tensor type. By itself it is just a list of cell labels, it's meaning depends on its accompanying type. @@ -163,8 +166,8 @@ public abstract class TensorAddress implements Comparable<TensorAddress> { /** Supports building of a tensor address */ public static class Builder { - - private Pattern labelPattern = Pattern.compile("[-,A-Za-z0-9_@]([A-Z,a-z0-9_@$])*"); + static private final Ascii7BitMatcher labelMatcher = new Ascii7BitMatcher("-_@" + charsAndNumbers(), + "_@$" + charsAndNumbers()); private final TensorType type; private final String[] labels; @@ -206,10 +209,10 @@ public abstract class TensorAddress implements Comparable<TensorAddress> { return TensorAddress.of(labels); } - private void requireIdentifier(String s, String parameterName) { + static private void requireIdentifier(String s, String parameterName) { if (s == null) throw new IllegalArgumentException(parameterName + " can not be null"); - if ( ! labelPattern.matcher(s).matches()) + if ( ! labelMatcher.matches(s)) throw new IllegalArgumentException(parameterName + " must be an identifier or integer, not '" + s + "'"); } diff --git a/vespajlib/src/main/java/com/yahoo/text/Ascii7BitMatcher.java b/vespajlib/src/main/java/com/yahoo/text/Ascii7BitMatcher.java new file mode 100644 index 00000000000..b821d57de00 --- /dev/null +++ b/vespajlib/src/main/java/com/yahoo/text/Ascii7BitMatcher.java @@ -0,0 +1,64 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.text; + +import java.util.BitSet; + +/** + * Fast replacement for regex based validators of simple expressions. + * It can take a list of legal characters for the the first character, + * and another list for the following. Limited to 7bit ascii. + * @author baldersheim + */ +public class Ascii7BitMatcher { + private final BitSet legalFirst; + private final BitSet legalRest; + private static BitSet createBitSet(String legal) { + BitSet legalChars = new BitSet(128); + for (int i=0; i < legal.length(); i++) { + char c = legal.charAt(i); + if (c < 128) { + legalChars.set(c); + } else { + throw new IllegalArgumentException("Char '" + c + "' at position " + i + " is not valid ascii 7 bit char"); + } + } + return legalChars; + } + public Ascii7BitMatcher(String legal) { + this(legal, legal); + } + public Ascii7BitMatcher(String legalFirstChar, String legalChars) { + legalFirst = createBitSet(legalFirstChar); + legalRest = createBitSet(legalChars); + } + private static boolean isAscii7Bit(char c) { return c < 128;} + private boolean isLegalFirst(char c) { + return isAscii7Bit(c) && legalFirst.get(c); + } + private boolean isLegalRest(char c) { + return isAscii7Bit(c) && legalRest.get(c); + } + public boolean matches(String s) { + if (s == null || s.isEmpty() || ! isLegalFirst(s.charAt(0))) return false; + for (int i = 1; i < s.length(); i++) { + if ( ! isLegalRest(s.charAt(i))) { + return false; + } + } + return true; + } + static public String charsAndNumbers() { + char [] chars = new char[26*2+10]; + int i = 0; + for (char c = 'A'; c <= 'Z'; c++) { + chars[i++] = c; + } + for (char c = 'a'; c <= 'z'; c++) { + chars[i++] = c; + } + for (char c = '0'; c <= '9'; c++) { + chars[i++] = c; + } + return new String(chars); + } +} |