diff options
author | Jon Bratseth <bratseth@verizonmedia.com> | 2019-12-16 10:07:30 +0100 |
---|---|---|
committer | Jon Bratseth <bratseth@verizonmedia.com> | 2019-12-16 10:07:30 +0100 |
commit | baa6a81aa07f37a543c836710b4c65b7831fd9db (patch) | |
tree | 7e0724c298a1b45f356956b897133c07b618dff5 /vespajlib | |
parent | 45992cec4b915513372b1d8e777e505aaee3f4a1 (diff) | |
parent | 65dd6eb4932a7c29cbad7d717bc34da73d0bb723 (diff) |
Merge with master
Diffstat (limited to 'vespajlib')
5 files changed, 145 insertions, 6 deletions
diff --git a/vespajlib/abi-spec.json b/vespajlib/abi-spec.json index 1fcdf7f5cca..a4a9a1e1b24 100644 --- a/vespajlib/abi-spec.json +++ b/vespajlib/abi-spec.json @@ -2683,6 +2683,20 @@ "public static final char ESCAPE_CHAR" ] }, + "com.yahoo.text.Ascii7BitMatcher": { + "superClass": "java.lang.Object", + "interfaces": [], + "attributes": [ + "public" + ], + "methods": [ + "public void <init>(java.lang.String)", + "public void <init>(java.lang.String, java.lang.String)", + "public boolean matches(java.lang.String)", + "public static java.lang.String charsAndNumbers()" + ], + "fields": [] + }, "com.yahoo.text.BooleanParser": { "superClass": "java.lang.Object", "interfaces": [], diff --git a/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java b/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java index 43d1bb0e468..4770ad1b1f0 100644 --- a/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java +++ b/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java @@ -1,11 +1,14 @@ // Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.tensor; +import com.yahoo.text.Ascii7BitMatcher; + import java.util.Arrays; import java.util.Optional; -import java.util.regex.Pattern; import java.util.stream.Collectors; +import static com.yahoo.text.Ascii7BitMatcher.charsAndNumbers; + /** * An immutable address to a tensor cell. This simply supplies a value to each dimension * in a particular tensor type. By itself it is just a list of cell labels, it's meaning depends on its accompanying type. @@ -163,8 +166,8 @@ public abstract class TensorAddress implements Comparable<TensorAddress> { /** Supports building of a tensor address */ public static class Builder { - - private Pattern labelPattern = Pattern.compile("[-,A-Za-z0-9_@]([A-Z,a-z0-9_@$])*"); + static private final Ascii7BitMatcher labelMatcher = new Ascii7BitMatcher("-_@" + charsAndNumbers(), + "_@$" + charsAndNumbers()); private final TensorType type; private final String[] labels; @@ -206,10 +209,10 @@ public abstract class TensorAddress implements Comparable<TensorAddress> { return TensorAddress.of(labels); } - private void requireIdentifier(String s, String parameterName) { + static private void requireIdentifier(String s, String parameterName) { if (s == null) throw new IllegalArgumentException(parameterName + " can not be null"); - if ( ! labelPattern.matcher(s).matches()) + if ( ! labelMatcher.matches(s)) throw new IllegalArgumentException(parameterName + " must be an identifier or integer, not '" + s + "'"); } diff --git a/vespajlib/src/main/java/com/yahoo/text/Ascii7BitMatcher.java b/vespajlib/src/main/java/com/yahoo/text/Ascii7BitMatcher.java new file mode 100644 index 00000000000..b821d57de00 --- /dev/null +++ b/vespajlib/src/main/java/com/yahoo/text/Ascii7BitMatcher.java @@ -0,0 +1,64 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.text; + +import java.util.BitSet; + +/** + * Fast replacement for regex based validators of simple expressions. + * It can take a list of legal characters for the the first character, + * and another list for the following. Limited to 7bit ascii. + * @author baldersheim + */ +public class Ascii7BitMatcher { + private final BitSet legalFirst; + private final BitSet legalRest; + private static BitSet createBitSet(String legal) { + BitSet legalChars = new BitSet(128); + for (int i=0; i < legal.length(); i++) { + char c = legal.charAt(i); + if (c < 128) { + legalChars.set(c); + } else { + throw new IllegalArgumentException("Char '" + c + "' at position " + i + " is not valid ascii 7 bit char"); + } + } + return legalChars; + } + public Ascii7BitMatcher(String legal) { + this(legal, legal); + } + public Ascii7BitMatcher(String legalFirstChar, String legalChars) { + legalFirst = createBitSet(legalFirstChar); + legalRest = createBitSet(legalChars); + } + private static boolean isAscii7Bit(char c) { return c < 128;} + private boolean isLegalFirst(char c) { + return isAscii7Bit(c) && legalFirst.get(c); + } + private boolean isLegalRest(char c) { + return isAscii7Bit(c) && legalRest.get(c); + } + public boolean matches(String s) { + if (s == null || s.isEmpty() || ! isLegalFirst(s.charAt(0))) return false; + for (int i = 1; i < s.length(); i++) { + if ( ! isLegalRest(s.charAt(i))) { + return false; + } + } + return true; + } + static public String charsAndNumbers() { + char [] chars = new char[26*2+10]; + int i = 0; + for (char c = 'A'; c <= 'Z'; c++) { + chars[i++] = c; + } + for (char c = 'a'; c <= 'z'; c++) { + chars[i++] = c; + } + for (char c = '0'; c <= '9'; c++) { + chars[i++] = c; + } + return new String(chars); + } +} diff --git a/vespajlib/src/test/java/com/yahoo/tensor/functions/DynamicTensorTestCase.java b/vespajlib/src/test/java/com/yahoo/tensor/functions/DynamicTensorTestCase.java index 7cddeab1641..2231d32281a 100644 --- a/vespajlib/src/test/java/com/yahoo/tensor/functions/DynamicTensorTestCase.java +++ b/vespajlib/src/test/java/com/yahoo/tensor/functions/DynamicTensorTestCase.java @@ -6,6 +6,7 @@ import com.yahoo.tensor.TensorAddress; import com.yahoo.tensor.TensorType; import com.yahoo.tensor.evaluation.EvaluationContext; import com.yahoo.tensor.evaluation.Name; +import org.junit.Ignore; import org.junit.Test; import java.util.Collections; @@ -46,10 +47,23 @@ public class DynamicTensorTestCase { new Constant(5)); values.put(new TensorAddress.Builder(sparse).add("x", "a").add("y", "c").build(), new Constant(7)); - DynamicTensor<Name> t2 = DynamicTensor.from(sparse, values); + DynamicTensor<Name> t2 = DynamicTensor.from(sparse, values); assertEquals(Tensor.from(sparse, "{{x:a,y:b}:5, {x:a,y:c}:7}"), t2.evaluate()); } + @Ignore // Enable for benchmarking + public void benchMarkTensorAddressBuilder() { + long start = System.nanoTime(); + TensorType sparse = TensorType.fromSpec("tensor(x{})"); + for (int i=0; i < 10000; i++) { + TensorAddress.Builder builder = new TensorAddress.Builder(sparse); + for (int j=0; j < 1000; j++) { + builder.add("x", String.valueOf(j)); + } + } + System.out.println("Took " + (System.nanoTime() - start) + " ns"); + } + private static class Constant implements ScalarFunction<Name> { private final double value; diff --git a/vespajlib/src/test/java/com/yahoo/text/Ascii7BitMatcherTestCase.java b/vespajlib/src/test/java/com/yahoo/text/Ascii7BitMatcherTestCase.java new file mode 100644 index 00000000000..3f628b109f5 --- /dev/null +++ b/vespajlib/src/test/java/com/yahoo/text/Ascii7BitMatcherTestCase.java @@ -0,0 +1,44 @@ +package com.yahoo.text; + +import org.junit.Assert; +import org.junit.Test; + +import static org.junit.Assert.*; + +public class Ascii7BitMatcherTestCase { + @Test + public void testThatListedCharsAreLegal() { + assertTrue(new Ascii7BitMatcher("a").matches("aaaa")); + assertTrue(new Ascii7BitMatcher("ab").matches("abbbbbbbbb")); + assertTrue(new Ascii7BitMatcher("ab").matches("bbbbbbbbbba")); + assertTrue(new Ascii7BitMatcher("1").matches("1")); + } + @Test + public void requireThatNotListedCharsFail() { + assertFalse(new Ascii7BitMatcher("a").matches("b")); + } + + @Test + public void testThatLegalFirstAndRestPass() { + assertTrue(new Ascii7BitMatcher("a", "").matches("a")); + assertTrue(new Ascii7BitMatcher("a", "b").matches("abbbbbbbbb")); + assertTrue(new Ascii7BitMatcher("abc", "0123").matches("a123120")); + } + @Test + public void requireThatIllegalFirstOrSecondFail() { + assertFalse(new Ascii7BitMatcher("a", "").matches("aa")); + assertFalse(new Ascii7BitMatcher("a", "b").matches("aa")); + assertFalse(new Ascii7BitMatcher("", "a").matches("a")); + assertFalse(new Ascii7BitMatcher("a", "b").matches("bb")); + assertFalse(new Ascii7BitMatcher("a", "b").matches("abbbbbx")); + } + @Test + public void requireThatNonAsciiFailConstruction() { + try { + new Ascii7BitMatcher("aæb"); + Assert.fail("'æ' should not be allowed"); + } catch (IllegalArgumentException e) { + assertEquals("Char 'æ' at position 1 is not valid ascii 7 bit char", e.getMessage()); + } + } +} |