aboutsummaryrefslogtreecommitdiffstats
path: root/vespajlib/src
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@verizonmedia.com>2019-12-16 10:07:30 +0100
committerJon Bratseth <bratseth@verizonmedia.com>2019-12-16 10:07:30 +0100
commitbaa6a81aa07f37a543c836710b4c65b7831fd9db (patch)
tree7e0724c298a1b45f356956b897133c07b618dff5 /vespajlib/src
parent45992cec4b915513372b1d8e777e505aaee3f4a1 (diff)
parent65dd6eb4932a7c29cbad7d717bc34da73d0bb723 (diff)
Merge with master
Diffstat (limited to 'vespajlib/src')
-rw-r--r--vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java13
-rw-r--r--vespajlib/src/main/java/com/yahoo/text/Ascii7BitMatcher.java64
-rw-r--r--vespajlib/src/test/java/com/yahoo/tensor/functions/DynamicTensorTestCase.java16
-rw-r--r--vespajlib/src/test/java/com/yahoo/text/Ascii7BitMatcherTestCase.java44
4 files changed, 131 insertions, 6 deletions
diff --git a/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java b/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java
index 43d1bb0e468..4770ad1b1f0 100644
--- a/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java
+++ b/vespajlib/src/main/java/com/yahoo/tensor/TensorAddress.java
@@ -1,11 +1,14 @@
// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.tensor;
+import com.yahoo.text.Ascii7BitMatcher;
+
import java.util.Arrays;
import java.util.Optional;
-import java.util.regex.Pattern;
import java.util.stream.Collectors;
+import static com.yahoo.text.Ascii7BitMatcher.charsAndNumbers;
+
/**
* An immutable address to a tensor cell. This simply supplies a value to each dimension
* in a particular tensor type. By itself it is just a list of cell labels, it's meaning depends on its accompanying type.
@@ -163,8 +166,8 @@ public abstract class TensorAddress implements Comparable<TensorAddress> {
/** Supports building of a tensor address */
public static class Builder {
-
- private Pattern labelPattern = Pattern.compile("[-,A-Za-z0-9_@]([A-Z,a-z0-9_@$])*");
+ static private final Ascii7BitMatcher labelMatcher = new Ascii7BitMatcher("-_@" + charsAndNumbers(),
+ "_@$" + charsAndNumbers());
private final TensorType type;
private final String[] labels;
@@ -206,10 +209,10 @@ public abstract class TensorAddress implements Comparable<TensorAddress> {
return TensorAddress.of(labels);
}
- private void requireIdentifier(String s, String parameterName) {
+ static private void requireIdentifier(String s, String parameterName) {
if (s == null)
throw new IllegalArgumentException(parameterName + " can not be null");
- if ( ! labelPattern.matcher(s).matches())
+ if ( ! labelMatcher.matches(s))
throw new IllegalArgumentException(parameterName + " must be an identifier or integer, not '" + s + "'");
}
diff --git a/vespajlib/src/main/java/com/yahoo/text/Ascii7BitMatcher.java b/vespajlib/src/main/java/com/yahoo/text/Ascii7BitMatcher.java
new file mode 100644
index 00000000000..b821d57de00
--- /dev/null
+++ b/vespajlib/src/main/java/com/yahoo/text/Ascii7BitMatcher.java
@@ -0,0 +1,64 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.text;
+
+import java.util.BitSet;
+
+/**
+ * Fast replacement for regex based validators of simple expressions.
+ * It can take a list of legal characters for the the first character,
+ * and another list for the following. Limited to 7bit ascii.
+ * @author baldersheim
+ */
+public class Ascii7BitMatcher {
+ private final BitSet legalFirst;
+ private final BitSet legalRest;
+ private static BitSet createBitSet(String legal) {
+ BitSet legalChars = new BitSet(128);
+ for (int i=0; i < legal.length(); i++) {
+ char c = legal.charAt(i);
+ if (c < 128) {
+ legalChars.set(c);
+ } else {
+ throw new IllegalArgumentException("Char '" + c + "' at position " + i + " is not valid ascii 7 bit char");
+ }
+ }
+ return legalChars;
+ }
+ public Ascii7BitMatcher(String legal) {
+ this(legal, legal);
+ }
+ public Ascii7BitMatcher(String legalFirstChar, String legalChars) {
+ legalFirst = createBitSet(legalFirstChar);
+ legalRest = createBitSet(legalChars);
+ }
+ private static boolean isAscii7Bit(char c) { return c < 128;}
+ private boolean isLegalFirst(char c) {
+ return isAscii7Bit(c) && legalFirst.get(c);
+ }
+ private boolean isLegalRest(char c) {
+ return isAscii7Bit(c) && legalRest.get(c);
+ }
+ public boolean matches(String s) {
+ if (s == null || s.isEmpty() || ! isLegalFirst(s.charAt(0))) return false;
+ for (int i = 1; i < s.length(); i++) {
+ if ( ! isLegalRest(s.charAt(i))) {
+ return false;
+ }
+ }
+ return true;
+ }
+ static public String charsAndNumbers() {
+ char [] chars = new char[26*2+10];
+ int i = 0;
+ for (char c = 'A'; c <= 'Z'; c++) {
+ chars[i++] = c;
+ }
+ for (char c = 'a'; c <= 'z'; c++) {
+ chars[i++] = c;
+ }
+ for (char c = '0'; c <= '9'; c++) {
+ chars[i++] = c;
+ }
+ return new String(chars);
+ }
+}
diff --git a/vespajlib/src/test/java/com/yahoo/tensor/functions/DynamicTensorTestCase.java b/vespajlib/src/test/java/com/yahoo/tensor/functions/DynamicTensorTestCase.java
index 7cddeab1641..2231d32281a 100644
--- a/vespajlib/src/test/java/com/yahoo/tensor/functions/DynamicTensorTestCase.java
+++ b/vespajlib/src/test/java/com/yahoo/tensor/functions/DynamicTensorTestCase.java
@@ -6,6 +6,7 @@ import com.yahoo.tensor.TensorAddress;
import com.yahoo.tensor.TensorType;
import com.yahoo.tensor.evaluation.EvaluationContext;
import com.yahoo.tensor.evaluation.Name;
+import org.junit.Ignore;
import org.junit.Test;
import java.util.Collections;
@@ -46,10 +47,23 @@ public class DynamicTensorTestCase {
new Constant(5));
values.put(new TensorAddress.Builder(sparse).add("x", "a").add("y", "c").build(),
new Constant(7));
- DynamicTensor<Name> t2 = DynamicTensor.from(sparse, values);
+ DynamicTensor<Name> t2 = DynamicTensor.from(sparse, values);
assertEquals(Tensor.from(sparse, "{{x:a,y:b}:5, {x:a,y:c}:7}"), t2.evaluate());
}
+ @Ignore // Enable for benchmarking
+ public void benchMarkTensorAddressBuilder() {
+ long start = System.nanoTime();
+ TensorType sparse = TensorType.fromSpec("tensor(x{})");
+ for (int i=0; i < 10000; i++) {
+ TensorAddress.Builder builder = new TensorAddress.Builder(sparse);
+ for (int j=0; j < 1000; j++) {
+ builder.add("x", String.valueOf(j));
+ }
+ }
+ System.out.println("Took " + (System.nanoTime() - start) + " ns");
+ }
+
private static class Constant implements ScalarFunction<Name> {
private final double value;
diff --git a/vespajlib/src/test/java/com/yahoo/text/Ascii7BitMatcherTestCase.java b/vespajlib/src/test/java/com/yahoo/text/Ascii7BitMatcherTestCase.java
new file mode 100644
index 00000000000..3f628b109f5
--- /dev/null
+++ b/vespajlib/src/test/java/com/yahoo/text/Ascii7BitMatcherTestCase.java
@@ -0,0 +1,44 @@
+package com.yahoo.text;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+public class Ascii7BitMatcherTestCase {
+ @Test
+ public void testThatListedCharsAreLegal() {
+ assertTrue(new Ascii7BitMatcher("a").matches("aaaa"));
+ assertTrue(new Ascii7BitMatcher("ab").matches("abbbbbbbbb"));
+ assertTrue(new Ascii7BitMatcher("ab").matches("bbbbbbbbbba"));
+ assertTrue(new Ascii7BitMatcher("1").matches("1"));
+ }
+ @Test
+ public void requireThatNotListedCharsFail() {
+ assertFalse(new Ascii7BitMatcher("a").matches("b"));
+ }
+
+ @Test
+ public void testThatLegalFirstAndRestPass() {
+ assertTrue(new Ascii7BitMatcher("a", "").matches("a"));
+ assertTrue(new Ascii7BitMatcher("a", "b").matches("abbbbbbbbb"));
+ assertTrue(new Ascii7BitMatcher("abc", "0123").matches("a123120"));
+ }
+ @Test
+ public void requireThatIllegalFirstOrSecondFail() {
+ assertFalse(new Ascii7BitMatcher("a", "").matches("aa"));
+ assertFalse(new Ascii7BitMatcher("a", "b").matches("aa"));
+ assertFalse(new Ascii7BitMatcher("", "a").matches("a"));
+ assertFalse(new Ascii7BitMatcher("a", "b").matches("bb"));
+ assertFalse(new Ascii7BitMatcher("a", "b").matches("abbbbbx"));
+ }
+ @Test
+ public void requireThatNonAsciiFailConstruction() {
+ try {
+ new Ascii7BitMatcher("aæb");
+ Assert.fail("'æ' should not be allowed");
+ } catch (IllegalArgumentException e) {
+ assertEquals("Char 'æ' at position 1 is not valid ascii 7 bit char", e.getMessage());
+ }
+ }
+}