Remove carriage return

author: Jon Bratseth <bratseth@yahoo-inc.com> 2017-06-14 14:41:18 +0200
committer: Jon Bratseth <bratseth@yahoo-inc.com> 2017-06-14 14:41:18 +0200
commit: 6ff3df19226036b8ee1bb559f9d73cab40e8d2a0 (patch)
tree: 355a7b0623b58983ba655b868341fe479a22eb3d /indexinglanguage
parent: b7f9e7ceaef72489d76683537973b639f8895b84 (diff)
6 files changed, 424 insertions, 424 deletions
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/MathResolver.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/MathResolver.java
index f648a0e38e4..9596bf60cae 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/MathResolver.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/MathResolver.java
@@ -1,55 +1,55 @@
 // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.vespa.indexinglanguage.expressions;
-
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Stack;
-
-/**
- * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
- */
-public class MathResolver {
-
-    private final List<Item> items = new LinkedList<>();
-
-    public void push(ArithmeticExpression.Operator op, Expression exp) {
-        op.getClass(); // throws NullPointerException
-        if (items.isEmpty() && op != ArithmeticExpression.Operator.ADD) {
-            throw new IllegalArgumentException("First item in an arithmetic operation must be an addition.");
-        }
-        items.add(new Item(op, exp));
-    }
-
-    public Expression resolve() {
-        Stack<Item> stack = new Stack<>();
-        stack.push(items.remove(0));
-        while (!items.isEmpty()) {
-            Item item = items.remove(0);
-            while (stack.size() > 1 && stack.peek().op.precedes(item.op)) {
-                pop(stack);
-            }
-            stack.push(item);
-        }
-        while (stack.size() > 1) {
-            pop(stack);
-        }
-        return stack.remove(0).exp;
-    }
-
-    private void pop(Stack<Item> stack) {
-        Item rhs = stack.pop();
-        Item lhs = stack.peek();
-        lhs.exp = new ArithmeticExpression(lhs.exp, rhs.op, rhs.exp);
-    }
-
-    private static class Item {
-
-        final ArithmeticExpression.Operator op;
-        Expression exp;
-
-        Item(ArithmeticExpression.Operator op, Expression exp) {
-            this.op = op;
-            this.exp = exp;
-        }
-    }
-}
-\ No newline at end of file
+package com.yahoo.vespa.indexinglanguage.expressions;
+
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Stack;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class MathResolver {
+
+    private final List<Item> items = new LinkedList<>();
+
+    public void push(ArithmeticExpression.Operator op, Expression exp) {
+        op.getClass(); // throws NullPointerException
+        if (items.isEmpty() && op != ArithmeticExpression.Operator.ADD) {
+            throw new IllegalArgumentException("First item in an arithmetic operation must be an addition.");
+        }
+        items.add(new Item(op, exp));
+    }
+
+    public Expression resolve() {
+        Stack<Item> stack = new Stack<>();
+        stack.push(items.remove(0));
+        while (!items.isEmpty()) {
+            Item item = items.remove(0);
+            while (stack.size() > 1 && stack.peek().op.precedes(item.op)) {
+                pop(stack);
+            }
+            stack.push(item);
+        }
+        while (stack.size() > 1) {
+            pop(stack);
+        }
+        return stack.remove(0).exp;
+    }
+
+    private void pop(Stack<Item> stack) {
+        Item rhs = stack.pop();
+        Item lhs = stack.peek();
+        lhs.exp = new ArithmeticExpression(lhs.exp, rhs.op, rhs.exp);
+    }
+
+    private static class Item {
+
+        final ArithmeticExpression.Operator op;
+        Expression exp;
+
+        Item(ArithmeticExpression.Operator op, Expression exp) {
+            this.op = op;
+            this.exp = exp;
+        }
+    }
+}
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/PassthroughExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/PassthroughExpression.java
index 427c777db5a..703cf30f6e0 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/PassthroughExpression.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/PassthroughExpression.java
@@ -18,4 +18,4 @@ public class PassthroughExpression extends OutputExpression {
     public boolean equals(Object obj) {
         return super.equals(obj) && obj instanceof PassthroughExpression;
     }
-}
-\ No newline at end of file
+}
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java
index 09364c796f0..48e78392498 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java
@@ -1,106 +1,106 @@
-// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.vespa.indexinglanguage.linguistics;
-
-import com.yahoo.language.Language;
-import com.yahoo.language.process.StemMode;
-import com.yahoo.vespa.configdefinition.IlscriptsConfig;
-
-/**
- * @author Simon Thoresen
- */
-public class AnnotatorConfig implements Cloneable {
-
-    private Language language;
-    private StemMode stemMode;
-    private boolean removeAccents;
-    private int maxTermOccurences;
-
-    public static final int DEFAULT_MAX_TERM_OCCURRENCES;
-
-    static {
-        IlscriptsConfig defaults = new IlscriptsConfig(new IlscriptsConfig.Builder());
-        DEFAULT_MAX_TERM_OCCURRENCES = defaults.maxtermoccurrences();
-    }
-
-    public AnnotatorConfig() {
-        language = Language.ENGLISH;
-        stemMode = StemMode.NONE;
-        removeAccents = false;
-        maxTermOccurences = DEFAULT_MAX_TERM_OCCURRENCES;
-    }
-
-    public AnnotatorConfig(AnnotatorConfig rhs) {
-        language = rhs.language;
-        stemMode = rhs.stemMode;
-        removeAccents = rhs.removeAccents;
-        maxTermOccurences = rhs.maxTermOccurences;
-    }
-
-    public Language getLanguage() {
-        return language;
-    }
-
-    public AnnotatorConfig setLanguage(Language language) {
-        this.language = language;
-        return this;
-    }
-
-    public StemMode getStemMode() {
-        return stemMode;
-    }
-
-    public AnnotatorConfig setStemMode(StemMode stemMode) {
-        this.stemMode = stemMode;
-        return this;
-    }
-
-    public AnnotatorConfig setStemMode(String name) {
-        this.stemMode = StemMode.valueOf(name);
-        return this;
-    }
-
-    public boolean getRemoveAccents() {
-        return removeAccents;
-    }
-
-    public AnnotatorConfig setRemoveAccents(boolean removeAccents) {
-        this.removeAccents = removeAccents;
-        return this;
-    }
-
-    public int getMaxTermOccurrences() {
-        return maxTermOccurences;
-    }
-
-    public AnnotatorConfig setMaxTermOccurrences(int maxTermCount) {
-        this.maxTermOccurences = maxTermCount;
-        return this;
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-        if (!(obj instanceof AnnotatorConfig)) {
-            return false;
-        }
-        AnnotatorConfig rhs = (AnnotatorConfig)obj;
-        if (!language.equals(rhs.language)) {
-            return false;
-        }
-        if (!stemMode.equals(rhs.stemMode)) {
-            return false;
-        }
-        if (removeAccents != rhs.removeAccents) {
-            return false;
-        }
-        if (maxTermOccurences != rhs.maxTermOccurences) {
-            return false;
-        }
-        return true;
-    }
-
-    @Override
-    public int hashCode() {
-        return getClass().hashCode() + language.hashCode() + stemMode.hashCode() +
-               Boolean.valueOf(removeAccents).hashCode() + maxTermOccurences;
-    }
-}
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+package com.yahoo.vespa.indexinglanguage.linguistics;
+
+import com.yahoo.language.Language;
+import com.yahoo.language.process.StemMode;
+import com.yahoo.vespa.configdefinition.IlscriptsConfig;
+
+/**
+ * @author Simon Thoresen
+ */
+public class AnnotatorConfig implements Cloneable {
+
+    private Language language;
+    private StemMode stemMode;
+    private boolean removeAccents;
+    private int maxTermOccurences;
+
+    public static final int DEFAULT_MAX_TERM_OCCURRENCES;
+
+    static {
+        IlscriptsConfig defaults = new IlscriptsConfig(new IlscriptsConfig.Builder());
+        DEFAULT_MAX_TERM_OCCURRENCES = defaults.maxtermoccurrences();
+    }
+
+    public AnnotatorConfig() {
+        language = Language.ENGLISH;
+        stemMode = StemMode.NONE;
+        removeAccents = false;
+        maxTermOccurences = DEFAULT_MAX_TERM_OCCURRENCES;
+    }
+
+    public AnnotatorConfig(AnnotatorConfig rhs) {
+        language = rhs.language;
+        stemMode = rhs.stemMode;
+        removeAccents = rhs.removeAccents;
+        maxTermOccurences = rhs.maxTermOccurences;
+    }
+
+    public Language getLanguage() {
+        return language;
+    }
+
+    public AnnotatorConfig setLanguage(Language language) {
+        this.language = language;
+        return this;
+    }
+
+    public StemMode getStemMode() {
+        return stemMode;
+    }
+
+    public AnnotatorConfig setStemMode(StemMode stemMode) {
+        this.stemMode = stemMode;
+        return this;
+    }
+
+    public AnnotatorConfig setStemMode(String name) {
+        this.stemMode = StemMode.valueOf(name);
+        return this;
+    }
+
+    public boolean getRemoveAccents() {
+        return removeAccents;
+    }
+
+    public AnnotatorConfig setRemoveAccents(boolean removeAccents) {
+        this.removeAccents = removeAccents;
+        return this;
+    }
+
+    public int getMaxTermOccurrences() {
+        return maxTermOccurences;
+    }
+
+    public AnnotatorConfig setMaxTermOccurrences(int maxTermCount) {
+        this.maxTermOccurences = maxTermCount;
+        return this;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if (!(obj instanceof AnnotatorConfig)) {
+            return false;
+        }
+        AnnotatorConfig rhs = (AnnotatorConfig)obj;
+        if (!language.equals(rhs.language)) {
+            return false;
+        }
+        if (!stemMode.equals(rhs.stemMode)) {
+            return false;
+        }
+        if (removeAccents != rhs.removeAccents) {
+            return false;
+        }
+        if (maxTermOccurences != rhs.maxTermOccurences) {
+            return false;
+        }
+        return true;
+    }
+
+    @Override
+    public int hashCode() {
+        return getClass().hashCode() + language.hashCode() + stemMode.hashCode() +
+               Boolean.valueOf(removeAccents).hashCode() + maxTermOccurences;
+    }
+}
diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/parser/IndexingInput.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/parser/IndexingInput.java
index de9d26547da..5e9ffd677cf 100644
--- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/parser/IndexingInput.java
+++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/parser/IndexingInput.java
@@ -1,14 +1,14 @@
 // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.vespa.indexinglanguage.parser;
-
-import com.yahoo.javacc.FastCharStream;
-
-/**
- * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
- */
-public final class IndexingInput extends FastCharStream implements CharStream {
-
-    public IndexingInput(String input) {
-        super(input);
-    }
-}
+package com.yahoo.vespa.indexinglanguage.parser;
+
+import com.yahoo.javacc.FastCharStream;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public final class IndexingInput extends FastCharStream implements CharStream {
+
+    public IndexingInput(String input) {
+        super(input);
+    }
+}
diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/AttributeExpressionTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/AttributeExpressionTestCase.java
index 097e0f21bc1..95eab5f9ef8 100644
--- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/AttributeExpressionTestCase.java
+++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/expressions/AttributeExpressionTestCase.java
@@ -38,4 +38,4 @@ public class AttributeExpressionTestCase {
     public void requireThatExpressionCanBeExecuted() {
         assertExecute(new AttributeExpression("foo"));
     }
-}
-\ No newline at end of file
+}
diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java
index 805bdc96904..5882e2c19c6 100644
--- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java
+++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java
@@ -1,250 +1,250 @@
 // Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-package com.yahoo.vespa.indexinglanguage.linguistics;
-
-import com.yahoo.document.annotation.Annotation;
-import com.yahoo.document.annotation.AnnotationTypes;
-import com.yahoo.document.annotation.SpanTree;
-import com.yahoo.document.annotation.SpanTrees;
-import com.yahoo.document.datatypes.StringFieldValue;
-import com.yahoo.language.Language;
-import com.yahoo.language.Linguistics;
-import com.yahoo.language.process.StemMode;
-import com.yahoo.language.process.Token;
-import com.yahoo.language.process.TokenType;
-import com.yahoo.language.process.Tokenizer;
-import com.yahoo.language.simple.SimpleToken;
-
-import org.junit.Test;
-import org.mockito.Mockito;
-
-import java.util.*;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-/**
- * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
- */
-public class LinguisticsAnnotatorTestCase {
-
-    private static final AnnotatorConfig CONFIG = new AnnotatorConfig();
-
-    // --------------------------------------------------------------------------------
-    //
-    // Tests
-    //
-    // --------------------------------------------------------------------------------
-
-    @Test
-    public void requireThatAnnotateFailsWithZeroTokens() {
-        assertAnnotations(null, "foo");
-    }
-
-    @Test
-    public void requireThatAnnotateFailsWithoutIndexableTokenString() {
-        for (TokenType type : TokenType.values()) {
-            if (type.isIndexable()) {
-                continue;
-            }
-            assertAnnotations(null, "foo", newToken("foo", "bar", type));
-        }
-    }
-
-    @Test
-    public void requireThatIndexableTokenStringsAreAnnotated() {
-        SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
-        expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("bar")));
-        for (TokenType type : TokenType.values()) {
-            if (!type.isIndexable()) {
-                continue;
-            }
-            assertAnnotations(expected, "foo", newToken("foo", "bar", type));
-        }
-    }
-
-    @Test
-    public void requireThatSpecialTokenStringsAreAnnotatedRegardlessOfType() {
-        SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
-        expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("bar")));
-        for (TokenType type : TokenType.values()) {
-            assertAnnotations(expected, "foo", newToken("foo", "bar", type, true));
-        }
-    }
-
-    @Test
-    public void requireThatTermAnnotationsAreEmptyIfOrigIsLowerCase() {
-        SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
-        expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM));
-        for (boolean specialToken : Arrays.asList(true, false)) {
-            for (TokenType type : TokenType.values()) {
-                if (!specialToken && !type.isIndexable()) {
-                    continue;
-                }
-                assertAnnotations(expected, "foo", newToken("foo", "foo", type, specialToken));
-            }
-        }
-    }
-
-    @Test
-    public void requireThatTermAnnotationsAreLowerCased() {
-        SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
-        expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("bar")));
-        for (boolean specialToken : Arrays.asList(true, false)) {
-            for (TokenType type : TokenType.values()) {
-                if (!specialToken && !type.isIndexable()) {
-                    continue;
-                }
-                assertAnnotations(expected, "foo", newToken("foo", "BAR", type, specialToken));
-            }
-        }
-    }
-
-    @Test
-    public void requireThatCompositeTokensAreFlattened() {
-        SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
-        expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("foo")));
-        expected.spanList().span(3, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("bar")));
-        expected.spanList().span(6, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("baz")));
-
-        SimpleToken token = newToken("FOOBARBAZ", "foobarbaz", TokenType.ALPHABETIC)
-                .addComponent(newToken("FOO", "foo", TokenType.ALPHABETIC).setOffset(0))
-                .addComponent(newToken("BARBAZ", "barbaz", TokenType.ALPHABETIC).setOffset(3)
-                                      .addComponent(newToken("BAR", "bar", TokenType.ALPHABETIC).setOffset(3))
-                                      .addComponent(newToken("BAZ", "baz", TokenType.ALPHABETIC).setOffset(6)));
-        assertAnnotations(expected, "foobarbaz", token);
-    }
-
-    @Test
-    public void requireThatCompositeSpecialTokensAreNotFlattened() {
-        SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
-        expected.spanList().span(0, 9).annotate(new Annotation(AnnotationTypes.TERM,
-                                                               new StringFieldValue("foobarbaz")));
-
-        SimpleToken token = newToken("FOOBARBAZ", "foobarbaz", TokenType.ALPHABETIC).setSpecialToken(true)
-                .addComponent(newToken("FOO", "foo", TokenType.ALPHABETIC).setOffset(0))
-                .addComponent(newToken("BARBAZ", "barbaz", TokenType.ALPHABETIC).setOffset(3)
-                                      .addComponent(newToken("BAR", "bar", TokenType.ALPHABETIC).setOffset(3))
-                                      .addComponent(newToken("BAZ", "baz", TokenType.ALPHABETIC).setOffset(6)));
-        assertAnnotations(expected, "foobarbaz", token);
-    }
-
-    @Test
-    public void requireThatErrorTokensAreSkipped() {
-        assertAnnotations(null, "foo", new SimpleToken("foo").setType(TokenType.ALPHABETIC)
-                                                             .setOffset(-1));
-    }
-
-    @Test
-    public void requireThatTermReplacementsAreApplied() {
-        SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
-        expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("bar")));
-        for (boolean specialToken : Arrays.asList(true, false)) {
-            for (TokenType type : TokenType.values()) {
-                if (!specialToken && !type.isIndexable()) {
-                    continue;
-                }
-                assertAnnotations(expected, "foo",
-                                  newLinguistics(Arrays.asList(newToken("foo", "foo", type, specialToken)),
-                                                 Collections.singletonMap("foo", "bar")));
-            }
-        }
-    }
-
-    @Test
-    public void requireThatExistingAnnotationsAreKept() {
-        SpanTree spanTree = new SpanTree(SpanTrees.LINGUISTICS);
-        spanTree.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("baz")));
-
-        StringFieldValue val = new StringFieldValue("foo");
-        val.setSpanTree(spanTree);
-
-        Linguistics linguistics = newLinguistics(Arrays.asList(newToken("foo", "bar", TokenType.ALPHABETIC, false)),
-                                                 Collections.<String, String>emptyMap());
-        new LinguisticsAnnotator(linguistics, CONFIG).annotate(val);
-
-        assertTrue(new LinguisticsAnnotator(linguistics, CONFIG).annotate(val));
-        assertEquals(spanTree, val.getSpanTree(SpanTrees.LINGUISTICS));
-    }
-
-    @Test
-    public void requireThatMaxTermOccurencesIsHonored() {
-        final String inputTerm = "foo";
-        final String stemmedInputTerm = "bar"; // completely different from
-                                               // inputTerm for safer test
-        final String paddedInputTerm = inputTerm + " ";
-        final SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
-        final int inputTermOccurence = AnnotatorConfig.DEFAULT_MAX_TERM_OCCURRENCES * 2;
-        for (int i = 0; i < AnnotatorConfig.DEFAULT_MAX_TERM_OCCURRENCES; ++i) {
-            expected.spanList().span(i * paddedInputTerm.length(), inputTerm.length())
-                    .annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue(stemmedInputTerm)));
-        }
-        for (TokenType type : TokenType.values()) {
-            if (!type.isIndexable()) {
-                continue;
-            }
-            StringBuilder input = new StringBuilder();
-            Token[] tokens = new Token[inputTermOccurence];
-            for (int i = 0; i < inputTermOccurence; ++i) {
-                SimpleToken t = newToken(inputTerm, stemmedInputTerm, type);
-                t.setOffset(i * paddedInputTerm.length());
-                tokens[i] = t;
-                input.append(paddedInputTerm);
-            }
-            assertAnnotations(expected, input.toString(), tokens);
-        }
-    }
-
-    // --------------------------------------------------------------------------------
-    //
-    // Utilities
-    //
-    // --------------------------------------------------------------------------------
-
-    private static SimpleToken newToken(String orig, String stem, TokenType type) {
-        return newToken(orig, stem, type, false);
-    }
-
-    private static SimpleToken newToken(String orig, String stem, TokenType type, boolean specialToken) {
-        return new SimpleToken(orig).setTokenString(stem)
-                                    .setType(type)
-                                    .setSpecialToken(specialToken);
-    }
-
-    private static void assertAnnotations(SpanTree expected, String value, Token... tokens) {
-        assertAnnotations(expected, value, newLinguistics(Arrays.asList(tokens), Collections.<String, String>emptyMap()));
-    }
-
-    private static void assertAnnotations(SpanTree expected, String str, Linguistics linguistics) {
-        StringFieldValue val = new StringFieldValue(str);
-        assertEquals(expected != null, new LinguisticsAnnotator(linguistics, CONFIG).annotate(val));
-        assertEquals(expected, val.getSpanTree(SpanTrees.LINGUISTICS));
-    }
-
-    private static Linguistics newLinguistics(List<? extends Token> tokens, Map<String, String> replacementTerms) {
-        Linguistics linguistics = Mockito.mock(Linguistics.class);
-        Mockito.when(linguistics.getTokenizer()).thenReturn(new MyTokenizer(tokens, replacementTerms));
-        return linguistics;
-    }
-
-    private static class MyTokenizer implements Tokenizer {
-
-        final List<Token> tokens;
-        final Map<String, String> replacementTerms;
-
-        public MyTokenizer(List<? extends Token> tokens, Map<String, String> replacementTerms) {
-            this.tokens = new ArrayList<>(tokens);
-            this.replacementTerms = replacementTerms;
-        }
-
-        @Override
-        public Iterable<Token> tokenize(String input, Language language, StemMode stemMode, boolean removeAccents) {
-            return tokens;
-        }
-
-        @Override
-        public String getReplacementTerm(String term) {
-            String replacement = replacementTerms.get(term);
-            return replacement != null ? replacement : term;
-        }
-    }
-}
+package com.yahoo.vespa.indexinglanguage.linguistics;
+
+import com.yahoo.document.annotation.Annotation;
+import com.yahoo.document.annotation.AnnotationTypes;
+import com.yahoo.document.annotation.SpanTree;
+import com.yahoo.document.annotation.SpanTrees;
+import com.yahoo.document.datatypes.StringFieldValue;
+import com.yahoo.language.Language;
+import com.yahoo.language.Linguistics;
+import com.yahoo.language.process.StemMode;
+import com.yahoo.language.process.Token;
+import com.yahoo.language.process.TokenType;
+import com.yahoo.language.process.Tokenizer;
+import com.yahoo.language.simple.SimpleToken;
+
+import org.junit.Test;
+import org.mockito.Mockito;
+
+import java.util.*;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * @author <a href="mailto:simon@yahoo-inc.com">Simon Thoresen</a>
+ */
+public class LinguisticsAnnotatorTestCase {
+
+    private static final AnnotatorConfig CONFIG = new AnnotatorConfig();
+
+    // --------------------------------------------------------------------------------
+    //
+    // Tests
+    //
+    // --------------------------------------------------------------------------------
+
+    @Test
+    public void requireThatAnnotateFailsWithZeroTokens() {
+        assertAnnotations(null, "foo");
+    }
+
+    @Test
+    public void requireThatAnnotateFailsWithoutIndexableTokenString() {
+        for (TokenType type : TokenType.values()) {
+            if (type.isIndexable()) {
+                continue;
+            }
+            assertAnnotations(null, "foo", newToken("foo", "bar", type));
+        }
+    }
+
+    @Test
+    public void requireThatIndexableTokenStringsAreAnnotated() {
+        SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
+        expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("bar")));
+        for (TokenType type : TokenType.values()) {
+            if (!type.isIndexable()) {
+                continue;
+            }
+            assertAnnotations(expected, "foo", newToken("foo", "bar", type));
+        }
+    }
+
+    @Test
+    public void requireThatSpecialTokenStringsAreAnnotatedRegardlessOfType() {
+        SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
+        expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("bar")));
+        for (TokenType type : TokenType.values()) {
+            assertAnnotations(expected, "foo", newToken("foo", "bar", type, true));
+        }
+    }
+
+    @Test
+    public void requireThatTermAnnotationsAreEmptyIfOrigIsLowerCase() {
+        SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
+        expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM));
+        for (boolean specialToken : Arrays.asList(true, false)) {
+            for (TokenType type : TokenType.values()) {
+                if (!specialToken && !type.isIndexable()) {
+                    continue;
+                }
+                assertAnnotations(expected, "foo", newToken("foo", "foo", type, specialToken));
+            }
+        }
+    }
+
+    @Test
+    public void requireThatTermAnnotationsAreLowerCased() {
+        SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
+        expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("bar")));
+        for (boolean specialToken : Arrays.asList(true, false)) {
+            for (TokenType type : TokenType.values()) {
+                if (!specialToken && !type.isIndexable()) {
+                    continue;
+                }
+                assertAnnotations(expected, "foo", newToken("foo", "BAR", type, specialToken));
+            }
+        }
+    }
+
+    @Test
+    public void requireThatCompositeTokensAreFlattened() {
+        SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
+        expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("foo")));
+        expected.spanList().span(3, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("bar")));
+        expected.spanList().span(6, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("baz")));
+
+        SimpleToken token = newToken("FOOBARBAZ", "foobarbaz", TokenType.ALPHABETIC)
+                .addComponent(newToken("FOO", "foo", TokenType.ALPHABETIC).setOffset(0))
+                .addComponent(newToken("BARBAZ", "barbaz", TokenType.ALPHABETIC).setOffset(3)
+                                      .addComponent(newToken("BAR", "bar", TokenType.ALPHABETIC).setOffset(3))
+                                      .addComponent(newToken("BAZ", "baz", TokenType.ALPHABETIC).setOffset(6)));
+        assertAnnotations(expected, "foobarbaz", token);
+    }
+
+    @Test
+    public void requireThatCompositeSpecialTokensAreNotFlattened() {
+        SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
+        expected.spanList().span(0, 9).annotate(new Annotation(AnnotationTypes.TERM,
+                                                               new StringFieldValue("foobarbaz")));
+
+        SimpleToken token = newToken("FOOBARBAZ", "foobarbaz", TokenType.ALPHABETIC).setSpecialToken(true)
+                .addComponent(newToken("FOO", "foo", TokenType.ALPHABETIC).setOffset(0))
+                .addComponent(newToken("BARBAZ", "barbaz", TokenType.ALPHABETIC).setOffset(3)
+                                      .addComponent(newToken("BAR", "bar", TokenType.ALPHABETIC).setOffset(3))
+                                      .addComponent(newToken("BAZ", "baz", TokenType.ALPHABETIC).setOffset(6)));
+        assertAnnotations(expected, "foobarbaz", token);
+    }
+
+    @Test
+    public void requireThatErrorTokensAreSkipped() {
+        assertAnnotations(null, "foo", new SimpleToken("foo").setType(TokenType.ALPHABETIC)
+                                                             .setOffset(-1));
+    }
+
+    @Test
+    public void requireThatTermReplacementsAreApplied() {
+        SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
+        expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("bar")));
+        for (boolean specialToken : Arrays.asList(true, false)) {
+            for (TokenType type : TokenType.values()) {
+                if (!specialToken && !type.isIndexable()) {
+                    continue;
+                }
+                assertAnnotations(expected, "foo",
+                                  newLinguistics(Arrays.asList(newToken("foo", "foo", type, specialToken)),
+                                                 Collections.singletonMap("foo", "bar")));
+            }
+        }
+    }
+
+    @Test
+    public void requireThatExistingAnnotationsAreKept() {
+        SpanTree spanTree = new SpanTree(SpanTrees.LINGUISTICS);
+        spanTree.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("baz")));
+
+        StringFieldValue val = new StringFieldValue("foo");
+        val.setSpanTree(spanTree);
+
+        Linguistics linguistics = newLinguistics(Arrays.asList(newToken("foo", "bar", TokenType.ALPHABETIC, false)),
+                                                 Collections.<String, String>emptyMap());
+        new LinguisticsAnnotator(linguistics, CONFIG).annotate(val);
+
+        assertTrue(new LinguisticsAnnotator(linguistics, CONFIG).annotate(val));
+        assertEquals(spanTree, val.getSpanTree(SpanTrees.LINGUISTICS));
+    }
+
+    @Test
+    public void requireThatMaxTermOccurencesIsHonored() {
+        final String inputTerm = "foo";
+        final String stemmedInputTerm = "bar"; // completely different from
+                                               // inputTerm for safer test
+        final String paddedInputTerm = inputTerm + " ";
+        final SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
+        final int inputTermOccurence = AnnotatorConfig.DEFAULT_MAX_TERM_OCCURRENCES * 2;
+        for (int i = 0; i < AnnotatorConfig.DEFAULT_MAX_TERM_OCCURRENCES; ++i) {
+            expected.spanList().span(i * paddedInputTerm.length(), inputTerm.length())
+                    .annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue(stemmedInputTerm)));
+        }
+        for (TokenType type : TokenType.values()) {
+            if (!type.isIndexable()) {
+                continue;
+            }
+            StringBuilder input = new StringBuilder();
+            Token[] tokens = new Token[inputTermOccurence];
+            for (int i = 0; i < inputTermOccurence; ++i) {
+                SimpleToken t = newToken(inputTerm, stemmedInputTerm, type);
+                t.setOffset(i * paddedInputTerm.length());
+                tokens[i] = t;
+                input.append(paddedInputTerm);
+            }
+            assertAnnotations(expected, input.toString(), tokens);
+        }
+    }
+
+    // --------------------------------------------------------------------------------
+    //
+    // Utilities
+    //
+    // --------------------------------------------------------------------------------
+
+    private static SimpleToken newToken(String orig, String stem, TokenType type) {
+        return newToken(orig, stem, type, false);
+    }
+
+    private static SimpleToken newToken(String orig, String stem, TokenType type, boolean specialToken) {
+        return new SimpleToken(orig).setTokenString(stem)
+                                    .setType(type)
+                                    .setSpecialToken(specialToken);
+    }
+
+    private static void assertAnnotations(SpanTree expected, String value, Token... tokens) {
+        assertAnnotations(expected, value, newLinguistics(Arrays.asList(tokens), Collections.<String, String>emptyMap()));
+    }
+
+    private static void assertAnnotations(SpanTree expected, String str, Linguistics linguistics) {
+        StringFieldValue val = new StringFieldValue(str);
+        assertEquals(expected != null, new LinguisticsAnnotator(linguistics, CONFIG).annotate(val));
+        assertEquals(expected, val.getSpanTree(SpanTrees.LINGUISTICS));
+    }
+
+    private static Linguistics newLinguistics(List<? extends Token> tokens, Map<String, String> replacementTerms) {
+        Linguistics linguistics = Mockito.mock(Linguistics.class);
+        Mockito.when(linguistics.getTokenizer()).thenReturn(new MyTokenizer(tokens, replacementTerms));
+        return linguistics;
+    }
+
+    private static class MyTokenizer implements Tokenizer {
+
+        final List<Token> tokens;
+        final Map<String, String> replacementTerms;
+
+        public MyTokenizer(List<? extends Token> tokens, Map<String, String> replacementTerms) {
+            this.tokens = new ArrayList<>(tokens);
+            this.replacementTerms = replacementTerms;
+        }
+
+        @Override
+        public Iterable<Token> tokenize(String input, Language language, StemMode stemMode, boolean removeAccents) {
+            return tokens;
+        }
+
+        @Override
+        public String getReplacementTerm(String term) {
+            String replacement = replacementTerms.get(term);
+            return replacement != null ? replacement : term;
+        }
+    }
+}
author	Jon Bratseth <bratseth@yahoo-inc.com>	2017-06-14 14:41:18 +0200
committer	Jon Bratseth <bratseth@yahoo-inc.com>	2017-06-14 14:41:18 +0200
commit	6ff3df19226036b8ee1bb559f9d73cab40e8d2a0 (patch)
tree	355a7b0623b58983ba655b868341fe479a22eb3d /indexinglanguage
parent	b7f9e7ceaef72489d76683537973b639f8895b84 (diff)