diff options
author | Jon Bratseth <bratseth@gmail.com> | 2021-05-04 16:17:07 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@gmail.com> | 2021-05-04 16:17:07 +0200 |
commit | b399aa85883146aa3ba1396769d8e82c88877674 (patch) | |
tree | 5628548eb45d7ef6aed4561360dc51563cfd380e | |
parent | 20d71c1dd96cd74803504f22df3f100b63e9d838 (diff) |
Move specialtokens to linguistics
13 files changed, 113 insertions, 66 deletions
diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json index b5933936adf..74ed9d33f04 100644 --- a/container-search/abi-spec.json +++ b/container-search/abi-spec.json @@ -5574,8 +5574,8 @@ "public com.yahoo.search.query.parser.ParserEnvironment setIndexFacts(com.yahoo.prelude.IndexFacts)", "public com.yahoo.language.Linguistics getLinguistics()", "public com.yahoo.search.query.parser.ParserEnvironment setLinguistics(com.yahoo.language.Linguistics)", - "public com.yahoo.prelude.query.parser.SpecialTokens getSpecialTokens()", - "public com.yahoo.search.query.parser.ParserEnvironment setSpecialTokens(com.yahoo.prelude.query.parser.SpecialTokens)", + "public com.yahoo.language.process.SpecialTokens getSpecialTokens()", + "public com.yahoo.search.query.parser.ParserEnvironment setSpecialTokens(com.yahoo.language.process.SpecialTokens)", "public static com.yahoo.search.query.parser.ParserEnvironment fromExecutionContext(com.yahoo.search.searchchain.Execution$Context)", "public static com.yahoo.search.query.parser.ParserEnvironment fromParserEnvironment(com.yahoo.search.query.parser.ParserEnvironment)" ], @@ -7765,7 +7765,7 @@ "final" ], "methods": [ - "public void <init>(com.yahoo.search.searchchain.SearchChainRegistry, com.yahoo.prelude.IndexFacts, com.yahoo.prelude.query.parser.SpecialTokenRegistry, com.yahoo.search.rendering.RendererRegistry, com.yahoo.language.Linguistics)", + "public void <init>(com.yahoo.search.searchchain.SearchChainRegistry, com.yahoo.prelude.IndexFacts, com.yahoo.language.process.SpecialTokenRegistry, com.yahoo.search.rendering.RendererRegistry, com.yahoo.language.Linguistics)", "public static com.yahoo.search.searchchain.Execution$Context createContextStub()", "public static com.yahoo.search.searchchain.Execution$Context createContextStub(com.yahoo.prelude.IndexFacts)", "public static com.yahoo.search.searchchain.Execution$Context createContextStub(com.yahoo.search.searchchain.SearchChainRegistry, com.yahoo.prelude.IndexFacts)", @@ -7779,8 +7779,8 @@ "public void setIndexFacts(com.yahoo.prelude.IndexFacts)", "public com.yahoo.search.searchchain.SearchChainRegistry searchChainRegistry()", "public com.yahoo.search.rendering.RendererRegistry rendererRegistry()", - "public com.yahoo.prelude.query.parser.SpecialTokenRegistry getTokenRegistry()", - "public void setTokenRegistry(com.yahoo.prelude.query.parser.SpecialTokenRegistry)", + "public com.yahoo.language.process.SpecialTokenRegistry getTokenRegistry()", + "public void setTokenRegistry(com.yahoo.language.process.SpecialTokenRegistry)", "public void setDetailedDiagnostics(boolean)", "public boolean getDetailedDiagnostics()", "public boolean getBreakdown()", diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java index f0656efa59a..b71bd57539f 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/Tokenizer.java @@ -3,6 +3,7 @@ package com.yahoo.prelude.query.parser; import com.yahoo.language.Linguistics; import com.yahoo.language.process.CharacterClasses; +import com.yahoo.language.process.SpecialTokens; import com.yahoo.prelude.Index; import com.yahoo.prelude.IndexFacts; import com.yahoo.prelude.query.Substring; diff --git a/container-search/src/main/java/com/yahoo/search/query/parser/ParserEnvironment.java b/container-search/src/main/java/com/yahoo/search/query/parser/ParserEnvironment.java index 1b35296082e..df96d314455 100644 --- a/container-search/src/main/java/com/yahoo/search/query/parser/ParserEnvironment.java +++ b/container-search/src/main/java/com/yahoo/search/query/parser/ParserEnvironment.java @@ -4,7 +4,7 @@ package com.yahoo.search.query.parser; import com.yahoo.language.Linguistics; import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.prelude.IndexFacts; -import com.yahoo.prelude.query.parser.SpecialTokens; +import com.yahoo.language.process.SpecialTokens; import com.yahoo.search.Searcher; import com.yahoo.search.searchchain.Execution; diff --git a/container-search/src/main/java/com/yahoo/search/searchchain/Execution.java b/container-search/src/main/java/com/yahoo/search/searchchain/Execution.java index 7adfccc2ed1..1954e6e657e 100644 --- a/container-search/src/main/java/com/yahoo/search/searchchain/Execution.java +++ b/container-search/src/main/java/com/yahoo/search/searchchain/Execution.java @@ -6,7 +6,7 @@ import com.yahoo.language.Linguistics; import com.yahoo.prelude.IndexFacts; import com.yahoo.prelude.Ping; import com.yahoo.prelude.Pong; -import com.yahoo.prelude.query.parser.SpecialTokenRegistry; +import com.yahoo.language.process.SpecialTokenRegistry; import com.yahoo.processing.Processor; import com.yahoo.processing.Request; import com.yahoo.processing.Response; diff --git a/container-search/src/main/java/com/yahoo/search/searchchain/ExecutionFactory.java b/container-search/src/main/java/com/yahoo/search/searchchain/ExecutionFactory.java index 31b6d06f78e..a813229c984 100644 --- a/container-search/src/main/java/com/yahoo/search/searchchain/ExecutionFactory.java +++ b/container-search/src/main/java/com/yahoo/search/searchchain/ExecutionFactory.java @@ -13,7 +13,7 @@ import com.yahoo.language.Linguistics; import com.yahoo.language.simple.SimpleLinguistics; import com.yahoo.prelude.IndexFacts; import com.yahoo.prelude.IndexModel; -import com.yahoo.prelude.query.parser.SpecialTokenRegistry; +import com.yahoo.language.process.SpecialTokenRegistry; import com.yahoo.processing.rendering.Renderer; import com.yahoo.search.Searcher; import com.yahoo.search.config.IndexInfoConfig; diff --git a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParseTestCase.java b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParseTestCase.java index eb86af993d7..cef8ae1751c 100644 --- a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParseTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParseTestCase.java @@ -18,16 +18,14 @@ import com.yahoo.prelude.query.PhraseSegmentItem; import com.yahoo.prelude.query.PrefixItem; import com.yahoo.prelude.query.RankItem; import com.yahoo.prelude.query.SubstringItem; -import com.yahoo.prelude.query.SubstringItem; import com.yahoo.prelude.query.SuffixItem; import com.yahoo.prelude.query.TaggableItem; import com.yahoo.prelude.query.WordItem; -import com.yahoo.prelude.query.parser.SpecialTokens; +import com.yahoo.language.process.SpecialTokens; import com.yahoo.prelude.query.parser.TestLinguistics; import com.yahoo.search.Query; import org.junit.Test; -import java.util.Collections; import java.util.Iterator; import static org.junit.Assert.assertEquals; diff --git a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParsingTester.java b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParsingTester.java index a17d791f906..fd7e4cbe0e6 100644 --- a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParsingTester.java +++ b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/ParsingTester.java @@ -11,8 +11,8 @@ import com.yahoo.prelude.IndexFacts; import com.yahoo.prelude.IndexModel; import com.yahoo.prelude.query.Item; import com.yahoo.prelude.query.NullItem; -import com.yahoo.prelude.query.parser.SpecialTokenRegistry; -import com.yahoo.prelude.query.parser.SpecialTokens; +import com.yahoo.language.process.SpecialTokenRegistry; +import com.yahoo.language.process.SpecialTokens; import com.yahoo.search.Query; import com.yahoo.search.config.IndexInfoConfig; import com.yahoo.search.query.parser.Parsable; diff --git a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/TokenizerTestCase.java b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/TokenizerTestCase.java index 5634cf67212..e10fbd71c72 100644 --- a/container-search/src/test/java/com/yahoo/prelude/query/parser/test/TokenizerTestCase.java +++ b/container-search/src/test/java/com/yahoo/prelude/query/parser/test/TokenizerTestCase.java @@ -6,11 +6,10 @@ import com.yahoo.prelude.Index; import com.yahoo.prelude.IndexFacts; import com.yahoo.prelude.IndexModel; import com.yahoo.prelude.SearchDefinition; -import com.yahoo.prelude.query.parser.SpecialTokenRegistry; -import com.yahoo.prelude.query.parser.SpecialTokens; +import com.yahoo.language.process.SpecialTokenRegistry; +import com.yahoo.language.process.SpecialTokens; import com.yahoo.prelude.query.parser.Token; import com.yahoo.prelude.query.parser.Tokenizer; -import com.yahoo.vespa.configdefinition.SpecialtokensConfig; import org.junit.Test; import java.util.ArrayList; @@ -745,34 +744,6 @@ public class TokenizerTestCase { assertEquals(new Token(WORD, "a'"), tokens.get(30)); } - @Test - public void testSpecialTokensConfig() { - var builder = new SpecialtokensConfig.Builder(); - var tokenBuilder = new SpecialtokensConfig.Tokenlist.Builder(); - tokenBuilder.name("default"); - - var tokenListBuilder1 = new SpecialtokensConfig.Tokenlist.Tokens.Builder(); - tokenListBuilder1.token("c++"); - tokenListBuilder1.replace("cpp"); - tokenBuilder.tokens(tokenListBuilder1); - - var tokenListBuilder2 = new SpecialtokensConfig.Tokenlist.Tokens.Builder(); - tokenListBuilder2.token("..."); - tokenBuilder.tokens(tokenListBuilder2); - - builder.tokenlist(tokenBuilder); - - var registry = new SpecialTokenRegistry(builder.build()); - - var defaultTokens = registry.getSpecialTokens("default"); - assertEquals("default", defaultTokens.name()); - assertEquals(2, defaultTokens.tokens().size()); - assertEquals("c++", defaultTokens.tokens().get(0).token()); - assertEquals("cpp", defaultTokens.tokens().get(0).replacement()); - assertEquals("...", defaultTokens.tokens().get(1).token()); - assertEquals("...", defaultTokens.tokens().get(1).replacement()); - } - private SpecialTokenRegistry createSpecialTokens() { List<SpecialTokens.Token> tokens = new ArrayList<>(); tokens.add(new SpecialTokens.Token("c+")); diff --git a/container-search/src/test/java/com/yahoo/search/query/rewrite/RewriterFeaturesTestCase.java b/container-search/src/test/java/com/yahoo/search/query/rewrite/RewriterFeaturesTestCase.java index 5508c2a73a7..08146bbe069 100644 --- a/container-search/src/test/java/com/yahoo/search/query/rewrite/RewriterFeaturesTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/query/rewrite/RewriterFeaturesTestCase.java @@ -8,7 +8,7 @@ import org.junit.Test; import com.yahoo.prelude.query.AndItem; import com.yahoo.prelude.query.CompositeItem; import com.yahoo.prelude.query.Item; -import com.yahoo.prelude.query.parser.SpecialTokenRegistry; +import com.yahoo.language.process.SpecialTokenRegistry; import com.yahoo.search.Query; import com.yahoo.search.searchchain.Execution; import com.yahoo.search.searchchain.Execution.Context; diff --git a/linguistics/abi-spec.json b/linguistics/abi-spec.json index 58b838d7332..34c388b8a2e 100644 --- a/linguistics/abi-spec.json +++ b/linguistics/abi-spec.json @@ -427,6 +427,57 @@ ], "fields": [] }, + "com.yahoo.language.process.SpecialTokenRegistry": { + "superClass": "java.lang.Object", + "interfaces": [], + "attributes": [ + "public" + ], + "methods": [ + "public void <init>()", + "public void <init>(com.yahoo.vespa.configdefinition.SpecialtokensConfig)", + "public void <init>(java.util.List)", + "public com.yahoo.language.process.SpecialTokens getSpecialTokens(java.lang.String)" + ], + "fields": [] + }, + "com.yahoo.language.process.SpecialTokens$Token": { + "superClass": "java.lang.Object", + "interfaces": [ + "java.lang.Comparable" + ], + "attributes": [ + "public", + "final" + ], + "methods": [ + "public void <init>(java.lang.String)", + "public void <init>(java.lang.String, java.lang.String)", + "public java.lang.String token()", + "public java.lang.String replacement()", + "public int compareTo(com.yahoo.language.process.SpecialTokens$Token)", + "public boolean equals(java.lang.Object)", + "public int hashCode()", + "public java.lang.String toString()", + "public bridge synthetic int compareTo(java.lang.Object)" + ], + "fields": [] + }, + "com.yahoo.language.process.SpecialTokens": { + "superClass": "java.lang.Object", + "interfaces": [], + "attributes": [ + "public" + ], + "methods": [ + "public void <init>(java.lang.String, java.util.List)", + "public java.lang.String name()", + "public java.util.List tokens()", + "public com.yahoo.language.process.SpecialTokens$Token tokenize(java.lang.String, boolean)", + "public static com.yahoo.language.process.SpecialTokens empty()" + ], + "fields": [] + }, "com.yahoo.language.process.StemList": { "superClass": "java.util.AbstractList", "interfaces": [], diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/SpecialTokenRegistry.java b/linguistics/src/main/java/com/yahoo/language/process/SpecialTokenRegistry.java index 9c735a031d7..b65c3ba663c 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/parser/SpecialTokenRegistry.java +++ b/linguistics/src/main/java/com/yahoo/language/process/SpecialTokenRegistry.java @@ -1,12 +1,11 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.prelude.query.parser; +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.language.process; import com.yahoo.vespa.configdefinition.SpecialtokensConfig; import com.yahoo.vespa.configdefinition.SpecialtokensConfig.Tokenlist; import com.yahoo.vespa.configdefinition.SpecialtokensConfig.Tokenlist.Tokens; import java.util.ArrayList; -import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -24,9 +23,7 @@ public class SpecialTokenRegistry { * The current special token lists, indexed on name. * These lists are unmodifiable and used directly by clients of this */ - private Map<String, SpecialTokens> specialTokenMap; - - private boolean frozen = false; + private final Map<String, SpecialTokens> specialTokenMap; /** Creates an empty special token registry */ public SpecialTokenRegistry() { @@ -40,11 +37,6 @@ public class SpecialTokenRegistry { public SpecialTokenRegistry(List<SpecialTokens> specialTokensList) { specialTokenMap = specialTokensList.stream().collect(Collectors.toMap(t -> t.name(), t -> t)); - freeze(); - } - - private void freeze() { - frozen = true; } private static List<SpecialTokens> specialTokensFrom(SpecialtokensConfig config) { @@ -62,12 +54,6 @@ public class SpecialTokenRegistry { return specialTokensList; } - private void ensureNotFrozen() { - if (frozen) { - throw new IllegalStateException("Tried to modify a frozen SpecialTokenRegistry instance."); - } - } - /** * Returns the list of special tokens for a given name. * diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/SpecialTokens.java b/linguistics/src/main/java/com/yahoo/language/process/SpecialTokens.java index 4b29b50f095..c1b05a00377 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/parser/SpecialTokens.java +++ b/linguistics/src/main/java/com/yahoo/language/process/SpecialTokens.java @@ -1,5 +1,5 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -package com.yahoo.prelude.query.parser; +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.language.process; import java.util.ArrayList; import java.util.Collections; @@ -8,8 +8,6 @@ import java.util.List; import java.util.Locale; import java.util.Objects; -import com.yahoo.prelude.query.Substring; - import static com.yahoo.language.LinguisticsCase.toLowerCase; /** diff --git a/linguistics/src/test/java/com/yahoo/language/process/SpecialTokensTestCase.java b/linguistics/src/test/java/com/yahoo/language/process/SpecialTokensTestCase.java new file mode 100644 index 00000000000..fee70e3a407 --- /dev/null +++ b/linguistics/src/test/java/com/yahoo/language/process/SpecialTokensTestCase.java @@ -0,0 +1,42 @@ +// Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package com.yahoo.language.process; + +import com.yahoo.vespa.configdefinition.SpecialtokensConfig; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +/** + * @author bratseth + */ +public class SpecialTokensTestCase { + + @Test + public void testSpecialTokensConfig() { + var builder = new SpecialtokensConfig.Builder(); + var tokenBuilder = new SpecialtokensConfig.Tokenlist.Builder(); + tokenBuilder.name("default"); + + var tokenListBuilder1 = new SpecialtokensConfig.Tokenlist.Tokens.Builder(); + tokenListBuilder1.token("c++"); + tokenListBuilder1.replace("cpp"); + tokenBuilder.tokens(tokenListBuilder1); + + var tokenListBuilder2 = new SpecialtokensConfig.Tokenlist.Tokens.Builder(); + tokenListBuilder2.token("..."); + tokenBuilder.tokens(tokenListBuilder2); + + builder.tokenlist(tokenBuilder); + + var registry = new SpecialTokenRegistry(builder.build()); + + var defaultTokens = registry.getSpecialTokens("default"); + assertEquals("default", defaultTokens.name()); + assertEquals(2, defaultTokens.tokens().size()); + assertEquals("c++", defaultTokens.tokens().get(0).token()); + assertEquals("cpp", defaultTokens.tokens().get(0).replacement()); + assertEquals("...", defaultTokens.tokens().get(1).token()); + assertEquals("...", defaultTokens.tokens().get(1).replacement()); + } + +} |