diff options
author | Tor Egge <Tor.Egge@online.no> | 2023-10-16 12:58:04 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2023-10-16 12:58:04 +0200 |
commit | f67d01124f2a19e77c94039e571db3e4c60f4ed1 (patch) | |
tree | 3563782080658bec8986658822c8621e34e79b71 /config-model/src | |
parent | 0ccfe8aab8c12ecd518f882a048f8a13fb2084f1 (diff) |
Add linguistics tokens document field writer.
Diffstat (limited to 'config-model/src')
8 files changed, 30 insertions, 3 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/derived/SummaryClass.java b/config-model/src/main/java/com/yahoo/schema/derived/SummaryClass.java index ddb6b004070..94b456b3f5e 100644 --- a/config-model/src/main/java/com/yahoo/schema/derived/SummaryClass.java +++ b/config-model/src/main/java/com/yahoo/schema/derived/SummaryClass.java @@ -155,7 +155,8 @@ public class SummaryClass extends Derived { summaryField.getTransform() == SummaryTransform.GEOPOS || summaryField.getTransform() == SummaryTransform.POSITIONS || summaryField.getTransform() == SummaryTransform.MATCHED_ELEMENTS_FILTER || - summaryField.getTransform() == SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER) + summaryField.getTransform() == SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER || + summaryField.getTransform() == SummaryTransform.LINGUISTICS_TOKENS) { return summaryField.getSingleSource(); } else if (summaryField.getTransform().isDynamic()) { diff --git a/config-model/src/main/java/com/yahoo/schema/derived/SummaryClassField.java b/config-model/src/main/java/com/yahoo/schema/derived/SummaryClassField.java index c1e6dd2aea3..54a4883fa00 100644 --- a/config-model/src/main/java/com/yahoo/schema/derived/SummaryClassField.java +++ b/config-model/src/main/java/com/yahoo/schema/derived/SummaryClassField.java @@ -92,6 +92,8 @@ public class SummaryClassField { return Type.FEATUREDATA; } else if (transform != null && transform.equals(SummaryTransform.SUMMARYFEATURES)) { return Type.FEATUREDATA; + } else if (transform != null && transform.equals(SummaryTransform.LINGUISTICS_TOKENS)) { + return Type.JSONSTRING; } else { return Type.LONGSTRING; } diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java index 7c6d62580cb..61f68defe40 100644 --- a/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java +++ b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java @@ -217,6 +217,8 @@ public class ConvertParsedFields { transform = SummaryTransform.MATCHED_ELEMENTS_FILTER; } else if (parsed.getDynamic()) { transform = SummaryTransform.DYNAMICTEASER; + } else if (parsed.getLinguisticsTokens()) { + transform = SummaryTransform.LINGUISTICS_TOKENS; } if (parsed.getBolded()) { transform = transform.bold(); diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedSummaryField.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedSummaryField.java index 1d5d73635e7..446981f1ba4 100644 --- a/config-model/src/main/java/com/yahoo/schema/parser/ParsedSummaryField.java +++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedSummaryField.java @@ -18,6 +18,7 @@ class ParsedSummaryField extends ParsedBlock { private boolean isMEO = false; private boolean isFull = false; private boolean isBold = false; + private boolean isLinguisticsTokens = false; private final List<String> sources = new ArrayList<>(); private final List<String> destinations = new ArrayList<>(); @@ -37,6 +38,7 @@ class ParsedSummaryField extends ParsedBlock { boolean getDynamic() { return isDyn; } boolean getFull() { return isFull; } boolean getMatchedElementsOnly() { return isMEO; } + boolean getLinguisticsTokens() { return isLinguisticsTokens; } void addDestination(String dst) { destinations.add(dst); } void addSource(String src) { sources.add(src); } @@ -44,6 +46,7 @@ class ParsedSummaryField extends ParsedBlock { void setDynamic() { this.isDyn = true; } void setFull() { this.isFull = true; } void setMatchedElementsOnly() { this.isMEO = true; } + void setLinguisticsTokens() { this.isLinguisticsTokens = true; } void setType(ParsedType value) { verifyThat(type == null, "Cannot change type from ", type, "to", value); this.type = value; diff --git a/config-model/src/main/java/com/yahoo/schema/processing/IndexingOutputs.java b/config-model/src/main/java/com/yahoo/schema/processing/IndexingOutputs.java index 1d279242895..e54f8d3e881 100644 --- a/config-model/src/main/java/com/yahoo/schema/processing/IndexingOutputs.java +++ b/config-model/src/main/java/com/yahoo/schema/processing/IndexingOutputs.java @@ -78,7 +78,8 @@ public class IndexingOutputs extends Processor { return; } dynamicSummary.add(summaryName); - } else if (summaryTransform != SummaryTransform.ATTRIBUTE) { + } else if (summaryTransform != SummaryTransform.ATTRIBUTE && + summaryTransform != SummaryTransform.LINGUISTICS_TOKENS) { staticSummary.add(summaryName); } } diff --git a/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryTransform.java b/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryTransform.java index 575a3a748e6..c7c1606951e 100644 --- a/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryTransform.java +++ b/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryTransform.java @@ -23,7 +23,8 @@ public enum SummaryTransform { MATCHED_ELEMENTS_FILTER("matchedelementsfilter"), MATCHED_ATTRIBUTE_ELEMENTS_FILTER("matchedattributeelementsfilter"), COPY("copy"), - DOCUMENT_ID("documentid"); + DOCUMENT_ID("documentid"), + LINGUISTICS_TOKENS("linguistics-tokens"); private final String name; diff --git a/config-model/src/main/javacc/SchemaParser.jj b/config-model/src/main/javacc/SchemaParser.jj index ae4c3b365d8..a5238afc86a 100644 --- a/config-model/src/main/javacc/SchemaParser.jj +++ b/config-model/src/main/javacc/SchemaParser.jj @@ -201,6 +201,7 @@ TOKEN : | < FULL: "full" > | < STATIC: "static" > | < DYNAMIC: "dynamic" > +| < LINGUISTICS_TOKENS: "linguistics-tokens" > | < MATCHED_ELEMENTS_ONLY: "matched-elements-only" > | < SSCONTEXTUAL: "contextual" > | < SSOVERRIDE: "override" > @@ -1128,6 +1129,7 @@ void summaryInFieldShort(ParsedField field) : <COLON> ( <DYNAMIC> { psf.setDynamic(); } | <MATCHED_ELEMENTS_ONLY> { psf.setMatchedElementsOnly(); } | (<FULL> | <STATIC>) { psf.setFull(); } + | <LINGUISTICS_TOKENS> { psf.setLinguisticsTokens(); } ) } @@ -1173,6 +1175,7 @@ void summaryTransform(ParsedSummaryField field) : { } ( <DYNAMIC> { field.setDynamic(); } | <MATCHED_ELEMENTS_ONLY> { field.setMatchedElementsOnly(); } | (<FULL> | <STATIC>) { field.setFull(); } + | <LINGUISTICS_TOKENS> { field.setLinguisticsTokens(); } ) } @@ -2712,6 +2715,7 @@ String identifier() : { } | <INLINE> | <INPUTS> | <INTEGER> + | <LINGUISTICS_TOKENS> | <LITERAL> | <LOCALE> | <LONG> diff --git a/config-model/src/test/java/com/yahoo/schema/derived/SummaryTestCase.java b/config-model/src/test/java/com/yahoo/schema/derived/SummaryTestCase.java index 1f18a5ed49b..4128baddcb7 100644 --- a/config-model/src/test/java/com/yahoo/schema/derived/SummaryTestCase.java +++ b/config-model/src/test/java/com/yahoo/schema/derived/SummaryTestCase.java @@ -227,6 +227,19 @@ public class SummaryTestCase extends AbstractSchemaTestCase { } @Test + void linguistics_tokenizer_override() throws ParseException { + var schema = buildSchema("field foo type string { indexing: summary }", + joinLines("document-summary bar {", + " summary baz type string {", + " source: foo ", + " linguistics-tokens", + " }", + " from-disk", + "}")); + assertOverride(schema, "baz", SummaryTransform.LINGUISTICS_TOKENS.getName(), "foo", "bar"); + } + + @Test void documentid_summary_transform_requires_disk_access() { assertFalse(SummaryTransform.DOCUMENT_ID.isInMemory()); } |