summaryrefslogtreecommitdiffstats
path: root/config-model
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@yahooinc.com>2023-10-16 16:32:02 +0200
committerGitHub <noreply@github.com>2023-10-16 16:32:02 +0200
commit17149c1354cc0a94290a5600bea2f1303d2dda31 (patch)
tree4b7ef2738420d0d866dcb7779f26e4b48c3cbd75 /config-model
parent25d1b91aa42852e0c0dfa46a747a297b2b8bab9a (diff)
parent89a4db5e76d6d31d43e168eec5066c6c3f81b5c0 (diff)
Merge pull request #28943 from vespa-engine/toregge/add-linguistics-tokens-dfw
Add linguistics tokens document field writer.
Diffstat (limited to 'config-model')
-rw-r--r--config-model/src/main/java/com/yahoo/schema/derived/SummaryClass.java3
-rw-r--r--config-model/src/main/java/com/yahoo/schema/derived/SummaryClassField.java2
-rw-r--r--config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java2
-rw-r--r--config-model/src/main/java/com/yahoo/schema/parser/ParsedSummaryField.java3
-rw-r--r--config-model/src/main/java/com/yahoo/schema/processing/IndexingOutputs.java3
-rw-r--r--config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryTransform.java3
-rw-r--r--config-model/src/main/javacc/SchemaParser.jj4
-rw-r--r--config-model/src/test/java/com/yahoo/schema/derived/SummaryTestCase.java13
8 files changed, 30 insertions, 3 deletions
diff --git a/config-model/src/main/java/com/yahoo/schema/derived/SummaryClass.java b/config-model/src/main/java/com/yahoo/schema/derived/SummaryClass.java
index ddb6b004070..94b456b3f5e 100644
--- a/config-model/src/main/java/com/yahoo/schema/derived/SummaryClass.java
+++ b/config-model/src/main/java/com/yahoo/schema/derived/SummaryClass.java
@@ -155,7 +155,8 @@ public class SummaryClass extends Derived {
summaryField.getTransform() == SummaryTransform.GEOPOS ||
summaryField.getTransform() == SummaryTransform.POSITIONS ||
summaryField.getTransform() == SummaryTransform.MATCHED_ELEMENTS_FILTER ||
- summaryField.getTransform() == SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER)
+ summaryField.getTransform() == SummaryTransform.MATCHED_ATTRIBUTE_ELEMENTS_FILTER ||
+ summaryField.getTransform() == SummaryTransform.LINGUISTICS_TOKENS)
{
return summaryField.getSingleSource();
} else if (summaryField.getTransform().isDynamic()) {
diff --git a/config-model/src/main/java/com/yahoo/schema/derived/SummaryClassField.java b/config-model/src/main/java/com/yahoo/schema/derived/SummaryClassField.java
index c1e6dd2aea3..54a4883fa00 100644
--- a/config-model/src/main/java/com/yahoo/schema/derived/SummaryClassField.java
+++ b/config-model/src/main/java/com/yahoo/schema/derived/SummaryClassField.java
@@ -92,6 +92,8 @@ public class SummaryClassField {
return Type.FEATUREDATA;
} else if (transform != null && transform.equals(SummaryTransform.SUMMARYFEATURES)) {
return Type.FEATUREDATA;
+ } else if (transform != null && transform.equals(SummaryTransform.LINGUISTICS_TOKENS)) {
+ return Type.JSONSTRING;
} else {
return Type.LONGSTRING;
}
diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java
index 7c6d62580cb..61f68defe40 100644
--- a/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java
+++ b/config-model/src/main/java/com/yahoo/schema/parser/ConvertParsedFields.java
@@ -217,6 +217,8 @@ public class ConvertParsedFields {
transform = SummaryTransform.MATCHED_ELEMENTS_FILTER;
} else if (parsed.getDynamic()) {
transform = SummaryTransform.DYNAMICTEASER;
+ } else if (parsed.getLinguisticsTokens()) {
+ transform = SummaryTransform.LINGUISTICS_TOKENS;
}
if (parsed.getBolded()) {
transform = transform.bold();
diff --git a/config-model/src/main/java/com/yahoo/schema/parser/ParsedSummaryField.java b/config-model/src/main/java/com/yahoo/schema/parser/ParsedSummaryField.java
index 1d5d73635e7..446981f1ba4 100644
--- a/config-model/src/main/java/com/yahoo/schema/parser/ParsedSummaryField.java
+++ b/config-model/src/main/java/com/yahoo/schema/parser/ParsedSummaryField.java
@@ -18,6 +18,7 @@ class ParsedSummaryField extends ParsedBlock {
private boolean isMEO = false;
private boolean isFull = false;
private boolean isBold = false;
+ private boolean isLinguisticsTokens = false;
private final List<String> sources = new ArrayList<>();
private final List<String> destinations = new ArrayList<>();
@@ -37,6 +38,7 @@ class ParsedSummaryField extends ParsedBlock {
boolean getDynamic() { return isDyn; }
boolean getFull() { return isFull; }
boolean getMatchedElementsOnly() { return isMEO; }
+ boolean getLinguisticsTokens() { return isLinguisticsTokens; }
void addDestination(String dst) { destinations.add(dst); }
void addSource(String src) { sources.add(src); }
@@ -44,6 +46,7 @@ class ParsedSummaryField extends ParsedBlock {
void setDynamic() { this.isDyn = true; }
void setFull() { this.isFull = true; }
void setMatchedElementsOnly() { this.isMEO = true; }
+ void setLinguisticsTokens() { this.isLinguisticsTokens = true; }
void setType(ParsedType value) {
verifyThat(type == null, "Cannot change type from ", type, "to", value);
this.type = value;
diff --git a/config-model/src/main/java/com/yahoo/schema/processing/IndexingOutputs.java b/config-model/src/main/java/com/yahoo/schema/processing/IndexingOutputs.java
index 1d279242895..e54f8d3e881 100644
--- a/config-model/src/main/java/com/yahoo/schema/processing/IndexingOutputs.java
+++ b/config-model/src/main/java/com/yahoo/schema/processing/IndexingOutputs.java
@@ -78,7 +78,8 @@ public class IndexingOutputs extends Processor {
return;
}
dynamicSummary.add(summaryName);
- } else if (summaryTransform != SummaryTransform.ATTRIBUTE) {
+ } else if (summaryTransform != SummaryTransform.ATTRIBUTE &&
+ summaryTransform != SummaryTransform.LINGUISTICS_TOKENS) {
staticSummary.add(summaryName);
}
}
diff --git a/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryTransform.java b/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryTransform.java
index 575a3a748e6..c7c1606951e 100644
--- a/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryTransform.java
+++ b/config-model/src/main/java/com/yahoo/vespa/documentmodel/SummaryTransform.java
@@ -23,7 +23,8 @@ public enum SummaryTransform {
MATCHED_ELEMENTS_FILTER("matchedelementsfilter"),
MATCHED_ATTRIBUTE_ELEMENTS_FILTER("matchedattributeelementsfilter"),
COPY("copy"),
- DOCUMENT_ID("documentid");
+ DOCUMENT_ID("documentid"),
+ LINGUISTICS_TOKENS("linguistics-tokens");
private final String name;
diff --git a/config-model/src/main/javacc/SchemaParser.jj b/config-model/src/main/javacc/SchemaParser.jj
index ae4c3b365d8..a5238afc86a 100644
--- a/config-model/src/main/javacc/SchemaParser.jj
+++ b/config-model/src/main/javacc/SchemaParser.jj
@@ -201,6 +201,7 @@ TOKEN :
| < FULL: "full" >
| < STATIC: "static" >
| < DYNAMIC: "dynamic" >
+| < LINGUISTICS_TOKENS: "linguistics-tokens" >
| < MATCHED_ELEMENTS_ONLY: "matched-elements-only" >
| < SSCONTEXTUAL: "contextual" >
| < SSOVERRIDE: "override" >
@@ -1128,6 +1129,7 @@ void summaryInFieldShort(ParsedField field) :
<COLON> ( <DYNAMIC> { psf.setDynamic(); }
| <MATCHED_ELEMENTS_ONLY> { psf.setMatchedElementsOnly(); }
| (<FULL> | <STATIC>) { psf.setFull(); }
+ | <LINGUISTICS_TOKENS> { psf.setLinguisticsTokens(); }
)
}
@@ -1173,6 +1175,7 @@ void summaryTransform(ParsedSummaryField field) : { }
( <DYNAMIC> { field.setDynamic(); }
| <MATCHED_ELEMENTS_ONLY> { field.setMatchedElementsOnly(); }
| (<FULL> | <STATIC>) { field.setFull(); }
+ | <LINGUISTICS_TOKENS> { field.setLinguisticsTokens(); }
)
}
@@ -2712,6 +2715,7 @@ String identifier() : { }
| <INLINE>
| <INPUTS>
| <INTEGER>
+ | <LINGUISTICS_TOKENS>
| <LITERAL>
| <LOCALE>
| <LONG>
diff --git a/config-model/src/test/java/com/yahoo/schema/derived/SummaryTestCase.java b/config-model/src/test/java/com/yahoo/schema/derived/SummaryTestCase.java
index 1f18a5ed49b..4128baddcb7 100644
--- a/config-model/src/test/java/com/yahoo/schema/derived/SummaryTestCase.java
+++ b/config-model/src/test/java/com/yahoo/schema/derived/SummaryTestCase.java
@@ -227,6 +227,19 @@ public class SummaryTestCase extends AbstractSchemaTestCase {
}
@Test
+ void linguistics_tokenizer_override() throws ParseException {
+ var schema = buildSchema("field foo type string { indexing: summary }",
+ joinLines("document-summary bar {",
+ " summary baz type string {",
+ " source: foo ",
+ " linguistics-tokens",
+ " }",
+ " from-disk",
+ "}"));
+ assertOverride(schema, "baz", SummaryTransform.LINGUISTICS_TOKENS.getName(), "foo", "bar");
+ }
+
+ @Test
void documentid_summary_transform_requires_disk_access() {
assertFalse(SummaryTransform.DOCUMENT_ID.isInMemory());
}