diff options
author | Geir Storli <geirst@verizonmedia.com> | 2019-05-02 12:26:45 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-05-02 12:26:45 +0200 |
commit | cc987cc9321f64a058ed9a2519495bb701ff527a (patch) | |
tree | 7b0f7ebc23b6fd4aaf8032ca681285772bafaac7 | |
parent | 05fb9663a0e78ee98f50e8ec57229654b5a4bb3c (diff) | |
parent | 3ad304534d9d3b8b9fe04f66a83b4fa28ccb0334 (diff) |
Merge pull request #9256 from vespa-engine/geirst/experimental-posting-list-format-flag
Add flag to trigger use of experimental posting list format for an in…
11 files changed, 97 insertions, 1 deletions
diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/Index.java b/config-model/src/main/java/com/yahoo/searchdefinition/Index.java index 1620c90acd1..d7e9e0da081 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/Index.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/Index.java @@ -56,6 +56,9 @@ public class Index implements Cloneable, Serializable { /** The boolean index definition, if set */ private BooleanIndexDefinition boolIndex; + // TODO: Remove when experimental posting list format is made default + private boolean experimentalPostingListFormat = false; + public Index(String name) { this(name, false); } @@ -181,4 +184,12 @@ public class Index implements Cloneable, Serializable { boolIndex = def; } + public void setExperimentalPostingListFormat(boolean value) { + experimentalPostingListFormat = value; + } + + public boolean useExperimentalPostingListFormat() { + return experimentalPostingListFormat; + } + } diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexSchema.java b/config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexSchema.java index f8766afbc7b..6f6e97a0876 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexSchema.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/derived/IndexSchema.java @@ -113,7 +113,8 @@ public class IndexSchema extends Derived implements IndexschemaConfig.Producer { .datatype(IndexschemaConfig.Indexfield.Datatype.Enum.valueOf(f.getType())) .prefix(f.hasPrefix()) .phrases(f.hasPhrases()) - .positions(f.hasPositions()); + .positions(f.hasPositions()) + .experimentalpostinglistformat(f.useExperimentalPostingListFormat()); if (!f.getCollectionType().equals("SINGLE")) { ifB.collectiontype(IndexschemaConfig.Indexfield.Collectiontype.Enum.valueOf(f.getCollectionType())); } @@ -174,6 +175,8 @@ public class IndexSchema extends Derived implements IndexschemaConfig.Producer { private boolean phrases = false; // TODO dead, but keep a while to ensure config compatibility? private boolean positions = true;// TODO dead, but keep a while to ensure config compatibility? private BooleanIndexDefinition boolIndex = null; + // TODO: Remove when experimental posting list format is made default + private boolean experimentalPostingListFormat = false; public IndexField(String name, Index.Type type, DataType sdFieldType) { this.name = name; @@ -183,6 +186,7 @@ public class IndexSchema extends Derived implements IndexschemaConfig.Producer { public void setIndexSettings(com.yahoo.searchdefinition.Index index) { if (type.equals(Index.Type.TEXT)) { prefix = index.isPrefix(); + experimentalPostingListFormat = index.useExperimentalPostingListFormat(); } sdType = index.getType(); boolIndex = index.getBooleanIndexDefiniton(); @@ -205,6 +209,7 @@ public class IndexSchema extends Derived implements IndexschemaConfig.Producer { public boolean hasPrefix() { return prefix; } public boolean hasPhrases() { return phrases; } public boolean hasPositions() { return positions; } + public boolean useExperimentalPostingListFormat() { return experimentalPostingListFormat; } public BooleanIndexDefinition getBooleanIndexDefinition() { return boolIndex; diff --git a/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexOperation.java b/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexOperation.java index 6df4ca2a6e1..459bb247e5f 100644 --- a/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexOperation.java +++ b/config-model/src/main/java/com/yahoo/searchdefinition/fieldoperation/IndexOperation.java @@ -29,6 +29,8 @@ public class IndexOperation implements FieldOperation { private OptionalLong lowerBound = OptionalLong.empty(); private OptionalLong upperBound = OptionalLong.empty(); private OptionalDouble densePostingListThreshold = OptionalDouble.empty(); + // TODO: Remove when experimental posting list format is made default + private Optional<Boolean> experimentalPostingListFormat = Optional.empty(); public String getIndexName() { return indexName; @@ -87,6 +89,9 @@ public class IndexOperation implements FieldOperation { index.setBooleanIndexDefiniton( new BooleanIndexDefinition(arity, lowerBound, upperBound, densePostingListThreshold)); } + if (experimentalPostingListFormat.isPresent()) { + index.setExperimentalPostingListFormat(experimentalPostingListFormat.get()); + } } public Type getType() { @@ -112,5 +117,8 @@ public class IndexOperation implements FieldOperation { public void setDensePostingListThreshold(double densePostingListThreshold) { this.densePostingListThreshold = OptionalDouble.of(densePostingListThreshold); } + public void setExperimentalPostingListFormat(boolean value) { + experimentalPostingListFormat = Optional.of(value); + } } diff --git a/config-model/src/main/javacc/SDParser.jj b/config-model/src/main/javacc/SDParser.jj index de5146ec7d2..571ad452b01 100644 --- a/config-model/src/main/javacc/SDParser.jj +++ b/config-model/src/main/javacc/SDParser.jj @@ -331,6 +331,7 @@ TOKEN : | < LOWERBOUND: "lower-bound" > | < UPPERBOUND: "upper-bound" > | < DENSEPOSTINGLISTTHRESHOLD: "dense-posting-list-threshold" > +| < EXPERIMENTALPOSTINGLISTFORMAT: "experimental-posting-list-format" > | < SUMMARYFEATURES_SL: "summary-features" (" ")* ":" (~["}","\n"])* ("\n")? > | < SUMMARYFEATURES_ML: "summary-features" (<SEARCHLIB_SKIP>)? "{" (~["}"])* "}" > | < RANKFEATURES_SL: "rank-features" (" ")* ":" (~["}","\n"])* ("\n")? > @@ -1781,6 +1782,7 @@ Object indexBody(IndexOperation index) : | <LOWERBOUND> <COLON> num = consumeLong() { index.setLowerBound(num); } | <UPPERBOUND> <COLON> num = consumeLong() { index.setUpperBound(num); } | <DENSEPOSTINGLISTTHRESHOLD> <COLON> threshold = consumeFloat() { index.setDensePostingListThreshold(threshold); } + | <EXPERIMENTALPOSTINGLISTFORMAT> { index.setExperimentalPostingListFormat(true); } ) { return null; } } diff --git a/config-model/src/test/derived/indexschema/index-info.cfg b/config-model/src/test/derived/indexschema/index-info.cfg index 2ba3a5a99ee..3a420e12a24 100644 --- a/config-model/src/test/derived/indexschema/index-info.cfg +++ b/config-model/src/test/derived/indexschema/index-info.cfg @@ -133,6 +133,16 @@ indexinfo[].command[].indexname "exact2" indexinfo[].command[].command "lowercase" indexinfo[].command[].indexname "exact2" indexinfo[].command[].command "exact @@" +indexinfo[].command[].indexname "experimental" +indexinfo[].command[].command "index" +indexinfo[].command[].indexname "experimental" +indexinfo[].command[].command "lowercase" +indexinfo[].command[].indexname "experimental" +indexinfo[].command[].command "stem:BEST" +indexinfo[].command[].indexname "experimental" +indexinfo[].command[].command "normalize" +indexinfo[].command[].indexname "experimental" +indexinfo[].command[].command "plain-tokens" indexinfo[].command[].indexname "ia" indexinfo[].command[].command "index" indexinfo[].command[].indexname "ia" diff --git a/config-model/src/test/derived/indexschema/indexschema.cfg b/config-model/src/test/derived/indexschema/indexschema.cfg index d1f43c4a81e..612af087b0c 100644 --- a/config-model/src/test/derived/indexschema/indexschema.cfg +++ b/config-model/src/test/derived/indexschema/indexschema.cfg @@ -5,6 +5,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "sb" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -12,6 +13,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "sc" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -19,6 +21,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "sd" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -26,6 +29,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "sf" indexfield[].datatype STRING indexfield[].collectiontype ARRAY @@ -33,6 +37,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "sg" indexfield[].datatype STRING indexfield[].collectiontype WEIGHTEDSET @@ -40,6 +45,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "sh" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -47,6 +53,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "si" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -54,6 +61,7 @@ indexfield[].prefix true indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "exact1" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -61,6 +69,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "exact2" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -68,6 +77,15 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false +indexfield[].name "experimental" +indexfield[].datatype STRING +indexfield[].collectiontype SINGLE +indexfield[].prefix false +indexfield[].phrases false +indexfield[].positions true +indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat true indexfield[].name "nostemstring1" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -75,6 +93,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "nostemstring2" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -82,6 +101,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "nostemstring3" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -89,6 +109,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "nostemstring4" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -96,6 +117,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "fs9" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -103,6 +125,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "sd_literal" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -110,6 +133,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "sh.fragment" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -117,6 +141,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "sh.host" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -124,6 +149,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "sh.hostname" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -131,6 +157,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "sh.path" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -138,6 +165,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "sh.port" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -145,6 +173,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "sh.query" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -152,6 +181,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "sh.scheme" indexfield[].datatype STRING indexfield[].collectiontype SINGLE @@ -159,6 +189,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false fieldset[].name "fs9" fieldset[].field[].name "se" fieldset[].name "fs1" diff --git a/config-model/src/test/derived/indexschema/indexschema.sd b/config-model/src/test/derived/indexschema/indexschema.sd index c28711813c3..44956f30e9e 100644 --- a/config-model/src/test/derived/indexschema/indexschema.sd +++ b/config-model/src/test/derived/indexschema/indexschema.sd @@ -56,6 +56,10 @@ search indexschema { exact } } + field experimental type string { + indexing: index + index: experimental-posting-list-format + } # integer fields field ia type int { diff --git a/config-model/src/test/derived/indexschema/vsmfields.cfg b/config-model/src/test/derived/indexschema/vsmfields.cfg index b6c50869e34..30ed67f61b7 100644 --- a/config-model/src/test/derived/indexschema/vsmfields.cfg +++ b/config-model/src/test/derived/indexschema/vsmfields.cfg @@ -55,6 +55,11 @@ fieldspec[].searchmethod AUTOUTF8 fieldspec[].arg1 "exact" fieldspec[].maxlength 1048576 fieldspec[].fieldtype INDEX +fieldspec[].name "experimental" +fieldspec[].searchmethod AUTOUTF8 +fieldspec[].arg1 "" +fieldspec[].maxlength 1048576 +fieldspec[].fieldtype INDEX fieldspec[].name "ia" fieldspec[].searchmethod INT32 fieldspec[].arg1 "" @@ -133,6 +138,8 @@ documenttype[].index[].name "exact1" documenttype[].index[].field[].name "exact1" documenttype[].index[].name "exact2" documenttype[].index[].field[].name "exact2" +documenttype[].index[].name "experimental" +documenttype[].index[].field[].name "experimental" documenttype[].index[].name "ia" documenttype[].index[].field[].name "ia" documenttype[].index[].name "ib" diff --git a/config-model/src/test/derived/uri_array/indexschema.cfg b/config-model/src/test/derived/uri_array/indexschema.cfg index 8593c594e3c..1a556daf558 100644 --- a/config-model/src/test/derived/uri_array/indexschema.cfg +++ b/config-model/src/test/derived/uri_array/indexschema.cfg @@ -5,6 +5,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "my_uri.fragment" indexfield[].datatype STRING indexfield[].collectiontype ARRAY @@ -12,6 +13,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "my_uri.host" indexfield[].datatype STRING indexfield[].collectiontype ARRAY @@ -19,6 +21,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "my_uri.hostname" indexfield[].datatype STRING indexfield[].collectiontype ARRAY @@ -26,6 +29,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "my_uri.path" indexfield[].datatype STRING indexfield[].collectiontype ARRAY @@ -33,6 +37,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "my_uri.port" indexfield[].datatype STRING indexfield[].collectiontype ARRAY @@ -40,6 +45,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "my_uri.query" indexfield[].datatype STRING indexfield[].collectiontype ARRAY @@ -47,6 +53,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "my_uri.scheme" indexfield[].datatype STRING indexfield[].collectiontype ARRAY @@ -54,3 +61,4 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false diff --git a/config-model/src/test/derived/uri_wset/indexschema.cfg b/config-model/src/test/derived/uri_wset/indexschema.cfg index a432556bb6b..7fe7f7a4941 100644 --- a/config-model/src/test/derived/uri_wset/indexschema.cfg +++ b/config-model/src/test/derived/uri_wset/indexschema.cfg @@ -5,6 +5,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "my_uri.fragment" indexfield[].datatype STRING indexfield[].collectiontype WEIGHTEDSET @@ -12,6 +13,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "my_uri.host" indexfield[].datatype STRING indexfield[].collectiontype WEIGHTEDSET @@ -19,6 +21,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "my_uri.hostname" indexfield[].datatype STRING indexfield[].collectiontype WEIGHTEDSET @@ -26,6 +29,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "my_uri.path" indexfield[].datatype STRING indexfield[].collectiontype WEIGHTEDSET @@ -33,6 +37,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "my_uri.port" indexfield[].datatype STRING indexfield[].collectiontype WEIGHTEDSET @@ -40,6 +45,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "my_uri.query" indexfield[].datatype STRING indexfield[].collectiontype WEIGHTEDSET @@ -47,6 +53,7 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false indexfield[].name "my_uri.scheme" indexfield[].datatype STRING indexfield[].collectiontype WEIGHTEDSET @@ -54,3 +61,4 @@ indexfield[].prefix false indexfield[].phrases false indexfield[].positions true indexfield[].averageelementlen 512 +indexfield[].experimentalpostinglistformat false diff --git a/configdefinitions/src/vespa/indexschema.def b/configdefinitions/src/vespa/indexschema.def index 7153164bfab..c0c9f175837 100644 --- a/configdefinitions/src/vespa/indexschema.def +++ b/configdefinitions/src/vespa/indexschema.def @@ -16,6 +16,8 @@ indexfield[].phrases bool default=false indexfield[].positions bool default=true ## Average element length indexfield[].averageelementlen int default=512 +## Whether we should use a new experimental posting list format for this field. +indexfield[].experimentalpostinglistformat bool default=false ## The name of the field collection (aka logical view). fieldset[].name string |