diff options
76 files changed, 453 insertions, 169 deletions
diff --git a/client/go/go.mod b/client/go/go.mod index ba0af5a763e..7327dafcf16 100644 --- a/client/go/go.mod +++ b/client/go/go.mod @@ -17,7 +17,7 @@ require ( github.com/stretchr/testify v1.9.0 github.com/zalando/go-keyring v0.2.4 golang.org/x/net v0.24.0 - golang.org/x/sys v0.19.0 + golang.org/x/sys v0.20.0 gopkg.in/yaml.v3 v3.0.1 ) diff --git a/client/go/go.sum b/client/go/go.sum index d985c9e7ffc..ac0ab46d1a9 100644 --- a/client/go/go.sum +++ b/client/go/go.sum @@ -91,6 +91,8 @@ golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o= golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.14.0 h1:LGK9IlZ8T9jvdy6cTdfKUCltatMFOehAQo9SRC46UQ8= golang.org/x/term v0.14.0/go.mod h1:TySc+nGkYR6qt8km8wUhuFRTVSMIX3XPR58y2lC8vww= golang.org/x/term v0.15.0 h1:y/Oo/a/q3IXu26lQgl04j/gjuBDOBlx7X6Om1j2CPW4= diff --git a/config-model/src/main/java/com/yahoo/schema/RankProfile.java b/config-model/src/main/java/com/yahoo/schema/RankProfile.java index cdefbbf8174..60674b5487c 100644 --- a/config-model/src/main/java/com/yahoo/schema/RankProfile.java +++ b/config-model/src/main/java/com/yahoo/schema/RankProfile.java @@ -222,9 +222,10 @@ public class RankProfile implements Cloneable { this.useSignificanceModel = useSignificanceModel; } - public Boolean useSignificanceModel() { + public boolean useSignificanceModel() { if (useSignificanceModel != null) return useSignificanceModel; - return uniquelyInherited(p -> p.useSignificanceModel(), "use-model").orElse(null); + return uniquelyInherited(p -> p.useSignificanceModel(), "use-model") + .orElse(false); // Disabled by default } /** diff --git a/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java b/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java index 42586fa7d75..b057624f055 100644 --- a/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java +++ b/config-model/src/main/java/com/yahoo/schema/derived/RawRankProfile.java @@ -186,7 +186,6 @@ public class RawRankProfile { private RankingExpression globalPhaseRanking; private final int globalPhaseRerankCount; private final SerializationContext functionSerializationContext; - private Boolean useSignificanceModel; /** * Creates a raw rank profile from the given rank profile @@ -222,7 +221,6 @@ public class RawRankProfile { rankScoreDropLimit = compiled.getRankScoreDropLimit(); ignoreDefaultRankFeatures = compiled.getIgnoreDefaultRankFeatures(); rankProperties = new ArrayList<>(compiled.getRankProperties()); - useSignificanceModel = compiled.useSignificanceModel(); Map<String, RankProfile.RankingExpressionFunction> functions = compiled.getFunctions(); List<ExpressionFunction> functionExpressions = functions.values().stream().map(RankProfile.RankingExpressionFunction::function).toList(); @@ -481,9 +479,6 @@ public class RawRankProfile { if (targetHitsMaxAdjustmentFactor.isPresent()) { properties.add(new Pair<>("vespa.matching.nns.target_hits_max_adjustment_factor", String.valueOf(targetHitsMaxAdjustmentFactor.getAsDouble()))); } - if (useSignificanceModel != null) { - properties.add(new Pair<>("vespa.significance.use_model", String.valueOf(useSignificanceModel))); - } if (matchPhaseSettings != null) { properties.add(new Pair<>("vespa.matchphase.degradation.attribute", matchPhaseSettings.getAttribute())); properties.add(new Pair<>("vespa.matchphase.degradation.ascendingorder", matchPhaseSettings.getAscending() + "")); diff --git a/config-model/src/main/java/com/yahoo/schema/derived/SchemaInfo.java b/config-model/src/main/java/com/yahoo/schema/derived/SchemaInfo.java index f996b2624db..b91404be2dd 100644 --- a/config-model/src/main/java/com/yahoo/schema/derived/SchemaInfo.java +++ b/config-model/src/main/java/com/yahoo/schema/derived/SchemaInfo.java @@ -183,10 +183,12 @@ public final class SchemaInfo extends Derived { private void addRankProfilesConfig(SchemaInfoConfig.Schema.Builder schemaBuilder) { for (RankProfileInfo rankProfile : rankProfiles().values()) { - var rankProfileConfig = new SchemaInfoConfig.Schema.Rankprofile.Builder(); - rankProfileConfig.name(rankProfile.name()); - rankProfileConfig.hasSummaryFeatures(rankProfile.hasSummaryFeatures()); - rankProfileConfig.hasRankFeatures(rankProfile.hasRankFeatures()); + var rankProfileConfig = new SchemaInfoConfig.Schema.Rankprofile.Builder() + .name(rankProfile.name()) + .hasSummaryFeatures(rankProfile.hasSummaryFeatures()) + .hasRankFeatures(rankProfile.hasRankFeatures()) + .significance(new SchemaInfoConfig.Schema.Rankprofile.Significance.Builder() + .useModel(rankProfile.useSignificanceModel())); for (var input : rankProfile.inputs().entrySet()) { var inputConfig = new SchemaInfoConfig.Schema.Rankprofile.Input.Builder(); inputConfig.name(input.getKey().toString()); @@ -226,6 +228,7 @@ public final class SchemaInfo extends Derived { private final String name; private final boolean hasSummaryFeatures; private final boolean hasRankFeatures; + private final boolean useSignificanceModel; private final Map<Reference, RankProfile.Input> inputs; public RankProfileInfo(RankProfile profile) { @@ -233,11 +236,13 @@ public final class SchemaInfo extends Derived { this.hasSummaryFeatures = ! profile.getSummaryFeatures().isEmpty(); this.hasRankFeatures = ! profile.getRankFeatures().isEmpty(); this.inputs = profile.inputs(); + useSignificanceModel = profile.useSignificanceModel(); } public String name() { return name; } public boolean hasSummaryFeatures() { return hasSummaryFeatures; } public boolean hasRankFeatures() { return hasRankFeatures; } + public boolean useSignificanceModel() { return useSignificanceModel; } public Map<Reference, RankProfile.Input> inputs() { return inputs; } } diff --git a/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java b/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java index e29f683761f..1ff019038fc 100644 --- a/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java +++ b/config-model/src/main/java/com/yahoo/schema/processing/TextMatch.java @@ -64,7 +64,7 @@ public class TextMatch extends Processor { if (fieldMatching != null) { var maxLength = fieldMatching.maxLength(); if (maxLength != null) { - ret.setMaxTokenLength(maxLength); + ret.setMaxTokenizeLength(maxLength); } var maxTermOccurrences = fieldMatching.maxTermOccurrences(); if (maxTermOccurrences != null) { diff --git a/config-model/src/test/derived/advanced/ilscripts.cfg b/config-model/src/test/derived/advanced/ilscripts.cfg index 51a49502b64..d633cd97f0c 100644 --- a/config-model/src/test/derived/advanced/ilscripts.cfg +++ b/config-model/src/test/derived/advanced/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "advanced" ilscript[].docfield[] "debug_src" diff --git a/config-model/src/test/derived/annotationsimplicitstruct/ilscripts.cfg b/config-model/src/test/derived/annotationsimplicitstruct/ilscripts.cfg index 767c3af3c19..53dc789fbb7 100644 --- a/config-model/src/test/derived/annotationsimplicitstruct/ilscripts.cfg +++ b/config-model/src/test/derived/annotationsimplicitstruct/ilscripts.cfg @@ -1,3 +1,4 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "annotationsimplicitstruct" diff --git a/config-model/src/test/derived/annotationsinheritance/ilscripts.cfg b/config-model/src/test/derived/annotationsinheritance/ilscripts.cfg index d8e6c882b80..b0a69c5408a 100644 --- a/config-model/src/test/derived/annotationsinheritance/ilscripts.cfg +++ b/config-model/src/test/derived/annotationsinheritance/ilscripts.cfg @@ -1,3 +1,4 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "annotationsinheritance" diff --git a/config-model/src/test/derived/annotationsinheritance2/ilscripts.cfg b/config-model/src/test/derived/annotationsinheritance2/ilscripts.cfg index ae4ea621583..5ec1f839429 100644 --- a/config-model/src/test/derived/annotationsinheritance2/ilscripts.cfg +++ b/config-model/src/test/derived/annotationsinheritance2/ilscripts.cfg @@ -1,3 +1,4 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "annotationsinheritance2" diff --git a/config-model/src/test/derived/annotationsreference/ilscripts.cfg b/config-model/src/test/derived/annotationsreference/ilscripts.cfg index 812f5e44545..eaa20043be8 100644 --- a/config-model/src/test/derived/annotationsreference/ilscripts.cfg +++ b/config-model/src/test/derived/annotationsreference/ilscripts.cfg @@ -1,3 +1,4 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "annotationsreference" diff --git a/config-model/src/test/derived/annotationssimple/ilscripts.cfg b/config-model/src/test/derived/annotationssimple/ilscripts.cfg index 9d0962df5be..af179221eb4 100644 --- a/config-model/src/test/derived/annotationssimple/ilscripts.cfg +++ b/config-model/src/test/derived/annotationssimple/ilscripts.cfg @@ -1,3 +1,4 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "annotationssimple" diff --git a/config-model/src/test/derived/arrays/ilscripts.cfg b/config-model/src/test/derived/arrays/ilscripts.cfg index 98cff642d9e..3f2dae48552 100644 --- a/config-model/src/test/derived/arrays/ilscripts.cfg +++ b/config-model/src/test/derived/arrays/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "arrays" ilscript[].docfield[] "tags" diff --git a/config-model/src/test/derived/attributeprefetch/ilscripts.cfg b/config-model/src/test/derived/attributeprefetch/ilscripts.cfg index dec054b33f0..5a3784f7cb9 100644 --- a/config-model/src/test/derived/attributeprefetch/ilscripts.cfg +++ b/config-model/src/test/derived/attributeprefetch/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "prefetch" ilscript[].docfield[] "singlebyte" diff --git a/config-model/src/test/derived/attributes/ilscripts.cfg b/config-model/src/test/derived/attributes/ilscripts.cfg index 6d3ef2799d9..58279759e5f 100644 --- a/config-model/src/test/derived/attributes/ilscripts.cfg +++ b/config-model/src/test/derived/attributes/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "attributes" ilscript[].docfield[] "a1" diff --git a/config-model/src/test/derived/bolding_dynamic_summary/ilscripts.cfg b/config-model/src/test/derived/bolding_dynamic_summary/ilscripts.cfg index c20c321ebcf..0b925da4778 100644 --- a/config-model/src/test/derived/bolding_dynamic_summary/ilscripts.cfg +++ b/config-model/src/test/derived/bolding_dynamic_summary/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "test" ilscript[].docfield[] "str_1" diff --git a/config-model/src/test/derived/complex/ilscripts.cfg b/config-model/src/test/derived/complex/ilscripts.cfg index 4405d2fda40..7d025e15703 100644 --- a/config-model/src/test/derived/complex/ilscripts.cfg +++ b/config-model/src/test/derived/complex/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "complex" ilscript[].docfield[] "title" diff --git a/config-model/src/test/derived/emptydefault/ilscripts.cfg b/config-model/src/test/derived/emptydefault/ilscripts.cfg index e4242153bce..bbb8e5c556c 100644 --- a/config-model/src/test/derived/emptydefault/ilscripts.cfg +++ b/config-model/src/test/derived/emptydefault/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "emptydefault" ilscript[].docfield[] "one" diff --git a/config-model/src/test/derived/exactmatch/ilscripts.cfg b/config-model/src/test/derived/exactmatch/ilscripts.cfg index 21dfbd1371b..1d1bd6d5e8a 100644 --- a/config-model/src/test/derived/exactmatch/ilscripts.cfg +++ b/config-model/src/test/derived/exactmatch/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "exactmatch" ilscript[].docfield[] "tag" diff --git a/config-model/src/test/derived/hnsw_index/ilscripts.cfg b/config-model/src/test/derived/hnsw_index/ilscripts.cfg index e48f116f468..c811b93c3df 100644 --- a/config-model/src/test/derived/hnsw_index/ilscripts.cfg +++ b/config-model/src/test/derived/hnsw_index/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "test" ilscript[].docfield[] "t1" diff --git a/config-model/src/test/derived/id/ilscripts.cfg b/config-model/src/test/derived/id/ilscripts.cfg index d3ab29f6cd8..121e305059e 100644 --- a/config-model/src/test/derived/id/ilscripts.cfg +++ b/config-model/src/test/derived/id/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "id" ilscript[].docfield[] "uri" diff --git a/config-model/src/test/derived/imported_position_field_summary/schema-info.cfg b/config-model/src/test/derived/imported_position_field_summary/schema-info.cfg index f820ad9720b..5a474f62e07 100644 --- a/config-model/src/test/derived/imported_position_field_summary/schema-info.cfg +++ b/config-model/src/test/derived/imported_position_field_summary/schema-info.cfg @@ -53,6 +53,8 @@ schema[].summaryclass[].fields[].dynamic false schema[].rankprofile[].name "default" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "unranked" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false diff --git a/config-model/src/test/derived/indexswitches/ilscripts.cfg b/config-model/src/test/derived/indexswitches/ilscripts.cfg index 472c1f95cb0..454f675c0a2 100644 --- a/config-model/src/test/derived/indexswitches/ilscripts.cfg +++ b/config-model/src/test/derived/indexswitches/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "indexswitches" ilscript[].docfield[] "title" diff --git a/config-model/src/test/derived/inheritance/ilscripts.cfg b/config-model/src/test/derived/inheritance/ilscripts.cfg index d4c804773f0..c966f32a502 100644 --- a/config-model/src/test/derived/inheritance/ilscripts.cfg +++ b/config-model/src/test/derived/inheritance/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "child" ilscript[].docfield[] "onlygrandparent" diff --git a/config-model/src/test/derived/language/ilscripts.cfg b/config-model/src/test/derived/language/ilscripts.cfg index 1860f180839..d0abc08f1e0 100644 --- a/config-model/src/test/derived/language/ilscripts.cfg +++ b/config-model/src/test/derived/language/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "language" ilscript[].docfield[] "language" diff --git a/config-model/src/test/derived/lowercase/ilscripts.cfg b/config-model/src/test/derived/lowercase/ilscripts.cfg index 8ba4bfa3349..49515e50df4 100644 --- a/config-model/src/test/derived/lowercase/ilscripts.cfg +++ b/config-model/src/test/derived/lowercase/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "lowercase" ilscript[].docfield[] "single_field_source" diff --git a/config-model/src/test/derived/multiplesummaries/ilscripts.cfg b/config-model/src/test/derived/multiplesummaries/ilscripts.cfg index 0cdf921de25..4a6de4154f8 100644 --- a/config-model/src/test/derived/multiplesummaries/ilscripts.cfg +++ b/config-model/src/test/derived/multiplesummaries/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "multiplesummaries" ilscript[].docfield[] "a" diff --git a/config-model/src/test/derived/music/ilscripts.cfg b/config-model/src/test/derived/music/ilscripts.cfg index f90cdb15baa..f79e8824b69 100644 --- a/config-model/src/test/derived/music/ilscripts.cfg +++ b/config-model/src/test/derived/music/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "music" ilscript[].docfield[] "bgndata" diff --git a/config-model/src/test/derived/neuralnet_noqueryprofile/schema-info.cfg b/config-model/src/test/derived/neuralnet_noqueryprofile/schema-info.cfg index 728856abbf2..8f59c21e97f 100644 --- a/config-model/src/test/derived/neuralnet_noqueryprofile/schema-info.cfg +++ b/config-model/src/test/derived/neuralnet_noqueryprofile/schema-info.cfg @@ -156,6 +156,7 @@ schema[].summaryclass[].fields[].dynamic false schema[].rankprofile[].name "default" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].input[].name "query(W_0)" schema[].rankprofile[].input[].type "tensor(hidden[9],x[9])" schema[].rankprofile[].input[].name "query(b_0)" @@ -173,9 +174,11 @@ schema[].rankprofile[].input[].type "tensor()" schema[].rankprofile[].name "unranked" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "defaultRankProfile" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].input[].name "query(W_0)" schema[].rankprofile[].input[].type "tensor(hidden[9],x[9])" schema[].rankprofile[].input[].name "query(b_0)" @@ -193,6 +196,7 @@ schema[].rankprofile[].input[].type "tensor()" schema[].rankprofile[].name "neuralNetworkProfile" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].input[].name "query(W_0)" schema[].rankprofile[].input[].type "tensor(hidden[9],x[9])" schema[].rankprofile[].input[].name "query(b_0)" diff --git a/config-model/src/test/derived/newrank/ilscripts.cfg b/config-model/src/test/derived/newrank/ilscripts.cfg index b02e09a0496..487d2fca902 100644 --- a/config-model/src/test/derived/newrank/ilscripts.cfg +++ b/config-model/src/test/derived/newrank/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "newrank" ilscript[].docfield[] "bgndata" diff --git a/config-model/src/test/derived/orderilscripts/ilscripts.cfg b/config-model/src/test/derived/orderilscripts/ilscripts.cfg index 0ed1589af0a..4918e23efc6 100644 --- a/config-model/src/test/derived/orderilscripts/ilscripts.cfg +++ b/config-model/src/test/derived/orderilscripts/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "orderilscripts" ilscript[].docfield[] "foo" diff --git a/config-model/src/test/derived/position_array/ilscripts.cfg b/config-model/src/test/derived/position_array/ilscripts.cfg index ecafbc4a025..3f7611b25d8 100644 --- a/config-model/src/test/derived/position_array/ilscripts.cfg +++ b/config-model/src/test/derived/position_array/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "position_array" ilscript[].docfield[] "pos" diff --git a/config-model/src/test/derived/position_attribute/ilscripts.cfg b/config-model/src/test/derived/position_attribute/ilscripts.cfg index d2fc8503ce5..fbd1a293418 100644 --- a/config-model/src/test/derived/position_attribute/ilscripts.cfg +++ b/config-model/src/test/derived/position_attribute/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "position_attribute" ilscript[].docfield[] "pos" diff --git a/config-model/src/test/derived/position_extra/ilscripts.cfg b/config-model/src/test/derived/position_extra/ilscripts.cfg index a86dcec92ec..4645798723c 100644 --- a/config-model/src/test/derived/position_extra/ilscripts.cfg +++ b/config-model/src/test/derived/position_extra/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "position_extra" ilscript[].docfield[] "pos_str" diff --git a/config-model/src/test/derived/prefixexactattribute/ilscripts.cfg b/config-model/src/test/derived/prefixexactattribute/ilscripts.cfg index 40c7843a0a4..2d1904cf9d8 100644 --- a/config-model/src/test/derived/prefixexactattribute/ilscripts.cfg +++ b/config-model/src/test/derived/prefixexactattribute/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "prefixexactattribute" ilscript[].docfield[] "indexfield0" diff --git a/config-model/src/test/derived/rankingexpression/schema-info.cfg b/config-model/src/test/derived/rankingexpression/schema-info.cfg index 5bf01f10ede..f78eb7de310 100644 --- a/config-model/src/test/derived/rankingexpression/schema-info.cfg +++ b/config-model/src/test/derived/rankingexpression/schema-info.cfg @@ -148,96 +148,125 @@ schema[].summaryclass[].fields[].dynamic false schema[].rankprofile[].name "default" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures true +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "unranked" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "static" schema[].rankprofile[].hasSummaryFeatures true schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "overflow" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "duplicates" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "whitespace1" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "whitespace2" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "macros" schema[].rankprofile[].hasSummaryFeatures true schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "macros2" schema[].rankprofile[].hasSummaryFeatures true schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "macros3" schema[].rankprofile[].hasSummaryFeatures true schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "macros3-inherited" schema[].rankprofile[].hasSummaryFeatures true schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "macros-inherited" schema[].rankprofile[].hasSummaryFeatures true schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "macros-inherited2" schema[].rankprofile[].hasSummaryFeatures true schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "macros-inherited3" schema[].rankprofile[].hasSummaryFeatures true schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "macros-refering-macros" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "macros-refering-macros-inherited" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "macros-refering-macros-inherited2" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "macros-refering-macros-inherited-two-levels" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "withmf" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "withboolean" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "withglobalphase" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "layered" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].input[].name "query(v)" schema[].rankprofile[].input[].type "tensor(v[3])" schema[].rankprofile[].name "withtfl" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].input[].name "query(v)" schema[].rankprofile[].input[].type "tensor(v[3])" schema[].rankprofile[].name "withtfl2" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].input[].name "query(v)" schema[].rankprofile[].input[].type "tensor(v[3])" schema[].rankprofile[].name "withnorm" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "withfusion" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "function-with-arg-as-summary-feature" schema[].rankprofile[].hasSummaryFeatures true schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "function-with-arg-in-global-phase" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "withstringcompare" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].input[].name "query(myquerystring)" schema[].rankprofile[].input[].type "string" schema[].rankprofile[].input[].name "query(mybadlong)" diff --git a/config-model/src/test/derived/rankprofilemodularity/schema-info.cfg b/config-model/src/test/derived/rankprofilemodularity/schema-info.cfg index 377c10d3293..68892737e63 100644 --- a/config-model/src/test/derived/rankprofilemodularity/schema-info.cfg +++ b/config-model/src/test/derived/rankprofilemodularity/schema-info.cfg @@ -18,24 +18,32 @@ schema[].summaryclass[].fields[].dynamic false schema[].rankprofile[].name "default" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "unranked" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "in_schema0" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "in_schema1" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "in_schema2" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "in_schema3" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "outside_schema1" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "outside_schema2" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false diff --git a/config-model/src/test/derived/ranktypes/ilscripts.cfg b/config-model/src/test/derived/ranktypes/ilscripts.cfg index adcd2f70c70..22526d1aa23 100644 --- a/config-model/src/test/derived/ranktypes/ilscripts.cfg +++ b/config-model/src/test/derived/ranktypes/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "ranktypes" ilscript[].docfield[] "title" diff --git a/config-model/src/test/derived/schemainheritance/ilscripts.cfg b/config-model/src/test/derived/schemainheritance/ilscripts.cfg index f7324920fe7..b1ba947f1dc 100644 --- a/config-model/src/test/derived/schemainheritance/ilscripts.cfg +++ b/config-model/src/test/derived/schemainheritance/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "child" ilscript[].docfield[] "pf1" diff --git a/config-model/src/test/derived/schemainheritance/schema-info.cfg b/config-model/src/test/derived/schemainheritance/schema-info.cfg index 9fe71780c7a..466e66ad0bb 100644 --- a/config-model/src/test/derived/schemainheritance/schema-info.cfg +++ b/config-model/src/test/derived/schemainheritance/schema-info.cfg @@ -116,12 +116,16 @@ schema[].summaryclass[].fields[].dynamic false schema[].rankprofile[].name "default" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "unranked" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "child_profile" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false schema[].rankprofile[].name "parent_profile" schema[].rankprofile[].hasSummaryFeatures false schema[].rankprofile[].hasRankFeatures false +schema[].rankprofile[].significance.useModel false diff --git a/config-model/src/test/derived/structanyorder/ilscripts.cfg b/config-model/src/test/derived/structanyorder/ilscripts.cfg index c07f04b3021..a806bc1b712 100644 --- a/config-model/src/test/derived/structanyorder/ilscripts.cfg +++ b/config-model/src/test/derived/structanyorder/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "annotationsimplicitstruct" ilscript[].docfield[] "structfield" diff --git a/config-model/src/test/derived/tokenization/ilscripts.cfg b/config-model/src/test/derived/tokenization/ilscripts.cfg index c08b6a54c83..cad8ec81879 100644 --- a/config-model/src/test/derived/tokenization/ilscripts.cfg +++ b/config-model/src/test/derived/tokenization/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "tokenization" ilscript[].docfield[] "text" diff --git a/config-model/src/test/derived/types/ilscripts.cfg b/config-model/src/test/derived/types/ilscripts.cfg index 17bed90deb4..73befb221ce 100644 --- a/config-model/src/test/derived/types/ilscripts.cfg +++ b/config-model/src/test/derived/types/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "types" ilscript[].docfield[] "abyte" diff --git a/config-model/src/test/derived/uri_array/ilscripts.cfg b/config-model/src/test/derived/uri_array/ilscripts.cfg index 3dd97e5c11f..0dc87b513ce 100644 --- a/config-model/src/test/derived/uri_array/ilscripts.cfg +++ b/config-model/src/test/derived/uri_array/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "uri_array" ilscript[].docfield[] "my_uri" diff --git a/config-model/src/test/derived/uri_wset/ilscripts.cfg b/config-model/src/test/derived/uri_wset/ilscripts.cfg index 48e07ef9959..cc45ee5ad8f 100644 --- a/config-model/src/test/derived/uri_wset/ilscripts.cfg +++ b/config-model/src/test/derived/uri_wset/ilscripts.cfg @@ -1,4 +1,5 @@ maxtermoccurrences 10000 +maxtokenlength 1000 fieldmatchmaxlength 1000000 ilscript[].doctype "uri_wset" ilscript[].docfield[] "my_uri" diff --git a/configdefinitions/src/vespa/ilscripts.def b/configdefinitions/src/vespa/ilscripts.def index acb06abb755..7a286773564 100644 --- a/configdefinitions/src/vespa/ilscripts.def +++ b/configdefinitions/src/vespa/ilscripts.def @@ -3,6 +3,8 @@ namespace=vespa.configdefinition ## The maximum number of occurrences of a given term to index per field maxtermoccurrences int default=10000 +## The maximum number of characters for a token +maxtokenlength int default=1000 fieldmatchmaxlength int default=1000000 ilscript[].doctype string diff --git a/container-core/src/main/java/com/yahoo/processing/request/CompoundName.java b/container-core/src/main/java/com/yahoo/processing/request/CompoundName.java index b4536a1c56b..440df4f9be9 100644 --- a/container-core/src/main/java/com/yahoo/processing/request/CompoundName.java +++ b/container-core/src/main/java/com/yahoo/processing/request/CompoundName.java @@ -8,6 +8,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.Objects; import static com.yahoo.text.Lowercase.toLowerCase; @@ -74,41 +75,52 @@ public final class CompoundName { * @param compounds the compounds of this name */ private CompoundName(String name, String [] compounds, boolean useCache) { - if (name == null) throw new NullPointerException("Name can not be null"); - - this.name = name; + this.name = Objects.requireNonNull(name, "Name can not be null"); this.lowerCasedName = toLowerCase(name); - if (compounds.length == 1 && compounds[0].isEmpty()) { - this.compounds = List.of(); - this.hashCode = 0; - rest = this; - first = this; + if (compounds.length == 1) { + if (compounds[0].isEmpty()) { + this.compounds = List.of(); + this.hashCode = 0; + rest = first = this; + return; + } + this.compounds = new ImmutableArrayList(compounds); + this.hashCode = this.compounds.hashCode(); + rest = first = empty; return; } - this.compounds = new ImmutableArrayList(compounds); - this.hashCode = this.compounds.hashCode(); - - if (compounds.length > 1) { - String restName = name.substring(compounds[0].length()+1); - if (useCache) { - rest = cache.computeIfAbsent(restName, (key) -> new CompoundName(key, Arrays.copyOfRange(compounds, 1, compounds.length), useCache)); - } else { - rest = new CompoundName(restName, Arrays.copyOfRange(compounds, 1, compounds.length), useCache); + CompoundName[] children = new CompoundName[compounds.length]; + for (int i = 0; i + 1 < children.length; i++) { + int start = 0, end = i == 0 ? -1 : children[0].name.length(); + for (int j = 0; j + i < children.length; j++) { + end += compounds[j + i].length() + 1; + if (end == start) throw new IllegalArgumentException("'" + name + "' is not a legal compound name. " + + "Consecutive, leading or trailing dots are not allowed."); + String subName = this.name.substring(start, end); + CompoundName cached = cache.get(subName); + children[j] = cached != null ? cached + : new CompoundName(subName, + this.lowerCasedName.substring(start, end), + Arrays.copyOfRange(compounds, j, j + i + 1), + i == 0 ? empty : children[j + 1], + i == 0 ? empty : children[j]); + if (useCache && cached == null) cache.put(subName, children[j]); + start += compounds[j].length() + 1; } - } else { - rest = empty; } + this.compounds = new ImmutableArrayList(compounds); + this.hashCode = this.compounds.hashCode(); + this.rest = children[1]; + this.first = children[0]; + } - if (compounds.length > 1) { - String firstName = name.substring(0, name.length() - (compounds[compounds.length-1].length()+1)); - if (useCache) { - first = cache.computeIfAbsent(firstName, (key) -> new CompoundName(key, Arrays.copyOfRange(compounds, 0, compounds.length-1), useCache)); - } else { - first = new CompoundName(firstName, Arrays.copyOfRange(compounds, 0, compounds.length-1), useCache); - } - } else { - first = empty; - } + private CompoundName(String name, String lowerCasedName, String[] compounds, CompoundName rest, CompoundName first) { + this.name = name; + this.lowerCasedName = lowerCasedName; + this.compounds = new ImmutableArrayList(compounds); + this.hashCode = this.compounds.hashCode(); + this.rest = rest; + this.first = first; } private static List<String> parse(String s) { diff --git a/container-core/src/test/java/com/yahoo/processing/request/CompoundNameTestCase.java b/container-core/src/test/java/com/yahoo/processing/request/CompoundNameTestCase.java index b5143f89c78..7523a68501f 100644 --- a/container-core/src/test/java/com/yahoo/processing/request/CompoundNameTestCase.java +++ b/container-core/src/test/java/com/yahoo/processing/request/CompoundNameTestCase.java @@ -13,7 +13,7 @@ import static org.junit.jupiter.api.Assertions.*; /** * Module local test of the basic property name building block. * - * @author <a href="mailto:steinar@yahoo-inc.com">Steinar Knutsen</a> + * @author Steinar Knutsen */ public class CompoundNameTestCase { @@ -30,22 +30,22 @@ public class CompoundNameTestCase { } @Test - final void testLast() { + void testLast() { assertEquals(NAME.substring(NAME.lastIndexOf('.') + 1), C_NAME.last()); } @Test - final void testFirst() { + void testFirst() { assertEquals(NAME.substring(0, NAME.indexOf('.')), C_NAME.first()); } @Test - final void testRest() { + void testRest() { verifyStrict(NAME.substring(NAME.indexOf('.') + 1), C_NAME.rest()); } @Test - final void testRestN() { + void testRestN() { verifyStrict("a.b.c.d.e", C_abcde.rest(0)); verifyStrict("b.c.d.e", C_abcde.rest(1)); verifyStrict("c.d.e", C_abcde.rest(2)); @@ -53,8 +53,9 @@ public class CompoundNameTestCase { verifyStrict("e", C_abcde.rest(4)); verifyStrict(CompoundName.empty, C_abcde.rest(5)); } + @Test - final void testFirstN() { + void testFirstN() { verifyStrict("a.b.c.d.e", C_abcde.first(5)); verifyStrict("a.b.c.d", C_abcde.first(4)); verifyStrict("a.b.c", C_abcde.first(3)); @@ -64,15 +65,32 @@ public class CompoundNameTestCase { } @Test - final void testPrefix() { - CompoundName abc = CompoundName.from("a.b.c"); - assertTrue(abc.hasPrefix(CompoundName.empty)); - assertTrue(abc.hasPrefix(CompoundName.from("a"))); - assertTrue(abc.hasPrefix(CompoundName.from("a.b"))); - assertTrue(abc.hasPrefix(CompoundName.from("a.b.c"))); + void testPrefix() { + CompoundName abcc = CompoundName.from("a.b.cc"); + assertTrue(abcc.hasPrefix(CompoundName.empty)); + assertTrue(abcc.hasPrefix(CompoundName.from("a"))); + assertTrue(abcc.hasPrefix(CompoundName.from("a.b"))); + assertTrue(abcc.hasPrefix(CompoundName.from("a.b.cc"))); - assertFalse(abc.hasPrefix(CompoundName.from("a.b.c.d"))); - assertFalse(abc.hasPrefix(CompoundName.from("a.b.d"))); + assertFalse(abcc.hasPrefix(CompoundName.from("a.b.c"))); + assertFalse(abcc.hasPrefix(CompoundName.from("a.b.c.d"))); + assertFalse(abcc.hasPrefix(CompoundName.from("a.b.d"))); + } + + @Test + void testIllegalCompound() { + assertEquals("'a.' is not a legal compound name. Names can not end with a dot.", + assertThrows(IllegalArgumentException.class, + () -> CompoundName.from("a.")) + .getMessage()); + assertEquals("'.b' is not a legal compound name. Consecutive, leading or trailing dots are not allowed.", + assertThrows(IllegalArgumentException.class, + () -> CompoundName.from(".b")) + .getMessage()); + assertEquals("'a..b' is not a legal compound name. Consecutive, leading or trailing dots are not allowed.", + assertThrows(IllegalArgumentException.class, + () -> CompoundName.from("a..b")) + .getMessage()); } @Test @@ -82,7 +100,7 @@ public class CompoundNameTestCase { } @Test - final void testSize() { + void testSize() { Splitter s = Splitter.on('.'); Iterable<String> i = s.split(NAME); int n = 0; @@ -93,23 +111,23 @@ public class CompoundNameTestCase { } @Test - final void testGet() { + void testGet() { String s = C_NAME.get(0); assertEquals(NAME.substring(0, NAME.indexOf('.')), s); } @Test - final void testIsCompound() { + void testIsCompound() { assertTrue(C_NAME.isCompound()); } @Test - final void testIsEmpty() { + void testIsEmpty() { assertFalse(C_NAME.isEmpty()); } @Test - final void testAsList() { + void testAsList() { List<String> l = C_NAME.asList(); Splitter peoplesFront = Splitter.on('.'); Iterable<String> answer = peoplesFront.split(NAME); @@ -121,7 +139,7 @@ public class CompoundNameTestCase { } @Test - final void testEqualsObject() { + void testEqualsObject() { assertNotEquals(C_NAME, NAME); assertNotEquals(C_NAME, null); verifyStrict(C_NAME, C_NAME); @@ -129,7 +147,7 @@ public class CompoundNameTestCase { } @Test - final void testEmptyNonEmpty() { + void testEmptyNonEmpty() { assertTrue(CompoundName.empty.isEmpty()); assertEquals(0, CompoundName.empty.size()); assertFalse(CompoundName.from("a").isEmpty()); @@ -140,7 +158,7 @@ public class CompoundNameTestCase { } @Test - final void testGetLowerCasedName() { + void testGetLowerCasedName() { assertEquals(Lowercase.toLowerCase(NAME), C_NAME.getLowerCasedName()); } @@ -223,4 +241,5 @@ public class CompoundNameTestCase { assertEquals("[one]", CompoundName.from("one").asList().toString()); assertEquals("[one, two, three]", CompoundName.from("one.two.three").asList().toString()); } + } diff --git a/container-search/abi-spec.json b/container-search/abi-spec.json index d85f1844b18..1c6c773afd9 100644 --- a/container-search/abi-spec.json +++ b/container-search/abi-spec.json @@ -8539,6 +8539,7 @@ "public com.yahoo.search.schema.RankProfile$Builder setHasSummaryFeatures(boolean)", "public com.yahoo.search.schema.RankProfile$Builder setHasRankFeatures(boolean)", "public com.yahoo.search.schema.RankProfile$Builder addInput(java.lang.String, com.yahoo.search.schema.RankProfile$InputType)", + "public com.yahoo.search.schema.RankProfile$Builder setUseSignificanceModel(boolean)", "public com.yahoo.search.schema.RankProfile build()" ], "fields" : [ ] @@ -8573,6 +8574,7 @@ "public com.yahoo.search.schema.Schema schema()", "public boolean hasSummaryFeatures()", "public boolean hasRankFeatures()", + "public boolean useSignificanceModel()", "public java.util.Map inputs()", "public boolean equals(java.lang.Object)", "public int hashCode()", diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java b/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java index d7fad148c8c..bfcf0af325d 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java @@ -79,7 +79,7 @@ public abstract class InvokerFactory { success.add(node); } } - if ( ! cluster.isPartialGroupCoverageSufficient(success) && !acceptIncompleteCoverage) { + if ( ! cluster.isPartialGroupCoverageSufficient(group.hasSufficientCoverage(), success) && !acceptIncompleteCoverage) { return Optional.empty(); } if (invokers.isEmpty()) { diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java index 965ce4aeb94..c7af37b3a26 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java @@ -23,7 +23,7 @@ public class Group { // Using volatile to ensure visibility for reader. // All updates are done in a single writer thread - private volatile boolean hasSufficientCoverage = true; + private volatile boolean hasSufficientCoverage = false; private volatile boolean hasFullCoverage = true; private volatile long activeDocuments = 0; private volatile long targetActiveDocuments = 0; diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java index 56545a32831..8f83d8ef5ce 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java @@ -226,17 +226,20 @@ public class SearchCluster implements NodeManager<Node> { // With just one group sufficient coverage may not be the same as full coverage, as the // group will always be marked sufficient for use. updateSufficientCoverage(group, true); - boolean sufficientCoverage = groups.isGroupCoverageSufficient(group.activeDocuments(), group.activeDocuments()); - trackGroupCoverageChanges(group, sufficientCoverage, group.activeDocuments()); + boolean sufficientCoverage = groups.isGroupCoverageSufficient(group.hasSufficientCoverage(), + group.activeDocuments(), group.activeDocuments(), group.activeDocuments()); + trackGroupCoverageChanges(group, sufficientCoverage, group.activeDocuments(), group.activeDocuments()); } private void pingIterationCompletedMultipleGroups(SearchGroupsImpl groups) { groups.groups().forEach(Group::aggregateNodeValues); - long medianDocuments = groups.medianDocumentsPerGroup(); + long medianDocuments = groups.medianDocumentCount(); + long maxDocuments = groups.maxDocumentCount(); for (Group group : groups.groups()) { - boolean sufficientCoverage = groups.isGroupCoverageSufficient(group.activeDocuments(), medianDocuments); + boolean sufficientCoverage = groups.isGroupCoverageSufficient(group.hasSufficientCoverage(), + group.activeDocuments(), medianDocuments, maxDocuments); updateSufficientCoverage(group, sufficientCoverage); - trackGroupCoverageChanges(group, sufficientCoverage, medianDocuments); + trackGroupCoverageChanges(group, sufficientCoverage, medianDocuments, maxDocuments); } } @@ -261,7 +264,7 @@ public class SearchCluster implements NodeManager<Node> { /** * Calculate whether a subset of nodes in a group has enough coverage */ - private void trackGroupCoverageChanges(Group group, boolean fullCoverage, long medianDocuments) { + private void trackGroupCoverageChanges(Group group, boolean fullCoverage, long medianDocuments, long maxDocuments) { if ( ! hasInformationAboutAllNodes()) return; // Be silent until we know what we are talking about. boolean changed = group.fullCoverageStatusChanged(fullCoverage); if (changed || (!fullCoverage && System.currentTimeMillis() > nextLogTime)) { @@ -278,7 +281,7 @@ public class SearchCluster implements NodeManager<Node> { unresponsive.append('\n').append(node); } String message = "Cluster " + clusterId + ": " + group + " has reduced coverage: " + - "Active documents: " + group.activeDocuments() + "/" + medianDocuments + ", " + + "Active documents: " + group.activeDocuments() + "/" + maxDocuments + ", " + "Target active documents: " + group.targetActiveDocuments() + ", " + "working nodes: " + group.workingNodes() + "/" + group.nodes().size() + ", unresponsive nodes: " + (unresponsive.toString().isEmpty() ? " none" : unresponsive); diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroups.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroups.java index 85063b8ef57..0bb694f610e 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroups.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroups.java @@ -13,21 +13,30 @@ import static java.util.stream.Collectors.toCollection; import static java.util.stream.Collectors.toSet; /** - * Simple interface for groups and their nodes in the content cluster + * Simple interface for groups and their nodes in the content cluster. + * * @author baldersheim */ public interface SearchGroups { + Group get(int id); + Set<Integer> keys(); + Collection<Group> groups(); + default boolean isEmpty() { return size() == 0; } + default Set<Node> nodes() { return groups().stream().flatMap(group -> group.nodes().stream()) .sorted(comparingInt(Node::key)) .collect(toCollection(LinkedHashSet::new)); } + int size(); - boolean isPartialGroupCoverageSufficient(Collection<Node> nodes); + + boolean isPartialGroupCoverageSufficient(boolean currentCoverageSufficient, Collection<Node> nodes); + } diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroupsImpl.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroupsImpl.java index c49a140804c..6528c5d2ae4 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroupsImpl.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroupsImpl.java @@ -7,14 +7,17 @@ import java.util.Collection; import java.util.Map; import java.util.Set; +/** + * @author baldersheim + */ public class SearchGroupsImpl implements SearchGroups { private final Map<Integer, Group> groups; - private final double minActivedocsPercentage; + private final double minActiveDocsPercentage; - public SearchGroupsImpl(Map<Integer, Group> groups, double minActivedocsPercentage) { + public SearchGroupsImpl(Map<Integer, Group> groups, double minActiveDocsPercentage) { this.groups = Map.copyOf(groups); - this.minActivedocsPercentage = minActivedocsPercentage; + this.minActiveDocsPercentage = minActiveDocsPercentage; } @Override public Group get(int id) { return groups.get(id); } @@ -23,23 +26,38 @@ public class SearchGroupsImpl implements SearchGroups { @Override public int size() { return groups.size(); } @Override - public boolean isPartialGroupCoverageSufficient(Collection<Node> nodes) { - if (size() == 1) - return true; - long activeDocuments = nodes.stream().mapToLong(Node::getActiveDocuments).sum(); - return isGroupCoverageSufficient(activeDocuments, medianDocumentsPerGroup()); + public boolean isPartialGroupCoverageSufficient(boolean currentIsGroupCoverageSufficient, Collection<Node> nodes) { + if (size() == 1) return true; + long groupDocumentCount = nodes.stream().mapToLong(Node::getActiveDocuments).sum(); + return isGroupCoverageSufficient(currentIsGroupCoverageSufficient, + groupDocumentCount, medianDocumentCount(), maxDocumentCount()); } - public boolean isGroupCoverageSufficient(long activeDocuments, long medianDocuments) { - if (medianDocuments <= 0) return true; - double documentCoverage = 100.0 * (double) activeDocuments / medianDocuments; - return documentCoverage >= minActivedocsPercentage; + public boolean isGroupCoverageSufficient(boolean currentIsGroupCoverageSufficient, + long groupDocumentCount, long medianDocumentCount, long maxDocumentCount) { + if (medianDocumentCount <= 0) return true; + if (currentIsGroupCoverageSufficient) { + // To take a group *out of* rotation, require that it has less active documents than the median. + // This avoids scenarios where incorrect accounting in a single group takes all other groups offline. + double documentCoverage = 100.0 * (double) groupDocumentCount / medianDocumentCount; + return documentCoverage >= minActiveDocsPercentage; + } + else { + // to put a group *in* rotation, require that it has as many documents as the largest group, + // to avoid taking groups in too early when the majority of the groups have just been added. + double documentCoverage = 100.0 * (double) groupDocumentCount / maxDocumentCount; + return documentCoverage >= minActiveDocsPercentage; + } } - public long medianDocumentsPerGroup() { + public long medianDocumentCount() { if (isEmpty()) return 0; double[] activeDocuments = groups().stream().mapToDouble(Group::activeDocuments).toArray(); return (long) Quantiles.median().computeInPlace(activeDocuments); } + public long maxDocumentCount() { + return (long)groups().stream().mapToDouble(Group::activeDocuments).max().orElse(0); + } + } diff --git a/container-search/src/main/java/com/yahoo/search/schema/RankProfile.java b/container-search/src/main/java/com/yahoo/search/schema/RankProfile.java index a5b8d328a7a..9583e9885e7 100644 --- a/container-search/src/main/java/com/yahoo/search/schema/RankProfile.java +++ b/container-search/src/main/java/com/yahoo/search/schema/RankProfile.java @@ -36,6 +36,7 @@ public class RankProfile { private final String name; private final boolean hasSummaryFeatures; private final boolean hasRankFeatures; + private final boolean useSignificanceModel; private final Map<String, InputType> inputs; // Assigned when this is added to a schema @@ -45,6 +46,7 @@ public class RankProfile { this.name = builder.name; this.hasSummaryFeatures = builder.hasSummaryFeatures; this.hasRankFeatures = builder.hasRankFeatures; + this.useSignificanceModel = builder.useSignificanceModel; this.inputs = Collections.unmodifiableMap(builder.inputs); } @@ -66,6 +68,9 @@ public class RankProfile { /** Returns true if this rank profile has rank features. */ public boolean hasRankFeatures() { return hasRankFeatures; } + /** Returns true if this rank profile should use significance models. */ + public boolean useSignificanceModel() { return useSignificanceModel; } + /** Returns the inputs explicitly declared in this rank profile. */ public Map<String, InputType> inputs() { return inputs; } @@ -76,13 +81,14 @@ public class RankProfile { if ( ! other.name.equals(this.name)) return false; if ( other.hasSummaryFeatures != this.hasSummaryFeatures) return false; if ( other.hasRankFeatures != this.hasRankFeatures) return false; + if ( other.useSignificanceModel != this.useSignificanceModel) return false; if ( ! other.inputs.equals(this.inputs)) return false; return true; } @Override public int hashCode() { - return Objects.hash(name, hasSummaryFeatures, hasRankFeatures, inputs); + return Objects.hash(name, hasSummaryFeatures, hasRankFeatures, useSignificanceModel, inputs); } @Override @@ -95,6 +101,7 @@ public class RankProfile { private final String name; private boolean hasSummaryFeatures = true; private boolean hasRankFeatures = true; + private boolean useSignificanceModel = false; private final Map<String, InputType> inputs = new LinkedHashMap<>(); public Builder(String name) { @@ -116,6 +123,8 @@ public class RankProfile { return this; } + public Builder setUseSignificanceModel(boolean use) { this.useSignificanceModel = use; return this; } + public RankProfile build() { return new RankProfile(this); } diff --git a/container-search/src/main/java/com/yahoo/search/schema/SchemaInfoConfigurer.java b/container-search/src/main/java/com/yahoo/search/schema/SchemaInfoConfigurer.java index d28c2db2b9e..77f27d3d411 100644 --- a/container-search/src/main/java/com/yahoo/search/schema/SchemaInfoConfigurer.java +++ b/container-search/src/main/java/com/yahoo/search/schema/SchemaInfoConfigurer.java @@ -22,9 +22,10 @@ class SchemaInfoConfigurer { Schema.Builder builder = new Schema.Builder(schemaInfoConfig.name()); for (var profileConfig : schemaInfoConfig.rankprofile()) { - RankProfile.Builder profileBuilder = new RankProfile.Builder(profileConfig.name()); - profileBuilder.setHasSummaryFeatures(profileConfig.hasSummaryFeatures()); - profileBuilder.setHasRankFeatures(profileConfig.hasRankFeatures()); + RankProfile.Builder profileBuilder = new RankProfile.Builder(profileConfig.name()) + .setHasSummaryFeatures(profileConfig.hasSummaryFeatures()) + .setHasRankFeatures(profileConfig.hasRankFeatures()) + .setUseSignificanceModel(profileConfig.significance().useModel()); for (var inputConfig : profileConfig.input()) profileBuilder.addInput(inputConfig.name(), RankProfile.InputType.fromSpec(inputConfig.type())); builder.add(profileBuilder.build()); diff --git a/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java b/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java index 6cef576f967..f6025dc6ba7 100644 --- a/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java +++ b/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java @@ -14,12 +14,16 @@ import com.yahoo.prelude.query.WordItem; import com.yahoo.search.Query; import com.yahoo.search.Result; import com.yahoo.search.Searcher; -import com.yahoo.search.query.Ranking; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.schema.RankProfile; +import com.yahoo.search.schema.Schema; +import com.yahoo.search.schema.SchemaInfo; import com.yahoo.search.searchchain.Execution; -import com.yahoo.vespa.config.search.RankProfilesConfig; -import java.util.HashMap; +import java.util.HashSet; import java.util.Optional; +import java.util.logging.Logger; +import java.util.stream.Collectors; import static com.yahoo.prelude.querytransform.StemmingSearcher.STEMMING; @@ -34,30 +38,48 @@ import static com.yahoo.prelude.querytransform.StemmingSearcher.STEMMING; public class SignificanceSearcher extends Searcher { public final static String SIGNIFICANCE = "Significance"; - private final SignificanceModelRegistry significanceModelRegistry; - private final RankProfilesConfig rankProfilesConfig; - private final HashMap<String, Boolean> useModel = new HashMap<>(); + private static final Logger log = Logger.getLogger(SignificanceSearcher.class.getName()); + private final SignificanceModelRegistry significanceModelRegistry; + private final SchemaInfo schemaInfo; @Inject - public SignificanceSearcher(SignificanceModelRegistry significanceModelRegistry, RankProfilesConfig rankProfilesConfig) { + public SignificanceSearcher(SignificanceModelRegistry significanceModelRegistry, SchemaInfo schemaInfo) { this.significanceModelRegistry = significanceModelRegistry; - this.rankProfilesConfig = rankProfilesConfig; - - for (RankProfilesConfig.Rankprofile profile : rankProfilesConfig.rankprofile()) { - for (RankProfilesConfig.Rankprofile.Fef.Property property : profile.fef().property()) { - if (property.name().equals("vespa.significance.use_model")) { - useModel.put(profile.name(), Boolean.parseBoolean(property.value())); - } - } - } + this.schemaInfo = schemaInfo; } @Override public Result search(Query query, Execution execution) { - Ranking ranking = query.getRanking(); - if (!useModel.containsKey(ranking.getProfile()) || !useModel.get(ranking.getProfile())) return execution.search(query); + var rankProfileName = query.getRanking().getProfile(); + + // Determine significance setup per schema for the given rank profile + var perSchemaSetup = schemaInfo.newSession(query).schemas().stream() + .collect(Collectors.toMap(Schema::name, schema -> + // Fallback to disabled if the rank profile is not found in the schema + // This will result in a failure later (in a "backend searcher") anyway. + Optional.ofNullable(schema.rankProfiles().get(rankProfileName)) + .map(RankProfile::useSignificanceModel).orElse(false))); + var uniqueSetups = new HashSet<>(perSchemaSetup.values()); + + // Fail if the significance setup for the selected schemas are conflicting + if (uniqueSetups.size() > 1) { + var result = new Result(query); + result.hits().addError( + ErrorMessage.createIllegalQuery( + ("Inconsistent 'significance' configuration for the rank profile '%s' in the schemas %s. " + + "Use 'restrict' to limit the query to a subset of schemas " + + "(https://docs.vespa.ai/en/schemas.html#multiple-schemas). " + + "Specify same 'significance' configuration for all selected schemas " + + "(https://docs.vespa.ai/en/reference/schema-reference.html#significance).") + .formatted(rankProfileName, perSchemaSetup.keySet()))); + return result; + } + + if (perSchemaSetup.isEmpty()) return execution.search(query); + var useSignificanceModel = uniqueSetups.iterator().next(); + if (!useSignificanceModel) return execution.search(query); Language language = query.getModel().getParsingLanguage(); Optional<SignificanceModel> model = significanceModelRegistry.getModel(language); diff --git a/container-search/src/main/resources/configdefinitions/container.search.schema-info.def b/container-search/src/main/resources/configdefinitions/container.search.schema-info.def index 989fbb16973..086b47f5ae5 100644 --- a/container-search/src/main/resources/configdefinitions/container.search.schema-info.def +++ b/container-search/src/main/resources/configdefinitions/container.search.schema-info.def @@ -28,6 +28,7 @@ schema[].summaryclass[].fields[].dynamic bool default=false schema[].rankprofile[].name string schema[].rankprofile[].hasSummaryFeatures bool default=true schema[].rankprofile[].hasRankFeatures bool default=true +schema[].rankprofile[].significance.useModel bool default=false # The name of an input (query rank feature) accepted by this profile schema[].rankprofile[].input[].name string # The tensor type of an input (query rank feature) accepted by this profile diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterCoverageTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterCoverageTest.java index 2a9eaa86674..e7085b093f3 100644 --- a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterCoverageTest.java +++ b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterCoverageTest.java @@ -48,6 +48,19 @@ public class SearchClusterCoverageTest { } @Test + void three_groups_of_which_two_were_just_added() { + var tester = new SearchClusterTester(3, 3); + + tester.setDocsPerNode(100, 0); + tester.setDocsPerNode(80, 1); + tester.setDocsPerNode(80, 2); + tester.pingIterationCompleted(); + assertTrue(tester.group(0).hasSufficientCoverage()); + assertFalse(tester.group(1).hasSufficientCoverage()); + assertFalse(tester.group(2).hasSufficientCoverage()); + } + + @Test void three_groups_one_missing_docs_but_too_few() { var tester = new SearchClusterTester(3, 3); @@ -65,6 +78,10 @@ public class SearchClusterCoverageTest { var tester = new SearchClusterTester(3, 3); tester.setDocsPerNode(100, 0); + tester.setDocsPerNode(100, 1); + tester.setDocsPerNode(100, 2); + tester.pingIterationCompleted(); + tester.setDocsPerNode(100, 0); tester.setDocsPerNode(150, 1); tester.setDocsPerNode(100, 2); tester.pingIterationCompleted(); diff --git a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java index 1b36c2b8151..8ac4f067876 100644 --- a/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java +++ b/container-search/src/test/java/com/yahoo/search/dispatch/searchcluster/SearchClusterTest.java @@ -200,8 +200,6 @@ public class SearchClusterTest { @Test void requireThatVipStatusIsDefaultDownWithLocalDispatch() { try (State test = new State("cluster.1", 1, HostName.getLocalhost(), "b")) { - assertTrue(test.searchCluster.localCorpusDispatchTarget().isPresent()); - assertFalse(test.vipStatus.isInRotation()); test.waitOneFullPingRound(); assertTrue(test.vipStatus.isInRotation()); @@ -211,8 +209,6 @@ public class SearchClusterTest { @Test void requireThatVipStatusStaysUpWithLocalDispatchAndClusterSize1() { try (State test = new State("cluster.1", 1, HostName.getLocalhost())) { - assertTrue(test.searchCluster.localCorpusDispatchTarget().isPresent()); - assertFalse(test.vipStatus.isInRotation()); test.waitOneFullPingRound(); assertTrue(test.vipStatus.isInRotation()); @@ -225,8 +221,6 @@ public class SearchClusterTest { @Test void requireThatVipStatusIsDefaultDownWithLocalDispatchAndClusterSize2() { try (State test = new State("cluster.1", 1, HostName.getLocalhost(), "otherhost")) { - assertTrue(test.searchCluster.localCorpusDispatchTarget().isPresent()); - assertFalse(test.vipStatus.isInRotation()); test.waitOneFullPingRound(); assertTrue(test.vipStatus.isInRotation()); diff --git a/container-search/src/test/java/com/yahoo/search/schema/SchemaInfoTester.java b/container-search/src/test/java/com/yahoo/search/schema/SchemaInfoTester.java index 3e98b911fc8..2ba399cf42d 100644 --- a/container-search/src/test/java/com/yahoo/search/schema/SchemaInfoTester.java +++ b/container-search/src/test/java/com/yahoo/search/schema/SchemaInfoTester.java @@ -77,6 +77,7 @@ public class SchemaInfoTester { .addInput("query(myTensor1)", InputType.fromSpec("tensor(x[10])")) .build()) .add(new RankProfile.Builder("bOnly") + .setUseSignificanceModel(true) .addInput("query(myTensor1)", InputType.fromSpec("tensor(a{},b{})")) .build()) .build()); @@ -129,7 +130,8 @@ public class SchemaInfoTester { rankProfileInconsistentB.input(new SchemaInfoConfig.Schema.Rankprofile.Input.Builder().name("query(myTensor1)").type("tensor(x[10])")); schemaB.rankprofile(rankProfileInconsistentB); var rankProfileBOnly = new SchemaInfoConfig.Schema.Rankprofile.Builder(); - rankProfileBOnly.name("bOnly"); + rankProfileBOnly.name("bOnly") + .significance(new SchemaInfoConfig.Schema.Rankprofile.Significance.Builder().useModel(true)); rankProfileBOnly.input(new SchemaInfoConfig.Schema.Rankprofile.Input.Builder().name("query(myTensor1)").type("tensor(a{},b{})")); schemaB.rankprofile(rankProfileBOnly); diff --git a/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java b/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java index ed67798b4b1..cb5722074ff 100644 --- a/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java +++ b/container-search/src/test/java/com/yahoo/search/significance/test/SignificanceSearcherTest.java @@ -11,6 +11,10 @@ import com.yahoo.prelude.query.AndItem; import com.yahoo.prelude.query.WordItem; import com.yahoo.search.Query; import com.yahoo.search.Result; +import com.yahoo.search.schema.DocumentSummary; +import com.yahoo.search.schema.RankProfile; +import com.yahoo.search.schema.Schema; +import com.yahoo.search.schema.SchemaInfo; import com.yahoo.search.searchchain.Execution; import com.yahoo.search.significance.SignificanceSearcher; import com.yahoo.vespa.config.search.RankProfilesConfig; @@ -33,24 +37,18 @@ public class SignificanceSearcherTest { SignificanceModelRegistry significanceModelRegistry; SignificanceSearcher searcher; - private static final String CONFIG_DIR = "src/test/resources/config/"; public SignificanceSearcherTest() { List<Path> models = new ArrayList<>(); models.add( Path.of("src/test/java/com/yahoo/search/significance/model/en.json")); - RankProfilesConfig rpCfg = readConfig("with_significance"); - - assertEquals(1, rpCfg.rankprofile().size()); - + var schema = new Schema.Builder("music") + .add(new DocumentSummary.Builder("default").build()) + .add(new RankProfile.Builder("significance-ranking") + .setUseSignificanceModel(true) + .build()); significanceModelRegistry = new DefaultSignificanceModelRegistry(models); - searcher = new SignificanceSearcher(significanceModelRegistry, rpCfg); - } - - @SuppressWarnings("deprecation") - private RankProfilesConfig readConfig(String subDir) { - String cfgId = "file:" + CONFIG_DIR + subDir + "/rank-profiles.cfg"; - return ConfigGetter.getConfig(RankProfilesConfig.class, cfgId); + searcher = new SignificanceSearcher(significanceModelRegistry, new SchemaInfo(List.of(schema.build()), List.of())); } private Execution createExecution(SignificanceSearcher searcher) { @@ -168,4 +166,36 @@ public class SignificanceSearcherTest { assertEquals(w0_1.getSignificance(), w1.getSignificance()); } + + @Test + public void failsOnConflictingSignificanceConfiguration() { + var musicSchema = new Schema.Builder("music") + .add(new DocumentSummary.Builder("default").build()) + .add(new RankProfile.Builder("significance-ranking") + .setUseSignificanceModel(true) + .build()) + .build(); + var albumSchema = new Schema.Builder("album") + .add(new DocumentSummary.Builder("default").build()) + .add(new RankProfile.Builder("significance-ranking") + .setUseSignificanceModel(false) + .build()) + .build(); + var searcher = new SignificanceSearcher( + significanceModelRegistry, new SchemaInfo(List.of(musicSchema, albumSchema), List.of())); + + var query = new Query(); + query.getRanking().setProfile("significance-ranking"); + + var result = createExecution(searcher).search(query); + assertEquals(1, result.hits().getErrorHit().errors().size()); + + var errorMessage = result.hits().getError(); + assertEquals("Inconsistent 'significance' configuration for the rank profile 'significance-ranking' in the schemas [music, album]. " + + "Use 'restrict' to limit the query to a subset of schemas " + + "(https://docs.vespa.ai/en/schemas.html#multiple-schemas). " + + "Specify same 'significance' configuration for all selected schemas " + + "(https://docs.vespa.ai/en/reference/schema-reference.html#significance).", + errorMessage.getDetailedMessage()); + } } diff --git a/container-search/src/test/resources/config/with_significance/rank-profiles.cfg b/container-search/src/test/resources/config/with_significance/rank-profiles.cfg deleted file mode 100644 index 1dc1be62862..00000000000 --- a/container-search/src/test/resources/config/with_significance/rank-profiles.cfg +++ /dev/null @@ -1,3 +0,0 @@ -rankprofile[0].name "significance-ranking" -rankprofile[0].fef.property[0].name "vespa.significance.use_model" -rankprofile[0].fef.property[0].value "true"
\ No newline at end of file diff --git a/dependency-versions/pom.xml b/dependency-versions/pom.xml index 9c21a9809b1..0876674e8c6 100644 --- a/dependency-versions/pom.xml +++ b/dependency-versions/pom.xml @@ -102,7 +102,7 @@ <felix.log.vespa.version>1.3.0</felix.log.vespa.version> <findbugs.vespa.version>3.0.2</findbugs.vespa.version> <!-- Should be kept in sync with guava --> <hamcrest.vespa.version>2.2</hamcrest.vespa.version> - <hdrhistogram.vespa.version>2.1.12</hdrhistogram.vespa.version> + <hdrhistogram.vespa.version>2.2.1</hdrhistogram.vespa.version> <huggingface.vespa.version>0.27.0</huggingface.vespa.version> <icu4j.vespa.version>75.1</icu4j.vespa.version> <java-jjwt.vespa.version>0.11.5</java-jjwt.vespa.version> @@ -179,7 +179,7 @@ <maven-jar-plugin.vespa.version>3.4.1</maven-jar-plugin.vespa.version> <maven-javadoc-plugin.vespa.version>3.6.3</maven-javadoc-plugin.vespa.version> <maven-plugin-api.vespa.version>${maven-core.vespa.version}</maven-plugin-api.vespa.version> - <maven-plugin-tools.vespa.version>3.12.0</maven-plugin-tools.vespa.version> + <maven-plugin-tools.vespa.version>3.13.0</maven-plugin-tools.vespa.version> <maven-resources-plugin.vespa.version>3.3.1</maven-resources-plugin.vespa.version> <maven-resolver.vespa.version>1.9.20</maven-resolver.vespa.version> <maven-shade-plugin.vespa.version>3.5.3</maven-shade-plugin.vespa.version> diff --git a/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java b/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java index 86b0a2e78ad..3088083912b 100644 --- a/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java +++ b/docprocs/src/main/java/com/yahoo/docprocs/indexing/ScriptManager.java @@ -72,7 +72,7 @@ public class ScriptManager { Map<String, Map<String, DocumentScript>> documentFieldScripts = new HashMap<>(config.ilscript().size()); ScriptParserContext parserContext = new ScriptParserContext(linguistics, embedders); parserContext.getAnnotatorConfig().setMaxTermOccurrences(config.maxtermoccurrences()); - parserContext.getAnnotatorConfig().setMaxTokenLength(config.fieldmatchmaxlength()); + parserContext.getAnnotatorConfig().setMaxTokenizeLength(config.fieldmatchmaxlength()); for (IlscriptsConfig.Ilscript ilscript : config.ilscript()) { DocumentType documentType = docTypeMgr.getDocumentType(ilscript.doctype()); diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java index b807ad4cb65..849bc075a64 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/expressions/TokenizeExpression.java @@ -66,7 +66,7 @@ public final class TokenizeExpression extends Expression { if (config.getStemMode() != StemMode.NONE) { ret.append(" stem:\""+config.getStemMode()+"\""); } - if (config.hasNonDefaultMaxTokenLength()) { + if (config.hasNonDefaultMaxTokenizeLength()) { ret.append(" max-length:" + config.getMaxTokenizeLength()); } if (config.hasNonDefaultMaxTermOccurrences()) { diff --git a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java index 7b6f350d831..4e5ef0d90df 100644 --- a/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java +++ b/indexinglanguage/src/main/java/com/yahoo/vespa/indexinglanguage/linguistics/AnnotatorConfig.java @@ -82,7 +82,7 @@ public class AnnotatorConfig implements Cloneable { return this; } - public AnnotatorConfig setMaxTokenLength(int maxTokenizeLength) { + public AnnotatorConfig setMaxTokenizeLength(int maxTokenizeLength) { this.maxTokenizeLength = maxTokenizeLength; return this; } @@ -91,7 +91,7 @@ public class AnnotatorConfig implements Cloneable { return maxTokenizeLength; } - public boolean hasNonDefaultMaxTokenLength() { + public boolean hasNonDefaultMaxTokenizeLength() { return maxTokenizeLength != DEFAULT_MAX_TOKENIZE_LENGTH; } diff --git a/indexinglanguage/src/main/javacc/IndexingParser.jj b/indexinglanguage/src/main/javacc/IndexingParser.jj index 469d96ead60..77591d3e54e 100644 --- a/indexinglanguage/src/main/javacc/IndexingParser.jj +++ b/indexinglanguage/src/main/javacc/IndexingParser.jj @@ -689,7 +689,7 @@ AnnotatorConfig tokenizeCfg() : } { ( <STEM> ( <COLON> str = string() ) ? { val.setStemMode(str); } | - <MAX_LENGTH> <COLON> maxLength = integer() { val.setMaxTokenLength(maxLength); } | + <MAX_LENGTH> <COLON> maxLength = integer() { val.setMaxTokenizeLength(maxLength); } | <MAX_OCCURRENCES> <COLON> maxTermOccurrences = integer() { val.setMaxTermOccurrences(maxTermOccurrences); } | <NORMALIZE> { val.setRemoveAccents(true); } )+ { return val; } diff --git a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java index 136e71564d8..461c915acef 100644 --- a/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java +++ b/indexinglanguage/src/test/java/com/yahoo/vespa/indexinglanguage/linguistics/LinguisticsAnnotatorTestCase.java @@ -194,7 +194,7 @@ public class LinguisticsAnnotatorTestCase { Linguistics linguistics = new SimpleLinguistics(); - LinguisticsAnnotator annotator = new LinguisticsAnnotator(linguistics, new AnnotatorConfig().setMaxTokenLength(12)); + LinguisticsAnnotator annotator = new LinguisticsAnnotator(linguistics, new AnnotatorConfig().setMaxTokenizeLength(12)); assertTrue(annotator.annotate(shortValue)); assertEquals(spanTree, shortValue.getSpanTree(SpanTrees.LINGUISTICS)); diff --git a/model-integration/abi-spec.json b/model-integration/abi-spec.json index e7130d9c777..31f2b64d728 100644 --- a/model-integration/abi-spec.json +++ b/model-integration/abi-spec.json @@ -94,6 +94,7 @@ "public ai.vespa.llm.clients.LlmLocalClientConfig$Builder model(com.yahoo.config.ModelReference)", "public ai.vespa.llm.clients.LlmLocalClientConfig$Builder parallelRequests(int)", "public ai.vespa.llm.clients.LlmLocalClientConfig$Builder maxQueueSize(int)", + "public ai.vespa.llm.clients.LlmLocalClientConfig$Builder maxQueueWait(int)", "public ai.vespa.llm.clients.LlmLocalClientConfig$Builder useGpu(boolean)", "public ai.vespa.llm.clients.LlmLocalClientConfig$Builder gpuLayers(int)", "public ai.vespa.llm.clients.LlmLocalClientConfig$Builder threads(int)", @@ -139,6 +140,7 @@ "public java.nio.file.Path model()", "public int parallelRequests()", "public int maxQueueSize()", + "public int maxQueueWait()", "public boolean useGpu()", "public int gpuLayers()", "public int threads()", diff --git a/model-integration/src/main/java/ai/vespa/llm/clients/LocalLLM.java b/model-integration/src/main/java/ai/vespa/llm/clients/LocalLLM.java index aa7c071b93a..b6409b5466d 100644 --- a/model-integration/src/main/java/ai/vespa/llm/clients/LocalLLM.java +++ b/model-integration/src/main/java/ai/vespa/llm/clients/LocalLLM.java @@ -3,6 +3,7 @@ package ai.vespa.llm.clients; import ai.vespa.llm.InferenceParameters; import ai.vespa.llm.LanguageModel; +import ai.vespa.llm.LanguageModelException; import ai.vespa.llm.completion.Completion; import ai.vespa.llm.completion.Prompt; import com.yahoo.component.AbstractComponent; @@ -14,10 +15,14 @@ import java.util.ArrayList; import java.util.List; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; import java.util.concurrent.RejectedExecutionException; +import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.SynchronousQueue; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Consumer; import java.util.logging.Logger; @@ -29,14 +34,19 @@ import java.util.logging.Logger; public class LocalLLM extends AbstractComponent implements LanguageModel { private final static Logger logger = Logger.getLogger(LocalLLM.class.getName()); + + private final ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor(); + private final LlamaModel model; private final ThreadPoolExecutor executor; + private final long queueTimeoutMilliseconds; private final int contextSize; private final int maxTokens; @Inject public LocalLLM(LlmLocalClientConfig config) { executor = createExecutor(config); + queueTimeoutMilliseconds = config.maxQueueWait(); // Maximum number of tokens to generate - need this since some models can just generate infinitely maxTokens = config.maxTokens(); @@ -74,6 +84,7 @@ public class LocalLLM extends AbstractComponent implements LanguageModel { logger.info("Closing LLM model..."); model.close(); executor.shutdownNow(); + scheduler.shutdownNow(); } @Override @@ -104,22 +115,39 @@ public class LocalLLM extends AbstractComponent implements LanguageModel { // Todo: more options? var completionFuture = new CompletableFuture<Completion.FinishReason>(); + var hasStarted = new AtomicBoolean(false); try { - executor.submit(() -> { + Future<?> future = executor.submit(() -> { + hasStarted.set(true); for (LlamaModel.Output output : model.generate(inferParams)) { consumer.accept(Completion.from(output.text, Completion.FinishReason.none)); } completionFuture.complete(Completion.FinishReason.stop); }); + + if (queueTimeoutMilliseconds > 0) { + scheduler.schedule(() -> { + if ( ! hasStarted.get()) { + future.cancel(false); + String error = rejectedExecutionReason("Rejected completion due to timeout waiting to start"); + completionFuture.completeExceptionally(new LanguageModelException(504, error)); + } + }, queueTimeoutMilliseconds, TimeUnit.MILLISECONDS); + } + } catch (RejectedExecutionException e) { // If we have too many requests (active + any waiting in queue), we reject the completion - int activeCount = executor.getActiveCount(); - int queueSize = executor.getQueue().size(); - String error = String.format("Rejected completion due to too many requests, " + - "%d active, %d in queue", activeCount, queueSize); + String error = rejectedExecutionReason("Rejected completion due to too many requests"); throw new RejectedExecutionException(error); } return completionFuture; } + private String rejectedExecutionReason(String prepend) { + int activeCount = executor.getActiveCount(); + int queueSize = executor.getQueue().size(); + return String.format("%s, %d active, %d in queue", prepend, activeCount, queueSize); + } + + } diff --git a/model-integration/src/main/resources/configdefinitions/llm-local-client.def b/model-integration/src/main/resources/configdefinitions/llm-local-client.def index 4823a53ec46..6b83ffd0751 100755 --- a/model-integration/src/main/resources/configdefinitions/llm-local-client.def +++ b/model-integration/src/main/resources/configdefinitions/llm-local-client.def @@ -8,7 +8,10 @@ model model parallelRequests int default=1 # Additional number of requests to put in queue for processing before starting to reject new requests -maxQueueSize int default=10 +maxQueueSize int default=100 + +# Max number of milliseoncds to wait in the queue before rejecting a request +maxQueueWait int default=10000 # Use GPU useGpu bool default=true @@ -24,6 +27,6 @@ threads int default=-1 # Context is divided between parallel requests. So for 10 parallel requests, each "slot" gets 1/10 of the context contextSize int default=4096 -# Maximum number of tokens to process in one request - overriden by inference parameters +# Maximum number of tokens to process in one request - overridden by inference parameters maxTokens int default=512 diff --git a/model-integration/src/test/java/ai/vespa/llm/clients/LocalLLMTest.java b/model-integration/src/test/java/ai/vespa/llm/clients/LocalLLMTest.java index a3b260f3fb5..4db1140d171 100644 --- a/model-integration/src/test/java/ai/vespa/llm/clients/LocalLLMTest.java +++ b/model-integration/src/test/java/ai/vespa/llm/clients/LocalLLMTest.java @@ -2,6 +2,7 @@ package ai.vespa.llm.clients; import ai.vespa.llm.InferenceParameters; +import ai.vespa.llm.LanguageModelException; import ai.vespa.llm.completion.Completion; import ai.vespa.llm.completion.Prompt; import ai.vespa.llm.completion.StringPrompt; @@ -96,7 +97,6 @@ public class LocalLLMTest { try { for (int i = 0; i < promptsToUse; i++) { final var seq = i; - completions.set(seq, new StringBuilder()); futures.set(seq, llm.completeAsync(StringPrompt.from(prompts.get(seq)), defaultOptions(), completion -> { completions.get(seq).append(completion.text()); @@ -122,8 +122,9 @@ public class LocalLLMTest { var prompts = testPrompts(); var promptsToUse = prompts.size(); var parallelRequests = 2; - var additionalQueue = 1; - // 7 should be rejected + var additionalQueue = 100; + var queueWaitTime = 10; + // 8 should be rejected due to queue wait time var futures = new ArrayList<CompletableFuture<Completion.FinishReason>>(Collections.nCopies(promptsToUse, null)); var completions = new ArrayList<StringBuilder>(Collections.nCopies(promptsToUse, null)); @@ -131,10 +132,12 @@ public class LocalLLMTest { var config = new LlmLocalClientConfig.Builder() .parallelRequests(parallelRequests) .maxQueueSize(additionalQueue) + .maxQueueWait(queueWaitTime) .model(ModelReference.valueOf(model)); var llm = new LocalLLM(config.build()); var rejected = new AtomicInteger(0); + var timedOut = new AtomicInteger(0); try { for (int i = 0; i < promptsToUse; i++) { final var seq = i; @@ -143,7 +146,14 @@ public class LocalLLMTest { try { var future = llm.completeAsync(StringPrompt.from(prompts.get(seq)), defaultOptions(), completion -> { completions.get(seq).append(completion.text()); - }).exceptionally(exception -> Completion.FinishReason.error); + }).exceptionally(exception -> { + if (exception instanceof LanguageModelException lme) { + if (lme.code() == 504) { + timedOut.incrementAndGet(); + } + } + return Completion.FinishReason.error; + }); futures.set(seq, future); } catch (RejectedExecutionException e) { rejected.incrementAndGet(); @@ -151,13 +161,14 @@ public class LocalLLMTest { } for (int i = 0; i < promptsToUse; i++) { if (futures.get(i) != null) { - assertNotEquals(futures.get(i).join(), Completion.FinishReason.error); + futures.get(i).join(); } } } finally { llm.deconstruct(); } - assertEquals(7, rejected.get()); + assertEquals(0, rejected.get()); + assertEquals(8, timedOut.get()); } private static InferenceParameters defaultOptions() { diff --git a/vespamalloc/src/vespamalloc/malloc/mmappool.cpp b/vespamalloc/src/vespamalloc/malloc/mmappool.cpp index cee709ed0ed..ba330d14125 100644 --- a/vespamalloc/src/vespamalloc/malloc/mmappool.cpp +++ b/vespamalloc/src/vespamalloc/malloc/mmappool.cpp @@ -58,9 +58,9 @@ MMapPool::mmap(size_t sz) { } buf = ::mmap(nullptr, sz, prot, flags, -1, 0); if (buf == MAP_FAILED) { - fprintf(_G_logFile, "Failed mmaping anonymous of size %ld errno(%d) from : ", sz, errno); + fprintf(_G_logFile, "Will exit due to: Failed mmaping anonymous of size %ld errno(%d) from : ", sz, errno); logStackTrace(); - abort(); + std::quick_exit(66); } } else { if (_has_hugepage_failure_just_happened) { diff --git a/vespamalloc/src/vespamalloc/util/callstack.cpp b/vespamalloc/src/vespamalloc/util/callstack.cpp index b8449c89a72..56b634bca33 100644 --- a/vespamalloc/src/vespamalloc/util/callstack.cpp +++ b/vespamalloc/src/vespamalloc/util/callstack.cpp @@ -1,39 +1,59 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include <dlfcn.h> -#include <ctype.h> +#include <cctype> #include <vespamalloc/util/callstack.h> +#include <string> +#include <cxxabi.h> namespace vespamalloc { -const char * -dlAddr(const void * func) { - static const char * _unknown = "UNKNOWN"; - const char * funcName = _unknown; +namespace { + +std::string +demangle(const char *native) { + int status = 0; + size_t size = 0; + char *unmangled = abi::__cxa_demangle(native, nullptr, &size, &status); + if (unmangled == nullptr) { + return ""; // Demangling failed for some reason. TODO return `native` instead? + } + std::string result(unmangled); + free(unmangled); + return result; +} + + +std::string +dlAddr(const void *func) { + static std::string _unknown = "UNKNOWN"; Dl_info info; int ret = dladdr(func, &info); if (ret != 0) { - funcName = info.dli_sname; + return demangle(info.dli_sname); } - return funcName; + return _unknown; +} + } namespace { void verifyAndCopy(const void *addr, char *v, size_t sz) { size_t pos(0); - const char *sym = dlAddr(addr); - for (; sym && (sym[pos] != '\0') && (pos < sz - 1); pos++) { + std::string sym = dlAddr(addr); + for (; (pos < sym.size()) && (pos < sz - 1); pos++) { char c(sym[pos]); v[pos] = isprint(c) ? c : '.'; } v[pos] = '\0'; } + } void StackReturnEntry::info(FILE * os) const { - static char tmp[0x400]; + char tmp[0x400]; verifyAndCopy(_return, tmp, sizeof(tmp)); fprintf(os, "%s(%p)", tmp, _return); } @@ -41,8 +61,8 @@ StackReturnEntry::info(FILE * os) const asciistream & operator << (asciistream & os, const StackReturnEntry & v) { - static char tmp[0x100]; - static char t[0x200]; + char tmp[0x100]; + char t[0x200]; verifyAndCopy(v._return, tmp, sizeof(tmp)); snprintf(t, sizeof(t), "%s(%p)", tmp, v._return); return os << t; diff --git a/vespamalloc/src/vespamalloc/util/callstack.h b/vespamalloc/src/vespamalloc/util/callstack.h index 3773d3c08b2..f3b177ea5f6 100644 --- a/vespamalloc/src/vespamalloc/util/callstack.h +++ b/vespamalloc/src/vespamalloc/util/callstack.h @@ -9,8 +9,6 @@ namespace vespamalloc { -const char * dlAddr(const void * addr); - class StackReturnEntry { public: StackReturnEntry(const void * returnAddress = nullptr, |