diff options
Diffstat (limited to 'container-search/src/main/java/com/yahoo')
10 files changed, 114 insertions, 53 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/parser/SimpleParser.java b/container-search/src/main/java/com/yahoo/prelude/query/parser/SimpleParser.java index ea0cd2312a6..d3e6241a6e5 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/parser/SimpleParser.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/parser/SimpleParser.java @@ -134,10 +134,8 @@ abstract class SimpleParser extends StructuredParser { if (topLevelItem != null && topLevelItem != not) { // => neutral rank items becomes implicit positives not.addPositiveItem(getItemAsPositiveItem(topLevelItem, not)); - return not; - } else { - return not; } + return not; } if (topLevelItem != null) { return topLevelItem; diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java b/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java index d7fad148c8c..bfcf0af325d 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/InvokerFactory.java @@ -79,7 +79,7 @@ public abstract class InvokerFactory { success.add(node); } } - if ( ! cluster.isPartialGroupCoverageSufficient(success) && !acceptIncompleteCoverage) { + if ( ! cluster.isPartialGroupCoverageSufficient(group.hasSufficientCoverage(), success) && !acceptIncompleteCoverage) { return Optional.empty(); } if (invokers.isEmpty()) { diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java index 965ce4aeb94..c7af37b3a26 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/Group.java @@ -23,7 +23,7 @@ public class Group { // Using volatile to ensure visibility for reader. // All updates are done in a single writer thread - private volatile boolean hasSufficientCoverage = true; + private volatile boolean hasSufficientCoverage = false; private volatile boolean hasFullCoverage = true; private volatile long activeDocuments = 0; private volatile long targetActiveDocuments = 0; diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java index 56545a32831..8f83d8ef5ce 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchCluster.java @@ -226,17 +226,20 @@ public class SearchCluster implements NodeManager<Node> { // With just one group sufficient coverage may not be the same as full coverage, as the // group will always be marked sufficient for use. updateSufficientCoverage(group, true); - boolean sufficientCoverage = groups.isGroupCoverageSufficient(group.activeDocuments(), group.activeDocuments()); - trackGroupCoverageChanges(group, sufficientCoverage, group.activeDocuments()); + boolean sufficientCoverage = groups.isGroupCoverageSufficient(group.hasSufficientCoverage(), + group.activeDocuments(), group.activeDocuments(), group.activeDocuments()); + trackGroupCoverageChanges(group, sufficientCoverage, group.activeDocuments(), group.activeDocuments()); } private void pingIterationCompletedMultipleGroups(SearchGroupsImpl groups) { groups.groups().forEach(Group::aggregateNodeValues); - long medianDocuments = groups.medianDocumentsPerGroup(); + long medianDocuments = groups.medianDocumentCount(); + long maxDocuments = groups.maxDocumentCount(); for (Group group : groups.groups()) { - boolean sufficientCoverage = groups.isGroupCoverageSufficient(group.activeDocuments(), medianDocuments); + boolean sufficientCoverage = groups.isGroupCoverageSufficient(group.hasSufficientCoverage(), + group.activeDocuments(), medianDocuments, maxDocuments); updateSufficientCoverage(group, sufficientCoverage); - trackGroupCoverageChanges(group, sufficientCoverage, medianDocuments); + trackGroupCoverageChanges(group, sufficientCoverage, medianDocuments, maxDocuments); } } @@ -261,7 +264,7 @@ public class SearchCluster implements NodeManager<Node> { /** * Calculate whether a subset of nodes in a group has enough coverage */ - private void trackGroupCoverageChanges(Group group, boolean fullCoverage, long medianDocuments) { + private void trackGroupCoverageChanges(Group group, boolean fullCoverage, long medianDocuments, long maxDocuments) { if ( ! hasInformationAboutAllNodes()) return; // Be silent until we know what we are talking about. boolean changed = group.fullCoverageStatusChanged(fullCoverage); if (changed || (!fullCoverage && System.currentTimeMillis() > nextLogTime)) { @@ -278,7 +281,7 @@ public class SearchCluster implements NodeManager<Node> { unresponsive.append('\n').append(node); } String message = "Cluster " + clusterId + ": " + group + " has reduced coverage: " + - "Active documents: " + group.activeDocuments() + "/" + medianDocuments + ", " + + "Active documents: " + group.activeDocuments() + "/" + maxDocuments + ", " + "Target active documents: " + group.targetActiveDocuments() + ", " + "working nodes: " + group.workingNodes() + "/" + group.nodes().size() + ", unresponsive nodes: " + (unresponsive.toString().isEmpty() ? " none" : unresponsive); diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroups.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroups.java index 85063b8ef57..0bb694f610e 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroups.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroups.java @@ -13,21 +13,30 @@ import static java.util.stream.Collectors.toCollection; import static java.util.stream.Collectors.toSet; /** - * Simple interface for groups and their nodes in the content cluster + * Simple interface for groups and their nodes in the content cluster. + * * @author baldersheim */ public interface SearchGroups { + Group get(int id); + Set<Integer> keys(); + Collection<Group> groups(); + default boolean isEmpty() { return size() == 0; } + default Set<Node> nodes() { return groups().stream().flatMap(group -> group.nodes().stream()) .sorted(comparingInt(Node::key)) .collect(toCollection(LinkedHashSet::new)); } + int size(); - boolean isPartialGroupCoverageSufficient(Collection<Node> nodes); + + boolean isPartialGroupCoverageSufficient(boolean currentCoverageSufficient, Collection<Node> nodes); + } diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroupsImpl.java b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroupsImpl.java index c49a140804c..6528c5d2ae4 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroupsImpl.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/searchcluster/SearchGroupsImpl.java @@ -7,14 +7,17 @@ import java.util.Collection; import java.util.Map; import java.util.Set; +/** + * @author baldersheim + */ public class SearchGroupsImpl implements SearchGroups { private final Map<Integer, Group> groups; - private final double minActivedocsPercentage; + private final double minActiveDocsPercentage; - public SearchGroupsImpl(Map<Integer, Group> groups, double minActivedocsPercentage) { + public SearchGroupsImpl(Map<Integer, Group> groups, double minActiveDocsPercentage) { this.groups = Map.copyOf(groups); - this.minActivedocsPercentage = minActivedocsPercentage; + this.minActiveDocsPercentage = minActiveDocsPercentage; } @Override public Group get(int id) { return groups.get(id); } @@ -23,23 +26,38 @@ public class SearchGroupsImpl implements SearchGroups { @Override public int size() { return groups.size(); } @Override - public boolean isPartialGroupCoverageSufficient(Collection<Node> nodes) { - if (size() == 1) - return true; - long activeDocuments = nodes.stream().mapToLong(Node::getActiveDocuments).sum(); - return isGroupCoverageSufficient(activeDocuments, medianDocumentsPerGroup()); + public boolean isPartialGroupCoverageSufficient(boolean currentIsGroupCoverageSufficient, Collection<Node> nodes) { + if (size() == 1) return true; + long groupDocumentCount = nodes.stream().mapToLong(Node::getActiveDocuments).sum(); + return isGroupCoverageSufficient(currentIsGroupCoverageSufficient, + groupDocumentCount, medianDocumentCount(), maxDocumentCount()); } - public boolean isGroupCoverageSufficient(long activeDocuments, long medianDocuments) { - if (medianDocuments <= 0) return true; - double documentCoverage = 100.0 * (double) activeDocuments / medianDocuments; - return documentCoverage >= minActivedocsPercentage; + public boolean isGroupCoverageSufficient(boolean currentIsGroupCoverageSufficient, + long groupDocumentCount, long medianDocumentCount, long maxDocumentCount) { + if (medianDocumentCount <= 0) return true; + if (currentIsGroupCoverageSufficient) { + // To take a group *out of* rotation, require that it has less active documents than the median. + // This avoids scenarios where incorrect accounting in a single group takes all other groups offline. + double documentCoverage = 100.0 * (double) groupDocumentCount / medianDocumentCount; + return documentCoverage >= minActiveDocsPercentage; + } + else { + // to put a group *in* rotation, require that it has as many documents as the largest group, + // to avoid taking groups in too early when the majority of the groups have just been added. + double documentCoverage = 100.0 * (double) groupDocumentCount / maxDocumentCount; + return documentCoverage >= minActiveDocsPercentage; + } } - public long medianDocumentsPerGroup() { + public long medianDocumentCount() { if (isEmpty()) return 0; double[] activeDocuments = groups().stream().mapToDouble(Group::activeDocuments).toArray(); return (long) Quantiles.median().computeInPlace(activeDocuments); } + public long maxDocumentCount() { + return (long)groups().stream().mapToDouble(Group::activeDocuments).max().orElse(0); + } + } diff --git a/container-search/src/main/java/com/yahoo/search/handler/Json2SingleLevelMap.java b/container-search/src/main/java/com/yahoo/search/handler/Json2SingleLevelMap.java index 01167be6b8b..fdedbdc2fd9 100644 --- a/container-search/src/main/java/com/yahoo/search/handler/Json2SingleLevelMap.java +++ b/container-search/src/main/java/com/yahoo/search/handler/Json2SingleLevelMap.java @@ -64,8 +64,8 @@ class Json2SingleLevelMap { } void parse(Map<String, String> map, String parent) throws IOException { - for (parser.nextToken(); parser.getCurrentToken() != JsonToken.END_OBJECT; parser.nextToken()) { - String fieldName = parent + parser.getCurrentName(); + for (parser.nextToken(); parser.currentToken() != JsonToken.END_OBJECT; parser.nextToken()) { + String fieldName = parent + parser.currentName(); JsonToken token = parser.nextToken(); if ((token == JsonToken.VALUE_STRING) || (token == JsonToken.VALUE_NUMBER_FLOAT) || @@ -89,9 +89,9 @@ class Json2SingleLevelMap { } private String skipChildren(JsonParser parser, byte [] input) throws IOException { - JsonLocation start = parser.getCurrentLocation(); + JsonLocation start = parser.currentLocation(); parser.skipChildren(); - JsonLocation end = parser.getCurrentLocation(); + JsonLocation end = parser.currentLocation(); int offset = (int)start.getByteOffset() - 1; return new String(input, offset, (int)(end.getByteOffset() - offset), StandardCharsets.UTF_8); } diff --git a/container-search/src/main/java/com/yahoo/search/schema/RankProfile.java b/container-search/src/main/java/com/yahoo/search/schema/RankProfile.java index a5b8d328a7a..9583e9885e7 100644 --- a/container-search/src/main/java/com/yahoo/search/schema/RankProfile.java +++ b/container-search/src/main/java/com/yahoo/search/schema/RankProfile.java @@ -36,6 +36,7 @@ public class RankProfile { private final String name; private final boolean hasSummaryFeatures; private final boolean hasRankFeatures; + private final boolean useSignificanceModel; private final Map<String, InputType> inputs; // Assigned when this is added to a schema @@ -45,6 +46,7 @@ public class RankProfile { this.name = builder.name; this.hasSummaryFeatures = builder.hasSummaryFeatures; this.hasRankFeatures = builder.hasRankFeatures; + this.useSignificanceModel = builder.useSignificanceModel; this.inputs = Collections.unmodifiableMap(builder.inputs); } @@ -66,6 +68,9 @@ public class RankProfile { /** Returns true if this rank profile has rank features. */ public boolean hasRankFeatures() { return hasRankFeatures; } + /** Returns true if this rank profile should use significance models. */ + public boolean useSignificanceModel() { return useSignificanceModel; } + /** Returns the inputs explicitly declared in this rank profile. */ public Map<String, InputType> inputs() { return inputs; } @@ -76,13 +81,14 @@ public class RankProfile { if ( ! other.name.equals(this.name)) return false; if ( other.hasSummaryFeatures != this.hasSummaryFeatures) return false; if ( other.hasRankFeatures != this.hasRankFeatures) return false; + if ( other.useSignificanceModel != this.useSignificanceModel) return false; if ( ! other.inputs.equals(this.inputs)) return false; return true; } @Override public int hashCode() { - return Objects.hash(name, hasSummaryFeatures, hasRankFeatures, inputs); + return Objects.hash(name, hasSummaryFeatures, hasRankFeatures, useSignificanceModel, inputs); } @Override @@ -95,6 +101,7 @@ public class RankProfile { private final String name; private boolean hasSummaryFeatures = true; private boolean hasRankFeatures = true; + private boolean useSignificanceModel = false; private final Map<String, InputType> inputs = new LinkedHashMap<>(); public Builder(String name) { @@ -116,6 +123,8 @@ public class RankProfile { return this; } + public Builder setUseSignificanceModel(boolean use) { this.useSignificanceModel = use; return this; } + public RankProfile build() { return new RankProfile(this); } diff --git a/container-search/src/main/java/com/yahoo/search/schema/SchemaInfoConfigurer.java b/container-search/src/main/java/com/yahoo/search/schema/SchemaInfoConfigurer.java index d28c2db2b9e..77f27d3d411 100644 --- a/container-search/src/main/java/com/yahoo/search/schema/SchemaInfoConfigurer.java +++ b/container-search/src/main/java/com/yahoo/search/schema/SchemaInfoConfigurer.java @@ -22,9 +22,10 @@ class SchemaInfoConfigurer { Schema.Builder builder = new Schema.Builder(schemaInfoConfig.name()); for (var profileConfig : schemaInfoConfig.rankprofile()) { - RankProfile.Builder profileBuilder = new RankProfile.Builder(profileConfig.name()); - profileBuilder.setHasSummaryFeatures(profileConfig.hasSummaryFeatures()); - profileBuilder.setHasRankFeatures(profileConfig.hasRankFeatures()); + RankProfile.Builder profileBuilder = new RankProfile.Builder(profileConfig.name()) + .setHasSummaryFeatures(profileConfig.hasSummaryFeatures()) + .setHasRankFeatures(profileConfig.hasRankFeatures()) + .setUseSignificanceModel(profileConfig.significance().useModel()); for (var inputConfig : profileConfig.input()) profileBuilder.addInput(inputConfig.name(), RankProfile.InputType.fromSpec(inputConfig.type())); builder.add(profileBuilder.build()); diff --git a/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java b/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java index da0f98c50f5..e3a559da8f9 100644 --- a/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java +++ b/container-search/src/main/java/com/yahoo/search/significance/SignificanceSearcher.java @@ -14,12 +14,16 @@ import com.yahoo.prelude.query.WordItem; import com.yahoo.search.Query; import com.yahoo.search.Result; import com.yahoo.search.Searcher; -import com.yahoo.search.query.Ranking; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.schema.RankProfile; +import com.yahoo.search.schema.Schema; +import com.yahoo.search.schema.SchemaInfo; import com.yahoo.search.searchchain.Execution; -import com.yahoo.vespa.config.search.RankProfilesConfig; -import java.util.HashMap; +import java.util.HashSet; import java.util.Optional; +import java.util.logging.Logger; +import java.util.stream.Collectors; import static com.yahoo.prelude.querytransform.StemmingSearcher.STEMMING; @@ -33,29 +37,48 @@ import static com.yahoo.prelude.querytransform.StemmingSearcher.STEMMING; public class SignificanceSearcher extends Searcher { public final static String SIGNIFICANCE = "Significance"; - private final SignificanceModelRegistry significanceModelRegistry; - private final RankProfilesConfig rankProfilesConfig; - private final HashMap<String, Boolean> useModel = new HashMap<>(); + private static final Logger log = Logger.getLogger(SignificanceSearcher.class.getName()); + + private final SignificanceModelRegistry significanceModelRegistry; + private final SchemaInfo schemaInfo; @Inject - public SignificanceSearcher(SignificanceModelRegistry significanceModelRegistry, RankProfilesConfig rankProfilesConfig) { + public SignificanceSearcher(SignificanceModelRegistry significanceModelRegistry, SchemaInfo schemaInfo) { this.significanceModelRegistry = significanceModelRegistry; - this.rankProfilesConfig = rankProfilesConfig; - - for (RankProfilesConfig.Rankprofile profile : rankProfilesConfig.rankprofile()) { - for (RankProfilesConfig.Rankprofile.Fef.Property property : profile.fef().property()) { - if (property.name().equals("vespa.significance.use_model")) { - useModel.put(profile.name(), Boolean.parseBoolean(property.value())); - } - } - } + this.schemaInfo = schemaInfo; } @Override public Result search(Query query, Execution execution) { - Ranking ranking = query.getRanking(); - if (!useModel.containsKey(ranking.getProfile()) || !useModel.get(ranking.getProfile())) return execution.search(query); + var rankProfileName = query.getRanking().getProfile(); + + // Determine significance setup per schema for the given rank profile + var perSchemaSetup = schemaInfo.newSession(query).schemas().stream() + .collect(Collectors.toMap(Schema::name, schema -> + // Fallback to disabled if the rank profile is not found in the schema + // This will result in a failure later (in a "backend searcher") anyway. + Optional.ofNullable(schema.rankProfiles().get(rankProfileName)) + .map(RankProfile::useSignificanceModel).orElse(false))); + var uniqueSetups = new HashSet<>(perSchemaSetup.values()); + + // Fail if the significance setup for the selected schemas are conflicting + if (uniqueSetups.size() > 1) { + var result = new Result(query); + result.hits().addError( + ErrorMessage.createIllegalQuery( + ("Inconsistent 'significance' configuration for the rank profile '%s' in the schemas %s. " + + "Use 'restrict' to limit the query to a subset of schemas " + + "(https://docs.vespa.ai/en/schemas.html#multiple-schemas). " + + "Specify same 'significance' configuration for all selected schemas " + + "(https://docs.vespa.ai/en/reference/schema-reference.html#significance).") + .formatted(rankProfileName, perSchemaSetup.keySet()))); + return result; + } + + if (perSchemaSetup.isEmpty()) return execution.search(query); + var useSignificanceModel = uniqueSetups.iterator().next(); + if (!useSignificanceModel) return execution.search(query); Language language = query.getModel().getParsingLanguage(); Optional<SignificanceModel> model = significanceModelRegistry.getModel(language); |