diff options
56 files changed, 894 insertions, 402 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumDefinitionSet.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumDefinitionSet.java index 55601059901..adfb1d06ae3 100644 --- a/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumDefinitionSet.java +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocsumDefinitionSet.java @@ -35,8 +35,8 @@ public final class DocsumDefinitionSet { public DocsumDefinitionSet(Collection<DocumentSummary> docsumDefinitions) { this.definitionsByName = docsumDefinitions.stream() - .map(summary -> new DocsumDefinition(summary)) - .collect(Collectors.toUnmodifiableMap(summary -> summary.name(), + .map(DocsumDefinition::new) + .collect(Collectors.toUnmodifiableMap(DocsumDefinition::name, summary -> summary)); } @@ -95,7 +95,7 @@ public final class DocsumDefinitionSet { public String toString() { StringBuilder sb = new StringBuilder(); for (Map.Entry<String, DocsumDefinition> e : definitionsByName.entrySet() ) { - if (sb.length() != 0) { + if (!sb.isEmpty()) { sb.append(","); } sb.append("[").append(e.getKey()).append(",").append(e.getValue().name()).append("]"); diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocumentDatabase.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocumentDatabase.java index 8d1c3bcd678..ea6b6a8ee38 100644 --- a/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocumentDatabase.java +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/DocumentDatabase.java @@ -1,16 +1,8 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.prelude.fastsearch; -import com.yahoo.search.schema.RankProfile; import com.yahoo.search.schema.Schema; -import com.yahoo.search.schema.SchemaInfo; -import com.yahoo.tensor.TensorType; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; + /** * Representation of a document database realizing a schema in a content cluster. diff --git a/container-search/src/main/java/com/yahoo/prelude/fastsearch/GroupingListHit.java b/container-search/src/main/java/com/yahoo/prelude/fastsearch/GroupingListHit.java index 11fbedfc0dd..7e60f8a41b6 100644 --- a/container-search/src/main/java/com/yahoo/prelude/fastsearch/GroupingListHit.java +++ b/container-search/src/main/java/com/yahoo/prelude/fastsearch/GroupingListHit.java @@ -3,28 +3,32 @@ package com.yahoo.prelude.fastsearch; import java.util.List; +import com.yahoo.search.Query; import com.yahoo.search.result.Hit; +import com.yahoo.search.schema.Schema; import com.yahoo.searchlib.aggregation.Grouping; public class GroupingListHit extends Hit { /** for unit tests only, may give problems if grouping contains docsums */ public GroupingListHit(List<Grouping> groupingList) { - this(groupingList, null); + this(groupingList, null, null); } - public GroupingListHit(List<Grouping> groupingList, DocsumDefinitionSet defs) { - super("meta:grouping", 0); + public GroupingListHit(List<Grouping> groupingList, DocumentDatabase documentDatabase, Query query) { + super("meta:grouping", 0, query); this.groupingList = groupingList; - this.defs = defs; + this.documentDatabase = documentDatabase; } public boolean isMeta() { return true; } public List<Grouping> getGroupingList() { return groupingList; } - public DocsumDefinitionSet getDocsumDefinitionSet() { return defs; } + public DocsumDefinitionSet getDocsumDefinitionSet() { return documentDatabase.getDocsumDefinitionSet(); } + public Schema getSchema() { return documentDatabase.schema(); } + public DocumentDatabase getDocumentDatBase() { return documentDatabase; } private final List<Grouping> groupingList; - private final DocsumDefinitionSet defs; + private final DocumentDatabase documentDatabase; } diff --git a/container-search/src/main/java/com/yahoo/prelude/searcher/JuniperSearcher.java b/container-search/src/main/java/com/yahoo/prelude/searcher/JuniperSearcher.java index 927fa37cc77..3c4e8107df5 100644 --- a/container-search/src/main/java/com/yahoo/prelude/searcher/JuniperSearcher.java +++ b/container-search/src/main/java/com/yahoo/prelude/searcher/JuniperSearcher.java @@ -83,9 +83,7 @@ public class JuniperSearcher extends Searcher { List<Hit> hits = new ArrayList<>(worstCase); for (Iterator<Hit> i = result.hits().deepIterator(); i.hasNext();) { Hit hit = i.next(); - if ( ! (hit instanceof FastHit)) continue; - - FastHit fastHit = (FastHit)hit; + if ( ! (hit instanceof FastHit fastHit)) continue; if (fastHit.isFilled(summaryClass)) continue; hits.add(fastHit); @@ -99,9 +97,8 @@ public class JuniperSearcher extends Searcher { String summaryClass, IndexFacts.Session indexFacts) { while (hitsToHighlight.hasNext()) { Hit hit = hitsToHighlight.next(); - if ( ! (hit instanceof FastHit)) continue; + if ( ! (hit instanceof FastHit fastHit)) continue; - FastHit fastHit = (FastHit) hit; if (summaryClass != null && ! fastHit.isFilled(summaryClass)) continue; Object searchDefinitionField = fastHit.getField(Hit.SDDOCNAME_FIELD); @@ -125,9 +122,9 @@ public class JuniperSearcher extends Searcher { private class StringArrayConverter implements ArrayTraverser { - private Index index; - private boolean bolding; - private Value.ArrayValue convertedField = new Value.ArrayValue(); + private final Index index; + private final boolean bolding; + private final Value.ArrayValue convertedField = new Value.ArrayValue(); /** * This converts the backend binary highlighting of each item in an array of string field, @@ -189,8 +186,8 @@ public class JuniperSearcher extends Searcher { } if (newFieldParts != null) { i.remove(); - for (Iterator<FieldPart> j = newFieldParts.iterator(); j.hasNext();) { - i.add(j.next()); + for (FieldPart newFieldPart : newFieldParts) { + i.add(newFieldPart); } } } @@ -220,7 +217,7 @@ public class JuniperSearcher extends Searcher { if (insideHighlight) { newFieldParts.add(new BoldCloseFieldPart(boldCloseTag)); } else { - if (newFieldParts.size() > 0 + if (!newFieldParts.isEmpty() && newFieldParts.get(newFieldParts.size() - 1) instanceof BoldCloseFieldPart) { newFieldParts.remove(newFieldParts.size() - 1); } else { @@ -230,7 +227,7 @@ public class JuniperSearcher extends Searcher { } } - private List<FieldPart> initFieldParts(List<FieldPart> newFieldParts) { + private static List<FieldPart> initFieldParts(List<FieldPart> newFieldParts) { if (newFieldParts == null) newFieldParts = new ArrayList<>(); return newFieldParts; diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/GroupingResultAggregator.java b/container-search/src/main/java/com/yahoo/search/dispatch/GroupingResultAggregator.java index 8db0ec46f95..8311c84930e 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/GroupingResultAggregator.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/GroupingResultAggregator.java @@ -1,7 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.search.dispatch; -import com.yahoo.prelude.fastsearch.DocsumDefinitionSet; +import com.yahoo.prelude.fastsearch.DocumentDatabase; import com.yahoo.prelude.fastsearch.GroupingListHit; import com.yahoo.search.Query; import com.yahoo.searchlib.aggregation.Grouping; @@ -23,13 +23,13 @@ class GroupingResultAggregator { private static final Logger log = Logger.getLogger(GroupingResultAggregator.class.getName()); private final Map<Integer, Grouping> groupings = new LinkedHashMap<>(); - private DocsumDefinitionSet docsumDefinitions = null; + private DocumentDatabase documentDatabase = null; private Query query = null; private int groupingHitsMerged = 0; void mergeWith(GroupingListHit result) { ++groupingHitsMerged; - if (docsumDefinitions == null) docsumDefinitions = result.getDocsumDefinitionSet(); + if (documentDatabase == null) documentDatabase = result.getDocumentDatBase(); if (query == null) query = result.getQuery(); log.log(Level.FINE, () -> String.format("Merging hit #%d having %d groupings", @@ -46,8 +46,8 @@ class GroupingResultAggregator { if (groupingHitsMerged == 0) return Optional.empty(); log.log(Level.FINE, () -> String.format("Creating aggregated hit containing %d groupings from %d hits with docsums '%s' and %s", - groupings.size(), groupingHitsMerged, docsumDefinitions, query)); - GroupingListHit groupingHit = new GroupingListHit(List.copyOf(groupings.values()), docsumDefinitions); + groupings.size(), groupingHitsMerged, documentDatabase.getDocsumDefinitionSet(), query)); + GroupingListHit groupingHit = new GroupingListHit(List.copyOf(groupings.values()), documentDatabase, query); groupingHit.setQuery(query); groupingHit.getGroupingList().forEach(g -> { g.select(o -> o instanceof Hit, o -> ((Hit)o).setContext(groupingHit)); diff --git a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/ProtobufSerialization.java b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/ProtobufSerialization.java index 9cad9d19f4c..9da4c91cd16 100644 --- a/container-search/src/main/java/com/yahoo/search/dispatch/rpc/ProtobufSerialization.java +++ b/container-search/src/main/java/com/yahoo/search/dispatch/rpc/ProtobufSerialization.java @@ -255,8 +255,7 @@ public class ProtobufSerialization { g.deserialize(buf); list.add(g); } - GroupingListHit hit = new GroupingListHit(list, documentDatabase.getDocsumDefinitionSet()); - hit.setQuery(query); + GroupingListHit hit = new GroupingListHit(list, documentDatabase, query); result.getResult().hits().add(hit); } for (var replyHit : protobuf.getHitsList()) { diff --git a/container-search/src/main/java/com/yahoo/search/grouping/vespa/GroupingExecutor.java b/container-search/src/main/java/com/yahoo/search/grouping/vespa/GroupingExecutor.java index 3b5399fccc9..be4f2f786e5 100644 --- a/container-search/src/main/java/com/yahoo/search/grouping/vespa/GroupingExecutor.java +++ b/container-search/src/main/java/com/yahoo/search/grouping/vespa/GroupingExecutor.java @@ -91,7 +91,7 @@ public class GroupingExecutor extends Searcher { Result result = performSearch(query, execution, groupingMap); // Convert Vespa style results to hits. - HitConverter hitConverter = new HitConverter(this, query); + HitConverter hitConverter = new HitConverter(this); for (RequestContext context : requestContextList) { RootGroup group = convertResult(context, groupingMap, hitConverter); result.hits().add(group); diff --git a/container-search/src/main/java/com/yahoo/search/grouping/vespa/HitConverter.java b/container-search/src/main/java/com/yahoo/search/grouping/vespa/HitConverter.java index 13d635b5772..5f5f8266ab0 100644 --- a/container-search/src/main/java/com/yahoo/search/grouping/vespa/HitConverter.java +++ b/container-search/src/main/java/com/yahoo/search/grouping/vespa/HitConverter.java @@ -4,7 +4,6 @@ package com.yahoo.search.grouping.vespa; import com.yahoo.prelude.fastsearch.DocsumDefinitionSet; import com.yahoo.prelude.fastsearch.FastHit; import com.yahoo.prelude.fastsearch.GroupingListHit; -import com.yahoo.search.Query; import com.yahoo.search.Searcher; import com.yahoo.search.result.Hit; import com.yahoo.search.result.Relevance; @@ -19,17 +18,14 @@ import com.yahoo.searchlib.aggregation.VdsHit; class HitConverter implements ResultBuilder.HitConverter { private final Searcher searcher; - private final Query query; /** * Creates a new instance of this class. * * @param searcher The searcher that owns this converter. - * @param query The query that returned the hits. */ - public HitConverter(Searcher searcher, Query query) { + public HitConverter(Searcher searcher) { this.searcher = searcher; - this.query = query; } @Override @@ -59,19 +55,21 @@ class HitConverter implements ResultBuilder.HitConverter { } private Hit convertVdsHit(String summaryClass, VdsHit grpHit) { - FastHit ret = new FastHit(); - ret.setRelevance(grpHit.getRank()); + FastHit hit = new FastHit(); + hit.setRelevance(grpHit.getRank()); if (grpHit.getSummary().getData().length > 0) { - GroupingListHit ctxHit = (GroupingListHit)grpHit.getContext(); - if (ctxHit == null) { + GroupingListHit hitContext = (GroupingListHit)grpHit.getContext(); + if (hitContext == null) { throw new NullPointerException("Hit has no context."); } - DocsumDefinitionSet defs = ctxHit.getDocsumDefinitionSet(); - defs.lazyDecode(summaryClass, grpHit.getSummary().getData(), ret); - ret.setFilled(summaryClass); - ret.setFilled(query.getPresentation().getSummary()); + DocsumDefinitionSet defs = hitContext.getDocsumDefinitionSet(); + defs.lazyDecode(summaryClass, grpHit.getSummary().getData(), hit); + hit.setField(Hit.SDDOCNAME_FIELD, hitContext.getSchema().name()); + hit.setFilled(summaryClass); + hit.setFilled(hitContext.getQuery().getPresentation().getSummary()); + hit.setQuery(hitContext.getQuery()); } - return ret; + return hit; } } diff --git a/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/StreamingSearcher.java b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/StreamingSearcher.java index f7f3f97f3ac..e5674a80eac 100644 --- a/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/StreamingSearcher.java +++ b/container-search/src/main/java/com/yahoo/vespa/streamingvisitors/StreamingSearcher.java @@ -245,7 +245,7 @@ public class StreamingSearcher extends VespaBackEndSearcher { List<Grouping> groupingList = visitor.getGroupings(); lazyTrace(query, 8, "Grouping list=", groupingList); if ( ! groupingList.isEmpty() ) { - GroupingListHit groupHit = new GroupingListHit(groupingList, getDocsumDefinitionSet(query)); + GroupingListHit groupHit = new GroupingListHit(groupingList, getDocumentDatabase(query), query); result.hits().add(groupHit); } diff --git a/container-search/src/test/java/com/yahoo/search/grouping/result/FlatteningSearcherTestCase.java b/container-search/src/test/java/com/yahoo/search/grouping/result/FlatteningSearcherTestCase.java index 7ec35151eab..9e8fcb0ea21 100644 --- a/container-search/src/test/java/com/yahoo/search/grouping/result/FlatteningSearcherTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/grouping/result/FlatteningSearcherTestCase.java @@ -76,8 +76,8 @@ public class FlatteningSearcherTestCase { Execution execution = newExecution(new FlatteningSearcher(), new GroupingExecutor(ComponentId.fromString("grouping")), new ResultProvider(List.of( - new GroupingListHit(List.of(group0), null), - new GroupingListHit(List.of(group1), null))), + new GroupingListHit(List.of(group0), null, query), + new GroupingListHit(List.of(group1), null, query))), new HitsProvider(List.of( new DefaultErrorHit("source 1", ErrorMessage.createBackendCommunicationError("backend communication error 1")), new DefaultErrorHit("source 2", ErrorMessage.createBackendCommunicationError("backend communication error 1"))))); diff --git a/container-search/src/test/java/com/yahoo/search/grouping/vespa/GroupingExecutorTestCase.java b/container-search/src/test/java/com/yahoo/search/grouping/vespa/GroupingExecutorTestCase.java index 17ab3823d57..ef2ef9724a9 100644 --- a/container-search/src/test/java/com/yahoo/search/grouping/vespa/GroupingExecutorTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/grouping/vespa/GroupingExecutorTestCase.java @@ -8,9 +8,7 @@ import com.yahoo.document.DocumentId; import com.yahoo.document.GlobalId; import com.yahoo.prelude.fastsearch.FastHit; import com.yahoo.prelude.fastsearch.GroupingListHit; -import com.yahoo.prelude.query.NotItem; import com.yahoo.prelude.query.NullItem; -import com.yahoo.prelude.query.WordItem; import com.yahoo.search.Query; import com.yahoo.search.Result; import com.yahoo.search.Searcher; @@ -183,8 +181,8 @@ public class GroupingExecutorTestCase { ); Execution exec = newExecution(new GroupingExecutor(), new ResultProvider(Arrays.asList( - new GroupingListHit(List.of(grpA), null), - new GroupingListHit(List.of(grpB), null)))); + new GroupingListHit(List.of(grpA), null, query), + new GroupingListHit(List.of(grpB), null, query)))); Group grp = req.getResultGroup(exec.search(query)); assertEquals(1, grp.size()); Hit hit = grp.get(0); @@ -216,8 +214,8 @@ public class GroupingExecutorTestCase { ); Execution exec = newExecution(new GroupingExecutor(), new ResultProvider(Arrays.asList( - new GroupingListHit(List.of(grpExpected), null), - new GroupingListHit(List.of(grpUnexpected), null)))); + new GroupingListHit(List.of(grpExpected), null, query), + new GroupingListHit(List.of(grpUnexpected), null, query)))); Group grp = req.getResultGroup(exec.search(query)); assertEquals(1, grp.size()); Hit hit = grp.get(0); @@ -247,8 +245,8 @@ public class GroupingExecutorTestCase { )); Execution exec = newExecution(new GroupingExecutor(), new ResultProvider(Arrays.asList( - new GroupingListHit(List.of(grp0), null), - new GroupingListHit(List.of(grp1), null))), + new GroupingListHit(List.of(grp0), null, query), + new GroupingListHit(List.of(grp1), null, query))), new FillRequestThrower()); Result res = exec.search(query); @@ -287,8 +285,8 @@ public class GroupingExecutorTestCase { .addHit(new com.yahoo.searchlib.aggregation.FS4Hit())))); Execution exec = newExecution(new GroupingExecutor(), new ResultProvider(Arrays.asList( - new GroupingListHit(List.of(grp0), null), - new GroupingListHit(List.of(grp1), null))), + new GroupingListHit(List.of(grp0), null, query), + new GroupingListHit(List.of(grp1), null, query))), new FillErrorProvider()); Result res = exec.search(query); exec.fill(res); @@ -313,8 +311,8 @@ public class GroupingExecutorTestCase { .addOrderBy(new AggregationRefNode(0), true))); Result res = newExecution(new GroupingExecutor(), new ResultProvider(Arrays.asList( - new GroupingListHit(List.of(grp), null), - new GroupingListHit(List.of(grp), null)))).search(query); + new GroupingListHit(List.of(grp), null, query), + new GroupingListHit(List.of(grp), null, query)))).search(query); GroupList groupList = (GroupList) req.getResultGroup(res).get(0); assertEquals(1.0, groupList.get(0).getRelevance().getScore(), 1E-6); @@ -342,8 +340,8 @@ public class GroupingExecutorTestCase { Execution exec = newExecution(new GroupingExecutor(), err, new ResultProvider(Arrays.asList( - new GroupingListHit(List.of(grp0), null), - new GroupingListHit(List.of(grp1), null)))); + new GroupingListHit(List.of(grp0), null, query), + new GroupingListHit(List.of(grp1), null, query)))); Result res = exec.search(query); assertNotNull(res.hits().getError()); assertEquals(Error.TIMEOUT.code, res.hits().getError().getCode()); @@ -353,8 +351,8 @@ public class GroupingExecutorTestCase { exec = newExecution(new GroupingExecutor(), err, new ResultProvider(Arrays.asList( - new GroupingListHit(List.of(grp0), null), - new GroupingListHit(List.of(grp1), null)))); + new GroupingListHit(List.of(grp0), null, query), + new GroupingListHit(List.of(grp1), null, query)))); res = exec.search(query); assertNotNull(res.hits().getError()); assertEquals(Error.TIMEOUT.code, res.hits().getError().getCode()); @@ -392,8 +390,8 @@ public class GroupingExecutorTestCase { SummaryMapper sm = new SummaryMapper(); Execution exec = newExecution(new GroupingExecutor(), new ResultProvider(Arrays.asList( - new GroupingListHit(Arrays.asList(pass0A, pass0B), null), - new GroupingListHit(Arrays.asList(pass1A, pass1B), null))), + new GroupingListHit(Arrays.asList(pass0A, pass0B), null, query), + new GroupingListHit(Arrays.asList(pass1A, pass1B), null, query))), sm); exec.fill(exec.search(query), "default"); assertEquals(2, sm.hitsBySummary.size()); @@ -436,8 +434,8 @@ public class GroupingExecutorTestCase { .addHit(new com.yahoo.searchlib.aggregation.FS4Hit())))); Execution exec = newExecution(new GroupingExecutor(), new ResultProvider(Arrays.asList( - new GroupingListHit(List.of(pass0), null), - new GroupingListHit(List.of(pass1), null)))); + new GroupingListHit(List.of(pass0), null, query), + new GroupingListHit(List.of(pass1), null, query)))); Result res = exec.search(query); exec.fill(res); @@ -457,7 +455,7 @@ public class GroupingExecutorTestCase { .addChild(new com.yahoo.searchlib.aggregation.Group().setId(new StringResultNode("foo")) .addAggregationResult(new HitsAggregationResult(1, "bar")) )); - GroupingListHit pass0 = new GroupingListHit(List.of(grp), null); + GroupingListHit pass0 = new GroupingListHit(List.of(grp), null, queryA); GlobalId gid = new GlobalId((new DocumentId("id:ns:type::1")).getGlobalId()); grp = new Grouping(0); @@ -465,9 +463,8 @@ public class GroupingExecutorTestCase { .addChild(new com.yahoo.searchlib.aggregation.Group().setId(new StringResultNode("foo")) .addAggregationResult(new HitsAggregationResult(1, "bar").addHit(new com.yahoo.searchlib.aggregation.FS4Hit(4, gid, 6))) )); - GroupingListHit pass1 = new GroupingListHit(List.of(grp), null); Query queryB = newQuery(); // required by GroupingListHit.getSearchQuery() - pass1.setQuery(queryB); + GroupingListHit pass1 = new GroupingListHit(List.of(grp), null, queryB); QueryMapper qm = new QueryMapper(); Execution exec = newExecution(new GroupingExecutor(), @@ -569,10 +566,10 @@ public class GroupingExecutorTestCase { .addChild(new com.yahoo.searchlib.aggregation.Group().setId(new StringResultNode("uniqueC")).addAggregationResult(new MaxAggregationResult().setMax(new IntegerResultNode(7)).setTag(4))) .addChild(new com.yahoo.searchlib.aggregation.Group().setId(new StringResultNode("common")).addAggregationResult(new MaxAggregationResult().setMax(new IntegerResultNode(11)).setTag(4))) ); - resultsByDocumentType.put("typeA", List.of(new GroupingListHit(List.of(groupA1), null), - new GroupingListHit(List.of(groupA2), null))); - resultsByDocumentType.put("typeB", List.of(new GroupingListHit(List.of(groupB1), null), - new GroupingListHit(List.of(groupB2), null))); + resultsByDocumentType.put("typeA", List.of(new GroupingListHit(List.of(groupA1), null, query), + new GroupingListHit(List.of(groupA2), null, query))); + resultsByDocumentType.put("typeB", List.of(new GroupingListHit(List.of(groupB1), null, query), + new GroupingListHit(List.of(groupB2), null, query))); Execution execution = newExecution(new GroupingExecutor(), new MockClusterSearcher(), new MultiDocumentTypeResultProvider(resultsByDocumentType)); @@ -742,11 +739,7 @@ public class GroupingExecutorTestCase { for (Iterator<Hit> it = result.hits().deepIterator(); it.hasNext();) { Hit hit = it.next(); Query query = hit.getQuery(); - List<Hit> lst = hitsByQuery.get(query); - if (lst == null) { - lst = new LinkedList<>(); - hitsByQuery.put(query, lst); - } + List<Hit> lst = hitsByQuery.computeIfAbsent(query, k -> new LinkedList<>()); lst.add(hit); } } @@ -767,11 +760,7 @@ public class GroupingExecutorTestCase { public void fill(Result result, String summaryClass, Execution exec) { for (Iterator<Hit> it = result.hits().deepIterator(); it.hasNext();) { Hit hit = it.next(); - List<Hit> lst = hitsBySummary.get(summaryClass); - if (lst == null) { - lst = new LinkedList<>(); - hitsBySummary.put(summaryClass, lst); - } + List<Hit> lst = hitsBySummary.computeIfAbsent(summaryClass, k -> new LinkedList<>()); lst.add(hit); } } diff --git a/container-search/src/test/java/com/yahoo/search/grouping/vespa/HitConverterTestCase.java b/container-search/src/test/java/com/yahoo/search/grouping/vespa/HitConverterTestCase.java index 8f775e9923a..2df821b7f0b 100644 --- a/container-search/src/test/java/com/yahoo/search/grouping/vespa/HitConverterTestCase.java +++ b/container-search/src/test/java/com/yahoo/search/grouping/vespa/HitConverterTestCase.java @@ -4,8 +4,8 @@ package com.yahoo.search.grouping.vespa; import com.yahoo.document.DocumentId; import com.yahoo.document.GlobalId; import com.yahoo.net.URI; +import com.yahoo.prelude.fastsearch.DocumentDatabase; import com.yahoo.prelude.fastsearch.GroupingListHit; -import com.yahoo.prelude.fastsearch.DocsumDefinitionSet; import com.yahoo.prelude.fastsearch.FastHit; import com.yahoo.search.Query; import com.yahoo.search.Result; @@ -19,7 +19,7 @@ import com.yahoo.searchlib.aggregation.FS4Hit; import com.yahoo.searchlib.aggregation.VdsHit; import org.junit.jupiter.api.Test; -import java.util.Collections; +import java.util.List; import static org.junit.jupiter.api.Assertions.*; @@ -34,24 +34,24 @@ public class HitConverterTestCase { @Test void requireThatHitsAreConverted() { - HitConverter converter = new HitConverter(new MySearcher(), new Query()); - Hit hit = converter.toSearchHit("default", new FS4Hit(1, createGlobalId(2), 3).setContext(context())); + Query query = new Query(); + HitConverter converter = new HitConverter(new MySearcher()); + Hit hit = converter.toSearchHit("default", new FS4Hit(1, createGlobalId(2), 3).setContext(context(query))); assertNotNull(hit); assertEquals(new URI("index:null/1/" + asHexString(createGlobalId(2))), hit.getId()); - hit = converter.toSearchHit("default", new FS4Hit(4, createGlobalId(5), 6).setContext(context())); + hit = converter.toSearchHit("default", new FS4Hit(4, createGlobalId(5), 6).setContext(context(query))); assertNotNull(hit); assertEquals(new URI("index:null/4/" + asHexString(createGlobalId(5))), hit.getId()); } @Test void requireThatContextDataIsCopied() { - Hit ctxHit = context(); - ctxHit.setSource("69"); Query ctxQuery = new Query(); - ctxHit.setQuery(ctxQuery); + Hit ctxHit = context(ctxQuery); + ctxHit.setSource("69"); - HitConverter converter = new HitConverter(new MySearcher(), new Query()); + HitConverter converter = new HitConverter(new MySearcher()); Hit hit = converter.toSearchHit("default", new FS4Hit(1, createGlobalId(2), 3).setContext(ctxHit)); assertNotNull(hit); assertTrue(hit instanceof FastHit); @@ -63,9 +63,10 @@ public class HitConverterTestCase { @Test void requireThatSummaryClassIsSet() { + Query query = new Query(); Searcher searcher = new MySearcher(); - HitConverter converter = new HitConverter(searcher, new Query()); - Hit hit = converter.toSearchHit("69", new FS4Hit(1, createGlobalId(2), 3).setContext(context())); + HitConverter converter = new HitConverter(searcher); + Hit hit = converter.toSearchHit("69", new FS4Hit(1, createGlobalId(2), 3).setContext(context(query))); assertNotNull(hit); assertTrue(hit instanceof FastHit); assertEquals("69", hit.getSearcherSpecificMetaData(searcher)); @@ -73,7 +74,7 @@ public class HitConverterTestCase { @Test void requireThatHitHasContext() { - HitConverter converter = new HitConverter(new MySearcher(), new Query()); + HitConverter converter = new HitConverter(new MySearcher()); try { converter.toSearchHit("69", new FS4Hit(1, createGlobalId(2), 3)); fail(); @@ -84,7 +85,7 @@ public class HitConverterTestCase { @Test void requireThatUnsupportedHitClassThrows() { - HitConverter converter = new HitConverter(new MySearcher(), new Query()); + HitConverter converter = new HitConverter(new MySearcher()); try { converter.toSearchHit("69", new com.yahoo.searchlib.aggregation.Hit() { @@ -95,21 +96,22 @@ public class HitConverterTestCase { } } - private static GroupingListHit context() { - return new GroupingListHit(Collections.emptyList(), null); + private static GroupingListHit context(Query query) { + return new GroupingListHit(List.of(), null, query); } - private static DocsumDefinitionSet sixtynine() { + private static DocumentDatabase sixtynine() { var schema = new Schema.Builder("none"); var summary = new DocumentSummary.Builder("69"); schema.add(summary.build()); - return new DocsumDefinitionSet(schema.build()); + return new DocumentDatabase(schema.build()); } @Test void requireThatVdsHitCanBeConverted() { - HitConverter converter = new HitConverter(new MySearcher(), new Query()); - GroupingListHit context = new GroupingListHit(null, sixtynine()); + Query query = new Query(); + HitConverter converter = new HitConverter(new MySearcher()); + GroupingListHit context = new GroupingListHit(null, sixtynine(), query); VdsHit lowHit = new VdsHit("id:ns:type::", new byte[]{0x55, 0x55, 0x55, 0x55}, 1); lowHit.setContext(context); Hit hit = converter.toSearchHit("69", lowHit); @@ -117,6 +119,8 @@ public class HitConverterTestCase { assertTrue(hit instanceof FastHit); assertEquals(new Relevance(1), hit.getRelevance()); assertTrue(hit.isFilled("69")); + assertEquals("none", hit.getField(Hit.SDDOCNAME_FIELD)); + assertSame(query, hit.getQuery()); } private static String asHexString(GlobalId gid) { diff --git a/flags/src/main/java/com/yahoo/vespa/flags/Dimension.java b/flags/src/main/java/com/yahoo/vespa/flags/Dimension.java index 0f81fd4640b..328d581aed3 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/Dimension.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/Dimension.java @@ -35,6 +35,9 @@ public enum Dimension { /** Machine architecture: either arm64 or x86_64. */ ARCHITECTURE("architecture"), + /** A provider of TLS certificates. */ + CERTIFICATE_PROVIDER("certificate-provider"), + /** Whether "enclave" (or "inclave" or "exclave"), or not ("noclave"). */ CLAVE("clave"), diff --git a/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java b/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java index 4edda472531..05f2062136b 100644 --- a/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java +++ b/flags/src/main/java/com/yahoo/vespa/flags/PermanentFlags.java @@ -14,6 +14,7 @@ import java.util.function.Predicate; import java.util.regex.Pattern; import static com.yahoo.vespa.flags.Dimension.APPLICATION; +import static com.yahoo.vespa.flags.Dimension.CERTIFICATE_PROVIDER; import static com.yahoo.vespa.flags.Dimension.CLOUD_ACCOUNT; import static com.yahoo.vespa.flags.Dimension.INSTANCE_ID; import static com.yahoo.vespa.flags.Dimension.CLUSTER_ID; @@ -418,7 +419,8 @@ public class PermanentFlags { public static final UnboundIntFlag CERT_POOL_SIZE = defineIntFlag( "cert-pool-size", 0, "Target number of preprovisioned endpoints certificates to maintain", - "Takes effect on next run of CertPoolMaintainer" + "Takes effect on next run of CertificatePoolMaintainer", + CERTIFICATE_PROVIDER ); public static final UnboundBooleanFlag ENCLAVE_WITHOUT_WIREGUARD = defineFeatureFlag( diff --git a/flags/src/test/java/com/yahoo/vespa/flags/DimensionTest.java b/flags/src/test/java/com/yahoo/vespa/flags/DimensionTest.java index 4b64c8a198e..032874dffac 100644 --- a/flags/src/test/java/com/yahoo/vespa/flags/DimensionTest.java +++ b/flags/src/test/java/com/yahoo/vespa/flags/DimensionTest.java @@ -12,9 +12,9 @@ class DimensionTest { @SuppressWarnings("unused") public String remember_to_update_SystemFlagsDataArchive(Dimension dimension) { return switch (dimension) { - case APPLICATION, ARCHITECTURE, CLAVE, CLOUD, CLOUD_ACCOUNT, CLUSTER_ID, CLUSTER_TYPE, + case APPLICATION, ARCHITECTURE, CERTIFICATE_PROVIDER, CLAVE, CLOUD, CLOUD_ACCOUNT, CLUSTER_ID, CLUSTER_TYPE, CONSOLE_USER_EMAIL, ENVIRONMENT, HOSTNAME, INSTANCE_ID, NODE_TYPE, SYSTEM, TENANT_ID, VESPA_VERSION, ZONE_ID -> dimension.toWire(); }; } -}
\ No newline at end of file +} diff --git a/searchcore/src/apps/vespa-gen-testdocs/vespa-gen-testdocs.cpp b/searchcore/src/apps/vespa-gen-testdocs/vespa-gen-testdocs.cpp index f7f5b2328c4..9ab8f4829a0 100644 --- a/searchcore/src/apps/vespa-gen-testdocs/vespa-gen-testdocs.cpp +++ b/searchcore/src/apps/vespa-gen-testdocs/vespa-gen-testdocs.cpp @@ -14,7 +14,6 @@ #include <getopt.h> #include <vector> #include <limits> -#include <unistd.h> #include <filesystem> #include <vespa/log/log.h> @@ -552,7 +551,7 @@ DocumentGenerator::generate(uint32_t docMin, uint32_t docIdLimit, { string fullName(prependBaseDir(baseDir, feedFileName)); std::filesystem::remove(std::filesystem::path(fullName)); - Fast_BufferedFile f(new FastOS_File); + Fast_BufferedFile f; f.WriteOpen(fullName.c_str()); if (json) { bool first = true; diff --git a/searchcore/src/vespa/searchcorespi/index/indexreadutilities.cpp b/searchcore/src/vespa/searchcorespi/index/indexreadutilities.cpp index d3f45ec081b..6683e9aa935 100644 --- a/searchcore/src/vespa/searchcorespi/index/indexreadutilities.cpp +++ b/searchcore/src/vespa/searchcorespi/index/indexreadutilities.cpp @@ -36,8 +36,7 @@ scanForIndexes(const vespalib::string &baseDir, if (name.find(IndexDiskLayout::FusionDirPrefix) == 0) { if (!fusionDir.empty()) { // Should never happen, since we run cleanup before load. - LOG(warning, "Base directory '%s' contains multiple fusion indexes", - baseDir.c_str()); + LOG(warning, "Base directory '%s' contains multiple fusion indexes", baseDir.c_str()); } fusionDir = name; } @@ -59,8 +58,8 @@ IndexReadUtilities::readFusionSpec(const vespalib::string &baseDir) fusionId = atoi(fusionDir.substr(IndexDiskLayout::FusionDirPrefix.size()).c_str()); } std::set<uint32_t> flushIds; - for (size_t i = 0; i < flushDirs.size(); ++i) { - uint32_t id = atoi(flushDirs[i].substr(IndexDiskLayout::FlushDirPrefix.size()).c_str()); + for (const auto & flushDir : flushDirs) { + uint32_t id = atoi(flushDir.substr(IndexDiskLayout::FlushDirPrefix.size()).c_str()); flushIds.insert(id); } @@ -74,7 +73,7 @@ SerialNum IndexReadUtilities::readSerialNum(const vespalib::string &dir) { const vespalib::string fileName = IndexDiskLayout::getSerialNumFileName(dir); - Fast_BufferedFile file; + Fast_BufferedFile file(16_Ki); file.ReadOpen(fileName.c_str()); FileHeader fileHeader; diff --git a/searchcore/src/vespa/searchcorespi/index/indexwriteutilities.cpp b/searchcore/src/vespa/searchcorespi/index/indexwriteutilities.cpp index d2f77cfaf29..dfef52cc8e8 100644 --- a/searchcore/src/vespa/searchcorespi/index/indexwriteutilities.cpp +++ b/searchcore/src/vespa/searchcorespi/index/indexwriteutilities.cpp @@ -40,12 +40,11 @@ IndexWriteUtilities::writeSerialNum(SerialNum serialNum, const vespalib::string &dir, const FileHeaderContext &fileHeaderContext) { - const vespalib::string fileName = - IndexDiskLayout::getSerialNumFileName(dir); + const vespalib::string fileName = IndexDiskLayout::getSerialNumFileName(dir); const vespalib::string tmpFileName = fileName + ".tmp"; SerialNumFileHeaderContext snFileHeaderContext(fileHeaderContext, serialNum); - Fast_BufferedFile file; + Fast_BufferedFile file(16_Ki); file.WriteOpen(tmpFileName.c_str()); FileHeader fileHeader; snFileHeaderContext.addTags(fileHeader, fileName); @@ -103,17 +102,13 @@ IndexWriteUtilities::copySerialNumFile(const vespalib::string &sourceDir, } void -IndexWriteUtilities::writeSourceSelector(FixedSourceSelector::SaveInfo & - saveInfo, +IndexWriteUtilities::writeSourceSelector(FixedSourceSelector::SaveInfo & saveInfo, uint32_t sourceId, - const TuneFileAttributes & - tuneFileAttributes, - const FileHeaderContext & - fileHeaderContext, + const TuneFileAttributes & tuneFileAttributes, + const FileHeaderContext & fileHeaderContext, SerialNum serialNum) { - SerialNumFileHeaderContext snFileHeaderContext(fileHeaderContext, - serialNum); + SerialNumFileHeaderContext snFileHeaderContext(fileHeaderContext, serialNum); if (!saveInfo.save(tuneFileAttributes, snFileHeaderContext)) { std::ostringstream msg; msg << "Flush of sourceselector failed. Source id = " << sourceId; @@ -122,20 +117,16 @@ IndexWriteUtilities::writeSourceSelector(FixedSourceSelector::SaveInfo & } void -IndexWriteUtilities::updateDiskIndexSchema(const vespalib::string &indexDir, - const Schema &schema, - SerialNum serialNum) +IndexWriteUtilities::updateDiskIndexSchema(const vespalib::string &indexDir, const Schema &schema, SerialNum serialNum) { vespalib::string schemaName = IndexDiskLayout::getSchemaFileName(indexDir); Schema oldSchema; if (!oldSchema.loadFromFile(schemaName)) { - LOG(error, "Could not open schema '%s'", - schemaName.c_str()); + LOG(error, "Could not open schema '%s'", schemaName.c_str()); return; } if (!SchemaUtil::validateSchema(oldSchema)) { - LOG(error, "Could not validate schema loaded from '%s'", - schemaName.c_str()); + LOG(error, "Could not validate schema loaded from '%s'", schemaName.c_str()); return; } Schema::UP newSchema = Schema::intersect(oldSchema, schema); @@ -152,8 +143,7 @@ IndexWriteUtilities::updateDiskIndexSchema(const vespalib::string &indexDir, vespalib::string schemaOrigName = schemaName + ".orig"; fs::remove(fs::path(schemaTmpName)); if (!newSchema->saveToFile(schemaTmpName)) { - LOG(error, "Could not save schema to '%s'", - schemaTmpName.c_str()); + LOG(error, "Could not save schema to '%s'", schemaTmpName.c_str()); } FastOS_StatInfo statInfo; bool statres; @@ -161,15 +151,12 @@ IndexWriteUtilities::updateDiskIndexSchema(const vespalib::string &indexDir, if (!statres) { if (statInfo._error != FastOS_StatInfo::FileNotFound) { LOG(error, "Failed to stat orig schema '%s': %s", - schemaOrigName.c_str(), - FastOS_File::getLastErrorString().c_str()); + schemaOrigName.c_str(), FastOS_File::getLastErrorString().c_str()); } int linkres = ::link(schemaName.c_str(), schemaOrigName.c_str()); if (linkres != 0) { LOG(error, "Could not link '%s' to '%s': %s", - schemaOrigName.c_str(), - schemaName.c_str(), - FastOS_File::getLastErrorString().c_str()); + schemaOrigName.c_str(), schemaName.c_str(), FastOS_File::getLastErrorString().c_str()); } vespalib::File::sync(indexDir); } @@ -178,9 +165,7 @@ IndexWriteUtilities::updateDiskIndexSchema(const vespalib::string &indexDir, int error = errno; std::string errString = FastOS_File::getErrorString(error); LOG(error, "Could not rename '%s' to '%s': %s", - schemaTmpName.c_str(), - schemaName.c_str(), - errString.c_str()); + schemaTmpName.c_str(), schemaName.c_str(), errString.c_str()); } vespalib::File::sync(indexDir); } diff --git a/searchlib/src/tests/query/streaming/CMakeLists.txt b/searchlib/src/tests/query/streaming/CMakeLists.txt index 7568e45d00a..5ed450ecbc8 100644 --- a/searchlib/src/tests/query/streaming/CMakeLists.txt +++ b/searchlib/src/tests/query/streaming/CMakeLists.txt @@ -1,5 +1,14 @@ # Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_query_streaming_equiv_query_node_test_app TEST + SOURCES + equiv_query_node_test.cpp + DEPENDS + searchlib + GTest::gtest +) +vespa_add_test(NAME searchlib_query_streaming_equiv_query_node_test_app COMMAND searchlib_query_streaming_equiv_query_node_test_app) + vespa_add_executable(searchlib_query_streaming_hit_iterator_test_app TEST SOURCES hit_iterator_test.cpp diff --git a/searchlib/src/tests/query/streaming/equiv_query_node_test.cpp b/searchlib/src/tests/query/streaming/equiv_query_node_test.cpp new file mode 100644 index 00000000000..72378385c78 --- /dev/null +++ b/searchlib/src/tests/query/streaming/equiv_query_node_test.cpp @@ -0,0 +1,209 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/searchlib/query/streaming/equiv_query_node.h> +#include <vespa/searchlib/fef/matchdata.h> +#include <vespa/searchlib/fef/simpletermdata.h> +#include <vespa/searchlib/query/streaming/phrase_query_node.h> +#include <vespa/searchlib/query/streaming/query.h> +#include <vespa/searchlib/query/streaming/queryterm.h> +#include <vespa/searchlib/query/tree/querybuilder.h> +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/query/tree/stackdumpcreator.h> +#include <vespa/vespalib/gtest/gtest.h> + +using search::fef::MatchData; +using search::fef::SimpleTermData; +using search::fef::TermFieldHandle; +using search::fef::TermFieldMatchDataPosition; +using search::query::QueryBuilder; +using search::query::Node; +using search::query::SimpleQueryNodeTypes; +using search::query::StackDumpCreator; +using search::query::Weight; +using search::streaming::EquivQueryNode; +using search::streaming::HitList; +using search::streaming::PhraseQueryNode; +using search::streaming::Query; +using search::streaming::QueryNodeResultFactory; +using search::streaming::QueryTerm; +using search::streaming::QueryTermList; + +class AllowRewrite : public QueryNodeResultFactory +{ +public: + bool allow_float_terms_rewrite(vespalib::stringref) const noexcept override { return true; } +}; + +class EquivQueryNodeTest : public ::testing::Test +{ +public: + EquivQueryNodeTest(); + ~EquivQueryNodeTest(); + + void assert_tfmd_pos(const vespalib::string label, + const TermFieldMatchDataPosition &tfmd_pos, + uint32_t exp_element_id, + uint32_t exp_position, + int32_t exp_element_weight, + uint32_t exp_element_length); + vespalib::string make_simple_equiv_stack_dump(); +}; + +EquivQueryNodeTest::EquivQueryNodeTest() + : ::testing::Test() +{ +} + +EquivQueryNodeTest::~EquivQueryNodeTest() = default; + +void +EquivQueryNodeTest::assert_tfmd_pos(const vespalib::string label, + const TermFieldMatchDataPosition &tfmd_pos, + uint32_t exp_element_id, + uint32_t exp_position, + int32_t exp_element_weight, + uint32_t exp_element_length) +{ + SCOPED_TRACE(label); + EXPECT_EQ(exp_element_id, tfmd_pos.getElementId()); + EXPECT_EQ(exp_position, tfmd_pos.getPosition()); + EXPECT_EQ(exp_element_weight, tfmd_pos.getElementWeight()); + EXPECT_EQ(exp_element_length, tfmd_pos.getElementLen()); +} + +vespalib::string +EquivQueryNodeTest::make_simple_equiv_stack_dump() +{ + QueryBuilder<SimpleQueryNodeTypes> builder; + builder.addEquiv(3, 0, Weight(0)); + { + builder.addStringTerm("2", "", 0, Weight(0)); + builder.addStringTerm("2.5", "", 0, Weight(0)); + builder.addStringTerm("3", "", 0, Weight(0)); + } + Node::UP node = builder.build(); + return StackDumpCreator::create(*node); +} + +TEST_F(EquivQueryNodeTest, test_equiv_evaluate_and_unpack) +{ + auto stack_dump = make_simple_equiv_stack_dump(); + QueryNodeResultFactory empty; + Query q(empty, stack_dump); + auto& eqn = dynamic_cast<EquivQueryNode&>(q.getRoot()); + auto& terms = eqn.get_terms(); + EXPECT_EQ(3, terms.size()); + for (auto& qt : terms) { + qt->resizeFieldId(1); + } + + /* + * Populate hit lists in query terms, emulating the result of + * having performed a streaming search. + */ + constexpr uint32_t field0 = 0; + constexpr uint32_t field1 = 1; + constexpr uint32_t elem0 = 0; + constexpr uint32_t elem1 = 1; + constexpr int32_t weight1 = 1; + constexpr int32_t weight2 = 2; + constexpr uint32_t pos5 = 5; + constexpr uint32_t pos6 = 6; + constexpr uint32_t pos3 = 3; + constexpr uint32_t pos4 = 4; + constexpr uint32_t field0_len = 100; + constexpr uint32_t field1_len = 200; + constexpr uint32_t field0_element0_len = 10; + constexpr uint32_t field0_element1_len = 30; + constexpr uint32_t field1_element0_len = 31; + // field 0 + terms[0]->add(field0, elem0, weight1, pos5); + terms[1]->add(field0, elem0, weight1, pos6); + terms[2]->add(field0, elem1, weight1, pos3); + // field 1 + terms[1]->add(field1, elem0, weight1, pos4); + terms[2]->add(field1, elem0, weight2, pos4); + + terms[0]->set_element_length(0, field0_element0_len); + terms[1]->set_element_length(0, field0_element0_len); + terms[1]->set_element_length(1, field1_element0_len); + terms[2]->set_element_length(0, field0_element1_len); + terms[2]->set_element_length(1, field1_element0_len); + + /* + * evaluateHits() should get the union of the hits for each query term + * but without duplicates. + */ + HitList hits; + eqn.evaluateHits(hits); + auto exp_hits = HitList{{field0,elem0,weight1,pos5},{field0,elem0,weight1,pos6},{field0,elem1,weight1,pos3},{field1,elem0,weight2,pos4}}; + exp_hits[0].set_element_length(field0_element0_len); + exp_hits[1].set_element_length(field0_element0_len); + exp_hits[2].set_element_length(field0_element1_len); + exp_hits[3].set_element_length(field1_element0_len); + ASSERT_EQ(exp_hits, hits); + EXPECT_TRUE(eqn.evaluate()); + + /* + * Verify that unpack_match_data() gives the expected term field + * match data information. + */ + SimpleTermData td; + constexpr TermFieldHandle handle0 = 27; + constexpr TermFieldHandle handle1 = 29; + constexpr TermFieldHandle handle_max = std::max(handle0, handle1); + td.addField(0).setHandle(handle0); + td.addField(1).setHandle(handle1); + terms[0]->resizeFieldId(field0); + terms[0]->getFieldInfo(field0).setFieldLength(field0_len); + terms[1]->resizeFieldId(field1); + terms[1]->getFieldInfo(field0).setFieldLength(field0_len); + terms[1]->getFieldInfo(field1).setFieldLength(field1_len); + terms[2]->resizeFieldId(field1); + terms[2]->getFieldInfo(field0).setFieldLength(field0_len); + terms[2]->getFieldInfo(field1).setFieldLength(field1_len); + auto md = MatchData::makeTestInstance(handle_max + 1, handle_max + 1); + auto tfmd0 = md->resolveTermField(handle0); + auto tfmd1 = md->resolveTermField(handle1); + tfmd0->setNeedInterleavedFeatures(true); + tfmd1->setNeedInterleavedFeatures(true); + eqn.unpack_match_data(2, td, *md); + EXPECT_EQ(2, tfmd0->getDocId()); + EXPECT_EQ(3, tfmd0->getNumOccs()); + EXPECT_EQ(3, tfmd0->end() - tfmd0->begin()); + auto itr = tfmd0->begin(); + assert_tfmd_pos("tmfd0[0]", *itr, elem0, pos5, weight1, field0_element0_len); + ++itr; + assert_tfmd_pos("tmfd0[1]", *itr, elem0, pos6, weight1, field0_element0_len); + ++itr; + assert_tfmd_pos("tmfd0[2]", *itr, elem1, pos3, weight1, field0_element1_len); + EXPECT_EQ(field0_len, tfmd0->getFieldLength()); + EXPECT_EQ(2, tfmd1->getDocId()); + EXPECT_EQ(1, tfmd1->getNumOccs()); + EXPECT_EQ(1, tfmd1->end() - tfmd1->begin()); + itr = tfmd1->begin(); + assert_tfmd_pos("tmfd1[0]", *itr, elem0, pos4, weight2, field1_element0_len); + EXPECT_EQ(field1_len, tfmd1->getFieldLength()); +} + +TEST_F(EquivQueryNodeTest, test_equiv_flattening) +{ + auto stack_dump = make_simple_equiv_stack_dump(); + AllowRewrite allow_rewrite; + Query q(allow_rewrite, stack_dump); + auto& eqn = dynamic_cast<EquivQueryNode&>(q.getRoot()); + auto& terms = eqn.get_terms(); + // Query is flattened to equiv("2", "2.5", phrase("2","5"), "3") + EXPECT_EQ(4, terms.size()); + EXPECT_EQ("2", terms[0]->getTermString()); + EXPECT_EQ("2.5", terms[1]->getTermString()); + auto phrase = dynamic_cast<PhraseQueryNode*>(terms[2].get()); + EXPECT_NE(phrase, nullptr); + EXPECT_EQ(2, phrase->get_terms().size()); + EXPECT_EQ("2", phrase->get_terms()[0]->getTermString()); + EXPECT_EQ("5", phrase->get_terms()[1]->getTermString()); + EXPECT_EQ("3", terms[3]->getTermString()); +} + + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/tests/query/streaming_query_test.cpp b/searchlib/src/tests/query/streaming_query_test.cpp index 19a2a0876c6..5559e194c5e 100644 --- a/searchlib/src/tests/query/streaming_query_test.cpp +++ b/searchlib/src/tests/query/streaming_query_test.cpp @@ -3,6 +3,7 @@ #include <vespa/searchlib/fef/simpletermdata.h> #include <vespa/searchlib/fef/matchdata.h> #include <vespa/searchlib/query/streaming/dot_product_term.h> +#include <vespa/searchlib/query/streaming/equiv_query_node.h> #include <vespa/searchlib/query/streaming/in_term.h> #include <vespa/searchlib/query/streaming/phrase_query_node.h> #include <vespa/searchlib/query/streaming/query.h> @@ -352,17 +353,17 @@ TEST(StreamingQueryTest, onedot0e_is_rewritten_if_allowed_too) const QueryNode & root = q.getRoot(); EXPECT_TRUE(dynamic_cast<const EquivQueryNode *>(&root) != nullptr); const auto & equiv = static_cast<const EquivQueryNode &>(root); - EXPECT_EQ(2u, equiv.size()); - EXPECT_TRUE(dynamic_cast<const QueryTerm *>(equiv[0].get()) != nullptr); + EXPECT_EQ(2u, equiv.get_terms().size()); + EXPECT_TRUE(dynamic_cast<const QueryTerm *>(equiv.get_terms()[0].get()) != nullptr); { - const auto & qt = static_cast<const QueryTerm &>(*equiv[0]); + const auto & qt = static_cast<const QueryTerm &>(*equiv.get_terms()[0]); EXPECT_EQ("c", qt.index()); EXPECT_EQ(vespalib::stringref("1.0e"), qt.getTerm()); EXPECT_EQ(3u, qt.uniqueId()); } - EXPECT_TRUE(dynamic_cast<const PhraseQueryNode *>(equiv[1].get()) != nullptr); + EXPECT_TRUE(dynamic_cast<const PhraseQueryNode *>(equiv.get_terms()[1].get()) != nullptr); { - const auto & phrase = static_cast<const PhraseQueryNode &>(*equiv[1]); + const auto & phrase = static_cast<const PhraseQueryNode &>(*equiv.get_terms()[1]); EXPECT_EQ(2u, phrase.get_terms().size()); { const auto & qt = *phrase.get_terms()[0]; diff --git a/searchlib/src/tests/queryeval/or_speed/or_speed_test.cpp b/searchlib/src/tests/queryeval/or_speed/or_speed_test.cpp index 502d2b39019..ac88515bb1b 100644 --- a/searchlib/src/tests/queryeval/or_speed/or_speed_test.cpp +++ b/searchlib/src/tests/queryeval/or_speed/or_speed_test.cpp @@ -120,9 +120,11 @@ struct OrSetup { std::vector<std::unique_ptr<TMD>> match_data; std::vector<BitVector::UP> child_hits; std::vector<bool> use_array; + bool strict_bm = true; bool strict_or = true; bool strict_children = true; bool unwrap_single_child = true; + uint32_t docid_skip = 1; OrSetup(uint32_t docid_limit_in) noexcept : docid_limit(docid_limit_in) {} size_t per_child(double target, size_t child_cnt) { size_t result = (docid_limit * target) / child_cnt; @@ -182,9 +184,9 @@ struct OrSetup { } return result; } - OrSetup &prepare_bm(size_t child_cnt, size_t hits_per_child) { + OrSetup &prepare_bm(size_t child_cnt, size_t hits_per_child, bool use_array_in) { for (size_t i = 0; i < child_cnt; ++i) { - add(hits_per_child, should_use_array(hits_per_child), false); + add(hits_per_child, use_array_in, false); } return *this; } @@ -213,10 +215,11 @@ struct OrSetup { size_t seeks = 0; BenchmarkTimer timer(budget); while (timer.has_budget()) { + uint32_t delta = docid_skip; timer.before(); seeks = 0; search.initRange(1, docid_limit); - for (uint32_t docid = 1; docid < docid_limit; ++docid) { + for (uint32_t docid = 1; docid < docid_limit; docid += delta) { ++seeks; search.seek(docid); } @@ -225,7 +228,7 @@ struct OrSetup { return std::make_pair(seeks, timer.min_time() * 1000.0); } std::pair<size_t,double> bm_search_ms(Impl impl, bool optimized) { - if (strict_or) { + if (strict_bm) { return bm_strict(impl, optimized); } else { return bm_non_strict(impl, optimized); @@ -378,6 +381,7 @@ TEST(OrSpeed, bm_array_vs_bitvector) { for (bool wrapped: {false, true}) { setup.unwrap_single_child = !wrapped; for (bool strict: {false, true}) { + setup.strict_bm = strict; setup.strict_or = strict; setup.strict_children = strict; for (bool use_array: {false, true}) { @@ -394,6 +398,46 @@ TEST(OrSpeed, bm_array_vs_bitvector) { } } +TEST(OrSpeed, bm_strict_when_not_needed) { + if (!bench_mode) { + fprintf(stdout, "[ SKIPPING ] run with 'bench' parameter to activate\n"); + return; + } + double target = 0.05; + size_t child_cnt = 200; + auto impl = Impl::HEAP; + bool optimize = false; + OrSetup setup(bench_docs); + size_t part = setup.per_child(target, child_cnt); + bool use_array = false; + setup.prepare_bm(child_cnt, part, use_array); + fprintf(stderr, "OR bench(%s, %s, children: %4zu, hits_per_child: %8zu %s)\n", + impl_str(impl), opt_str(optimize), child_cnt, part, leaf_str(use_array)); + for (bool strict_bm: {false, true}) { + setup.strict_bm = strict_bm; + for (bool strict_or: {false, true}) { + setup.strict_or = strict_or; + for (bool strict_children: {false, true}) { + setup.strict_children = strict_children; + for (uint32_t skip = 1; skip < 500'000; skip *= 4) { + setup.docid_skip = skip; + bool conflict = (strict_bm && !strict_or) || (strict_or && !strict_children) || (strict_bm && skip > 1); + if (!conflict) { + auto result = setup.bm_search_ms(impl, optimize); + auto flow_stats = FlowAdapter::stats(setup); + double in_flow = 1.0 / double(skip); // NOTE: not multiplied with strict cost + double ms_per_cost = result.second / (strict_or ? flow_stats.strict_cost : (in_flow * flow_stats.cost)); + fprintf(stderr, "loop: %s, skip: %8u, OR: %s, children: %s, " + "seeks: %8zu, time: %10.3f ms, ns per seek: %10.3f, ms per cost: %10.3f\n", + strict_str(strict_bm), skip, strict_str(strict_or), strict_str(strict_children), + result.first, result.second, ns_per(result), ms_per_cost); + } + } + } + } + } +} + TEST(OrSpeed, bm_strict_or) { if (!bench_mode) { fprintf(stdout, "[ SKIPPING ] run with 'bench' parameter to activate\n"); @@ -406,9 +450,10 @@ TEST(OrSpeed, bm_strict_or) { size_t part = setup.per_child(target, child_cnt); bool use_array = setup.should_use_array(part); if (part > 0 && (!use_array || !optimize)) { - setup.prepare_bm(child_cnt, part); + setup.prepare_bm(child_cnt, part, use_array); for (auto impl: {Impl::PLAIN, Impl::HEAP}) { for (bool strict: {false, true}) { + setup.strict_bm = strict; setup.strict_or = strict; setup.strict_children = strict; auto result = setup.bm_search_ms(impl, optimize); diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp index 4654cf435b1..dcf627b0ce2 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributevector.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attributevector.cpp @@ -328,7 +328,7 @@ bool AttributeVector::isEnumeratedSaveFormat() const { vespalib::string datName(fmt("%s.dat", getBaseFileName().c_str())); - Fast_BufferedFile datFile; + Fast_BufferedFile datFile(16_Ki); vespalib::FileHeader datHeader(FileSettings::DIRECTIO_ALIGNMENT); if ( ! datFile.OpenReadOnly(datName.c_str()) ) { LOG(error, "could not open %s: %s", datFile.GetFileName(), getLastErrorString().c_str()); diff --git a/searchlib/src/vespa/searchlib/attribute/sourceselector.cpp b/searchlib/src/vespa/searchlib/attribute/sourceselector.cpp index 907d54467c1..77c86e21b2a 100644 --- a/searchlib/src/vespa/searchlib/attribute/sourceselector.cpp +++ b/searchlib/src/vespa/searchlib/attribute/sourceselector.cpp @@ -81,7 +81,7 @@ void SourceSelector::LoadInfo::load() { const vespalib::string fileName = _header._baseFileName + ".dat"; - Fast_BufferedFile file; + Fast_BufferedFile file(16_Ki); // XXX no checking for success file.ReadOpen(fileName.c_str()); diff --git a/searchlib/src/vespa/searchlib/common/documentsummary.cpp b/searchlib/src/vespa/searchlib/common/documentsummary.cpp index f004e5cc4cf..9341cf92b89 100644 --- a/searchlib/src/vespa/searchlib/common/documentsummary.cpp +++ b/searchlib/src/vespa/searchlib/common/documentsummary.cpp @@ -40,7 +40,7 @@ bool DocumentSummary::writeDocIdLimit(const vespalib::string &dir, uint32_t count) { vespalib::string qcntname = dir + "/docsum.qcnt"; - Fast_BufferedFile qcntfile(new FastOS_File); + Fast_BufferedFile qcntfile(4_Ki); qcntfile.WriteOpen(qcntname.c_str()); if (!qcntfile.IsOpened()) { diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp b/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp index fc7fd9c2fb7..8c47e5c193e 100644 --- a/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp @@ -49,7 +49,7 @@ BitVectorFileWrite::open(const vespalib::string &name, uint32_t docIdLimit, Parent::open(name, docIdLimit, tuneFileWrite, fileHeaderContext); - _datFile = std::make_unique<Fast_BufferedFile>(new FastOS_File); + _datFile = std::make_unique<Fast_BufferedFile>(); if (tuneFileWrite.getWantSyncWrites()) { _datFile->EnableSyncWrites(); } diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp index 7e3f0f5f258..cec3db35d60 100644 --- a/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp @@ -59,7 +59,7 @@ BitVectorIdxFileWrite::open(const vespalib::string &name, uint32_t docIdLimit, vespalib::string idxname = name + getBitVectorKeyScopeSuffix(_scope); assert( !_idxFile); - _idxFile = std::make_unique<Fast_BufferedFile>(new FastOS_File()); + _idxFile = std::make_unique<Fast_BufferedFile>(); if (tuneFileWrite.getWantSyncWrites()) { _idxFile->EnableSyncWrites(); } diff --git a/searchlib/src/vespa/searchlib/diskindex/dictionarywordreader.cpp b/searchlib/src/vespa/searchlib/diskindex/dictionarywordreader.cpp index 1392f47525e..19741b3b166 100644 --- a/searchlib/src/vespa/searchlib/diskindex/dictionarywordreader.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/dictionarywordreader.cpp @@ -6,6 +6,8 @@ #include <vespa/fastlib/io/bufferedfile.h> #include <vespa/log/log.h> + +#include <memory> LOG_SETUP(".diskindex.dictionarywordreader"); namespace search::diskindex { @@ -21,17 +23,15 @@ DictionaryWordReader::DictionaryWordReader() { } - DictionaryWordReader::~DictionaryWordReader() = default; - bool DictionaryWordReader::open(const vespalib::string & dictionaryName, const vespalib::string & wordMapName, const TuneFileSeqRead &tuneFileRead) { - _old2newwordfile.reset(new Fast_BufferedFile(new FastOS_File)); - _dictFile.reset(new PageDict4FileSeqRead); + _old2newwordfile = std::make_unique<Fast_BufferedFile>(); + _dictFile = std::make_unique<PageDict4FileSeqRead>(); if (!_dictFile->open(dictionaryName, tuneFileRead)) { LOG(error, "Could not open dictionary %s: %s", dictionaryName.c_str(), getLastErrorString().c_str()); diff --git a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp index bf295acec75..105151d8132 100644 --- a/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/diskindex.cpp @@ -121,26 +121,20 @@ DiskIndex::openField(const vespalib::string &fieldDir, DiskPostingFileReal::getSubIdentifier()) { dynamicK = false; } else { - LOG(warning, - "Could not detect format for posocc file read %s", - postingName.c_str()); + LOG(warning, "Could not detect format for posocc file read %s", postingName.c_str()); } } - pFile.reset(dynamicK ? - new DiskPostingFileDynamicKReal() : - new DiskPostingFileReal()); + pFile.reset(dynamicK + ? new DiskPostingFileDynamicKReal() + : new DiskPostingFileReal()); if (!pFile->open(postingName, tuneFileSearch._read)) { - LOG(warning, - "Could not open posting list file '%s'", - postingName.c_str()); + LOG(warning, "Could not open posting list file '%s'", postingName.c_str()); return false; } bDict.reset(new BitVectorDictionary()); if (!bDict->open(fieldDir, tuneFileSearch._read, BitVectorKeyScope::PERFIELD_WORDS)) { - LOG(warning, - "Could not open bit vector dictionary in '%s'", - fieldDir.c_str()); + LOG(warning, "Could not open bit vector dictionary in '%s'", fieldDir.c_str()); return false; } _postingFiles.push_back(pFile); @@ -158,8 +152,7 @@ DiskIndex::setup(const TuneFileSearch &tuneFileSearch) return false; } for (SchemaUtil::IndexIterator itr(_schema); itr.isValid(); ++itr) { - vespalib::string fieldDir = - _indexDir + "/" + itr.getName() + "/"; + vespalib::string fieldDir = _indexDir + "/" + itr.getName() + "/"; if (!openField(fieldDir, tuneFileSearch)) { return false; } @@ -169,8 +162,7 @@ DiskIndex::setup(const TuneFileSearch &tuneFileSearch) } bool -DiskIndex::setup(const TuneFileSearch &tuneFileSearch, - const DiskIndex &old) +DiskIndex::setup(const TuneFileSearch &tuneFileSearch, const DiskIndex &old) { if (tuneFileSearch != old._tuneFileSearch) { return setup(tuneFileSearch); @@ -180,8 +172,7 @@ DiskIndex::setup(const TuneFileSearch &tuneFileSearch, } const Schema &oldSchema = old._schema; for (SchemaUtil::IndexIterator itr(_schema); itr.isValid(); ++itr) { - vespalib::string fieldDir = - _indexDir + "/" + itr.getName() + "/"; + vespalib::string fieldDir = _indexDir + "/" + itr.getName() + "/"; SchemaUtil::IndexSettings settings = itr.getIndexSettings(); if (settings.hasError()) { return false; @@ -336,12 +327,11 @@ DiskIndex::LookupResult G_nothing; class LookupCache { public: - LookupCache(DiskIndex & diskIndex, const std::vector<uint32_t> & fieldIds) : - _diskIndex(diskIndex), - _fieldIds(fieldIds), - _cache() - { - } + LookupCache(DiskIndex & diskIndex, const std::vector<uint32_t> & fieldIds) + : _diskIndex(diskIndex), + _fieldIds(fieldIds), + _cache() + { } const DiskIndex::LookupResult & lookup(const vespalib::string & word, uint32_t fieldId) { auto it = _cache.find(word); @@ -473,7 +463,7 @@ DiskIndex::get_field_length_info(const vespalib::string& field_name) const if (fieldId != Schema::UNKNOWN_FIELD_ID) { return _postingFiles[fieldId]->get_field_length_info(); } else { - return FieldLengthInfo(); + return {}; } } diff --git a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp index 5cf80d06c87..ae1e5594320 100644 --- a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.cpp @@ -180,10 +180,10 @@ FileHandle::close() FieldHandle::FieldHandle(const Schema &schema, uint32_t fieldId, IndexBuilder &builder, uint32_t docIdLimit, uint64_t numWordIds, const IFieldLengthInspector & field_length_inspector, const TuneFileSeqWrite &tuneFileWrite, const FileHeaderContext &fileHeaderContext) - : _schema(schema), - _builder(builder), - _file(), - _fieldId(fieldId) + : _schema(schema), + _builder(builder), + _file(), + _fieldId(fieldId) { std::filesystem::create_directory(std::filesystem::path(getDir())); _file.open(getDir(), SchemaUtil::IndexIterator(_schema, getIndexId()), docIdLimit, numWordIds, @@ -251,7 +251,6 @@ IndexBuilder::IndexBuilder(const Schema &schema, vespalib::stringref prefix, uin uint64_t numWordIds, const index::IFieldLengthInspector &field_length_inspector, const TuneFileIndexing &tuneFileIndexing, const search::common::FileHeaderContext &fileHeaderContext) : index::IndexBuilder(schema), - _schema(schema), _fields(extractFields(schema)), _prefix(prefix), _docIdLimit(docIdLimit), diff --git a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h index 4ef6ab4a813..3417f595faa 100644 --- a/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h +++ b/searchlib/src/vespa/searchlib/diskindex/indexbuilder.h @@ -30,7 +30,6 @@ public: std::unique_ptr<index::FieldIndexBuilder> startField(uint32_t fieldId) override; vespalib::string appendToPrefix(vespalib::stringref name) const; private: - const index::Schema &_schema; std::vector<int> _fields; const vespalib::string _prefix; const uint32_t _docIdLimit; diff --git a/searchlib/src/vespa/searchlib/diskindex/wordnummapper.cpp b/searchlib/src/vespa/searchlib/diskindex/wordnummapper.cpp index 83033b137f8..bda0a0594b7 100644 --- a/searchlib/src/vespa/searchlib/diskindex/wordnummapper.cpp +++ b/searchlib/src/vespa/searchlib/diskindex/wordnummapper.cpp @@ -18,7 +18,7 @@ WordNumMapping::readMappingFile(const vespalib::string &name, const TuneFileSeqRead &tuneFileRead) { // Open word mapping file - Fast_BufferedFile old2newwordfile(new FastOS_File); + Fast_BufferedFile old2newwordfile; if (tuneFileRead.getWantDirectIO()) { old2newwordfile.EnableDirectIO(); } diff --git a/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt b/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt index 63d52cbdf9f..a2f0c8fd136 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt @@ -2,7 +2,9 @@ vespa_add_library(searchlib_query_streaming OBJECT SOURCES dot_product_term.cpp + equiv_query_node.cpp fuzzy_term.cpp + hit.cpp hit_iterator_pack.cpp in_term.cpp multi_term.cpp diff --git a/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.cpp b/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.cpp new file mode 100644 index 00000000000..939afec0463 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.cpp @@ -0,0 +1,102 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "equiv_query_node.h" +#include "phrase_query_node.h" +#include "queryterm.hpp" + +using search::fef::TermFieldMatchData; +using search::fef::TermFieldMatchDataPosition; +using search::fef::ITermFieldData; + +namespace search::streaming { + +namespace { + +class HitWithFieldLength : public Hit +{ + uint32_t _field_length; +public: + HitWithFieldLength(const Hit& hit, uint32_t field_length) noexcept + : Hit(hit), + _field_length(field_length) + { + } + uint32_t get_field_length() const noexcept { return _field_length; } +}; + +template <typename HitType> +void merge_hits_from_children(std::vector<HitType>& hl, const MultiTerm& mt) +{ + HitList sub_hl_store; + for (auto& subterm : mt.get_terms()) { + auto *phrase = dynamic_cast<PhraseQueryNode*>(subterm.get()); + QueryTerm& fl_term = (phrase == nullptr) ? *subterm : *phrase->get_terms().front(); + auto& sub_hl = subterm->evaluateHits(sub_hl_store); + for (auto& h : sub_hl) { + if constexpr (std::is_same_v<Hit,HitType>) { + hl.emplace_back(h); + } else { + hl.emplace_back(h, extract_field_length(fl_term, h.field_id())); + } + } + } + std::sort(hl.begin(), hl.end()); + auto last = std::unique(hl.begin(), hl.end(), [](auto& lhs, auto &rhs) noexcept { return lhs.at_same_pos(rhs); }); + hl.erase(last, hl.end()); +} + +} + +EquivQueryNode::EquivQueryNode(std::unique_ptr<QueryNodeResultBase> result_base, uint32_t num_terms) + : MultiTerm(std::move(result_base), "", num_terms) +{ +} + +EquivQueryNode::~EquivQueryNode() = default; + +bool +EquivQueryNode::evaluate() const +{ + for (auto& subterm : get_terms()) { + if (subterm->evaluate()) { + return true; + } + } + return false; +} + +const HitList & +EquivQueryNode::evaluateHits(HitList & hl) const +{ + hl.clear(); + merge_hits_from_children(hl, *this); + return hl; +} + +void +EquivQueryNode::unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) +{ + std::vector<HitWithFieldLength> hit_list; + merge_hits_from_children(hit_list, *this); + unpack_match_data_helper(docid, td, match_data, hit_list, *this); +} + +EquivQueryNode* +EquivQueryNode::as_equiv_query_node() noexcept +{ + return this; +} + +const EquivQueryNode* +EquivQueryNode::as_equiv_query_node() const noexcept +{ + return this; +} + +std::vector<std::unique_ptr<QueryTerm>> +EquivQueryNode::steal_terms() +{ + return std::move(_terms); +} + +} diff --git a/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.h b/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.h new file mode 100644 index 00000000000..b5cdb31274f --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/streaming/equiv_query_node.h @@ -0,0 +1,25 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "multi_term.h" + +namespace search::streaming { + +/** + N-ary "EQUIV" operator that merges terms from nodes below. +*/ +class EquivQueryNode : public MultiTerm +{ +public: + EquivQueryNode(std::unique_ptr<QueryNodeResultBase> result_base, uint32_t num_terms); + ~EquivQueryNode() override; + bool evaluate() const override; + const HitList & evaluateHits(HitList & hl) const override; + void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) override; + EquivQueryNode* as_equiv_query_node() noexcept override; + const EquivQueryNode* as_equiv_query_node() const noexcept override; + std::vector<std::unique_ptr<QueryTerm>> steal_terms(); +}; + +} diff --git a/searchlib/src/vespa/searchlib/query/streaming/hit.cpp b/searchlib/src/vespa/searchlib/query/streaming/hit.cpp new file mode 100644 index 00000000000..c05fda77476 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/streaming/hit.cpp @@ -0,0 +1,17 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "hit.h" +#include <ostream> + +namespace search::streaming { + +std::ostream& +operator<<(std::ostream& os, const Hit& hit) +{ + os << "{" << hit.field_id() << "," << hit.element_id() << "," << + hit.element_weight() << "," << hit.element_length() << "," << + hit.position() << "}"; + return os; +} + +} diff --git a/searchlib/src/vespa/searchlib/query/streaming/hit.h b/searchlib/src/vespa/searchlib/query/streaming/hit.h index 168c09a91ec..fc24c21f722 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/hit.h +++ b/searchlib/src/vespa/searchlib/query/streaming/hit.h @@ -2,6 +2,7 @@ #pragma once #include <cstdint> +#include <iosfwd> #include <vector> namespace search::streaming { @@ -27,8 +28,37 @@ public: uint32_t element_length() const { return _element_length; } uint32_t position() const { return _position; } void set_element_length(uint32_t value) { _element_length = value; } + bool operator<(const Hit& rhs) const noexcept { + if (_field_id != rhs._field_id) { + return _field_id < rhs._field_id; + } + if (_element_id != rhs._element_id) { + return _element_id < rhs._element_id; + } + if (_position != rhs._position) { + return _position < rhs._position; + } + if (_element_weight != rhs._element_weight) { + return _element_weight > rhs._element_weight; + } + return _element_length < rhs._element_length; + } + bool at_same_pos(const Hit& rhs) const noexcept { + return (_field_id == rhs._field_id) && + (_element_id == rhs._element_id) && + (_position == rhs._position); + } + bool operator==(const Hit& rhs) const noexcept { + return (_field_id == rhs._field_id) && + (_element_id == rhs._element_id) && + (_position == rhs._position) && + (_element_weight == rhs._element_weight) && + (_element_length == rhs._element_length); + } }; +std::ostream& operator<<(std::ostream& os, const Hit& hit); + using HitList = std::vector<Hit>; } diff --git a/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.cpp b/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.cpp index 9cd8d41d33d..b090ca13225 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/phrase_query_node.cpp @@ -69,7 +69,9 @@ PhraseQueryNode::evaluateHits(HitList & hl) const void PhraseQueryNode::unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) { - unpack_match_data_helper(docid, td, match_data, *get_terms().front()); + HitList list; + const HitList & hit_list = evaluateHits(list); + unpack_match_data_helper(docid, td, match_data, hit_list, *get_terms().front()); } } diff --git a/searchlib/src/vespa/searchlib/query/streaming/query.cpp b/searchlib/src/vespa/searchlib/query/streaming/query.cpp index 77424fb2d62..94d9acd02cd 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/query.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/query.cpp @@ -94,7 +94,6 @@ QueryConnector::create(ParseItem::ItemType type) case search::ParseItem::ITEM_AND: return std::make_unique<AndQueryNode>(); case search::ParseItem::ITEM_OR: case search::ParseItem::ITEM_WEAK_AND: return std::make_unique<OrQueryNode>(); - case search::ParseItem::ITEM_EQUIV: return std::make_unique<EquivQueryNode>(); case search::ParseItem::ITEM_NOT: return std::make_unique<AndNotQueryNode>(); case search::ParseItem::ITEM_SAME_ELEMENT: return std::make_unique<SameElementQueryNode>(); case search::ParseItem::ITEM_NEAR: return std::make_unique<NearQueryNode>(); @@ -158,12 +157,6 @@ RankWithQueryNode::evaluate() const { return firstOk; } -bool -EquivQueryNode::evaluate() const -{ - return OrQueryNode::evaluate(); -} - Query::Query() = default; Query::Query(const QueryNodeResultFactory & factory, vespalib::stringref queryRep) diff --git a/searchlib/src/vespa/searchlib/query/streaming/query.h b/searchlib/src/vespa/searchlib/query/streaming/query.h index e91a2f91dc5..a993a9a8a8a 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/query.h +++ b/searchlib/src/vespa/searchlib/query/streaming/query.h @@ -103,20 +103,6 @@ public: bool evaluate() const override; }; - -/** - N-ary "EQUIV" operator that merges terms from nodes below. -*/ -class EquivQueryNode : public OrQueryNode -{ -public: - EquivQueryNode() noexcept : OrQueryNode("EQUIV") { } - bool evaluate() const override; - bool isFlattenable(ParseItem::ItemType type) const override { - return (type == ParseItem::ITEM_EQUIV); - } -}; - /** Query packages the query tree. The usage pattern is like this. Construct the tree with the correct tree description. diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp index 0b277dbe221..dd3b1f84ad9 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp @@ -9,6 +9,7 @@ #include "same_element_query_node.h" #include <vespa/searchlib/parsequery/stackdumpiterator.h> #include <vespa/searchlib/query/streaming/dot_product_term.h> +#include <vespa/searchlib/query/streaming/equiv_query_node.h> #include <vespa/searchlib/query/streaming/in_term.h> #include <vespa/searchlib/query/streaming/wand_term.h> #include <vespa/searchlib/query/streaming/weighted_set_term.h> @@ -44,7 +45,6 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor case ParseItem::ITEM_AND: case ParseItem::ITEM_OR: case ParseItem::ITEM_WEAK_AND: - case ParseItem::ITEM_EQUIV: case ParseItem::ITEM_NOT: case ParseItem::ITEM_SAME_ELEMENT: case ParseItem::ITEM_NEAR: @@ -142,10 +142,10 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor auto dotPos = ssTerm.find('.'); phrase->add_term(std::make_unique<QueryTerm>(factory.create(), ssTerm.substr(0, dotPos), ssIndex, TermType::WORD, normalize_mode)); phrase->add_term(std::make_unique<QueryTerm>(factory.create(), ssTerm.substr(dotPos + 1), ssIndex, TermType::WORD, normalize_mode)); - auto orqn = std::make_unique<EquivQueryNode>(); - orqn->addChild(std::move(qt)); - orqn->addChild(std::move(phrase)); - qn = std::move(orqn); + auto eqn = std::make_unique<EquivQueryNode>(factory.create(), 2); + eqn->add_term(std::move(qt)); + eqn->add_term(std::move(phrase)); + qn = std::move(eqn); } else { qn = std::move(qt); } @@ -171,6 +171,9 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor case ParseItem::ITEM_PHRASE: qn = build_phrase_term(factory, queryRep); break; + case ParseItem::ITEM_EQUIV: + qn = build_equiv_term(factory, queryRep, allowRewrite); + break; default: skip_unknown(queryRep); break; @@ -282,6 +285,33 @@ QueryNode::build_phrase_term(const QueryNodeResultFactory& factory, SimpleQueryS return phrase; } +std::unique_ptr<QueryNode> +QueryNode::build_equiv_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep, bool allow_rewrite) +{ + auto eqn = std::make_unique<EquivQueryNode>(factory.create(), queryRep.getArity()); + auto arity = queryRep.getArity(); + eqn->setWeight(queryRep.GetWeight()); + eqn->setUniqueId(queryRep.getUniqueId()); + for (size_t i = 0; i < arity; ++i) { + queryRep.next(); + auto qn = Build(eqn.get(), factory, queryRep, allow_rewrite); + auto nested_eqn = dynamic_cast<EquivQueryNode*>(qn.get()); + if (nested_eqn != nullptr) { + auto stolen_terms = nested_eqn->steal_terms(); + for (auto& term : stolen_terms) { + eqn->add_term(std::move(term)); + } + continue; + } + auto qtp = dynamic_cast<QueryTerm*>(qn.get()); + assert(qtp != nullptr); + qn.release(); + std::unique_ptr<QueryTerm> qt(qtp); + eqn->add_term(std::move(qt)); + } + return eqn; +} + void QueryNode::skip_unknown(SimpleQueryStackDumpIterator& queryRep) { diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.h b/searchlib/src/vespa/searchlib/query/streaming/querynode.h index 4c7d9e88930..fff3bb15d10 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/querynode.h +++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.h @@ -34,6 +34,7 @@ class QueryNode static std::unique_ptr<QueryNode> build_wand_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep); static std::unique_ptr<QueryNode> build_weighted_set_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep); static std::unique_ptr<QueryNode> build_phrase_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep); + static std::unique_ptr<QueryNode> build_equiv_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep, bool allow_rewrite); static void skip_unknown(SimpleQueryStackDumpIterator& queryRep); public: using UP = std::unique_ptr<QueryNode>; diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp index f01f815e673..0d0f5a7c4ad 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp @@ -1,6 +1,6 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "queryterm.h" +#include "queryterm.hpp" #include <vespa/searchlib/fef/itermdata.h> #include <vespa/searchlib/fef/matchdata.h> #include <vespa/vespalib/objects/visit.h> @@ -113,89 +113,12 @@ QueryTerm::set_element_length(uint32_t hitlist_idx, uint32_t element_length) _hitList[hitlist_idx].set_element_length(element_length); } -namespace { - -uint16_t -cap_16_bits(uint32_t value) -{ - return std::min(value, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max())); -} - -uint32_t -extract_field_length(const QueryTerm& term, uint32_t field_id) -{ - return (field_id < term.getFieldInfoSize()) ? term.getFieldInfo(field_id).getFieldLength() : search::fef::FieldPositionsIterator::UNKNOWN_LENGTH; -} - -void -set_interleaved_features(TermFieldMatchData& tmd, uint32_t field_length, uint32_t num_occs) -{ - tmd.setFieldLength(cap_16_bits(field_length)); - tmd.setNumOccs(cap_16_bits(num_occs)); -} - -} - -void -QueryTerm::unpack_match_data_helper(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const QueryTerm& fl_term) const -{ - HitList list; - const HitList & hitList = evaluateHits(list); - - if (!hitList.empty()) { // only unpack if we have a hit - LOG(debug, "Unpack match data for query term '%s:%s'", - index().c_str(), getTerm()); - - uint32_t lastFieldId = -1; - TermFieldMatchData *tmd = nullptr; - uint32_t num_occs = 0; - - // optimize for hitlist giving all hits for a single field in one chunk - for (const Hit & hit : hitList) { - uint32_t fieldId = hit.field_id(); - if (fieldId != lastFieldId) { - if (tmd != nullptr) { - if (tmd->needs_interleaved_features()) { - set_interleaved_features(*tmd, extract_field_length(fl_term, lastFieldId), num_occs); - } - // reset to notfound/unknown values - tmd = nullptr; - } - num_occs = 0; - - // setup for new field that had a hit - const ITermFieldData *tfd = td.lookupField(fieldId); - if (tfd != nullptr) { - tmd = match_data.resolveTermField(tfd->getHandle()); - tmd->setFieldId(fieldId); - // reset field match data, but only once per docId - if (tmd->getDocId() != docid) { - tmd->reset(docid); - } - } - lastFieldId = fieldId; - } - ++num_occs; - if (tmd != nullptr) { - TermFieldMatchDataPosition pos(hit.element_id(), hit.position(), - hit.element_weight(), hit.element_length()); - tmd->appendPosition(pos); - LOG(debug, "Append elemId(%u),position(%u), weight(%d), tfmd.weight(%d)", - pos.getElementId(), pos.getPosition(), pos.getElementWeight(), tmd->getWeight()); - } - } - if (tmd != nullptr) { - if (tmd->needs_interleaved_features()) { - set_interleaved_features(*tmd, extract_field_length(fl_term, lastFieldId), num_occs); - } - } - } -} - void QueryTerm::unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) { - unpack_match_data_helper(docid, td, match_data, *this); + HitList list; + const HitList & hit_list = evaluateHits(list); + unpack_match_data_helper(docid, td, match_data, hit_list, *this); } NearestNeighborQueryNode* @@ -222,4 +145,16 @@ QueryTerm::as_fuzzy_term() noexcept return nullptr; } +EquivQueryNode* +QueryTerm::as_equiv_query_node() noexcept +{ + return nullptr; +} + +const EquivQueryNode* +QueryTerm::as_equiv_query_node() const noexcept +{ + return nullptr; +} + } diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h index 2eaecb86854..2cb4f2d2ebb 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h +++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h @@ -17,6 +17,7 @@ class MatchData; } namespace search::streaming { +class EquivQueryNode; class FuzzyTerm; class NearestNeighborQueryNode; class MultiTerm; @@ -100,9 +101,12 @@ public: virtual MultiTerm* as_multi_term() noexcept; virtual RegexpTerm* as_regexp_term() noexcept; virtual FuzzyTerm* as_fuzzy_term() noexcept; + virtual EquivQueryNode* as_equiv_query_node() noexcept; + virtual const EquivQueryNode* as_equiv_query_node() const noexcept; virtual void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data); protected: - void unpack_match_data_helper(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const QueryTerm& fl_term) const; + template <typename HitListType> + static void unpack_match_data_helper(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const HitListType& hit_list, const QueryTerm& fl_term); using QueryNodeResultBaseContainer = std::unique_ptr<QueryNodeResultBase>; string _index; EncodingBitMap _encoding; diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.hpp b/searchlib/src/vespa/searchlib/query/streaming/queryterm.hpp new file mode 100644 index 00000000000..dd6eff1f22b --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.hpp @@ -0,0 +1,94 @@ +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "queryterm.h" +#include <vespa/searchlib/fef/itermdata.h> +#include <vespa/searchlib/fef/matchdata.h> +#include <algorithm> +#include <limits> + + +namespace search::streaming { + +namespace { + +uint16_t +cap_16_bits(uint32_t value) +{ + return std::min(value, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max())); +} + +uint32_t +extract_field_length(const QueryTerm& term, uint32_t field_id) +{ + return (field_id < term.getFieldInfoSize()) ? term.getFieldInfo(field_id).getFieldLength() : search::fef::FieldPositionsIterator::UNKNOWN_LENGTH; +} + +void +set_interleaved_features(search::fef::TermFieldMatchData& tmd, uint32_t field_length, uint32_t num_occs) +{ + tmd.setFieldLength(cap_16_bits(field_length)); + tmd.setNumOccs(cap_16_bits(num_occs)); +} + +} + +template <typename HitListType> +void +QueryTerm::unpack_match_data_helper(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const HitListType& hit_list, const QueryTerm& fl_term) +{ + (void) fl_term; + if (!hit_list.empty()) { // only unpack if we have a hit + + uint32_t last_field_id = -1; + uint32_t last_field_length = 0; + search::fef::TermFieldMatchData *tmd = nullptr; + uint32_t num_occs = 0; + + // optimize for hitlist giving all hits for a single field in one chunk + for (const auto& hit : hit_list) { + uint32_t field_id = hit.field_id(); + if (field_id != last_field_id) { + if (tmd != nullptr) { + if (tmd->needs_interleaved_features()) { + set_interleaved_features(*tmd, last_field_length, num_occs); + } + // reset to notfound/unknown values + tmd = nullptr; + } + num_occs = 0; + + // setup for new field that had a hit + const search::fef::ITermFieldData *tfd = td.lookupField(field_id); + if (tfd != nullptr) { + tmd = match_data.resolveTermField(tfd->getHandle()); + tmd->setFieldId(field_id); + // reset field match data, but only once per docId + if (tmd->getDocId() != docid) { + tmd->reset(docid); + } + } + last_field_id = field_id; + if constexpr (std::is_same_v<HitList, HitListType>) { + last_field_length = extract_field_length(fl_term, field_id); + } else { + last_field_length = hit.get_field_length(); + } + } + ++num_occs; + if (tmd != nullptr) { + search::fef::TermFieldMatchDataPosition pos(hit.element_id(), hit.position(), + hit.element_weight(), hit.element_length()); + tmd->appendPosition(pos); + } + } + if (tmd != nullptr) { + if (tmd->needs_interleaved_features()) { + set_interleaved_features(*tmd, last_field_length, num_occs); + } + } + } +} + +} diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h index 510351a4843..439eff680ec 100644 --- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h +++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h @@ -265,12 +265,12 @@ public: // seen by optimize. When the calculate_flow_stats function is // called on a complex leaf, it can call the update_flow_stats // function directly (the function that is normally called by - // optimize) on interal blueprints to make these values available + // optimize) on internal blueprints to make these values available // before using them to calculate its own flow stats. // // 'estimate': relative estimate in the range [0,1] - // 'cost': per-document cost of non-strict evaluation - // 'strict_cost': per-document cost of strict evaluation + // 'cost': cost of non-strict evaluation: multiply by non-strict in-flow + // 'strict_cost': cost of strict evaluation: assuming strict in-flow of 1.0 double estimate() const noexcept { return _flow_stats.estimate; } double cost() const noexcept { return _flow_stats.cost; } double strict_cost() const noexcept { return _flow_stats.strict_cost; } diff --git a/searchlib/src/vespa/searchlib/queryeval/flow.h b/searchlib/src/vespa/searchlib/queryeval/flow.h index 426aa077db2..1dc6e6aef55 100644 --- a/searchlib/src/vespa/searchlib/queryeval/flow.h +++ b/searchlib/src/vespa/searchlib/queryeval/flow.h @@ -91,8 +91,11 @@ template <typename ADAPTER, typename T, typename F> double ordered_cost_of(ADAPTER adapter, const T &children, F flow) { double cost = 0.0; for (const auto &child: children) { - double child_cost = flow.strict() ? adapter.strict_cost(child) : adapter.cost(child); - cost += flow.flow() * child_cost; + if (flow.strict()) { + cost += adapter.strict_cost(child); + } else { + cost += flow.flow() * adapter.cost(child); + } flow.add(adapter.estimate(child)); } return cost; diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp index 3449df57513..a54d2adee78 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp @@ -4,7 +4,7 @@ #include "rankprocessor.h" #include <vespa/searchlib/fef/handle.h> #include <vespa/searchlib/fef/simpletermfielddata.h> -#include <vespa/searchlib/query/streaming/multi_term.h> +#include <vespa/searchlib/query/streaming/equiv_query_node.h> #include <vespa/searchlib/query/streaming/nearest_neighbor_query_node.h> #include <vespa/vsm/vsm/fieldsearchspec.h> #include <algorithm> @@ -56,6 +56,51 @@ getFeature(const RankProgram &rankProgram) { } void +RankProcessor::resolve_fields_from_children(QueryTermData& qtd, MultiTerm& mt) +{ + vespalib::hash_set<uint32_t> field_ids; + for (auto& subterm : mt.get_terms()) { + vespalib::string expandedIndexName = vsm::FieldSearchSpecMap::stripNonFields(subterm->index()); + const RankManager::View *view = _rankManagerSnapshot->getView(expandedIndexName); + if (view != nullptr) { + for (auto field_id : *view) { + field_ids.insert(field_id); + } + } else { + LOG(warning, "Could not find a view for index '%s'. Ranking no fields.", + getIndexName(subterm->index(), expandedIndexName).c_str()); + } + } + std::vector<uint32_t> sorted_field_ids; + sorted_field_ids.reserve(field_ids.size()); + for (auto field_id : field_ids) { + sorted_field_ids.emplace_back(field_id); + } + std::sort(sorted_field_ids.begin(), sorted_field_ids.end()); + for (auto field_id : sorted_field_ids) { + qtd.getTermData().addField(field_id).setHandle(_mdLayout.allocTermField(field_id)); + } +} + +void +RankProcessor::resolve_fields_from_term(QueryTermData& qtd, search::streaming::QueryTerm& term) +{ + vespalib::string expandedIndexName = vsm::FieldSearchSpecMap::stripNonFields(term.index()); + const RankManager::View *view = _rankManagerSnapshot->getView(expandedIndexName); + if (view != nullptr) { + for (auto field_id : *view) { + qtd.getTermData().addField(field_id).setHandle(_mdLayout.allocTermField(field_id)); + } + } else { + LOG(warning, "Could not find a view for index '%s'. Ranking no fields.", + getIndexName(term.index(), expandedIndexName).c_str()); + } + LOG(debug, "Setup query term '%s:%s'", + getIndexName(term.index(), expandedIndexName).c_str(), + term.getTerm()); +} + +void RankProcessor::initQueryEnvironment() { QueryWrapper::TermList & terms = _query.getTermList(); @@ -75,21 +120,12 @@ RankProcessor::initQueryEnvironment() if (nn_term != nullptr) { qtd.getTermData().set_query_tensor_name(nn_term->get_query_tensor_name()); } - - vespalib::string expandedIndexName = vsm::FieldSearchSpecMap::stripNonFields(term->index()); - const RankManager::View *view = _rankManagerSnapshot->getView(expandedIndexName); - if (view != nullptr) { - for (auto field_id : *view) { - qtd.getTermData().addField(field_id).setHandle(_mdLayout.allocTermField(field_id)); - } + auto* eqn = term->as_equiv_query_node(); + if (eqn != nullptr) { + resolve_fields_from_children(qtd, *eqn); } else { - LOG(warning, "Could not find a view for index '%s'. Ranking no fields.", - getIndexName(term->index(), expandedIndexName).c_str()); + resolve_fields_from_term(qtd, *term); } - - LOG(debug, "Setup query term '%s:%s'", - getIndexName(term->index(), expandedIndexName).c_str(), - term->getTerm()); _queryEnv.addTerm(&qtd.getTermData()); } _rankSetup.prepareSharedState(_queryEnv, _queryEnv.getObjectStore()); diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h index 5651917ce7a..bec70beca77 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h +++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h @@ -16,6 +16,8 @@ namespace streaming { +class QueryTermData; + /** * This class is associated with a query and a rank profile and * is used to calculate rank and feature set for matched documents. @@ -43,6 +45,8 @@ private: HitCollector::UP _hitCollector; std::unique_ptr<RankProgram> _match_features_program; + void resolve_fields_from_children(QueryTermData& qtd, search::streaming::MultiTerm& mt); + void resolve_fields_from_term(QueryTermData& qtd, search::streaming::QueryTerm& term); void initQueryEnvironment(); void initHitCollector(size_t wantedHitCount); void setupRankProgram(search::fef::RankProgram &program); diff --git a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp index c75ab7fccd3..72807bc6c34 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp +++ b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.cpp @@ -3,7 +3,7 @@ #include <vespa/vsm/vsm/fieldsearchspec.h> #include <vespa/document/fieldvalue/arrayfieldvalue.h> #include <vespa/document/fieldvalue/weightedsetfieldvalue.h> -#include <vespa/searchlib/query/streaming/multi_term.h> +#include <vespa/searchlib/query/streaming/equiv_query_node.h> #include <vespa/vespalib/stllike/hash_set.h> #include <cassert> @@ -190,6 +190,39 @@ FieldSearcher::init() } void +FieldIdTSearcherMap::prepare_term(const DocumentTypeIndexFieldMapT& difm, QueryTerm* qt, FieldIdT fid, vespalib::hash_set<const void*>& seen, QueryTermList& onlyInIndex) +{ + auto equiv = qt->as_equiv_query_node(); + if (equiv != nullptr) { + for (auto& subterm : equiv->get_terms()) { + prepare_term(difm, subterm.get(), fid, seen, onlyInIndex); + } + return; + } + for (const auto& doc_type_elem : difm) { + const IndexFieldMapT & fim = doc_type_elem.second; + auto found = fim.find(FieldSearchSpecMap::stripNonFields(qt->index())); + if (found != fim.end()) { + const FieldIdTList & index = found->second; + if ((find(index.begin(), index.end(), fid) != index.end()) && !seen.contains(qt)) { + seen.insert(qt); + auto multi_term = qt->as_multi_term(); + if (multi_term != nullptr) { + for (auto& subterm : multi_term->get_terms()) { + onlyInIndex.emplace_back(subterm.get()); + } + } else { + onlyInIndex.emplace_back(qt); + } + } + } else { + LOG(debug, "Could not find the requested index=%s in the index config map. Query does not fit search definition.", + qt->index().c_str()); + } + } +} + +void FieldIdTSearcherMap::prepare(const DocumentTypeIndexFieldMapT& difm, const SharedSearcherBuf& searcherBuf, Query& query, const vsm::FieldPathMapT& field_paths, search::fef::IQueryEnvironment& query_env) @@ -202,27 +235,7 @@ FieldIdTSearcherMap::prepare(const DocumentTypeIndexFieldMapT& difm, const Share vespalib::hash_set<const void*> seen; FieldIdT fid = searcher->field(); for (auto qt : qtl) { - for (const auto& doc_type_elem : difm) { - const IndexFieldMapT & fim = doc_type_elem.second; - auto found = fim.find(FieldSearchSpecMap::stripNonFields(qt->index())); - if (found != fim.end()) { - const FieldIdTList & index = found->second; - if ((find(index.begin(), index.end(), fid) != index.end()) && !seen.contains(qt)) { - seen.insert(qt); - auto multi_term = qt->as_multi_term(); - if (multi_term != nullptr) { - for (auto& subterm : multi_term->get_terms()) { - onlyInIndex.emplace_back(subterm.get()); - } - } else { - onlyInIndex.emplace_back(qt); - } - } - } else { - LOG(debug, "Could not find the requested index=%s in the index config map. Query does not fit search definition.", - qt->index().c_str()); - } - } + prepare_term(difm, qt, fid, seen, onlyInIndex); } /// Should perhaps do a unique on onlyInIndex searcher->prepare(onlyInIndex, searcherBuf, field_paths, query_env); diff --git a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h index 6f3ec3e1e73..042e47ef164 100644 --- a/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h +++ b/streamingvisitors/src/vespa/vsm/searcher/fieldsearcher.h @@ -5,6 +5,7 @@ #include <vespa/searchlib/query/streaming/query.h> #include <vespa/vsm/common/document.h> #include <vespa/vsm/common/storagedocument.h> +#include <vespa/vespalib/stllike/hash_set.h> #include <vespa/vespalib/util/array.h> #include <utility> @@ -122,6 +123,7 @@ using FieldIdTSearcherMapT = std::vector<FieldSearcherContainer>; class FieldIdTSearcherMap : public FieldIdTSearcherMapT { + void prepare_term(const DocumentTypeIndexFieldMapT& difm, search::streaming::QueryTerm* qt, FieldIdT fid, vespalib::hash_set<const void*>& seen, search::streaming::QueryTermList& onlyInIndex); public: void prepare(const DocumentTypeIndexFieldMapT& difm, const SharedSearcherBuf& searcherBuf, search::streaming::Query& query, const vsm::FieldPathMapT& field_paths, diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp index 3ae4794e33f..c596b46a774 100644 --- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp +++ b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "fieldsearchspec.h" +#include <vespa/searchlib/query/streaming/equiv_query_node.h> #include <vespa/vespalib/stllike/asciistream.h> #include <vespa/vsm/searcher/boolfieldsearcher.h> #include <vespa/vsm/searcher/floatfieldsearcher.h> @@ -222,7 +223,14 @@ FieldSearchSpecMap::buildFieldsInQuery(const Query & query) const query.getLeaves(qtl); for (const auto & term : qtl) { - addFieldsFromIndex(term->index(), fieldsInQuery); + auto equiv = term->as_equiv_query_node(); + if (equiv != nullptr) { + for (const auto& subterm : equiv->get_terms()) { + addFieldsFromIndex(subterm->index(), fieldsInQuery); + } + } else { + addFieldsFromIndex(term->index(), fieldsInQuery); + } } return fieldsInQuery; } diff --git a/vespalib/src/vespa/fastlib/io/bufferedfile.cpp b/vespalib/src/vespa/fastlib/io/bufferedfile.cpp index 0fa62aca295..4d46427ed33 100644 --- a/vespalib/src/vespa/fastlib/io/bufferedfile.cpp +++ b/vespalib/src/vespa/fastlib/io/bufferedfile.cpp @@ -202,7 +202,7 @@ Fast_BufferedFile::ReadLine(char *line, size_t buflen) p = line; ep = line + buflen - 1; - while (1) { + while (true) { while (_bufi < _bufe && *_bufi != '\n' && p < ep) *p++ = *_bufi++; if (p >= ep) { @@ -258,7 +258,7 @@ Fast_BufferedFile::Read(void *dst, size_t dstlen) { char * p = static_cast<char *>(dst); char * pe = p + dstlen; - while (1) { + while (true) { int64_t sz = std::min(_bufe - _bufi, pe - p); memcpy(p, _bufi, sz); p += sz; @@ -334,11 +334,6 @@ Fast_BufferedFile::WriteOpen(const char *name) _openFlags = FASTOS_FILE_OPEN_WRITE; } -Fast_BufferedFile::Fast_BufferedFile(FastOS_FileInterface *file) : - Fast_BufferedFile(file, DEFAULT_BUF_SIZE) -{ -} - Fast_BufferedFile::Fast_BufferedFile() : Fast_BufferedFile(DEFAULT_BUF_SIZE) { diff --git a/vespalib/src/vespa/fastlib/io/bufferedfile.h b/vespalib/src/vespa/fastlib/io/bufferedfile.h index 8a22ec89bd7..16153b79171 100644 --- a/vespalib/src/vespa/fastlib/io/bufferedfile.h +++ b/vespalib/src/vespa/fastlib/io/bufferedfile.h @@ -28,30 +28,40 @@ private: char * buf() { return static_cast<char *>(_buf.get()); } const char * buf() const { return static_cast<const char *>(_buf.get()); } -protected: /** The file instance used for low-level file access. */ std::unique_ptr<FastOS_FileInterface> _file; -public: - /** - * Create buffered file. - * @param file file instance that should be used for low-level - * file access. If this is NULL, an instance of - * FastOS_File will be created. NOTE: the file - * instance given here will be deleted by - * the destructor. - **/ Fast_BufferedFile(FastOS_FileInterface *file, size_t bufferSize); - Fast_BufferedFile(FastOS_FileInterface *file); + /** + * Reset the internal start and end pointers to the + * head of the buffer, thus "emptying" it. + */ + void ResetBuf(); + /** + * Write the buffer to the file. Caution: Uses obsolete + * FastOS_FileInterface::WriteBuf. + * Allocates a 32kB buffer if not previously allocated. + */ + void flushWriteBuf(); + /** + * Read from the file into the buffer. Allocates a 32kB + * buffer if not previously allocated. Fills the buffer, + * or reads as much as possible if the (rest of) the file + * is smaller than the buffer. + * Caution: If the amount read is smaller than the expected + * amount, the method will abort. + */ + void fillReadBuf(); +public: Fast_BufferedFile(); - Fast_BufferedFile(size_t bufferSize); + explicit Fast_BufferedFile(size_t bufferSize); Fast_BufferedFile(const Fast_BufferedFile &) = delete; Fast_BufferedFile & operator = (const Fast_BufferedFile &) = delete; /** * Delete the file instance used for low-level file access. **/ - virtual ~Fast_BufferedFile(); + ~Fast_BufferedFile() override; /** * Open an existing file for reading. * @@ -71,26 +81,7 @@ public: * @param name The name of the file to open. */ void WriteOpen(const char *name); - /** - * Reset the internal start and end pointers to the - * head of the buffer, thus "emptying" it. - */ - void ResetBuf(); - /** - * Write the buffer to the file. Caution: Uses obsolete - * FastOS_FileInterface::WriteBuf. - * Allocates a 32kB buffer if not previously allocated. - */ - void flushWriteBuf(); - /** - * Read from the file into the buffer. Allocates a 32kB - * buffer if not previously allocated. Fills the buffer, - * or reads as much as possible if the (rest of) the file - * is smaller than the buffer. - * Caution: If the amount read is smaller than the expected - * amount, the method will abort. - */ - void fillReadBuf(); + /** * Read the next line of the buffered file into a buffer, * reading from the file as necessary. diff --git a/vespalib/src/vespa/vespalib/util/mmap_file_allocator.cpp b/vespalib/src/vespa/vespalib/util/mmap_file_allocator.cpp index a663c6a601b..e6a00f4b86d 100644 --- a/vespalib/src/vespa/vespalib/util/mmap_file_allocator.cpp +++ b/vespalib/src/vespa/vespalib/util/mmap_file_allocator.cpp @@ -75,7 +75,7 @@ PtrAndSize MmapFileAllocator::alloc(size_t sz) const { if (sz == 0) { - return PtrAndSize(); // empty allocation + return {}; // empty allocation } static constexpr size_t alignment = 128; sz = (sz + alignment - 1) & -alignment; // round sz to a multiple of alignment @@ -107,7 +107,7 @@ MmapFileAllocator::alloc_large(size_t sz) const retval = madvise(buf, sz, MADV_DONTDUMP); assert(retval == 0); #endif - return PtrAndSize(buf, sz); + return {buf, sz}; } void* diff --git a/vespalib/src/vespa/vespalib/util/mmap_file_allocator.h b/vespalib/src/vespa/vespalib/util/mmap_file_allocator.h index 96a69f50e43..f568a14572a 100644 --- a/vespalib/src/vespa/vespalib/util/mmap_file_allocator.h +++ b/vespalib/src/vespa/vespalib/util/mmap_file_allocator.h @@ -24,15 +24,11 @@ class MmapFileAllocator : public MemoryAllocator { struct SizeAndOffset { size_t size; uint64_t offset; - SizeAndOffset() - : SizeAndOffset(0u, 0u) - { - } - SizeAndOffset(size_t size_in, uint64_t offset_in) + SizeAndOffset() noexcept : SizeAndOffset(0u, 0u) { } + SizeAndOffset(size_t size_in, uint64_t offset_in) noexcept : size(size_in), offset(offset_in) - { - } + { } }; using Allocations = hash_map<void *, SizeAndOffset>; const vespalib::string _dir_name; @@ -55,9 +51,9 @@ class MmapFileAllocator : public MemoryAllocator { public: static constexpr uint32_t default_small_limit = 128_Ki; static constexpr uint32_t default_premmap_size = 1_Mi; - MmapFileAllocator(const vespalib::string& dir_name); + explicit MmapFileAllocator(const vespalib::string& dir_name); MmapFileAllocator(const vespalib::string& dir_name, uint32_t small_limit, uint32_t premmap_size); - ~MmapFileAllocator(); + ~MmapFileAllocator() override; PtrAndSize alloc(size_t sz) const override; void free(PtrAndSize alloc) const noexcept override; size_t resize_inplace(PtrAndSize, size_t) const override; |