summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2024-01-15 10:23:18 +0100
committerGitHub <noreply@github.com>2024-01-15 10:23:18 +0100
commit29a807d35ac5d9e76ea1b8d653bb25b0e4e2dc73 (patch)
treed55fddad443566300bd4a7fdd3ef1118a8460700 /searchlib
parent48b1bae2a6cdf58a237aa7be59632a06aba86861 (diff)
parent252fbeed13b8622fbc813620dc3b4e45abc6bbe2 (diff)
Merge branch 'master' into balder/sliced-parallell-or
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/CMakeLists.txt5
-rw-r--r--searchlib/src/main/java/com/yahoo/searchlib/gbdt/XmlHelper.java36
-rw-r--r--searchlib/src/tests/attribute/bitvector/bitvector_test.cpp4
-rw-r--r--searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp225
-rw-r--r--searchlib/src/tests/attribute/direct_posting_store/.gitignore1
-rw-r--r--searchlib/src/tests/attribute/direct_posting_store/CMakeLists.txt10
-rw-r--r--searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp297
-rw-r--r--searchlib/src/tests/attribute/document_weight_iterator/.gitignore1
-rw-r--r--searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp226
-rw-r--r--searchlib/src/tests/attribute/document_weight_or_filter_search/CMakeLists.txt10
-rw-r--r--searchlib/src/tests/attribute/multi_term_or_filter_search/CMakeLists.txt10
-rw-r--r--searchlib/src/tests/attribute/multi_term_or_filter_search/multi_term_or_filter_search_test.cpp (renamed from searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp)50
-rw-r--r--searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp6
-rw-r--r--searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp6
-rw-r--r--searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp2
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp28
-rw-r--r--searchlib/src/tests/common/bitvector/bitvector_test.cpp26
-rw-r--r--searchlib/src/tests/features/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/features/beta/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/features/beta/beta_features_test.cpp2
-rw-r--r--searchlib/src/tests/features/bm25/bm25_test.cpp2
-rw-r--r--searchlib/src/tests/features/element_completeness/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/features/element_completeness/element_completeness_test.cpp2
-rw-r--r--searchlib/src/tests/features/element_similarity_feature/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/features/element_similarity_feature/element_similarity_feature_test.cpp2
-rw-r--r--searchlib/src/tests/features/euclidean_distance/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/features/euclidean_distance/euclidean_distance_test.cpp4
-rw-r--r--searchlib/src/tests/features/featurebenchmark.cpp2
-rw-r--r--searchlib/src/tests/features/internal_max_reduce_prod_join_feature/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/features/internal_max_reduce_prod_join_feature/internal_max_reduce_prod_join_feature_test.cpp6
-rw-r--r--searchlib/src/tests/features/prod_features_test.h2
-rw-r--r--searchlib/src/tests/features/tensor/tensor_test.cpp16
-rw-r--r--searchlib/src/tests/features/tensor_from_labels/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/features/tensor_from_labels/tensor_from_labels_test.cpp2
-rw-r--r--searchlib/src/tests/features/tensor_from_weighted_set/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/features/tensor_from_weighted_set/tensor_from_weighted_set_test.cpp14
-rw-r--r--searchlib/src/tests/features/text_similarity_feature/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/features/text_similarity_feature/text_similarity_feature_test.cpp6
-rw-r--r--searchlib/src/tests/nativerank/CMakeLists.txt1
-rw-r--r--searchlib/src/tests/nativerank/nativerank_test.cpp2
-rw-r--r--searchlib/src/tests/query/streaming_query_test.cpp205
-rw-r--r--searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp27
-rw-r--r--searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp137
-rw-r--r--searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp2
-rw-r--r--searchlib/src/tests/queryeval/flow/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp117
-rw-r--r--searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp232
-rw-r--r--searchlib/src/tests/queryeval/queryeval_test.cpp284
-rw-r--r--searchlib/src/tests/queryeval/same_element/same_element_test.cpp2
-rw-r--r--searchlib/src/tests/queryeval/termwise_eval/termwise_eval_test.cpp254
-rw-r--r--searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp63
-rw-r--r--searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp119
-rw-r--r--searchlib/src/tests/sortspec/multilevelsort_test.cpp17
-rw-r--r--searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp16
-rw-r--r--searchlib/src/vespa/searchlib/attribute/CMakeLists.txt2
-rw-r--r--searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp63
-rw-r--r--searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.cpp3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h6
-rw-r--r--searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp41
-rw-r--r--searchlib/src/vespa/searchlib/attribute/in_term_search.h15
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multi_term_or_filter_search.cpp (renamed from searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp)75
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multi_term_or_filter_search.h (renamed from searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.h)12
-rw-r--r--searchlib/src/vespa/searchlib/attribute/numeric_range_matcher.h7
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h61
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp36
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h8
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.h11
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp6
-rw-r--r--searchlib/src/vespa/searchlib/common/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/common/bitvector.cpp20
-rw-r--r--searchlib/src/vespa/searchlib/common/bitvector.h7
-rw-r--r--searchlib/src/vespa/searchlib/common/fileheadercontext.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/common/fileheadertags.cpp16
-rw-r--r--searchlib/src/vespa/searchlib/common/fileheadertags.h17
-rw-r--r--searchlib/src/vespa/searchlib/common/sortspec.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp50
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp71
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp44
-rw-r--r--searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.h2
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.cpp24
-rw-r--r--searchlib/src/vespa/searchlib/fef/indexproperties.h28
-rw-r--r--searchlib/src/vespa/searchlib/fef/ranksetup.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/fef/ranksetup.h8
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/ftlib.cpp282
-rw-r--r--searchlib/src/vespa/searchlib/fef/test/ftlib.h57
-rw-r--r--searchlib/src/vespa/searchlib/query/query_term_simple.cpp231
-rw-r--r--searchlib/src/vespa/searchlib/query/query_term_simple.h59
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/dot_product_term.cpp33
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/dot_product_term.h6
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/in_term.cpp5
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/in_term.h3
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/multi_term.cpp17
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/multi_term.h7
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/nearest_neighbor_query_node.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/query.cpp10
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/query.h33
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/querynode.cpp93
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/querynode.h8
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h18
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp109
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/queryterm.h12
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/wand_term.cpp44
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/wand_term.h22
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt2
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/blueprint.cpp26
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/blueprint.h28
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/docid_with_weight_search_iterator.cpp (renamed from searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.cpp)2
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/docid_with_weight_search_iterator.h (renamed from searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.h)13
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/dot_product_search.h3
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/executeinfo.cpp14
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/executeinfo.h34
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.cpp3
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp87
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h28
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp63
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h12
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.h2
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h3
-rw-r--r--searchlib/src/vespa/searchlib/test/CMakeLists.txt2
-rw-r--r--searchlib/src/vespa/searchlib/test/document_weight_attribute_helper.h8
-rw-r--r--searchlib/src/vespa/searchlib/test/ft_test_app.cpp5
-rw-r--r--searchlib/src/vespa/searchlib/test/ft_test_app.h13
-rw-r--r--searchlib/src/vespa/searchlib/test/ft_test_app_base.cpp286
-rw-r--r--searchlib/src/vespa/searchlib/test/ft_test_app_base.h61
129 files changed, 2987 insertions, 1940 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index 219439a1224..5628db99171 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -77,8 +77,7 @@ vespa_define_module(
src/tests/attribute/compaction
src/tests/attribute/dfa_fuzzy_matcher
src/tests/attribute/direct_multi_term_blueprint
- src/tests/attribute/document_weight_iterator
- src/tests/attribute/document_weight_or_filter_search
+ src/tests/attribute/direct_posting_store
src/tests/attribute/enum_attribute_compaction
src/tests/attribute/enum_comparator
src/tests/attribute/enumeratedsave
@@ -87,6 +86,7 @@ vespa_define_module(
src/tests/attribute/guard
src/tests/attribute/imported_attribute_vector
src/tests/attribute/imported_search_context
+ src/tests/attribute/multi_term_or_filter_search
src/tests/attribute/multi_value_mapping
src/tests/attribute/multi_value_read_view
src/tests/attribute/posting_list_merger
@@ -196,6 +196,7 @@ vespa_define_module(
src/tests/queryeval/equiv
src/tests/queryeval/fake_searchable
src/tests/queryeval/filter_search
+ src/tests/queryeval/flow
src/tests/queryeval/getnodeweight
src/tests/queryeval/global_filter
src/tests/queryeval/matching_elements_search
diff --git a/searchlib/src/main/java/com/yahoo/searchlib/gbdt/XmlHelper.java b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/XmlHelper.java
index fce0485f41a..60617687f44 100644
--- a/searchlib/src/main/java/com/yahoo/searchlib/gbdt/XmlHelper.java
+++ b/searchlib/src/main/java/com/yahoo/searchlib/gbdt/XmlHelper.java
@@ -7,6 +7,7 @@ import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
+import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
@@ -15,21 +16,21 @@ import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
import java.util.LinkedList;
import java.util.List;
import java.util.Optional;
+import java.util.logging.Level;
+import java.util.logging.Logger;
/**
* @author Simon Thoresen Hult
*/
abstract class XmlHelper {
-
- private static final Charset UTF8 = Charset.forName("UTF-8");
-
public static Element parseXml(String xml)
throws ParserConfigurationException, IOException, SAXException
{
- return parseXmlStream(new ByteArrayInputStream(xml.getBytes(UTF8)));
+ return parseXmlStream(new ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8)));
}
public static Element parseXmlFile(String fileName)
@@ -41,22 +42,27 @@ abstract class XmlHelper {
public static Element parseXmlStream(InputStream in)
throws ParserConfigurationException, IOException, SAXException
{
- DocumentBuilderFactory factory = createDocumentBuilderFactory();
- DocumentBuilder builder = factory.newDocumentBuilder();
+ DocumentBuilder builder = createDocumentBuilderFactory().newDocumentBuilder();
Document doc = builder.parse(in);
return doc.getDocumentElement();
}
- private static DocumentBuilderFactory createDocumentBuilderFactory() throws ParserConfigurationException {
- DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
- factory.setNamespaceAware(true);
- factory.setXIncludeAware(false);
+ private static DocumentBuilderFactory createDocumentBuilderFactory() {
+ try {
+ DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
+ factory.setNamespaceAware(true);
+ factory.setXIncludeAware(false);
+ factory.setExpandEntityReferences(false);
+ factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
- // XXE prevention
- factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
- factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
- factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
- return factory;
+ // XXE prevention
+ factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
+ factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
+ factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
+ return factory;
+ } catch (ParserConfigurationException e) {
+ throw new RuntimeException("Failed to initialize XML parser", e);
+ }
}
public static String getAttributeText(Node node, String name) {
diff --git a/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp
index 181c0fdf110..f612bdda87f 100644
--- a/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp
+++ b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp
@@ -7,7 +7,7 @@
#include <vespa/searchlib/common/bitvectoriterator.h>
#include <vespa/searchlib/fef/termfieldmatchdata.h>
#include <vespa/searchlib/parsequery/parse.h>
-#include <vespa/searchlib/queryeval/document_weight_search_iterator.h>
+#include <vespa/searchlib/queryeval/docid_with_weight_search_iterator.h>
#include <vespa/searchlib/queryeval/executeinfo.h>
#include <vespa/searchlib/test/searchiteratorverifier.h>
#include <vespa/searchlib/util/randomgenerator.h>
@@ -432,7 +432,7 @@ BitVectorTest::test(BasicType bt, CollectionType ct, const vespalib::string &pre
const auto* dww = v->as_docid_with_weight_posting_store();
if (dww != nullptr) {
auto lres = dww->lookup(getSearchStr<VectorType>(), dww->get_dictionary_snapshot());
- using DWSI = search::queryeval::DocumentWeightSearchIterator;
+ using DWSI = search::queryeval::DocidWithWeightSearchIterator;
TermFieldMatchData md;
auto dwsi = std::make_unique<DWSI>(md, *dww, lres);
if (!filter) {
diff --git a/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp b/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp
index f2341d0968e..899ddaa3cc0 100644
--- a/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp
+++ b/searchlib/src/tests/attribute/direct_multi_term_blueprint/direct_multi_term_blueprint_test.cpp
@@ -3,7 +3,9 @@
#include <vespa/searchlib/attribute/direct_multi_term_blueprint.h>
#include <vespa/searchlib/attribute/i_docid_posting_store.h>
#include <vespa/searchlib/attribute/i_docid_with_weight_posting_store.h>
+#include <vespa/searchlib/attribute/in_term_search.h>
#include <vespa/searchlib/attribute/integerbase.h>
+#include <vespa/searchlib/attribute/stringbase.h>
#include <vespa/searchlib/fef/termfieldmatchdata.h>
#include <vespa/searchlib/queryeval/orsearch.h>
#include <vespa/searchlib/queryeval/searchiterator.h>
@@ -19,13 +21,22 @@ using namespace search::queryeval;
using namespace search;
using testing::StartsWith;
-struct IntegerKey : public IDirectPostingStore::LookupKey {
+using LookupKey = IDirectPostingStore::LookupKey;
+
+struct IntegerKey : public LookupKey {
int64_t _value;
IntegerKey(int64_t value_in) : _value(value_in) {}
vespalib::stringref asString() const override { abort(); }
bool asInteger(int64_t& value) const override { value = _value; return true; }
};
+struct StringKey : public LookupKey {
+ vespalib::string _value;
+ StringKey(int64_t value_in) : _value(std::to_string(value_in)) {}
+ vespalib::stringref asString() const override { return _value; }
+ bool asInteger(int64_t&) const override { abort(); }
+};
+
const vespalib::string field_name = "test";
constexpr uint32_t field_id = 3;
uint32_t doc_id_limit = 500;
@@ -50,112 +61,153 @@ concat(const Docids& a, const Docids& b)
return res;
}
+template <typename AttributeType, typename DataType>
+void
+populate_attribute(AttributeType& attr, const std::vector<DataType>& values)
+{
+ // Values 0 and 1 have btree (short) posting lists.
+ attr.update(10, values[0]);
+ attr.update(30, values[1]);
+ attr.update(31, values[1]);
+
+ // Values 2 and 3 have bitvector posting lists.
+ // We need at least 128 documents to get bitvector posting list (see PostingStoreBase2::resizeBitVectors())
+ for (auto docid : range(100, 128)) {
+ attr.update(docid, values[2]);
+ }
+ for (auto docid : range(300, 128)) {
+ attr.update(docid, values[3]);
+ }
+ attr.commit(true);
+}
+
std::shared_ptr<AttributeVector>
-make_attribute(bool field_is_filter, CollectionType col_type)
+make_attribute(CollectionType col_type, BasicType type, bool field_is_filter)
{
- Config cfg(BasicType::INT64, col_type);
+ Config cfg(type, col_type);
cfg.setFastSearch(true);
if (field_is_filter) {
cfg.setIsFilter(field_is_filter);
}
uint32_t num_docs = doc_id_limit - 1;
auto attr = test::AttributeBuilder(field_name, cfg).docs(num_docs).get();
- IntegerAttribute& real = dynamic_cast<IntegerAttribute&>(*attr);
-
- // Values 1 and 3 have btree (short) posting lists with weights.
- real.update(10, 1);
- real.update(30, 3);
- real.update(31, 3);
-
- // Values 100 and 300 have bitvector posting lists.
- // We need at least 128 documents to get bitvector posting list (see PostingStoreBase2::resizeBitVectors())
- for (auto docid : range(100, 128)) {
- real.update(docid, 100);
- }
- for (auto docid : range(300, 128)) {
- real.update(docid, 300);
+ if (type == BasicType::STRING) {
+ populate_attribute<StringAttribute, vespalib::string>(dynamic_cast<StringAttribute&>(*attr),
+ {"1", "3", "100", "300"});
+ } else {
+ populate_attribute<IntegerAttribute, int64_t>(dynamic_cast<IntegerAttribute&>(*attr),
+ {1, 3, 100, 300});
}
- attr->commit(true);
return attr;
}
void
-expect_has_btree_iterator(const IDirectPostingStore& store, int64_t term_value)
+expect_has_btree_iterator(const IDirectPostingStore& store, const LookupKey& key)
{
auto snapshot = store.get_dictionary_snapshot();
- auto res = store.lookup(IntegerKey(term_value), snapshot);
+ auto res = store.lookup(key, snapshot);
EXPECT_TRUE(store.has_btree_iterator(res.posting_idx));
}
void
-expect_has_bitvector_iterator(const IDirectPostingStore& store, int64_t term_value)
+expect_has_bitvector_iterator(const IDirectPostingStore& store, const LookupKey& key)
{
auto snapshot = store.get_dictionary_snapshot();
- auto res = store.lookup(IntegerKey(term_value), snapshot);
+ auto res = store.lookup(key, snapshot);
EXPECT_TRUE(store.has_bitvector(res.posting_idx));
}
+template <typename LookupKeyType>
void
validate_posting_lists(const IDirectPostingStore& store)
{
- expect_has_btree_iterator(store, 1);
- expect_has_btree_iterator(store, 3);
+ expect_has_btree_iterator(store, LookupKeyType(1));
+ expect_has_btree_iterator(store, LookupKeyType(3));
if (store.has_always_btree_iterator()) {
- expect_has_btree_iterator(store, 100);
- expect_has_btree_iterator(store, 300);
+ expect_has_btree_iterator(store, LookupKeyType(100));
+ expect_has_btree_iterator(store, LookupKeyType(300));
}
- expect_has_bitvector_iterator(store, 100);
- expect_has_bitvector_iterator(store, 300);
+ expect_has_bitvector_iterator(store, LookupKeyType(100));
+ expect_has_bitvector_iterator(store, LookupKeyType(300));
}
+enum OperatorType {
+ In,
+ WSet
+};
+
struct TestParam {
+ OperatorType op_type;
CollectionType col_type;
- TestParam(CollectionType col_type_in) : col_type(col_type_in) {}
+ BasicType type;
+ TestParam(OperatorType op_type_in, CollectionType col_type_in, BasicType type_in)
+ : op_type(op_type_in), col_type(col_type_in), type(type_in) {}
~TestParam() = default;
};
std::ostream& operator<<(std::ostream& os, const TestParam& param)
{
- os << param.col_type.asString();
+ os << (param.op_type == OperatorType::In ? "in_" : "wset_") << param.col_type.asString() << "_" << param.type.asString();
return os;
}
+using SingleInBlueprintType = DirectMultiTermBlueprint<IDocidPostingStore, InTermSearch>;
+using MultiInBlueprintType = DirectMultiTermBlueprint<IDocidWithWeightPostingStore, InTermSearch>;
+using SingleWSetBlueprintType = DirectMultiTermBlueprint<IDocidPostingStore, WeightedSetTermSearch>;
+using MultiWSetBlueprintType = DirectMultiTermBlueprint<IDocidWithWeightPostingStore, WeightedSetTermSearch>;
+
class DirectMultiTermBlueprintTest : public ::testing::TestWithParam<TestParam> {
public:
- using SingleValueBlueprintType = DirectMultiTermBlueprint<IDocidPostingStore, WeightedSetTermSearch>;
- using MultiValueBlueprintType = DirectMultiTermBlueprint<IDocidWithWeightPostingStore, WeightedSetTermSearch>;
std::shared_ptr<AttributeVector> attr;
- std::shared_ptr<SingleValueBlueprintType> single_blueprint;
- std::shared_ptr<MultiValueBlueprintType> multi_blueprint;
- queryeval::ComplexLeafBlueprint* blueprint;
+ bool in_operator;
+ bool single_type;
+ bool integer_type;
+ std::shared_ptr<ComplexLeafBlueprint> blueprint;
Blueprint::HitEstimate estimate;
fef::TermFieldMatchData tfmd;
fef::TermFieldMatchDataArray tfmda;
DirectMultiTermBlueprintTest()
: attr(),
- single_blueprint(),
- multi_blueprint(),
+ in_operator(true),
+ single_type(true),
+ integer_type(true),
blueprint(),
tfmd(),
tfmda()
{
tfmda.add(&tfmd);
}
+ ~DirectMultiTermBlueprintTest() {}
void setup(bool field_is_filter, bool need_term_field_match_data) {
- attr = make_attribute(field_is_filter, GetParam().col_type);
+ attr = make_attribute(GetParam().col_type, GetParam().type, field_is_filter);
+ in_operator = GetParam().op_type == OperatorType::In;
+ single_type = GetParam().col_type == CollectionType::SINGLE;
+ integer_type = GetParam().type != BasicType::STRING;
FieldSpec spec(field_name, field_id, fef::TermFieldHandle(), field_is_filter);
- if (GetParam().col_type == CollectionType::SINGLE) {
- const auto* store = attr->as_docid_posting_store();
- ASSERT_TRUE(store);
- validate_posting_lists(*store);
- single_blueprint = std::make_shared<SingleValueBlueprintType>(spec, *attr, *store, 2);
- blueprint = single_blueprint.get();
+ const IDirectPostingStore* store;
+ if (single_type) {
+ auto real_store = attr->as_docid_posting_store();
+ ASSERT_TRUE(real_store);
+ if (in_operator) {
+ blueprint = std::make_shared<SingleInBlueprintType>(spec, *attr, *real_store, 2);
+ } else {
+ blueprint = std::make_shared<SingleWSetBlueprintType>(spec, *attr, *real_store, 2);
+ }
+ store = real_store;
+ } else {
+ auto real_store = attr->as_docid_with_weight_posting_store();
+ ASSERT_TRUE(real_store);
+ if (in_operator) {
+ blueprint = std::make_shared<MultiInBlueprintType>(spec, *attr, *real_store, 2);
+ } else {
+ blueprint = std::make_shared<MultiWSetBlueprintType>(spec, *attr, *real_store, 2);
+ }
+ store = real_store;
+ }
+ if (integer_type) {
+ validate_posting_lists<IntegerKey>(*store);
} else {
- const auto* store = attr->as_docid_with_weight_posting_store();
- ASSERT_TRUE(store);
- validate_posting_lists(*store);
- multi_blueprint = std::make_shared<MultiValueBlueprintType>(spec, *attr, *store, 2);
- blueprint = multi_blueprint.get();
+ validate_posting_lists<StringKey>(*store);
}
blueprint->setDocIdLimit(doc_id_limit);
if (need_term_field_match_data) {
@@ -164,16 +216,35 @@ public:
tfmd.tagAsNotNeeded();
}
}
+ template <typename BlueprintType>
+ void add_term_helper(BlueprintType& b, int64_t term_value) {
+ if (integer_type) {
+ b.addTerm(IntegerKey(term_value), 1, estimate);
+ } else {
+ b.addTerm(StringKey(term_value), 1, estimate);
+ }
+ }
void add_term(int64_t term_value) {
- if (single_blueprint) {
- single_blueprint->addTerm(IntegerKey(term_value), 1, estimate);
+ if (single_type) {
+ if (in_operator) {
+ add_term_helper(dynamic_cast<SingleInBlueprintType&>(*blueprint), term_value);
+ } else {
+ add_term_helper(dynamic_cast<SingleWSetBlueprintType&>(*blueprint), term_value);
+ }
} else {
- multi_blueprint->addTerm(IntegerKey(term_value), 1, estimate);
+ if (in_operator) {
+ add_term_helper(dynamic_cast<MultiInBlueprintType&>(*blueprint), term_value);
+ } else {
+ add_term_helper(dynamic_cast<MultiWSetBlueprintType&>(*blueprint), term_value);
+ }
}
}
std::unique_ptr<SearchIterator> create_leaf_search() const {
return blueprint->createLeafSearch(tfmda, true);
}
+ vespalib::string multi_term_iterator() const {
+ return in_operator ? "search::attribute::MultiTermOrFilterSearchImpl" : "search::queryeval::WeightedSetTermSearchImpl";
+ }
};
void
@@ -201,30 +272,54 @@ expect_or_child(SearchIterator& itr, size_t child, const vespalib::string& exp_c
INSTANTIATE_TEST_SUITE_P(DefaultInstantiation,
DirectMultiTermBlueprintTest,
- testing::Values(CollectionType::SINGLE, CollectionType::WSET),
+ testing::Values(TestParam(OperatorType::In, CollectionType::SINGLE, BasicType::INT64),
+ TestParam(OperatorType::In, CollectionType::SINGLE, BasicType::STRING),
+ TestParam(OperatorType::In, CollectionType::WSET, BasicType::INT64),
+ TestParam(OperatorType::In, CollectionType::WSET, BasicType::STRING),
+ TestParam(OperatorType::WSet, CollectionType::SINGLE, BasicType::INT64),
+ TestParam(OperatorType::WSet, CollectionType::SINGLE, BasicType::STRING),
+ TestParam(OperatorType::WSet, CollectionType::WSET, BasicType::INT64),
+ TestParam(OperatorType::WSet, CollectionType::WSET, BasicType::STRING)),
testing::PrintToStringParamName());
-TEST_P(DirectMultiTermBlueprintTest, weight_iterators_used_for_none_filter_field)
-{
+TEST_P(DirectMultiTermBlueprintTest, btree_iterators_used_for_none_filter_field) {
setup(false, true);
add_term(1);
add_term(3);
auto itr = create_leaf_search();
- EXPECT_THAT(itr->asString(), StartsWith("search::queryeval::WeightedSetTermSearchImpl"));
+ EXPECT_THAT(itr->asString(), StartsWith(multi_term_iterator()));
expect_hits({10, 30, 31}, *itr);
}
-TEST_P(DirectMultiTermBlueprintTest, weight_iterators_used_instead_of_bitvectors_for_none_filter_field)
+TEST_P(DirectMultiTermBlueprintTest, bitvectors_used_instead_of_btree_iterators_for_none_filter_field)
+{
+ setup(false, true);
+ if (!in_operator) {
+ return;
+ }
+ add_term(1);
+ add_term(100);
+ auto itr = create_leaf_search();
+ expect_or_iterator(*itr, 2);
+ expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT");
+ expect_or_child(*itr, 1, multi_term_iterator());
+ expect_hits(concat({10}, range(100, 128)), *itr);
+}
+
+TEST_P(DirectMultiTermBlueprintTest, btree_iterators_used_instead_of_bitvectors_for_none_filter_field)
{
setup(false, true);
+ if (in_operator) {
+ return;
+ }
add_term(1);
add_term(100);
auto itr = create_leaf_search();
- EXPECT_THAT(itr->asString(), StartsWith("search::queryeval::WeightedSetTermSearchImpl"));
+ EXPECT_THAT(itr->asString(), StartsWith(multi_term_iterator()));
expect_hits(concat({10}, range(100, 128)), *itr);
}
-TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_weight_iterators_used_for_filter_field)
+TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_btree_iterators_used_for_filter_field)
{
setup(true, true);
add_term(1);
@@ -235,7 +330,7 @@ TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_weight_iterators_used_for_fi
expect_or_iterator(*itr, 3);
expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT");
expect_or_child(*itr, 1, "search::BitVectorIteratorStrictT");
- expect_or_child(*itr, 2, "search::queryeval::WeightedSetTermSearchImpl");
+ expect_or_child(*itr, 2, multi_term_iterator());
expect_hits(concat({10, 30, 31}, concat(range(100, 128), range(300, 128))), *itr);
}
@@ -251,17 +346,17 @@ TEST_P(DirectMultiTermBlueprintTest, only_bitvectors_used_for_filter_field)
expect_hits(concat(range(100, 128), range(300, 128)), *itr);
}
-TEST_P(DirectMultiTermBlueprintTest, filter_iterator_used_for_filter_field_and_ranking_not_needed)
+TEST_P(DirectMultiTermBlueprintTest, or_filter_iterator_used_for_filter_field_when_ranking_not_needed)
{
setup(true, false);
add_term(1);
add_term(3);
auto itr = create_leaf_search();
- EXPECT_THAT(itr->asString(), StartsWith("search::attribute::DocumentWeightOrFilterSearchImpl"));
+ EXPECT_THAT(itr->asString(), StartsWith("search::attribute::MultiTermOrFilterSearchImpl"));
expect_hits({10, 30, 31}, *itr);
}
-TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_filter_iterator_used_for_filter_field_and_ranking_not_needed)
+TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_or_filter_iterator_used_for_filter_field_when_ranking_not_needed)
{
setup(true, false);
add_term(1);
@@ -272,11 +367,11 @@ TEST_P(DirectMultiTermBlueprintTest, bitvectors_and_filter_iterator_used_for_fil
expect_or_iterator(*itr, 3);
expect_or_child(*itr, 0, "search::BitVectorIteratorStrictT");
expect_or_child(*itr, 1, "search::BitVectorIteratorStrictT");
- expect_or_child(*itr, 2, "search::attribute::DocumentWeightOrFilterSearchImpl");
+ expect_or_child(*itr, 2, "search::attribute::MultiTermOrFilterSearchImpl");
expect_hits(concat({10, 30, 31}, concat(range(100, 128), range(300, 128))), *itr);
}
-TEST_P(DirectMultiTermBlueprintTest, only_bitvectors_used_for_filter_field_and_ranking_not_needed)
+TEST_P(DirectMultiTermBlueprintTest, only_bitvectors_used_for_filter_field_when_ranking_not_needed)
{
setup(true, false);
add_term(100);
diff --git a/searchlib/src/tests/attribute/direct_posting_store/.gitignore b/searchlib/src/tests/attribute/direct_posting_store/.gitignore
new file mode 100644
index 00000000000..5516bc721c7
--- /dev/null
+++ b/searchlib/src/tests/attribute/direct_posting_store/.gitignore
@@ -0,0 +1 @@
+searchlib_direct_posting_store_test_app
diff --git a/searchlib/src/tests/attribute/direct_posting_store/CMakeLists.txt b/searchlib/src/tests/attribute/direct_posting_store/CMakeLists.txt
new file mode 100644
index 00000000000..3c8e76bc9b2
--- /dev/null
+++ b/searchlib/src/tests/attribute/direct_posting_store/CMakeLists.txt
@@ -0,0 +1,10 @@
+# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_direct_posting_store_test_app TEST
+ SOURCES
+ direct_posting_store_test.cpp
+ DEPENDS
+ searchlib
+ searchlib_test
+ GTest::GTest
+)
+vespa_add_test(NAME searchlib_direct_posting_store_test_app COMMAND searchlib_direct_posting_store_test_app)
diff --git a/searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp b/searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp
new file mode 100644
index 00000000000..c1e12580559
--- /dev/null
+++ b/searchlib/src/tests/attribute/direct_posting_store/direct_posting_store_test.cpp
@@ -0,0 +1,297 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchcommon/attribute/config.h>
+#include <vespa/searchlib/attribute/attribute.h>
+#include <vespa/searchlib/attribute/attribute_read_guard.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/attributememorysavetarget.h>
+#include <vespa/searchlib/attribute/i_docid_posting_store.h>
+#include <vespa/searchlib/attribute/i_docid_with_weight_posting_store.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchlib/queryeval/docid_with_weight_search_iterator.h>
+#define ENABLE_GTEST_MIGRATION
+#include <vespa/searchlib/test/searchiteratorverifier.h>
+#include <vespa/searchlib/util/randomgenerator.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <vespa/vespalib/test/insertion_operators.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP("direct_posting_store_test");
+
+using namespace search;
+using namespace search::attribute;
+
+AttributeVector::SP make_attribute(BasicType type, CollectionType collection, bool fast_search) {
+ Config cfg(type, collection);
+ cfg.setFastSearch(fast_search);
+ return AttributeFactory::createAttribute("my_attribute", cfg);
+}
+
+void add_docs(AttributeVector::SP attr_ptr, size_t limit = 1000) {
+ AttributeVector::DocId docid;
+ for (size_t i = 0; i < limit; ++i) {
+ attr_ptr->addDoc(docid);
+ }
+ attr_ptr->commit();
+ ASSERT_EQ((limit - 1), docid);
+}
+
+template <typename ATTR, typename KEY>
+void set_doc(ATTR *attr, uint32_t docid, KEY key, int32_t weight) {
+ attr->clearDoc(docid);
+ if (attr->getCollectionType() == CollectionType::SINGLE) {
+ attr->update(docid, key);
+ } else {
+ attr->append(docid, key, weight);
+ }
+ attr->commit();
+}
+
+void populate_long(AttributeVector::SP attr_ptr) {
+ IntegerAttribute *attr = static_cast<IntegerAttribute *>(attr_ptr.get());
+ set_doc(attr, 1, int64_t(111), 20);
+ set_doc(attr, 5, int64_t(111), 5);
+ set_doc(attr, 7, int64_t(111), 10);
+}
+
+void populate_string(AttributeVector::SP attr_ptr) {
+ StringAttribute *attr = static_cast<StringAttribute *>(attr_ptr.get());
+ set_doc(attr, 1, "foo", 20);
+ set_doc(attr, 5, "foo", 5);
+ set_doc(attr, 7, "foo", 10);
+}
+
+struct TestParam {
+ CollectionType col_type;
+ BasicType type;
+ const char* valid_term;
+ const char* invalid_term;
+ TestParam(CollectionType col_type_in, BasicType type_in,
+ const char* valid_term_in, const char* invalid_term_in)
+ : col_type(col_type_in), type(type_in), valid_term(valid_term_in), invalid_term(invalid_term_in) {}
+ ~TestParam() {}
+};
+
+std::ostream& operator<<(std::ostream& os, const TestParam& param)
+{
+ os << param.col_type.asString() << "_" << param.type.asString();
+ return os;
+}
+
+struct DirectPostingStoreTest : public ::testing::TestWithParam<TestParam> {
+ AttributeVector::SP attr;
+ bool has_weight;
+ const IDirectPostingStore* api;
+
+ const IDirectPostingStore* extract_api() {
+ if (has_weight) {
+ return attr->as_docid_with_weight_posting_store();
+ } else {
+ return attr->as_docid_posting_store();
+ }
+ }
+
+ DirectPostingStoreTest()
+ : attr(make_attribute(GetParam().type, GetParam().col_type, true)),
+ has_weight(GetParam().col_type != CollectionType::SINGLE),
+ api(extract_api())
+ {
+ assert(api != nullptr);
+ add_docs(attr);
+ if (GetParam().type == BasicType::STRING) {
+ populate_string(attr);
+ } else {
+ populate_long(attr);
+ }
+ }
+ ~DirectPostingStoreTest() {}
+};
+
+void expect_docid_posting_store(BasicType type, CollectionType col_type, bool fast_search) {
+ EXPECT_TRUE(make_attribute(type, col_type, fast_search)->as_docid_posting_store() != nullptr);
+}
+
+void expect_not_docid_posting_store(BasicType type, CollectionType col_type, bool fast_search) {
+ EXPECT_TRUE(make_attribute(type, col_type, fast_search)->as_docid_posting_store() == nullptr);
+}
+
+void expect_docid_with_weight_posting_store(BasicType type, CollectionType col_type, bool fast_search) {
+ EXPECT_TRUE(make_attribute(type, col_type, fast_search)->as_docid_with_weight_posting_store() != nullptr);
+}
+
+void expect_not_docid_with_weight_posting_store(BasicType type, CollectionType col_type, bool fast_search) {
+ EXPECT_TRUE(make_attribute(type, col_type, fast_search)->as_docid_with_weight_posting_store() == nullptr);
+}
+
+TEST(DirectPostingStoreApiTest, attributes_support_IDocidPostingStore_interface) {
+ expect_docid_posting_store(BasicType::INT8, CollectionType::SINGLE, true);
+ expect_docid_posting_store(BasicType::INT16, CollectionType::SINGLE, true);
+ expect_docid_posting_store(BasicType::INT32, CollectionType::SINGLE, true);
+ expect_docid_posting_store(BasicType::INT64, CollectionType::SINGLE, true);
+ expect_docid_posting_store(BasicType::STRING, CollectionType::SINGLE, true);
+}
+
+TEST(DirectPostingStoreApiTest, attributes_do_not_support_IDocidPostingStore_interface) {
+ expect_not_docid_posting_store(BasicType::BOOL, CollectionType::SINGLE, true);
+ expect_not_docid_posting_store(BasicType::FLOAT, CollectionType::SINGLE, true);
+ expect_not_docid_posting_store(BasicType::DOUBLE, CollectionType::SINGLE, true);
+ expect_not_docid_posting_store(BasicType::INT64, CollectionType::SINGLE, false);
+ expect_not_docid_posting_store(BasicType::STRING, CollectionType::SINGLE, false);
+}
+
+TEST(DirectPostingStoreApiTest, attributes_support_IDocidWithWeightPostingStore_interface) {
+ expect_docid_with_weight_posting_store(BasicType::INT64, CollectionType::WSET, true);
+ expect_docid_with_weight_posting_store(BasicType::STRING, CollectionType::WSET, true);
+}
+
+TEST(DirectPostingStoreApiTest, attributes_do_not_support_IDocidWithWeightPostingStore_interface) {
+ expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::SINGLE, false);
+ expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::ARRAY, false);
+ expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::WSET, false);
+ expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::SINGLE, true);
+ expect_not_docid_with_weight_posting_store(BasicType::INT64, CollectionType::ARRAY, true);
+ expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::SINGLE, false);
+ expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::ARRAY, false);
+ expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::WSET, false);
+ expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::SINGLE, true);
+ expect_not_docid_with_weight_posting_store(BasicType::STRING, CollectionType::ARRAY, true);
+ expect_not_docid_with_weight_posting_store(BasicType::INT32, CollectionType::WSET, true);
+ expect_not_docid_with_weight_posting_store(BasicType::DOUBLE, CollectionType::WSET, true);
+}
+
+void verify_valid_lookup(IDirectPostingStore::LookupResult result, bool has_weight) {
+ EXPECT_TRUE(result.posting_idx.valid());
+ EXPECT_EQ(3u, result.posting_size);
+ EXPECT_EQ(has_weight ? 5 : 1, result.min_weight);
+ EXPECT_EQ(has_weight ? 20 : 1, result.max_weight);
+}
+
+void verify_invalid_lookup(IDirectPostingStore::LookupResult result) {
+ EXPECT_FALSE(result.posting_idx.valid());
+ EXPECT_EQ(0u, result.posting_size);
+ EXPECT_EQ(0, result.min_weight);
+ EXPECT_EQ(0, result.max_weight);
+}
+
+INSTANTIATE_TEST_SUITE_P(DefaultInstantiation,
+ DirectPostingStoreTest,
+ testing::Values(TestParam(CollectionType::SINGLE, BasicType::INT64, "111", "222"),
+ TestParam(CollectionType::WSET, BasicType::INT64, "111", "222"),
+ TestParam(CollectionType::SINGLE, BasicType::STRING, "foo", "bar"),
+ TestParam(CollectionType::WSET, BasicType::STRING, "foo", "bar")),
+ testing::PrintToStringParamName());
+
+TEST_P(DirectPostingStoreTest, lookup_works_correctly) {
+ verify_valid_lookup(api->lookup(GetParam().valid_term, api->get_dictionary_snapshot()), has_weight);
+ verify_invalid_lookup(api->lookup(GetParam().invalid_term, api->get_dictionary_snapshot()));
+}
+
+template <typename DirectPostingStoreType, bool has_weight>
+void verify_posting(const IDirectPostingStore& api, const vespalib::string& term) {
+ auto result = api.lookup(term, api.get_dictionary_snapshot());
+ ASSERT_TRUE(result.posting_idx.valid());
+ std::vector<typename DirectPostingStoreType::IteratorType> itr_store;
+ auto& real = dynamic_cast<const DirectPostingStoreType&>(api);
+ real.create(result.posting_idx, itr_store);
+ ASSERT_EQ(1u, itr_store.size());
+ {
+ auto& itr = itr_store[0];
+ if (itr.valid() && itr.getKey() < 1) {
+ itr.linearSeek(1);
+ }
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQ(1u, itr.getKey()); // docid
+ if constexpr (has_weight) {
+ EXPECT_EQ(20, itr.getData()); // weight
+ }
+ itr.linearSeek(2);
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQ(5u, itr.getKey()); // docid
+ if constexpr (has_weight) {
+ EXPECT_EQ(5, itr.getData()); // weight
+ }
+ itr.linearSeek(6);
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQ(7u, itr.getKey()); // docid
+ if constexpr (has_weight) {
+ EXPECT_EQ(10, itr.getData()); // weight
+ }
+ itr.linearSeek(8);
+ EXPECT_FALSE(itr.valid());
+ }
+}
+
+TEST_P(DirectPostingStoreTest, iterators_are_created_correctly) {
+ if (has_weight) {
+ verify_posting<IDocidWithWeightPostingStore, true>(*api, GetParam().valid_term);
+ } else {
+ verify_posting<IDocidPostingStore, false>(*api, GetParam().valid_term);
+ }
+}
+
+TEST_P(DirectPostingStoreTest, collect_folded_works)
+{
+ if (GetParam().type == BasicType::STRING) {
+ auto* sa = static_cast<StringAttribute*>(attr.get());
+ set_doc(sa, 2, "bar", 30);
+ attr->commit();
+ set_doc(sa, 3, "FOO", 30);
+ attr->commit();
+ auto snapshot = api->get_dictionary_snapshot();
+ auto lookup = api->lookup(GetParam().valid_term, snapshot);
+ std::vector<vespalib::string> folded;
+ std::function<void(vespalib::datastore::EntryRef)> save_folded = [&folded,sa](vespalib::datastore::EntryRef enum_idx) { folded.emplace_back(sa->getFromEnum(enum_idx.ref())); };
+ api->collect_folded(lookup.enum_idx, snapshot, save_folded);
+ std::vector<vespalib::string> expected_folded{"FOO", "foo"};
+ EXPECT_EQ(expected_folded, folded);
+ } else {
+ auto* ia = dynamic_cast<IntegerAttributeTemplate<int64_t>*>(attr.get());
+ set_doc(ia, 2, int64_t(112), 30);
+ attr->commit();
+ auto snapshot = api->get_dictionary_snapshot();
+ auto lookup = api->lookup(GetParam().valid_term, snapshot);
+ std::vector<int64_t> folded;
+ std::function<void(vespalib::datastore::EntryRef)> save_folded = [&folded, ia](
+ vespalib::datastore::EntryRef enum_idx) { folded.emplace_back(ia->getFromEnum(enum_idx.ref())); };
+ api->collect_folded(lookup.enum_idx, snapshot, save_folded);
+ std::vector<int64_t> expected_folded{int64_t(111)};
+ EXPECT_EQ(expected_folded, folded);
+ }
+}
+
+class Verifier : public search::test::SearchIteratorVerifier {
+public:
+ Verifier();
+ ~Verifier();
+ SearchIterator::UP create(bool strict) const override {
+ (void) strict;
+ const auto* api = _attr->as_docid_with_weight_posting_store();
+ assert(api != nullptr);
+ auto dict_entry = api->lookup("123", api->get_dictionary_snapshot());
+ assert(dict_entry.posting_idx.valid());
+ return std::make_unique<queryeval::DocidWithWeightSearchIterator>(_tfmd, *api, dict_entry);
+ }
+private:
+ mutable fef::TermFieldMatchData _tfmd;
+ AttributeVector::SP _attr;
+};
+
+Verifier::Verifier()
+ : _attr(make_attribute(BasicType::INT64, CollectionType::WSET, true))
+{
+ add_docs(_attr, getDocIdLimit());
+ auto docids = getExpectedDocIds();
+ auto* int_attr = static_cast<IntegerAttribute*>(_attr.get());
+ for (auto docid : docids) {
+ set_doc(int_attr, docid, int64_t(123), 1);
+ }
+}
+Verifier::~Verifier() {}
+
+TEST(VerifierTest, verify_document_weight_search_iterator) {
+ Verifier verifier;
+ verifier.verify();
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/tests/attribute/document_weight_iterator/.gitignore b/searchlib/src/tests/attribute/document_weight_iterator/.gitignore
deleted file mode 100644
index 08cae9a48df..00000000000
--- a/searchlib/src/tests/attribute/document_weight_iterator/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-searchlib_document_weight_iterator_test_app
diff --git a/searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt b/searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt
deleted file mode 100644
index 4cb480068e3..00000000000
--- a/searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-vespa_add_executable(searchlib_document_weight_iterator_test_app TEST
- SOURCES
- document_weight_iterator_test.cpp
- DEPENDS
- searchlib
- searchlib_test
-)
-vespa_add_test(NAME searchlib_document_weight_iterator_test_app COMMAND searchlib_document_weight_iterator_test_app)
diff --git a/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp b/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp
deleted file mode 100644
index 28416d09d6f..00000000000
--- a/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp
+++ /dev/null
@@ -1,226 +0,0 @@
-// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#include <vespa/searchcommon/attribute/config.h>
-#include <vespa/searchlib/attribute/attribute.h>
-#include <vespa/searchlib/attribute/attribute_read_guard.h>
-#include <vespa/searchlib/attribute/attributefactory.h>
-#include <vespa/searchlib/attribute/attributeguard.h>
-#include <vespa/searchlib/attribute/attributememorysavetarget.h>
-#include <vespa/searchlib/attribute/i_docid_with_weight_posting_store.h>
-#include <vespa/searchlib/index/dummyfileheadercontext.h>
-#include <vespa/searchlib/queryeval/document_weight_search_iterator.h>
-#include <vespa/searchlib/test/searchiteratorverifier.h>
-#include <vespa/searchlib/util/randomgenerator.h>
-#include <vespa/vespalib/test/insertion_operators.h>
-#include <vespa/vespalib/testkit/test_kit.h>
-
-#include <vespa/log/log.h>
-LOG_SETUP("document_weight_iterator_test");
-
-using namespace search;
-using namespace search::attribute;
-
-AttributeVector::SP make_attribute(BasicType type, CollectionType collection, bool fast_search) {
- Config cfg(type, collection);
- cfg.setFastSearch(fast_search);
- return AttributeFactory::createAttribute("my_attribute", cfg);
-}
-
-void add_docs(AttributeVector::SP attr_ptr, size_t limit = 1000) {
- AttributeVector::DocId docid;
- for (size_t i = 0; i < limit; ++i) {
- attr_ptr->addDoc(docid);
- }
- attr_ptr->commit();
- ASSERT_EQUAL((limit - 1), docid);
-}
-
-template <typename ATTR, typename KEY>
-void set_doc(ATTR *attr, uint32_t docid, KEY key, int32_t weight) {
- attr->clearDoc(docid);
- attr->append(docid, key, weight);
- attr->commit();
-}
-
-void populate_long(AttributeVector::SP attr_ptr) {
- IntegerAttribute *attr = static_cast<IntegerAttribute *>(attr_ptr.get());
- set_doc(attr, 1, int64_t(111), 20);
- set_doc(attr, 5, int64_t(111), 5);
- set_doc(attr, 7, int64_t(111), 10);
-}
-
-void populate_string(AttributeVector::SP attr_ptr) {
- StringAttribute *attr = static_cast<StringAttribute *>(attr_ptr.get());
- set_doc(attr, 1, "foo", 20);
- set_doc(attr, 5, "foo", 5);
- set_doc(attr, 7, "foo", 10);
-}
-
-struct LongFixture {
- AttributeVector::SP attr;
- const IDocidWithWeightPostingStore *api;
- LongFixture() : attr(make_attribute(BasicType::INT64, CollectionType::WSET, true)),
- api(attr->as_docid_with_weight_posting_store())
- {
- ASSERT_TRUE(api != nullptr);
- add_docs(attr);
- populate_long(attr);
- }
-};
-
-struct StringFixture {
- AttributeVector::SP attr;
- const IDocidWithWeightPostingStore *api;
- StringFixture() : attr(make_attribute(BasicType::STRING, CollectionType::WSET, true)),
- api(attr->as_docid_with_weight_posting_store())
- {
- ASSERT_TRUE(api != nullptr);
- add_docs(attr);
- populate_string(attr);
- }
-};
-
-TEST("require that appropriate attributes support the document weight attribute interface") {
- EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::WSET, true)->as_docid_with_weight_posting_store() != nullptr);
- EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::WSET, true)->as_docid_with_weight_posting_store() != nullptr);
-}
-
-TEST("require that inappropriate attributes do not support the document weight attribute interface") {
- EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::SINGLE, false)->as_docid_with_weight_posting_store() == nullptr);
- EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::ARRAY, false)->as_docid_with_weight_posting_store() == nullptr);
- EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::WSET, false)->as_docid_with_weight_posting_store() == nullptr);
- EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::SINGLE, true)->as_docid_with_weight_posting_store() == nullptr);
- EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::ARRAY, true)->as_docid_with_weight_posting_store() == nullptr);
- EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::SINGLE, false)->as_docid_with_weight_posting_store() == nullptr);
- EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::ARRAY, false)->as_docid_with_weight_posting_store() == nullptr);
- EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::WSET, false)->as_docid_with_weight_posting_store() == nullptr);
- EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::SINGLE, true)->as_docid_with_weight_posting_store() == nullptr);
- EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::ARRAY, true)->as_docid_with_weight_posting_store() == nullptr);
- EXPECT_TRUE(make_attribute(BasicType::INT32, CollectionType::WSET, true)->as_docid_with_weight_posting_store() == nullptr);
- EXPECT_TRUE(make_attribute(BasicType::DOUBLE, CollectionType::WSET, true)->as_docid_with_weight_posting_store() == nullptr);
-}
-
-void verify_valid_lookup(IDirectPostingStore::LookupResult result) {
- EXPECT_TRUE(result.posting_idx.valid());
- EXPECT_EQUAL(3u, result.posting_size);
- EXPECT_EQUAL(5, result.min_weight);
- EXPECT_EQUAL(20, result.max_weight);
-}
-
-void verify_invalid_lookup(IDirectPostingStore::LookupResult result) {
- EXPECT_FALSE(result.posting_idx.valid());
- EXPECT_EQUAL(0u, result.posting_size);
- EXPECT_EQUAL(0, result.min_weight);
- EXPECT_EQUAL(0, result.max_weight);
-}
-
-TEST_F("require that integer lookup works correctly", LongFixture) {
- verify_valid_lookup(f1.api->lookup("111", f1.api->get_dictionary_snapshot()));
- verify_invalid_lookup(f1.api->lookup("222", f1.api->get_dictionary_snapshot()));
-}
-
-TEST_F("require string lookup works correctly", StringFixture) {
- verify_valid_lookup(f1.api->lookup("foo", f1.api->get_dictionary_snapshot()));
- verify_invalid_lookup(f1.api->lookup("bar", f1.api->get_dictionary_snapshot()));
-}
-
-void verify_posting(const IDocidWithWeightPostingStore &api, const char *term) {
- auto result = api.lookup(term, api.get_dictionary_snapshot());
- ASSERT_TRUE(result.posting_idx.valid());
- std::vector<DocidWithWeightIterator> itr_store;
- api.create(result.posting_idx, itr_store);
- ASSERT_EQUAL(1u, itr_store.size());
- {
- DocidWithWeightIterator &itr = itr_store[0];
- if (itr.valid() && itr.getKey() < 1) {
- itr.linearSeek(1);
- }
- ASSERT_TRUE(itr.valid());
- EXPECT_EQUAL(1u, itr.getKey()); // docid
- EXPECT_EQUAL(20, itr.getData()); // weight
- itr.linearSeek(2);
- ASSERT_TRUE(itr.valid());
- EXPECT_EQUAL(5u, itr.getKey()); // docid
- EXPECT_EQUAL(5, itr.getData()); // weight
- itr.linearSeek(6);
- ASSERT_TRUE(itr.valid());
- EXPECT_EQUAL(7u, itr.getKey()); // docid
- EXPECT_EQUAL(10, itr.getData()); // weight
- itr.linearSeek(8);
- EXPECT_FALSE(itr.valid());
- }
-}
-
-TEST_F("require that integer iterators are created correctly", LongFixture) {
- verify_posting(*f1.api, "111");
-}
-
-TEST_F("require that string iterators are created correctly", StringFixture) {
- verify_posting(*f1.api, "foo");
-}
-
-TEST_F("require that collect_folded works for string", StringFixture)
-{
- StringAttribute *attr = static_cast<StringAttribute *>(f1.attr.get());
- set_doc(attr, 2, "bar", 30);
- attr->commit();
- set_doc(attr, 3, "FOO", 30);
- attr->commit();
- auto dictionary_snapshot = f1.api->get_dictionary_snapshot();
- auto lookup1 = f1.api->lookup("foo", dictionary_snapshot);
- std::vector<vespalib::string> folded;
- std::function<void(vespalib::datastore::EntryRef)> save_folded = [&folded,attr](vespalib::datastore::EntryRef enum_idx) { folded.emplace_back(attr->getFromEnum(enum_idx.ref())); };
- f1.api->collect_folded(lookup1.enum_idx, dictionary_snapshot, save_folded);
- std::vector<vespalib::string> expected_folded{"FOO", "foo"};
- EXPECT_EQUAL(expected_folded, folded);
-}
-
-TEST_F("require that collect_folded works for integers", LongFixture)
-{
- IntegerAttributeTemplate<int64_t> *attr = dynamic_cast<IntegerAttributeTemplate<int64_t> *>(f1.attr.get());
- set_doc(attr, 2, int64_t(112), 30);
- attr->commit();
- auto dictionary_snapshot = f1.api->get_dictionary_snapshot();
- auto lookup1 = f1.api->lookup("111", dictionary_snapshot);
- std::vector<int64_t> folded;
- std::function<void(vespalib::datastore::EntryRef)> save_folded = [&folded,attr](vespalib::datastore::EntryRef enum_idx) { folded.emplace_back(attr->getFromEnum(enum_idx.ref())); };
- f1.api->collect_folded(lookup1.enum_idx, dictionary_snapshot, save_folded);
- std::vector<int64_t> expected_folded{int64_t(111)};
- EXPECT_EQUAL(expected_folded, folded);
-}
-
-class Verifier : public search::test::SearchIteratorVerifier {
-public:
- Verifier();
- ~Verifier();
- SearchIterator::UP create(bool strict) const override {
- (void) strict;
- const auto* api = _attr->as_docid_with_weight_posting_store();
- ASSERT_TRUE(api != nullptr);
- auto dict_entry = api->lookup("123", api->get_dictionary_snapshot());
- ASSERT_TRUE(dict_entry.posting_idx.valid());
- return std::make_unique<queryeval::DocumentWeightSearchIterator>(_tfmd, *api, dict_entry);
- }
-private:
- mutable fef::TermFieldMatchData _tfmd;
- AttributeVector::SP _attr;
-};
-
-Verifier::Verifier()
- : _attr(make_attribute(BasicType::INT64, CollectionType::WSET, true))
-{
- add_docs(_attr, getDocIdLimit());
- auto docids = getExpectedDocIds();
- IntegerAttribute *int_attr = static_cast<IntegerAttribute *>(_attr.get());
- for (auto docid: docids) {
- set_doc(int_attr, docid, int64_t(123), 1);
- }
-}
-Verifier::~Verifier() {}
-
-TEST("verify document weight search iterator") {
- Verifier verifier;
- verifier.verify();
-}
-
-TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/attribute/document_weight_or_filter_search/CMakeLists.txt b/searchlib/src/tests/attribute/document_weight_or_filter_search/CMakeLists.txt
deleted file mode 100644
index b2f86a9ddec..00000000000
--- a/searchlib/src/tests/attribute/document_weight_or_filter_search/CMakeLists.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-vespa_add_executable(searchlib_document_weight_or_filter_search_test_app TEST
- SOURCES
- document_weight_or_filter_search_test.cpp
- DEPENDS
- searchlib
- searchlib_test
- GTest::GTest
-)
-vespa_add_test(NAME searchlib_document_weight_or_filter_search_test_app COMMAND searchlib_document_weight_or_filter_search_test_app)
diff --git a/searchlib/src/tests/attribute/multi_term_or_filter_search/CMakeLists.txt b/searchlib/src/tests/attribute/multi_term_or_filter_search/CMakeLists.txt
new file mode 100644
index 00000000000..4ec5d849ad3
--- /dev/null
+++ b/searchlib/src/tests/attribute/multi_term_or_filter_search/CMakeLists.txt
@@ -0,0 +1,10 @@
+# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_multi_term_or_filter_search_test_app TEST
+ SOURCES
+ multi_term_or_filter_search_test.cpp
+ DEPENDS
+ searchlib
+ searchlib_test
+ GTest::GTest
+)
+vespa_add_test(NAME searchlib_multi_term_or_filter_search_test_app COMMAND searchlib_multi_term_or_filter_search_test_app)
diff --git a/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp b/searchlib/src/tests/attribute/multi_term_or_filter_search/multi_term_or_filter_search_test.cpp
index ae4812b5437..552a128c518 100644
--- a/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp
+++ b/searchlib/src/tests/attribute/multi_term_or_filter_search/multi_term_or_filter_search_test.cpp
@@ -1,30 +1,34 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/vespalib/gtest/gtest.h>
#include <vespa/searchlib/attribute/i_direct_posting_store.h>
-#include <vespa/searchlib/attribute/document_weight_or_filter_search.h>
-#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/searchlib/attribute/multi_term_or_filter_search.h>
#include <vespa/searchlib/common/bitvector.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/vespalib/gtest/gtest.h>
#define ENABLE_GTEST_MIGRATION
#include <vespa/searchlib/test/searchiteratorverifier.h>
using PostingList = search::attribute::PostingListTraits<int32_t>::PostingStoreBase;
using Iterator = search::attribute::PostingListTraits<int32_t>::const_iterator;
using KeyData = PostingList::KeyDataType;
+
using search::BitVector;
-using search::attribute::DocumentWeightOrFilterSearch;
+using search::attribute::MultiTermOrFilterSearch;
+using search::fef::TermFieldMatchData;
using search::queryeval::SearchIterator;
using vespalib::datastore::EntryRef;
-class DocumentWeightOrFilterSearchTest : public ::testing::Test {
+class MultiTermOrFilterSearchTest : public ::testing::Test {
PostingList _postings;
+ mutable TermFieldMatchData _tfmd;
vespalib::GenerationHandler _gens;
std::vector<EntryRef> _trees;
uint32_t _range_start;
uint32_t _range_end;
public:
- DocumentWeightOrFilterSearchTest();
- ~DocumentWeightOrFilterSearchTest() override;
+ MultiTermOrFilterSearchTest();
+ ~MultiTermOrFilterSearchTest() override;
void inc_generation();
size_t num_trees() const { return _trees.size(); }
Iterator get_tree(size_t idx) const {
@@ -62,7 +66,7 @@ public:
for (size_t i = 0; i < num_trees(); ++i) {
iterators.emplace_back(get_tree(i));
}
- auto result = DocumentWeightOrFilterSearch::create(std::move(iterators));
+ auto result = MultiTermOrFilterSearch::create(std::move(iterators), _tfmd);
result->initRange(_range_start, _range_end);
return result;
};
@@ -73,6 +77,8 @@ public:
while (doc_id < _range_end) {
if (iterator.seek(doc_id)) {
result.emplace_back(doc_id);
+ iterator.unpack(doc_id);
+ EXPECT_EQ(doc_id, _tfmd.getDocId());
++doc_id;
} else {
doc_id = std::max(doc_id + 1, iterator.getDocId());
@@ -121,7 +127,7 @@ public:
}
};
-DocumentWeightOrFilterSearchTest::DocumentWeightOrFilterSearchTest()
+MultiTermOrFilterSearchTest::MultiTermOrFilterSearchTest()
: _postings(true),
_gens(),
_range_start(1),
@@ -129,7 +135,7 @@ DocumentWeightOrFilterSearchTest::DocumentWeightOrFilterSearchTest()
{
}
-DocumentWeightOrFilterSearchTest::~DocumentWeightOrFilterSearchTest()
+MultiTermOrFilterSearchTest::~MultiTermOrFilterSearchTest()
{
for (auto& tree : _trees) {
_postings.clear(tree);
@@ -140,7 +146,7 @@ DocumentWeightOrFilterSearchTest::~DocumentWeightOrFilterSearchTest()
}
void
-DocumentWeightOrFilterSearchTest::inc_generation()
+MultiTermOrFilterSearchTest::inc_generation()
{
_postings.freeze();
_postings.assign_generation(_gens.getCurrentGeneration());
@@ -148,19 +154,19 @@ DocumentWeightOrFilterSearchTest::inc_generation()
_postings.reclaim_memory(_gens.get_oldest_used_generation());
}
-TEST_F(DocumentWeightOrFilterSearchTest, daat_or)
+TEST_F(MultiTermOrFilterSearchTest, daat_or)
{
make_sample_data();
expect_result(eval_daat(*make_iterator()), { 3, 10, 11, 14, 17, 20 });
}
-TEST_F(DocumentWeightOrFilterSearchTest, taat_get_hits)
+TEST_F(MultiTermOrFilterSearchTest, taat_get_hits)
{
make_sample_data();
expect_result(frombv(*make_iterator()->get_hits(get_range_start())), { 3, 10, 11, 14, 17, 20 });
}
-TEST_F(DocumentWeightOrFilterSearchTest, taat_or_hits_into)
+TEST_F(MultiTermOrFilterSearchTest, taat_or_hits_into)
{
make_sample_data();
auto bv = tobv({13, 14});
@@ -168,7 +174,7 @@ TEST_F(DocumentWeightOrFilterSearchTest, taat_or_hits_into)
expect_result(frombv(*bv), { 3, 10, 11, 13, 14, 17, 20 });
}
-TEST_F(DocumentWeightOrFilterSearchTest, taat_and_hits_into)
+TEST_F(MultiTermOrFilterSearchTest, taat_and_hits_into)
{
make_sample_data();
auto bv = tobv({13, 14});
@@ -176,21 +182,21 @@ TEST_F(DocumentWeightOrFilterSearchTest, taat_and_hits_into)
expect_result(frombv(*bv), { 14 });
}
-TEST_F(DocumentWeightOrFilterSearchTest, daat_or_ranged)
+TEST_F(MultiTermOrFilterSearchTest, daat_or_ranged)
{
make_sample_data();
set_range(4, 15);
expect_result(eval_daat(*make_iterator()), {10, 11, 14 });
}
-TEST_F(DocumentWeightOrFilterSearchTest, taat_get_hits_ranged)
+TEST_F(MultiTermOrFilterSearchTest, taat_get_hits_ranged)
{
make_sample_data();
set_range(4, 15);
expect_result(frombv(*make_iterator()->get_hits(get_range_start())), { 10, 11, 14 });
}
-TEST_F(DocumentWeightOrFilterSearchTest, taat_or_hits_into_ranged)
+TEST_F(MultiTermOrFilterSearchTest, taat_or_hits_into_ranged)
{
make_sample_data();
set_range(4, 15);
@@ -199,7 +205,7 @@ TEST_F(DocumentWeightOrFilterSearchTest, taat_or_hits_into_ranged)
expect_result(frombv(*bv), { 10, 11, 13, 14 });
}
-TEST_F(DocumentWeightOrFilterSearchTest, taat_and_hits_into_ranged)
+TEST_F(MultiTermOrFilterSearchTest, taat_and_hits_into_ranged)
{
make_sample_data();
set_range(4, 15);
@@ -211,9 +217,9 @@ TEST_F(DocumentWeightOrFilterSearchTest, taat_and_hits_into_ranged)
namespace {
class Verifier : public search::test::SearchIteratorVerifier {
- DocumentWeightOrFilterSearchTest &_test;
+ MultiTermOrFilterSearchTest &_test;
public:
- Verifier(DocumentWeightOrFilterSearchTest &test, int num_trees)
+ Verifier(MultiTermOrFilterSearchTest &test, int num_trees)
: _test(test)
{
std::vector<std::vector<uint32_t>> trees(num_trees);
@@ -239,7 +245,7 @@ public:
};
-TEST_F(DocumentWeightOrFilterSearchTest, iterator_conformance)
+TEST_F(MultiTermOrFilterSearchTest, iterator_conformance)
{
{
Verifier verifier(*this, 1);
diff --git a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp
index 8831bd1ec75..ecc03ac54c5 100644
--- a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp
+++ b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp
@@ -488,11 +488,11 @@ TEST("require that direct attribute iterators work") {
EXPECT_TRUE(result.has_minmax);
EXPECT_EQUAL(100, result.min_weight);
EXPECT_EQUAL(1000, result.max_weight);
- EXPECT_TRUE(result.iterator_dump.find("DocumentWeightSearchIterator") != vespalib::string::npos);
+ EXPECT_TRUE(result.iterator_dump.find("DocidWithWeightSearchIterator") != vespalib::string::npos);
} else {
EXPECT_EQUAL(num_docs, result.est_hits);
EXPECT_FALSE(result.has_minmax);
- EXPECT_TRUE(result.iterator_dump.find("DocumentWeightSearchIterator") == vespalib::string::npos);
+ EXPECT_TRUE(result.iterator_dump.find("DocidWithWeightSearchIterator") == vespalib::string::npos);
}
ASSERT_EQUAL(3u, result.hits.size());
EXPECT_FALSE(result.est_empty);
@@ -513,7 +513,7 @@ TEST("require that single weighted set turns filter on filter fields") {
SimpleStringTerm node("foo", "", 0, Weight(1));
Result result = do_search(attribute_manager, node, strict);
EXPECT_EQUAL(3u, result.est_hits);
- EXPECT_TRUE(result.iterator_dump.find("DocumentWeightSearchIterator") == vespalib::string::npos);
+ EXPECT_TRUE(result.iterator_dump.find("DocidWithWeightSearchIterator") == vespalib::string::npos);
EXPECT_TRUE(result.iterator_dump.find("FilterAttributePostingListIteratorT") != vespalib::string::npos);
ASSERT_EQUAL(3u, result.hits.size());
EXPECT_FALSE(result.est_empty);
diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp
index 6e334fffa75..741a86b0beb 100644
--- a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp
+++ b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp
@@ -468,7 +468,7 @@ template <typename V, typename T>
ResultSetPtr
SearchContextTest::performSearch(const V & vec, const T & term)
{
- return performSearch(search::queryeval::ExecuteInfo::TRUE, vec, term, TermType::WORD);
+ return performSearch(queryeval::ExecuteInfo::TRUE, vec, term, TermType::WORD);
}
template <typename V, typename T>
@@ -503,7 +503,7 @@ void
SearchContextTest::performSearch(const V & vec, const vespalib::string & term,
const DocSet & expected, TermType termType)
{
- performSearch(search::queryeval::ExecuteInfo::TRUE, vec, term, expected, termType);
+ performSearch(queryeval::ExecuteInfo::TRUE, vec, term, expected, termType);
}
void
@@ -1113,7 +1113,7 @@ SearchContextTest::performRangeSearch(const VectorType & vec, const vespalib::st
{
for (size_t num_threads : {1,3}) {
vespalib::SimpleThreadBundle thread_bundle(num_threads);
- auto executeInfo = search::queryeval::ExecuteInfo::create(true, 1.0, nullptr, thread_bundle, true, true);
+ auto executeInfo = queryeval::ExecuteInfo::create(true, 1.0, vespalib::Doom::never(), thread_bundle);
performSearch(executeInfo, vec, term, expected, TermType::WORD);
}
}
diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
index 1beb2b1e501..1bfb9fb41f9 100644
--- a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
+++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
@@ -1,5 +1,6 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchcommon/attribute/config.h>
#include <vespa/searchlib/attribute/enumstore.h>
#include <vespa/searchlib/attribute/singlestringattribute.h>
#include <vespa/searchlib/attribute/singlestringpostattribute.h>
@@ -8,7 +9,6 @@
#include <vespa/searchlib/attribute/enumstore.hpp>
#include <vespa/searchlib/attribute/single_string_enum_search_context.h>
-#include <vespa/searchlib/attribute/multistringpostattribute.hpp>
#include <vespa/log/log.h>
LOG_SETUP("stringattribute_test");
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
index 81862b74eb2..b1b2235165f 100644
--- a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
+++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
@@ -7,7 +7,6 @@
#include <vespa/searchlib/tensor/dense_tensor_attribute.h>
#include <vespa/searchlib/tensor/direct_tensor_attribute.h>
#include <vespa/searchlib/tensor/doc_vector_access.h>
-#include <vespa/searchlib/tensor/distance_functions.h>
#include <vespa/searchlib/tensor/hnsw_index.h>
#include <vespa/searchlib/tensor/mips_distance_transform.h>
#include <vespa/searchlib/tensor/nearest_neighbor_index.h>
@@ -25,7 +24,6 @@
#include <vespa/vespalib/testkit/test_kit.h>
#include <vespa/vespalib/util/mmap_file_allocator_factory.h>
#include <vespa/searchlib/util/bufferwriter.h>
-#include <vespa/vespalib/util/fake_doom.h>
#include <vespa/vespalib/util/threadstackexecutor.h>
#include <vespa/document/base/exceptions.h>
#include <vespa/eval/eval/fast_value.h>
@@ -132,7 +130,7 @@ private:
int _index_value;
public:
- MockIndexSaver(int index_value) : _index_value(index_value) {}
+ explicit MockIndexSaver(int index_value) noexcept : _index_value(index_value) {}
void save(search::BufferWriter& writer) const override {
writer.write(&_index_value, sizeof(int));
writer.flush();
@@ -158,7 +156,7 @@ public:
class MockPrepareResult : public PrepareResult {
public:
uint32_t docid;
- MockPrepareResult(uint32_t docid_in) : docid(docid_in) {}
+ explicit MockPrepareResult(uint32_t docid_in) noexcept : docid(docid_in) {}
};
class MockNearestNeighborIndex : public NearestNeighborIndex {
@@ -177,7 +175,7 @@ private:
int _index_value;
public:
- MockNearestNeighborIndex(const DocVectorAccess& vectors)
+ explicit MockNearestNeighborIndex(const DocVectorAccess& vectors)
: _vectors(vectors),
_adds(),
_removes(),
@@ -279,11 +277,11 @@ public:
}
vespalib::MemoryUsage update_stat(const CompactionStrategy&) override {
++_memory_usage_cnt;
- return vespalib::MemoryUsage();
+ return {};
}
vespalib::MemoryUsage memory_usage() const override {
++_memory_usage_cnt;
- return vespalib::MemoryUsage();
+ return {};
}
void populate_address_space_usage(AddressSpaceUsage&) const override {}
void get_state(const vespalib::slime::Inserter&) const override {}
@@ -293,7 +291,7 @@ public:
if (_index_value != 0) {
return std::make_unique<MockIndexSaver>(_index_value);
}
- return std::unique_ptr<NearestNeighborIndexSaver>();
+ return {};
}
std::unique_ptr<NearestNeighborIndexLoader> make_loader(FastOS_FileInterface& file, const vespalib::GenericHeader& header) override {
(void) header;
@@ -310,7 +308,7 @@ public:
(void) explore_k;
(void) doom;
(void) distance_threshold;
- return std::vector<Neighbor>();
+ return {};
}
std::vector<Neighbor> find_top_k_with_filter(uint32_t k,
const search::tensor::BoundDistanceFunction &df,
@@ -324,7 +322,7 @@ public:
(void) filter;
(void) doom;
(void) distance_threshold;
- return std::vector<Neighbor>();
+ return {};
}
search::tensor::DistanceFunctionFactory &distance_function_factory() const override {
@@ -427,7 +425,7 @@ struct Fixture {
FixtureTraits _traits;
vespalib::string _mmap_allocator_base_dir;
- Fixture(const vespalib::string &typeSpec, FixtureTraits traits = FixtureTraits());
+ explicit Fixture(const vespalib::string &typeSpec, FixtureTraits traits = FixtureTraits());
~Fixture();
@@ -589,7 +587,7 @@ struct Fixture {
}
TensorSpec expEmptyDenseTensor() const {
- return TensorSpec(denseSpec);
+ return {denseSpec};
}
vespalib::string expEmptyDenseTensorSpec() const {
@@ -1296,12 +1294,10 @@ template <typename ParentT>
class NearestNeighborBlueprintFixtureBase : public ParentT {
private:
std::unique_ptr<Value> _query_tensor;
- vespalib::FakeDoom _no_doom;
public:
NearestNeighborBlueprintFixtureBase()
- : _query_tensor(),
- _no_doom()
+ : _query_tensor()
{
this->set_tensor(1, vec_2d(1, 1));
this->set_tensor(2, vec_2d(2, 2));
@@ -1329,7 +1325,7 @@ public:
std::make_unique<DistanceCalculator>(this->as_dense_tensor(),
create_query_tensor(vec_2d(17, 42))),
3, approximate, 5, 100100.25,
- global_filter_lower_limit, 1.0, target_hits_max_adjustment_factor, _no_doom.get_doom());
+ global_filter_lower_limit, 1.0, target_hits_max_adjustment_factor, vespalib::Doom::never());
EXPECT_EQUAL(11u, bp->getState().estimate().estHits);
EXPECT_EQUAL(100100.25 * 100100.25, bp->get_distance_threshold());
return bp;
diff --git a/searchlib/src/tests/common/bitvector/bitvector_test.cpp b/searchlib/src/tests/common/bitvector/bitvector_test.cpp
index 2ac9fb738f8..758f44cdba2 100644
--- a/searchlib/src/tests/common/bitvector/bitvector_test.cpp
+++ b/searchlib/src/tests/common/bitvector/bitvector_test.cpp
@@ -646,45 +646,45 @@ TEST("requireThatGrowWorks")
EXPECT_EQUAL(4u, v.writer().countTrueBits());
EXPECT_EQUAL(200u, v.reader().size());
- EXPECT_EQUAL(1023u, v.writer().capacity());
+ EXPECT_EQUAL(2047u, v.writer().capacity());
EXPECT_TRUE(assertBV("[7,39,71,103]", v.reader()));
EXPECT_EQUAL(4u, v.writer().countTrueBits());
- EXPECT_TRUE(v.reserve(1024));
+ EXPECT_TRUE(v.reserve(2048u));
EXPECT_EQUAL(200u, v.reader().size());
- EXPECT_EQUAL(2047u, v.writer().capacity());
+ EXPECT_EQUAL(4095u, v.writer().capacity());
EXPECT_TRUE(assertBV("[7,39,71,103]", v.reader()));
EXPECT_EQUAL(4u, v.writer().countTrueBits());
EXPECT_FALSE(v.extend(202));
EXPECT_EQUAL(202u, v.reader().size());
- EXPECT_EQUAL(2047u, v.writer().capacity());
+ EXPECT_EQUAL(4095u, v.writer().capacity());
EXPECT_TRUE(assertBV("[7,39,71,103]", v.reader()));
EXPECT_EQUAL(4u, v.writer().countTrueBits());
EXPECT_FALSE(v.shrink(200));
EXPECT_EQUAL(200u, v.reader().size());
- EXPECT_EQUAL(2047u, v.writer().capacity());
+ EXPECT_EQUAL(4095u, v.writer().capacity());
EXPECT_TRUE(assertBV("[7,39,71,103]", v.reader()));
EXPECT_EQUAL(4u, v.writer().countTrueBits());
- EXPECT_FALSE(v.reserve(2047));
+ EXPECT_FALSE(v.reserve(4095u));
EXPECT_EQUAL(200u, v.reader().size());
- EXPECT_EQUAL(2047u, v.writer().capacity());
+ EXPECT_EQUAL(4095u, v.writer().capacity());
EXPECT_TRUE(assertBV("[7,39,71,103]", v.reader()));
EXPECT_EQUAL(4u, v.writer().countTrueBits());
EXPECT_FALSE(v.shrink(202));
EXPECT_EQUAL(202u, v.reader().size());
- EXPECT_EQUAL(2047u, v.writer().capacity());
+ EXPECT_EQUAL(4095u, v.writer().capacity());
EXPECT_TRUE(assertBV("[7,39,71,103]", v.reader()));
EXPECT_EQUAL(4u, v.writer().countTrueBits());
EXPECT_FALSE(v.shrink(100));
EXPECT_EQUAL(100u, v.reader().size());
- EXPECT_EQUAL(2047u, v.writer().capacity());
+ EXPECT_EQUAL(4095u, v.writer().capacity());
EXPECT_TRUE(assertBV("[7,39,71]", v.reader()));
EXPECT_EQUAL(3u, v.writer().countTrueBits());
v.writer().invalidateCachedCount();
- EXPECT_TRUE(v.reserve(3100));
+ EXPECT_TRUE(v.reserve(5100u));
EXPECT_EQUAL(100u, v.reader().size());
- EXPECT_EQUAL(4095u, v.writer().capacity());
+ EXPECT_EQUAL(6143u, v.writer().capacity());
EXPECT_EQUAL(3u, v.writer().countTrueBits());
g.assign_generation(1);
@@ -701,9 +701,9 @@ TEST("require that growable bit vectors keeps memory allocator")
EXPECT_EQUAL(AllocStats(1, 0), stats);
v.writer().resize(1); // DO NOT TRY THIS AT HOME
EXPECT_EQUAL(AllocStats(2, 1), stats);
- v.reserve(2000);
+ v.reserve(2048);
EXPECT_EQUAL(AllocStats(3, 1), stats);
- v.extend(4000);
+ v.extend(5000);
EXPECT_EQUAL(AllocStats(4, 1), stats);
v.shrink(200);
EXPECT_EQUAL(AllocStats(4, 1), stats);
diff --git a/searchlib/src/tests/features/CMakeLists.txt b/searchlib/src/tests/features/CMakeLists.txt
index 9d2ed02f5dd..ea2410734b5 100644
--- a/searchlib/src/tests/features/CMakeLists.txt
+++ b/searchlib/src/tests/features/CMakeLists.txt
@@ -17,5 +17,6 @@ vespa_add_executable(searchlib_featurebenchmark_app
featurebenchmark.cpp
DEPENDS
searchlib
+ searchlib_test
)
vespa_add_test(NAME searchlib_featurebenchmark_app COMMAND searchlib_featurebenchmark_app BENCHMARK)
diff --git a/searchlib/src/tests/features/beta/CMakeLists.txt b/searchlib/src/tests/features/beta/CMakeLists.txt
index 543982c549c..db45f02d898 100644
--- a/searchlib/src/tests/features/beta/CMakeLists.txt
+++ b/searchlib/src/tests/features/beta/CMakeLists.txt
@@ -4,6 +4,7 @@ vespa_add_executable(searchlib_beta_features_test_app TEST
beta_features_test.cpp
DEPENDS
searchlib
+ searchlib_test
)
vespa_add_test(
NAME searchlib_beta_features_test_app
diff --git a/searchlib/src/tests/features/beta/beta_features_test.cpp b/searchlib/src/tests/features/beta/beta_features_test.cpp
index e0f57a6cad1..8413cfde47f 100644
--- a/searchlib/src/tests/features/beta/beta_features_test.cpp
+++ b/searchlib/src/tests/features/beta/beta_features_test.cpp
@@ -14,7 +14,7 @@
#include <vespa/searchlib/features/utils.h>
#include <vespa/searchlib/fef/test/plugin/setup.h>
#include <vespa/vespalib/util/rand48.h>
-#include <vespa/searchlib/fef/test/ftlib.h>
+#include <vespa/searchlib/test/ft_test_app.h>
#include <vespa/vespalib/util/stringfmt.h>
using namespace search::features;
diff --git a/searchlib/src/tests/features/bm25/bm25_test.cpp b/searchlib/src/tests/features/bm25/bm25_test.cpp
index 8abd3d104b9..233c0ec09f3 100644
--- a/searchlib/src/tests/features/bm25/bm25_test.cpp
+++ b/searchlib/src/tests/features/bm25/bm25_test.cpp
@@ -4,9 +4,9 @@
#include <vespa/searchlib/features/setup.h>
#include <vespa/searchlib/fef/blueprintfactory.h>
#include <vespa/searchlib/fef/test/dummy_dependency_handler.h>
-#include <vespa/searchlib/fef/test/ftlib.h>
#include <vespa/searchlib/fef/test/indexenvironment.h>
#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h>
+#include <vespa/searchlib/test/ft_test_app.h>
#include <vespa/vespalib/gtest/gtest.h>
using namespace search::features;
diff --git a/searchlib/src/tests/features/element_completeness/CMakeLists.txt b/searchlib/src/tests/features/element_completeness/CMakeLists.txt
index 327bb691819..046b061b884 100644
--- a/searchlib/src/tests/features/element_completeness/CMakeLists.txt
+++ b/searchlib/src/tests/features/element_completeness/CMakeLists.txt
@@ -4,5 +4,6 @@ vespa_add_executable(searchlib_element_completeness_test_app TEST
element_completeness_test.cpp
DEPENDS
searchlib
+ searchlib_test
)
vespa_add_test(NAME searchlib_element_completeness_test_app COMMAND searchlib_element_completeness_test_app)
diff --git a/searchlib/src/tests/features/element_completeness/element_completeness_test.cpp b/searchlib/src/tests/features/element_completeness/element_completeness_test.cpp
index 3b2a5035d1a..ff210035502 100644
--- a/searchlib/src/tests/features/element_completeness/element_completeness_test.cpp
+++ b/searchlib/src/tests/features/element_completeness/element_completeness_test.cpp
@@ -5,8 +5,8 @@
#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h>
#include <vespa/searchlib/fef/test/queryenvironment.h>
#include <vespa/searchlib/features/element_completeness_feature.h>
-#include <vespa/searchlib/fef/test/ftlib.h>
#include <vespa/searchlib/fef/test/dummy_dependency_handler.h>
+#include <vespa/searchlib/test/ft_test_app.h>
#include <vespa/vespalib/util/stringfmt.h>
using namespace search::fef;
diff --git a/searchlib/src/tests/features/element_similarity_feature/CMakeLists.txt b/searchlib/src/tests/features/element_similarity_feature/CMakeLists.txt
index 921e4bab04e..748556b0fcd 100644
--- a/searchlib/src/tests/features/element_similarity_feature/CMakeLists.txt
+++ b/searchlib/src/tests/features/element_similarity_feature/CMakeLists.txt
@@ -4,5 +4,6 @@ vespa_add_executable(searchlib_element_similarity_feature_test_app TEST
element_similarity_feature_test.cpp
DEPENDS
searchlib
+ searchlib_test
)
vespa_add_test(NAME searchlib_element_similarity_feature_test_app COMMAND searchlib_element_similarity_feature_test_app)
diff --git a/searchlib/src/tests/features/element_similarity_feature/element_similarity_feature_test.cpp b/searchlib/src/tests/features/element_similarity_feature/element_similarity_feature_test.cpp
index 3aedb3c51ed..1eda660d2ec 100644
--- a/searchlib/src/tests/features/element_similarity_feature/element_similarity_feature_test.cpp
+++ b/searchlib/src/tests/features/element_similarity_feature/element_similarity_feature_test.cpp
@@ -5,7 +5,7 @@
#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h>
#include <vespa/searchlib/fef/test/queryenvironment.h>
#include <vespa/searchlib/features/element_similarity_feature.h>
-#include <vespa/searchlib/fef/test/ftlib.h>
+#include <vespa/searchlib/test/ft_test_app.h>
#include <initializer_list>
#include <vespa/searchlib/fef/test/dummy_dependency_handler.h>
#include <vespa/vespalib/util/stringfmt.h>
diff --git a/searchlib/src/tests/features/euclidean_distance/CMakeLists.txt b/searchlib/src/tests/features/euclidean_distance/CMakeLists.txt
index 6af6a9095ac..df55b8f834c 100644
--- a/searchlib/src/tests/features/euclidean_distance/CMakeLists.txt
+++ b/searchlib/src/tests/features/euclidean_distance/CMakeLists.txt
@@ -4,5 +4,6 @@ vespa_add_executable(searchlib_euclidean_distance_test_app TEST
euclidean_distance_test.cpp
DEPENDS
searchlib
+ searchlib_test
)
vespa_add_test(NAME searchlib_euclidean_distance_test_app COMMAND searchlib_euclidean_distance_test_app)
diff --git a/searchlib/src/tests/features/euclidean_distance/euclidean_distance_test.cpp b/searchlib/src/tests/features/euclidean_distance/euclidean_distance_test.cpp
index d327253731d..3bc61a77c55 100644
--- a/searchlib/src/tests/features/euclidean_distance/euclidean_distance_test.cpp
+++ b/searchlib/src/tests/features/euclidean_distance/euclidean_distance_test.cpp
@@ -1,16 +1,16 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchcommon/attribute/config.h>
#include <vespa/searchlib/attribute/attributefactory.h>
#include <vespa/searchlib/attribute/attributevector.h>
#include <vespa/searchlib/attribute/integerbase.h>
#include <vespa/searchlib/attribute/floatbase.h>
#include <vespa/searchlib/features/setup.h>
#include <vespa/searchlib/fef/test/indexenvironment.h>
-#include <vespa/searchlib/fef/test/ftlib.h>
#include <vespa/searchlib/features/euclidean_distance_feature.h>
#include <vespa/searchlib/fef/fef.h>
-#include <vespa/searchcommon/attribute/config.h>
+#include <vespa/searchlib/test/ft_test_app.h>
using search::feature_t;
diff --git a/searchlib/src/tests/features/featurebenchmark.cpp b/searchlib/src/tests/features/featurebenchmark.cpp
index e151b47a0c9..9c3d4943d65 100644
--- a/searchlib/src/tests/features/featurebenchmark.cpp
+++ b/searchlib/src/tests/features/featurebenchmark.cpp
@@ -11,7 +11,7 @@
#include <vespa/searchlib/fef/functiontablefactory.h>
#include <vespa/searchlib/fef/test/plugin/setup.h>
#include <vespa/vespalib/util/stringfmt.h>
-#include <vespa/searchlib/fef/test/ftlib.h>
+#include <vespa/searchlib/test/ft_test_app.h>
#include <vespa/searchcommon/attribute/config.h>
#include <fstream>
diff --git a/searchlib/src/tests/features/internal_max_reduce_prod_join_feature/CMakeLists.txt b/searchlib/src/tests/features/internal_max_reduce_prod_join_feature/CMakeLists.txt
index 217af473987..e7fc3126e2f 100644
--- a/searchlib/src/tests/features/internal_max_reduce_prod_join_feature/CMakeLists.txt
+++ b/searchlib/src/tests/features/internal_max_reduce_prod_join_feature/CMakeLists.txt
@@ -4,5 +4,6 @@ vespa_add_executable(searchlib_internal_max_reduce_prod_join_feature_test_app TE
internal_max_reduce_prod_join_feature_test.cpp
DEPENDS
searchlib
+ searchlib_test
)
vespa_add_test(NAME searchlib_internal_max_reduce_prod_join_feature_test_app COMMAND searchlib_internal_max_reduce_prod_join_feature_test_app)
diff --git a/searchlib/src/tests/features/internal_max_reduce_prod_join_feature/internal_max_reduce_prod_join_feature_test.cpp b/searchlib/src/tests/features/internal_max_reduce_prod_join_feature/internal_max_reduce_prod_join_feature_test.cpp
index 852827244bc..7611296c641 100644
--- a/searchlib/src/tests/features/internal_max_reduce_prod_join_feature/internal_max_reduce_prod_join_feature_test.cpp
+++ b/searchlib/src/tests/features/internal_max_reduce_prod_join_feature/internal_max_reduce_prod_join_feature_test.cpp
@@ -2,11 +2,11 @@
#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchcommon/attribute/config.h>
+#include <vespa/searchlib/attribute/attribute.h>
#include <vespa/searchlib/attribute/attributefactory.h>
-#include <vespa/searchlib/fef/test/ftlib.h>
#include <vespa/searchlib/features/internal_max_reduce_prod_join_feature.h>
-#include <vespa/searchlib/attribute/attribute.h>
-#include <vespa/searchcommon/attribute/config.h>
+#include <vespa/searchlib/test/ft_test_app.h>
using search::feature_t;
using namespace search::fef;
diff --git a/searchlib/src/tests/features/prod_features_test.h b/searchlib/src/tests/features/prod_features_test.h
index 94c4e496dd2..aeadf23be80 100644
--- a/searchlib/src/tests/features/prod_features_test.h
+++ b/searchlib/src/tests/features/prod_features_test.h
@@ -4,7 +4,7 @@
#include <vespa/searchlib/features/distancetopathfeature.h>
#include <vespa/searchlib/features/termdistancefeature.h>
-#include <vespa/searchlib/fef/test/ftlib.h>
+#include <vespa/searchlib/test/ft_test_app.h>
class Test : public FtTestApp
{
diff --git a/searchlib/src/tests/features/tensor/tensor_test.cpp b/searchlib/src/tests/features/tensor/tensor_test.cpp
index 96a53d98865..fe4464bad72 100644
--- a/searchlib/src/tests/features/tensor/tensor_test.cpp
+++ b/searchlib/src/tests/features/tensor/tensor_test.cpp
@@ -1,21 +1,21 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/eval/eval/function.h>
+#include <vespa/eval/eval/simple_value.h>
+#include <vespa/eval/eval/tensor_spec.h>
+#include <vespa/eval/eval/value.h>
+#include <vespa/eval/eval/value_codec.h>
+#include <vespa/eval/eval/test/value_compare.h>
+#include <vespa/searchcommon/attribute/config.h>
#include <vespa/searchlib/attribute/attributefactory.h>
#include <vespa/searchlib/attribute/attributevector.h>
#include <vespa/searchlib/features/setup.h>
#include <vespa/searchlib/fef/fef.h>
-#include <vespa/searchlib/fef/test/ftlib.h>
#include <vespa/searchlib/fef/test/indexenvironment.h>
#include <vespa/searchlib/tensor/tensor_attribute.h>
#include <vespa/searchlib/tensor/direct_tensor_attribute.h>
-#include <vespa/searchcommon/attribute/config.h>
-#include <vespa/eval/eval/function.h>
-#include <vespa/eval/eval/simple_value.h>
-#include <vespa/eval/eval/tensor_spec.h>
-#include <vespa/eval/eval/value.h>
-#include <vespa/eval/eval/value_codec.h>
-#include <vespa/eval/eval/test/value_compare.h>
+#include <vespa/searchlib/test/ft_test_app.h>
#include <vespa/vespalib/objects/nbostream.h>
using search::feature_t;
diff --git a/searchlib/src/tests/features/tensor_from_labels/CMakeLists.txt b/searchlib/src/tests/features/tensor_from_labels/CMakeLists.txt
index 186ecf38c9e..3ecceffd422 100644
--- a/searchlib/src/tests/features/tensor_from_labels/CMakeLists.txt
+++ b/searchlib/src/tests/features/tensor_from_labels/CMakeLists.txt
@@ -4,5 +4,6 @@ vespa_add_executable(searchlib_tensor_from_labels_test_app TEST
tensor_from_labels_test.cpp
DEPENDS
searchlib
+ searchlib_test
)
vespa_add_test(NAME searchlib_tensor_from_labels_test_app COMMAND searchlib_tensor_from_labels_test_app)
diff --git a/searchlib/src/tests/features/tensor_from_labels/tensor_from_labels_test.cpp b/searchlib/src/tests/features/tensor_from_labels/tensor_from_labels_test.cpp
index 20cfa4d84c8..f241398539a 100644
--- a/searchlib/src/tests/features/tensor_from_labels/tensor_from_labels_test.cpp
+++ b/searchlib/src/tests/features/tensor_from_labels/tensor_from_labels_test.cpp
@@ -8,8 +8,8 @@
#include <vespa/searchlib/features/setup.h>
#include <vespa/searchlib/features/tensor_from_labels_feature.h>
#include <vespa/searchlib/fef/fef.h>
-#include <vespa/searchlib/fef/test/ftlib.h>
#include <vespa/searchlib/fef/test/indexenvironment.h>
+#include <vespa/searchlib/test/ft_test_app.h>
#include <vespa/searchcommon/attribute/config.h>
#include <vespa/eval/eval/function.h>
#include <vespa/eval/eval/simple_value.h>
diff --git a/searchlib/src/tests/features/tensor_from_weighted_set/CMakeLists.txt b/searchlib/src/tests/features/tensor_from_weighted_set/CMakeLists.txt
index bf93e8923b5..b5322c1a64c 100644
--- a/searchlib/src/tests/features/tensor_from_weighted_set/CMakeLists.txt
+++ b/searchlib/src/tests/features/tensor_from_weighted_set/CMakeLists.txt
@@ -4,5 +4,6 @@ vespa_add_executable(searchlib_tensor_from_weighted_set_test_app TEST
tensor_from_weighted_set_test.cpp
DEPENDS
searchlib
+ searchlib_test
)
vespa_add_test(NAME searchlib_tensor_from_weighted_set_test_app COMMAND searchlib_tensor_from_weighted_set_test_app)
diff --git a/searchlib/src/tests/features/tensor_from_weighted_set/tensor_from_weighted_set_test.cpp b/searchlib/src/tests/features/tensor_from_weighted_set/tensor_from_weighted_set_test.cpp
index db734387288..9c8f231051e 100644
--- a/searchlib/src/tests/features/tensor_from_weighted_set/tensor_from_weighted_set_test.cpp
+++ b/searchlib/src/tests/features/tensor_from_weighted_set/tensor_from_weighted_set_test.cpp
@@ -1,6 +1,12 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/eval/eval/function.h>
+#include <vespa/eval/eval/simple_value.h>
+#include <vespa/eval/eval/tensor_spec.h>
+#include <vespa/eval/eval/value.h>
+#include <vespa/eval/eval/test/value_compare.h>
+#include <vespa/searchcommon/attribute/config.h>
#include <vespa/searchlib/attribute/attributefactory.h>
#include <vespa/searchlib/attribute/attributevector.h>
#include <vespa/searchlib/attribute/integerbase.h>
@@ -8,14 +14,8 @@
#include <vespa/searchlib/features/setup.h>
#include <vespa/searchlib/features/tensor_from_weighted_set_feature.h>
#include <vespa/searchlib/fef/fef.h>
-#include <vespa/searchlib/fef/test/ftlib.h>
#include <vespa/searchlib/fef/test/indexenvironment.h>
-#include <vespa/searchcommon/attribute/config.h>
-#include <vespa/eval/eval/function.h>
-#include <vespa/eval/eval/simple_value.h>
-#include <vespa/eval/eval/tensor_spec.h>
-#include <vespa/eval/eval/value.h>
-#include <vespa/eval/eval/test/value_compare.h>
+#include <vespa/searchlib/test/ft_test_app.h>
using search::feature_t;
using namespace search::fef;
diff --git a/searchlib/src/tests/features/text_similarity_feature/CMakeLists.txt b/searchlib/src/tests/features/text_similarity_feature/CMakeLists.txt
index 363619ce4fb..cfa715af516 100644
--- a/searchlib/src/tests/features/text_similarity_feature/CMakeLists.txt
+++ b/searchlib/src/tests/features/text_similarity_feature/CMakeLists.txt
@@ -4,5 +4,6 @@ vespa_add_executable(searchlib_text_similarity_feature_test_app TEST
text_similarity_feature_test.cpp
DEPENDS
searchlib
+ searchlib_test
)
vespa_add_test(NAME searchlib_text_similarity_feature_test_app COMMAND searchlib_text_similarity_feature_test_app)
diff --git a/searchlib/src/tests/features/text_similarity_feature/text_similarity_feature_test.cpp b/searchlib/src/tests/features/text_similarity_feature/text_similarity_feature_test.cpp
index 03734b15d64..cf0660282f2 100644
--- a/searchlib/src/tests/features/text_similarity_feature/text_similarity_feature_test.cpp
+++ b/searchlib/src/tests/features/text_similarity_feature/text_similarity_feature_test.cpp
@@ -1,14 +1,14 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/vespalib/testkit/test_kit.h>
#include <vespa/searchlib/features/setup.h>
+#include <vespa/searchlib/fef/test/dummy_dependency_handler.h>
#include <vespa/searchlib/fef/test/indexenvironment.h>
#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h>
#include <vespa/searchlib/fef/test/queryenvironment.h>
#include <vespa/searchlib/features/text_similarity_feature.h>
-#include <vespa/searchlib/fef/test/ftlib.h>
-#include <initializer_list>
-#include <vespa/searchlib/fef/test/dummy_dependency_handler.h>
+#include <vespa/searchlib/test/ft_test_app.h>
#include <vespa/vespalib/util/stringfmt.h>
+#include <initializer_list>
using namespace search::fef;
using namespace search::fef::test;
diff --git a/searchlib/src/tests/nativerank/CMakeLists.txt b/searchlib/src/tests/nativerank/CMakeLists.txt
index 20fdc0c1245..2a46dd54904 100644
--- a/searchlib/src/tests/nativerank/CMakeLists.txt
+++ b/searchlib/src/tests/nativerank/CMakeLists.txt
@@ -4,6 +4,7 @@ vespa_add_executable(searchlib_nativerank_test_app TEST
nativerank_test.cpp
DEPENDS
searchlib
+ searchlib_test
)
vespa_add_test(
NAME searchlib_nativerank_test_app
diff --git a/searchlib/src/tests/nativerank/nativerank_test.cpp b/searchlib/src/tests/nativerank/nativerank_test.cpp
index bc9c579a597..69234071a34 100644
--- a/searchlib/src/tests/nativerank/nativerank_test.cpp
+++ b/searchlib/src/tests/nativerank/nativerank_test.cpp
@@ -10,8 +10,8 @@
#include <vespa/searchlib/fef/functiontablefactory.h>
#include <vespa/searchlib/fef/test/plugin/setup.h>
#include <vespa/vespalib/util/stringfmt.h>
-#include <vespa/searchlib/fef/test/ftlib.h>
#include <vespa/searchlib/fef/test/dummy_dependency_handler.h>
+#include <vespa/searchlib/test/ft_test_app.h>
#include <vespa/log/log.h>
LOG_SETUP("nativerank_test");
diff --git a/searchlib/src/tests/query/streaming_query_test.cpp b/searchlib/src/tests/query/streaming_query_test.cpp
index 306456518b7..7c4b7555158 100644
--- a/searchlib/src/tests/query/streaming_query_test.cpp
+++ b/searchlib/src/tests/query/streaming_query_test.cpp
@@ -6,6 +6,7 @@
#include <vespa/searchlib/query/streaming/in_term.h>
#include <vespa/searchlib/query/streaming/query.h>
#include <vespa/searchlib/query/streaming/nearest_neighbor_query_node.h>
+#include <vespa/searchlib/query/streaming/wand_term.h>
#include <vespa/searchlib/query/tree/querybuilder.h>
#include <vespa/searchlib/query/tree/simplequery.h>
#include <vespa/searchlib/query/tree/stackdumpcreator.h>
@@ -27,6 +28,7 @@ void assertHit(const Hit & h, size_t expWordpos, size_t expContext, int32_t weig
EXPECT_EQ(h.weight(), weight);
}
+
TEST(StreamingQueryTest, test_query_language)
{
QueryNodeResultFactory factory;
@@ -38,7 +40,7 @@ TEST(StreamingQueryTest, test_query_language)
EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
EXPECT_EQ(ia, 7);
EXPECT_EQ(ib, 7);
- EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_TRUE(q.getAsFloatTerm(da, db));
EXPECT_EQ(da, 7);
EXPECT_EQ(db, 7);
}
@@ -48,15 +50,24 @@ TEST(StreamingQueryTest, test_query_language)
EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
EXPECT_EQ(ia, -7);
EXPECT_EQ(ib, -7);
- EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_TRUE(q.getAsFloatTerm(da, db));
EXPECT_EQ(da, -7);
EXPECT_EQ(db, -7);
}
+ {
+ QueryTerm q(factory.create(), "+7", "index", TermType::WORD);
+ EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
+ EXPECT_EQ(ia, 7);
+ EXPECT_EQ(ib, 7);
+ EXPECT_TRUE(q.getAsFloatTerm(da, db));
+ EXPECT_EQ(da, 7);
+ EXPECT_EQ(db, 7);
+ }
{
QueryTerm q(factory.create(), "7.5", "index", TermType::WORD);
EXPECT_TRUE(!q.getAsIntegerTerm(ia, ib));
- EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_TRUE(q.getAsFloatTerm(da, db));
EXPECT_EQ(da, 7.5);
EXPECT_EQ(db, 7.5);
}
@@ -64,7 +75,7 @@ TEST(StreamingQueryTest, test_query_language)
{
QueryTerm q(factory.create(), "-7.5", "index", TermType::WORD);
EXPECT_TRUE(!q.getAsIntegerTerm(ia, ib));
- EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_TRUE(q.getAsFloatTerm(da, db));
EXPECT_EQ(da, -7.5);
EXPECT_EQ(db, -7.5);
}
@@ -74,8 +85,8 @@ TEST(StreamingQueryTest, test_query_language)
EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
EXPECT_EQ(ia, std::numeric_limits<int64_t>::min());
EXPECT_EQ(ib, 6);
- EXPECT_TRUE(q.getAsDoubleTerm(da, db));
- EXPECT_EQ(da, -std::numeric_limits<double>::max());
+ EXPECT_TRUE(q.getAsFloatTerm(da, db));
+ EXPECT_EQ(da, -std::numeric_limits<double>::infinity());
EXPECT_LT(db, 7);
EXPECT_GT(db, 6.99);
}
@@ -85,8 +96,8 @@ TEST(StreamingQueryTest, test_query_language)
EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
EXPECT_EQ(ia, std::numeric_limits<int64_t>::min());
EXPECT_EQ(ib, 7);
- EXPECT_TRUE(q.getAsDoubleTerm(da, db));
- EXPECT_EQ(da, -std::numeric_limits<double>::max());
+ EXPECT_TRUE(q.getAsFloatTerm(da, db));
+ EXPECT_EQ(da, -std::numeric_limits<double>::infinity());
EXPECT_EQ(db, 7);
}
@@ -95,10 +106,10 @@ TEST(StreamingQueryTest, test_query_language)
EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
EXPECT_EQ(ia, 8);
EXPECT_EQ(ib, std::numeric_limits<int64_t>::max());
- EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_TRUE(q.getAsFloatTerm(da, db));
EXPECT_GT(da, 7);
EXPECT_LT(da, 7.01);
- EXPECT_EQ(db, std::numeric_limits<double>::max());
+ EXPECT_EQ(db, std::numeric_limits<double>::infinity());
}
{
@@ -106,9 +117,9 @@ TEST(StreamingQueryTest, test_query_language)
EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
EXPECT_EQ(ia, 7);
EXPECT_EQ(ib, std::numeric_limits<int64_t>::max());
- EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_TRUE(q.getAsFloatTerm(da, db));
EXPECT_EQ(da, 7);
- EXPECT_EQ(db, std::numeric_limits<double>::max());
+ EXPECT_EQ(db, std::numeric_limits<double>::infinity());
}
{
@@ -116,7 +127,7 @@ TEST(StreamingQueryTest, test_query_language)
EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
EXPECT_EQ(ia, -7);
EXPECT_EQ(ib, 7);
- EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_TRUE(q.getAsFloatTerm(da, db));
EXPECT_EQ(da, -7);
EXPECT_EQ(db, 7);
}
@@ -126,7 +137,7 @@ TEST(StreamingQueryTest, test_query_language)
EXPECT_FALSE(q.getAsIntegerTerm(ia, ib)); // This is dubious and perhaps a regression.
EXPECT_EQ(ia, std::numeric_limits<int64_t>::min());
EXPECT_EQ(ib, std::numeric_limits<int64_t>::max());
- EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_TRUE(q.getAsFloatTerm(da, db));
EXPECT_EQ(da, -7.1);
EXPECT_EQ(db, 7.1);
}
@@ -136,7 +147,7 @@ TEST(StreamingQueryTest, test_query_language)
EXPECT_FALSE(q.getAsIntegerTerm(ia, ib)); // This is dubious and perhaps a regression.
EXPECT_EQ(ia, std::numeric_limits<int64_t>::min());
EXPECT_EQ(ib, std::numeric_limits<int64_t>::max());
- EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_TRUE(q.getAsFloatTerm(da, db));
EXPECT_EQ(da, 500.0);
EXPECT_EQ(db, std::numeric_limits<double>::max());
}
@@ -147,8 +158,8 @@ TEST(StreamingQueryTest, test_query_language)
EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
EXPECT_EQ(ia, -6);
EXPECT_EQ(ib, 7);
- EXPECT_TRUE(q.getAsDoubleTerm(da, db));
- EXPECT_EQ(da, std::nextafterf(minusSeven, seven));
+ EXPECT_TRUE(q.getAsFloatTerm(da, db));
+ EXPECT_EQ(da, std::nextafter(minusSeven, seven));
EXPECT_EQ(db, seven);
}
@@ -157,9 +168,9 @@ TEST(StreamingQueryTest, test_query_language)
EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
EXPECT_EQ(ia, -6);
EXPECT_EQ(ib, 6);
- EXPECT_TRUE(q.getAsDoubleTerm(da, db));
- EXPECT_EQ(da, std::nextafterf(minusSeven, seven));
- EXPECT_EQ(db, std::nextafterf(seven, minusSeven));
+ EXPECT_TRUE(q.getAsFloatTerm(da, db));
+ EXPECT_EQ(da, std::nextafter(minusSeven, seven));
+ EXPECT_EQ(db, std::nextafter(seven, minusSeven));
}
{
@@ -174,9 +185,9 @@ TEST(StreamingQueryTest, test_query_language)
EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
EXPECT_EQ(ia, -7);
EXPECT_EQ(ib, 6);
- EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_TRUE(q.getAsFloatTerm(da, db));
EXPECT_EQ(da, minusSeven);
- EXPECT_EQ(db, std::nextafterf(seven, minusSeven));
+ EXPECT_EQ(db, std::nextafter(seven, minusSeven));
}
{
@@ -184,8 +195,8 @@ TEST(StreamingQueryTest, test_query_language)
EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
EXPECT_EQ(ia, std::numeric_limits<int64_t>::min());
EXPECT_EQ(ib, -8);
- EXPECT_TRUE(q.getAsDoubleTerm(da, db));
- EXPECT_EQ(da, -std::numeric_limits<double>::max());
+ EXPECT_TRUE(q.getAsFloatTerm(da, db));
+ EXPECT_EQ(da, -std::numeric_limits<double>::infinity());
EXPECT_LT(db, -7);
EXPECT_GT(db, -7.01);
}
@@ -195,8 +206,8 @@ TEST(StreamingQueryTest, test_query_language)
EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
EXPECT_EQ(ia, std::numeric_limits<int64_t>::min());
EXPECT_EQ(ib, -7);
- EXPECT_TRUE(q.getAsDoubleTerm(da, db));
- EXPECT_EQ(da, -std::numeric_limits<double>::max());
+ EXPECT_TRUE(q.getAsFloatTerm(da, db));
+ EXPECT_EQ(da, -std::numeric_limits<double>::infinity());
EXPECT_EQ(db, -7);
}
@@ -205,8 +216,8 @@ TEST(StreamingQueryTest, test_query_language)
EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
EXPECT_EQ(ia, std::numeric_limits<int64_t>::min());
EXPECT_EQ(ib, -7);
- EXPECT_TRUE(q.getAsDoubleTerm(da, db));
- EXPECT_EQ(da, -std::numeric_limits<double>::max());
+ EXPECT_TRUE(q.getAsFloatTerm(da, db));
+ EXPECT_EQ(da, -std::numeric_limits<double>::infinity());
EXPECT_EQ(db, -7);
}
@@ -215,10 +226,10 @@ TEST(StreamingQueryTest, test_query_language)
EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
EXPECT_EQ(ia, -6);
EXPECT_EQ(ib, std::numeric_limits<int64_t>::max());
- EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_TRUE(q.getAsFloatTerm(da, db));
EXPECT_GT(da, -7);
EXPECT_LT(da, -6.99);
- EXPECT_EQ(db, std::numeric_limits<double>::max());
+ EXPECT_EQ(db, std::numeric_limits<double>::infinity());
}
{
@@ -226,9 +237,9 @@ TEST(StreamingQueryTest, test_query_language)
EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
EXPECT_EQ(ia, -7);
EXPECT_EQ(ib, std::numeric_limits<int64_t>::max());
- EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_TRUE(q.getAsFloatTerm(da, db));
EXPECT_EQ(da, -7);
- EXPECT_EQ(db, std::numeric_limits<double>::max());
+ EXPECT_EQ(db, std::numeric_limits<double>::infinity());
}
{
@@ -236,15 +247,15 @@ TEST(StreamingQueryTest, test_query_language)
EXPECT_TRUE(q.getAsIntegerTerm(ia, ib));
EXPECT_EQ(ia, -7);
EXPECT_EQ(ib, std::numeric_limits<int64_t>::max());
- EXPECT_TRUE(q.getAsDoubleTerm(da, db));
+ EXPECT_TRUE(q.getAsFloatTerm(da, db));
EXPECT_EQ(da, -7);
- EXPECT_EQ(db, std::numeric_limits<double>::max());
+ EXPECT_EQ(db, std::numeric_limits<double>::infinity());
}
{
QueryTerm q(factory.create(), "a", "index", TermType::WORD);
EXPECT_TRUE(!q.getAsIntegerTerm(ia, ib));
- EXPECT_TRUE(!q.getAsDoubleTerm(da, db));
+ EXPECT_TRUE(!q.getAsFloatTerm(da, db));
}
{
@@ -287,7 +298,10 @@ TEST(StreamingQueryTest, test_query_language)
class AllowRewrite : public QueryNodeResultFactory
{
public:
- virtual bool getRewriteFloatTerms() const override { return true; }
+ explicit AllowRewrite(vespalib::stringref index) noexcept : _allowedIndex(index) {}
+ bool allow_float_terms_rewrite(vespalib::stringref index) const noexcept override { return index == _allowedIndex; }
+private:
+ vespalib::string _allowedIndex;
};
const char TERM_UNIQ = static_cast<char>(ParseItem::ITEM_TERM) | static_cast<char>(ParseItem::IF_UNIQUEID);
@@ -297,12 +311,12 @@ TEST(StreamingQueryTest, e_is_not_rewritten_even_if_allowed)
const char term[6] = {TERM_UNIQ, 3, 1, 'c', 1, 'e'};
vespalib::stringref stackDump(term, sizeof(term));
EXPECT_EQ(6u, stackDump.size());
- AllowRewrite allowRewrite;
+ AllowRewrite allowRewrite("c");
const Query q(allowRewrite, stackDump);
EXPECT_TRUE(q.valid());
const QueryNode & root = q.getRoot();
EXPECT_TRUE(dynamic_cast<const QueryTerm *>(&root) != nullptr);
- const QueryTerm & qt = static_cast<const QueryTerm &>(root);
+ const auto & qt = static_cast<const QueryTerm &>(root);
EXPECT_EQ("c", qt.index());
EXPECT_EQ(vespalib::stringref("e"), qt.getTerm());
EXPECT_EQ(3u, qt.uniqueId());
@@ -313,12 +327,12 @@ TEST(StreamingQueryTest, onedot0e_is_not_rewritten_by_default)
const char term[9] = {TERM_UNIQ, 3, 1, 'c', 4, '1', '.', '0', 'e'};
vespalib::stringref stackDump(term, sizeof(term));
EXPECT_EQ(9u, stackDump.size());
- QueryNodeResultFactory empty;
+ AllowRewrite empty("nix");
const Query q(empty, stackDump);
EXPECT_TRUE(q.valid());
const QueryNode & root = q.getRoot();
EXPECT_TRUE(dynamic_cast<const QueryTerm *>(&root) != nullptr);
- const QueryTerm & qt = static_cast<const QueryTerm &>(root);
+ const auto & qt = static_cast<const QueryTerm &>(root);
EXPECT_EQ("c", qt.index());
EXPECT_EQ(vespalib::stringref("1.0e"), qt.getTerm());
EXPECT_EQ(3u, qt.uniqueId());
@@ -329,34 +343,34 @@ TEST(StreamingQueryTest, onedot0e_is_rewritten_if_allowed_too)
const char term[9] = {TERM_UNIQ, 3, 1, 'c', 4, '1', '.', '0', 'e'};
vespalib::stringref stackDump(term, sizeof(term));
EXPECT_EQ(9u, stackDump.size());
- AllowRewrite empty;
+ AllowRewrite empty("c");
const Query q(empty, stackDump);
EXPECT_TRUE(q.valid());
const QueryNode & root = q.getRoot();
EXPECT_TRUE(dynamic_cast<const EquivQueryNode *>(&root) != nullptr);
- const EquivQueryNode & equiv = static_cast<const EquivQueryNode &>(root);
+ const auto & equiv = static_cast<const EquivQueryNode &>(root);
EXPECT_EQ(2u, equiv.size());
EXPECT_TRUE(dynamic_cast<const QueryTerm *>(equiv[0].get()) != nullptr);
{
- const QueryTerm & qt = static_cast<const QueryTerm &>(*equiv[0]);
+ const auto & qt = static_cast<const QueryTerm &>(*equiv[0]);
EXPECT_EQ("c", qt.index());
EXPECT_EQ(vespalib::stringref("1.0e"), qt.getTerm());
EXPECT_EQ(3u, qt.uniqueId());
}
EXPECT_TRUE(dynamic_cast<const PhraseQueryNode *>(equiv[1].get()) != nullptr);
{
- const PhraseQueryNode & phrase = static_cast<const PhraseQueryNode &>(*equiv[1]);
+ const auto & phrase = static_cast<const PhraseQueryNode &>(*equiv[1]);
EXPECT_EQ(2u, phrase.size());
EXPECT_TRUE(dynamic_cast<const QueryTerm *>(phrase[0].get()) != nullptr);
{
- const QueryTerm & qt = static_cast<const QueryTerm &>(*phrase[0]);
+ const auto & qt = static_cast<const QueryTerm &>(*phrase[0]);
EXPECT_EQ("c", qt.index());
EXPECT_EQ(vespalib::stringref("1"), qt.getTerm());
EXPECT_EQ(0u, qt.uniqueId());
}
EXPECT_TRUE(dynamic_cast<const QueryTerm *>(phrase[1].get()) != nullptr);
{
- const QueryTerm & qt = static_cast<const QueryTerm &>(*phrase[1]);
+ const auto & qt = static_cast<const QueryTerm &>(*phrase[1]);
EXPECT_EQ("c", qt.index());
EXPECT_EQ(vespalib::stringref("0e"), qt.getTerm());
EXPECT_EQ(0u, qt.uniqueId());
@@ -460,7 +474,7 @@ TEST(StreamingQueryTest, test_phrase_evaluate)
terms[1]->add(1, 5, 0, 1);
terms[2]->add(0, 5, 0, 1);
HitList hits;
- PhraseQueryNode * p = static_cast<PhraseQueryNode *>(phrases[0]);
+ auto * p = static_cast<PhraseQueryNode *>(phrases[0]);
p->evaluateHits(hits);
ASSERT_EQ(3u, hits.size());
EXPECT_EQ(hits[0].wordpos(), 2u);
@@ -522,6 +536,7 @@ void assertInt64Range(const std::string &term, bool expAdjusted, int64_t expLow,
EXPECT_EQ(expHigh, (int64_t)res.high);
}
+
TEST(StreamingQueryTest, require_that_int8_limits_are_enforced)
{
//std::numeric_limits<int8_t>::min() -> -128
@@ -607,6 +622,20 @@ TEST(StreamingQueryTest, require_that_we_can_take_floating_point_values_in_range
assertInt64Range("[1.7976931348623157E308;-1.7976931348623157E308]", false, std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::min());
}
+void assertIllegalRangeQueries(const QueryTermSimple & qt) {
+ QueryTermSimple::RangeResult<int64_t> ires = qt.getRange<int64_t>();
+ EXPECT_EQ(false, ires.valid);
+ QueryTermSimple::RangeResult<double> fres = qt.getRange<double>();
+ EXPECT_EQ(false, fres.valid);
+}
+
+TEST(StreamingQueryTest, require_safe_parsing_of_illegal_ranges) {
+ // The 2 below are created when naively splitting numeric terms by dot.
+ // T=A.B => T EQUIV PHRASE(A, B)
+ assertIllegalRangeQueries(QueryTermSimple("[1", TermType::WORD));
+ assertIllegalRangeQueries(QueryTermSimple(".1;2.1]", TermType::WORD));
+}
+
TEST(StreamingQueryTest, require_that_we_handle_empty_range_as_expected)
{
assertInt64Range("[1;1]", false, 1, 1);
@@ -627,11 +656,11 @@ TEST(StreamingQueryTest, require_that_ascending_range_can_be_specified_with_limi
QueryTerm ascending_query(eqnr.create(), "[;;500]", "index", TermType::WORD);
EXPECT_TRUE(ascending_query.getAsIntegerTerm(low_integer, high_integer));
- EXPECT_TRUE(ascending_query.getAsDoubleTerm(low_double, high_double));
+ EXPECT_TRUE(ascending_query.getAsFloatTerm(low_double, high_double));
EXPECT_EQ(std::numeric_limits<int64_t>::min(), low_integer);
EXPECT_EQ(std::numeric_limits<int64_t>::max(), high_integer);
- EXPECT_EQ(-std::numeric_limits<double>::max(), low_double);
- EXPECT_EQ(std::numeric_limits<double>::max(), high_double);
+ EXPECT_EQ(-std::numeric_limits<double>::infinity(), low_double);
+ EXPECT_EQ(std::numeric_limits<double>::infinity(), high_double);
EXPECT_EQ(500, ascending_query.getRangeLimit());
}
@@ -646,11 +675,11 @@ TEST(StreamingQueryTest, require_that_descending_range_can_be_specified_with_lim
QueryTerm descending_query(eqnr.create(), "[;;-500]", "index", TermType::WORD);
EXPECT_TRUE(descending_query.getAsIntegerTerm(low_integer, high_integer));
- EXPECT_TRUE(descending_query.getAsDoubleTerm(low_double, high_double));
+ EXPECT_TRUE(descending_query.getAsFloatTerm(low_double, high_double));
EXPECT_EQ(std::numeric_limits<int64_t>::min(), low_integer);
EXPECT_EQ(std::numeric_limits<int64_t>::max(), high_integer);
- EXPECT_EQ(-std::numeric_limits<double>::max(), low_double);
- EXPECT_EQ(std::numeric_limits<double>::max(), high_double);
+ EXPECT_EQ(-std::numeric_limits<double>::infinity(), low_double);
+ EXPECT_EQ(std::numeric_limits<double>::infinity(), high_double);
EXPECT_EQ(-500, descending_query.getRangeLimit());
}
@@ -735,7 +764,7 @@ TEST(StreamingQueryTest, require_that_incorrectly_specified_diversity_can_be_par
TEST(StreamingQueryTest, require_that_we_do_not_break_the_stack_on_bad_query)
{
- QueryTermSimple term("<form><iframe+&#09;&#10;&#11;+src=\\\"javascript&#58;alert(1)\\\"&#11;&#10;&#09;;>", TermType::WORD);
+ QueryTermSimple term(R"(<form><iframe+&#09;&#10;&#11;+src=\"javascript&#58;alert(1)\"&#11;&#10;&#09;;>)", TermType::WORD);
EXPECT_FALSE(term.isValid());
}
@@ -744,7 +773,7 @@ TEST(StreamingQueryTest, a_unhandled_sameElement_stack)
const char * stack = "\022\002\026xyz_abcdefghij_xyzxyzxQ\001\vxxxxxx_name\034xxxxxx_xxxx_xxxxxxx_xxxxxxxxE\002\005delta\b<0.00393";
vespalib::stringref stackDump(stack);
EXPECT_EQ(85u, stackDump.size());
- AllowRewrite empty;
+ AllowRewrite empty("");
const Query q(empty, stackDump);
EXPECT_TRUE(q.valid());
const QueryNode & root = q.getRoot();
@@ -778,7 +807,7 @@ TEST(StreamingQueryTest, test_same_element_evaluate)
vespalib::string stackDump = StackDumpCreator::create(*node);
QueryNodeResultFactory empty;
Query q(empty, stackDump);
- SameElementQueryNode * sameElem = dynamic_cast<SameElementQueryNode *>(&q.getRoot());
+ auto * sameElem = dynamic_cast<SameElementQueryNode *>(&q.getRoot());
EXPECT_TRUE(sameElem != nullptr);
EXPECT_EQ("field", sameElem->getIndex());
EXPECT_EQ(3u, sameElem->size());
@@ -878,7 +907,7 @@ TEST(StreamingQueryTest, test_in_term)
{
auto term_vector = std::make_unique<StringTermVector>(1);
term_vector->addTerm("7");
- search::streaming::InTerm term({}, "index", std::move(term_vector));
+ search::streaming::InTerm term({}, "index", std::move(term_vector), Normalizing::NONE);
SimpleTermData td;
td.addField(10);
td.addField(11);
@@ -929,6 +958,68 @@ TEST(StreamingQueryTest, dot_product_term)
EXPECT_EQ(-17 * 27 + 9 * 2, tmd1->getRawScore());
}
+namespace {
+
+constexpr double exp_wand_score_field_12 = 13 * 27 + 4 * 2;
+constexpr double exp_wand_score_field_11 = 17 * 27 + 9 * 2;
+
+void
+check_wand_term(double limit, const vespalib::string& label)
+{
+ SCOPED_TRACE(label);
+ search::streaming::WandTerm term({}, "index", 2);
+ term.add_term(std::make_unique<QueryTerm>(std::unique_ptr<QueryNodeResultBase>(), "7", "", QueryTermSimple::Type::WORD));
+ term.get_terms().back()->setWeight(Weight(27));
+ term.add_term(std::make_unique<QueryTerm>(std::unique_ptr<QueryNodeResultBase>(), "9", "", QueryTermSimple::Type::WORD));
+ term.get_terms().back()->setWeight(Weight(2));
+ EXPECT_EQ(2, term.get_terms().size());
+ term.set_score_threshold(limit);
+ SimpleTermData td;
+ /*
+ * Search in fields 10, 11 and 12 (cf. fieldset in schema).
+ * Fields 11 and 12 have content for doc containing the keys.
+ * Fields 10 and 12 have valid handles and can be used for ranking.
+ * Field 11 does not have a valid handle, thus no associated match data.
+ */
+ td.addField(10);
+ td.addField(11);
+ td.addField(12);
+ td.lookupField(10)->setHandle(0);
+ td.lookupField(12)->setHandle(1);
+ EXPECT_FALSE(term.evaluate());
+ auto& q0 = *term.get_terms()[0];
+ q0.add(0, 11, 0, 17);
+ q0.add(0, 12, 0, 13);
+ auto& q1 = *term.get_terms()[1];
+ q1.add(0, 11, 0, 9);
+ q1.add(0, 12, 0, 4);
+ EXPECT_EQ(limit < exp_wand_score_field_11, term.evaluate());
+ MatchData md(MatchData::params().numTermFields(2));
+ term.unpack_match_data(23, td, md);
+ auto tmd0 = md.resolveTermField(0);
+ EXPECT_NE(23, tmd0->getDocId());
+ auto tmd1 = md.resolveTermField(1);
+ if (limit < exp_wand_score_field_12) {
+ EXPECT_EQ(23, tmd1->getDocId());
+ EXPECT_EQ(exp_wand_score_field_12, tmd1->getRawScore());
+ } else {
+ EXPECT_NE(23, tmd1->getDocId());
+ }
+}
+
+}
+
+TEST(StreamingQueryTest, wand_term)
+{
+ check_wand_term(0.0, "no limit");
+ check_wand_term(exp_wand_score_field_12 - 1, "score above limit");
+ check_wand_term(exp_wand_score_field_12, "score at limit");
+ check_wand_term(exp_wand_score_field_12 + 1, "score below limit");
+ check_wand_term(exp_wand_score_field_11 - 1, "hidden score above limit");
+ check_wand_term(exp_wand_score_field_11, "hidden score at limit");
+ check_wand_term(exp_wand_score_field_11 + 1, "hidden score below limit");
+}
+
TEST(StreamingQueryTest, control_the_size_of_query_terms)
{
EXPECT_EQ(112u, sizeof(QueryTermSimple));
diff --git a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp
index 20cf2008e4b..f800e124bdc 100644
--- a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp
+++ b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp
@@ -1,6 +1,7 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "mysearch.h"
#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/queryeval/flow.h>
#include <vespa/searchlib/queryeval/blueprint.h>
#include <vespa/searchlib/queryeval/intermediate_blueprints.h>
#include <vespa/vespalib/objects/objectdumper.h>
@@ -22,8 +23,12 @@ class MyOr : public IntermediateBlueprint
{
private:
public:
- double calculate_cost() const final { return 1.0; }
- double calculate_relative_estimate() const final { return 0.5; }
+ double calculate_cost() const final {
+ return cost_of(get_children(), OrFlow());
+ }
+ double calculate_relative_estimate() const final {
+ return estimate_of(get_children(), OrFlow());
+ }
HitEstimate combine(const std::vector<HitEstimate> &data) const override {
return max(data);
}
@@ -32,7 +37,7 @@ public:
return mixChildrenFields();
}
- void sort(Children &children) const override {
+ void sort(Children &children, bool) const override {
std::sort(children.begin(), children.end(), TieredGreaterEstimate());
}
@@ -440,7 +445,8 @@ TEST_F("testChildAndNotCollapsing", Fixture)
)
);
TEST_DO(f.check_not_equal(*sorted, *unsorted));
- unsorted = Blueprint::optimize(std::move(unsorted));
+ unsorted->setDocIdLimit(1000);
+ unsorted = Blueprint::optimize(std::move(unsorted), true);
TEST_DO(f.check_equal(*sorted, *unsorted));
}
@@ -479,7 +485,8 @@ TEST_F("testChildAndCollapsing", Fixture)
);
TEST_DO(f.check_not_equal(*sorted, *unsorted));
- unsorted = Blueprint::optimize(std::move(unsorted));
+ unsorted->setDocIdLimit(1000);
+ unsorted = Blueprint::optimize(std::move(unsorted), true);
TEST_DO(f.check_equal(*sorted, *unsorted));
}
@@ -517,7 +524,8 @@ TEST_F("testChildOrCollapsing", Fixture)
.add(MyLeafSpec(1).addField(2, 42).create())
);
TEST_DO(f.check_not_equal(*sorted, *unsorted));
- unsorted = Blueprint::optimize(std::move(unsorted));
+ unsorted->setDocIdLimit(1000);
+ unsorted = Blueprint::optimize(std::move(unsorted), true);
TEST_DO(f.check_equal(*sorted, *unsorted));
}
@@ -560,7 +568,8 @@ TEST_F("testChildSorting", Fixture)
);
TEST_DO(f.check_not_equal(*sorted, *unsorted));
- unsorted = Blueprint::optimize(std::move(unsorted));
+ unsorted->setDocIdLimit(1000);
+ unsorted = Blueprint::optimize(std::move(unsorted), true);
TEST_DO(f.check_equal(*sorted, *unsorted));
}
@@ -646,6 +655,7 @@ getExpectedBlueprint()
" tree_size: 2\n"
" allow_termwise_eval: false\n"
" }\n"
+ " cost: 1\n"
" sourceId: 4294967295\n"
" docid_limit: 0\n"
" children: std::vector {\n"
@@ -666,6 +676,7 @@ getExpectedBlueprint()
" tree_size: 1\n"
" allow_termwise_eval: true\n"
" }\n"
+ " cost: 1\n"
" sourceId: 4294967295\n"
" docid_limit: 0\n"
" }\n"
@@ -696,6 +707,7 @@ getExpectedSlimeBlueprint() {
" tree_size: 2,"
" allow_termwise_eval: false"
" },"
+ " cost: 1.0,"
" sourceId: 4294967295,"
" docid_limit: 0,"
" children: {"
@@ -721,6 +733,7 @@ getExpectedSlimeBlueprint() {
" tree_size: 1,"
" allow_termwise_eval: true"
" },"
+ " cost: 1.0,"
" sourceId: 4294967295,"
" docid_limit: 0"
" }"
diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
index e24e91c2f1d..ab1c004c721 100644
--- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
+++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
@@ -14,6 +14,11 @@
#include <vespa/searchlib/test/diskindex/testdiskindex.h>
#include <vespa/searchlib/query/tree/simplequery.h>
#include <vespa/searchlib/common/bitvectoriterator.h>
+#include <vespa/vespalib/util/overload.h>
+#include <vespa/vespalib/util/approx.h>
+#include <vespa/vespalib/data/simple_buffer.h>
+#include <vespa/vespalib/data/slime/slime.h>
+#include <vespa/vespalib/data/slime/inserter.h>
#include <filesystem>
#include <vespa/log/log.h>
@@ -24,6 +29,11 @@ using namespace search::fef;
using namespace search::query;
using search::BitVector;
using BlueprintVector = std::vector<std::unique_ptr<Blueprint>>;
+using vespalib::Slime;
+using vespalib::slime::Inspector;
+using vespalib::slime::SlimeInserter;
+using vespalib::make_string_short::fmt;
+using Path = std::vector<std::variant<size_t,vespalib::stringref>>;
struct InvalidSelector : ISourceSelector {
InvalidSelector() : ISourceSelector(Source()) {}
@@ -66,7 +76,8 @@ void check_sort_order(IntermediateBlueprint &self, BlueprintVector children, std
for (const auto & child: children) {
unordered.push_back(child.get());
}
- self.sort(children);
+ // TODO: sort by cost (requires both setDocIdLimit and optimize to be called)
+ self.sort(children, false);
for (size_t i = 0; i < children.size(); ++i) {
EXPECT_EQUAL(children[i].get(), unordered[order[i]]);
}
@@ -120,7 +131,7 @@ TEST("test AndNot Blueprint") {
template <typename BP>
void optimize(std::unique_ptr<BP> &ref) {
- auto optimized = Blueprint::optimize(std::move(ref));
+ auto optimized = Blueprint::optimize(std::move(ref), true);
ref.reset(dynamic_cast<BP*>(optimized.get()));
ASSERT_TRUE(ref);
optimized.release();
@@ -132,8 +143,8 @@ TEST("test And propagates updated histestimate") {
bp->addChild(ap(MyLeafSpec(20).create<RememberExecuteInfo>()->setSourceId(2)));
bp->addChild(ap(MyLeafSpec(200).create<RememberExecuteInfo>()->setSourceId(2)));
bp->addChild(ap(MyLeafSpec(2000).create<RememberExecuteInfo>()->setSourceId(2)));
- optimize(bp);
bp->setDocIdLimit(5000);
+ optimize(bp);
bp->fetchPostings(ExecuteInfo::TRUE);
EXPECT_EQUAL(3u, bp->childCnt());
for (uint32_t i = 0; i < bp->childCnt(); i++) {
@@ -152,8 +163,8 @@ TEST("test Or propagates updated histestimate") {
bp->addChild(ap(MyLeafSpec(2000).create<RememberExecuteInfo>()->setSourceId(2)));
bp->addChild(ap(MyLeafSpec(800).create<RememberExecuteInfo>()->setSourceId(2)));
bp->addChild(ap(MyLeafSpec(20).create<RememberExecuteInfo>()->setSourceId(2)));
- optimize(bp);
bp->setDocIdLimit(5000);
+ optimize(bp);
bp->fetchPostings(ExecuteInfo::TRUE);
EXPECT_EQUAL(4u, bp->childCnt());
for (uint32_t i = 0; i < bp->childCnt(); i++) {
@@ -480,13 +491,71 @@ struct SourceBlenderTestFixture {
void addChildrenForSimpleSBTest(IntermediateBlueprint & parent);
};
+vespalib::string path_to_str(const Path &path) {
+ size_t cnt = 0;
+ vespalib::string str("[");
+ for (const auto &item: path) {
+ if (cnt++ > 0) {
+ str.append(",");
+ }
+ std::visit(vespalib::overload{
+ [&str](size_t value)noexcept{ str.append(fmt("%zu", value)); },
+ [&str](vespalib::stringref value)noexcept{ str.append(value); }}, item);
+ }
+ str.append("]");
+ return str;
+}
+
+vespalib::string to_str(const Inspector &value) {
+ if (!value.valid()) {
+ return "<missing>";
+ }
+ vespalib::SimpleBuffer buf;
+ vespalib::slime::JsonFormat::encode(value, buf, true);
+ return buf.get().make_string();
+}
+
+void compare(const Blueprint &bp1, const Blueprint &bp2, bool expect_eq) {
+ auto cmp_hook = [expect_eq](const auto &path, const auto &a, const auto &b) {
+ if (!path.empty() && std::holds_alternative<vespalib::stringref>(path.back())) {
+ vespalib::stringref field = std::get<vespalib::stringref>(path.back());
+ if (field == "cost") {
+ return true;
+ }
+ if (field == "relative_estimate") {
+ double a_val = a.asDouble();
+ double b_val = b.asDouble();
+ if (a_val != 0.0 && b_val != 0.0 && vespalib::approx_equal(a_val, b_val)) {
+ return true;
+ }
+ }
+ }
+ if (expect_eq) {
+ fprintf(stderr, " mismatch at %s: %s vs %s\n", path_to_str(path).c_str(),
+ to_str(a).c_str(), to_str(b).c_str());
+ }
+ return false;
+ };
+ Slime a;
+ Slime b;
+ bp1.asSlime(SlimeInserter(a));
+ bp2.asSlime(SlimeInserter(b));
+ if (expect_eq) {
+ EXPECT_TRUE(vespalib::slime::are_equal(a.get(), b.get(), cmp_hook));
+ } else {
+ EXPECT_FALSE(vespalib::slime::are_equal(a.get(), b.get(), cmp_hook));
+ }
+}
+
void
-optimize_and_compare(Blueprint::UP top, Blueprint::UP expect) {
- EXPECT_NOT_EQUAL(expect->asString(), top->asString());
- top = Blueprint::optimize(std::move(top));
- EXPECT_EQUAL(expect->asString(), top->asString());
- expect = Blueprint::optimize(std::move(expect));
- EXPECT_EQUAL(expect->asString(), top->asString());
+optimize_and_compare(Blueprint::UP top, Blueprint::UP expect, bool sort_by_cost = true) {
+ top->setDocIdLimit(1000);
+ expect->setDocIdLimit(1000);
+ TEST_DO(compare(*top, *expect, false));
+ top = Blueprint::optimize(std::move(top), sort_by_cost);
+ TEST_DO(compare(*top, *expect, true));
+ expect = Blueprint::optimize(std::move(expect), sort_by_cost);
+ TEST_DO(compare(*expect, *top, true));
}
void SourceBlenderTestFixture::addChildrenForSBTest(IntermediateBlueprint & parent) {
@@ -612,11 +681,11 @@ TEST("test empty root node optimization and safeness") {
//-------------------------------------------------------------------------
auto expect_up = std::make_unique<EmptyBlueprint>();
- EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top1))->asString());
- EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top2))->asString());
- EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top3))->asString());
- EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top4))->asString());
- EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top5))->asString());
+ EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top1), true)->asString());
+ EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top2), true)->asString());
+ EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top3), true)->asString());
+ EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top4), true)->asString());
+ EXPECT_EQUAL(expect_up->asString(), Blueprint::optimize(std::move(top5), true)->asString());
}
TEST("and with one empty child is optimized away") {
@@ -624,7 +693,7 @@ TEST("and with one empty child is optimized away") {
Blueprint::UP top = ap((new SourceBlenderBlueprint(*selector))->
addChild(ap(MyLeafSpec(10).create())).
addChild(addLeafs(std::make_unique<AndBlueprint>(), {{0, true}, 10, 20})));
- top = Blueprint::optimize(std::move(top));
+ top = Blueprint::optimize(std::move(top), true);
Blueprint::UP expect_up(ap((new SourceBlenderBlueprint(*selector))->
addChild(ap(MyLeafSpec(10).create())).
addChild(std::make_unique<EmptyBlueprint>())));
@@ -716,6 +785,22 @@ TEST("AND_NOT AND AND_NOT collapsing") {
optimize_and_compare(std::move(top), std::move(expect));
}
+TEST("AND_NOT AND AND_NOT AND nested collapsing") {
+ Blueprint::UP top = make::ANDNOT()
+ .add(make::AND()
+ .add(make::ANDNOT()
+ .add(make::AND().leafs({1,2}))
+ .leafs({5,6}))
+ .add(make::ANDNOT()
+ .add(make::AND().leafs({3,4}))
+ .leafs({8,9})))
+ .leaf(7);
+ Blueprint::UP expect = make::ANDNOT()
+ .add(make::AND().leafs({1,2,3,4}))
+ .leafs({9,8,7,6,5});
+ optimize_and_compare(std::move(top), std::move(expect));
+}
+
TEST("AND_NOT AND AND_NOT collapsing into full source blender optimization") {
InvalidSelector sel;
Blueprint::UP top =
@@ -783,8 +868,8 @@ TEST("require that replaced blueprints retain source id") {
addChild(ap(MyLeafSpec(30).create()->setSourceId(55)))));
Blueprint::UP expect2_up(ap(MyLeafSpec(30).create()->setSourceId(42)));
//-------------------------------------------------------------------------
- top1_up = Blueprint::optimize(std::move(top1_up));
- top2_up = Blueprint::optimize(std::move(top2_up));
+ top1_up = Blueprint::optimize(std::move(top1_up), true);
+ top2_up = Blueprint::optimize(std::move(top2_up), true);
EXPECT_EQUAL(expect1_up->asString(), top1_up->asString());
EXPECT_EQUAL(expect2_up->asString(), top2_up->asString());
EXPECT_EQUAL(13u, top1_up->getSourceId());
@@ -1103,8 +1188,8 @@ TEST("require that children of near are not optimized") {
auto expect_up = ap((new NearBlueprint(10))->
addChild(addLeafs(std::make_unique<OrBlueprint>(), {20, {0, true}})).
addChild(addLeafs(std::make_unique<OrBlueprint>(), {{0, true}, 30})));
- top_up = Blueprint::optimize(std::move(top_up));
- EXPECT_EQUAL(expect_up->asString(), top_up->asString());
+ top_up = Blueprint::optimize(std::move(top_up), true);
+ TEST_DO(compare(*top_up, *expect_up, true));
}
TEST("require that children of onear are not optimized") {
@@ -1114,27 +1199,27 @@ TEST("require that children of onear are not optimized") {
auto expect_up = ap((new ONearBlueprint(10))->
addChild(addLeafs(std::make_unique<OrBlueprint>(), {20, {0, true}})).
addChild(addLeafs(std::make_unique<OrBlueprint>(), {{0, true}, 30})));
- top_up = Blueprint::optimize(std::move(top_up));
- EXPECT_EQUAL(expect_up->asString(), top_up->asString());
+ top_up = Blueprint::optimize(std::move(top_up), true);
+ TEST_DO(compare(*top_up, *expect_up, true));
}
TEST("require that ANDNOT without children is optimized to empty search") {
Blueprint::UP top_up = std::make_unique<AndNotBlueprint>();
auto expect_up = std::make_unique<EmptyBlueprint>();
- top_up = Blueprint::optimize(std::move(top_up));
+ top_up = Blueprint::optimize(std::move(top_up), true);
EXPECT_EQUAL(expect_up->asString(), top_up->asString());
}
TEST("require that highest cost tier sorts last for OR") {
Blueprint::UP top = addLeafsWithCostTier(std::make_unique<OrBlueprint>(), {{50, 1}, {30, 3}, {20, 2}, {10, 1}});
Blueprint::UP expect = addLeafsWithCostTier(std::make_unique<OrBlueprint>(), {{50, 1}, {10, 1}, {20, 2}, {30, 3}});
- optimize_and_compare(std::move(top), std::move(expect));
+ optimize_and_compare(std::move(top), std::move(expect), false);
}
TEST("require that highest cost tier sorts last for AND") {
Blueprint::UP top = addLeafsWithCostTier(std::make_unique<AndBlueprint>(), {{10, 1}, {20, 3}, {30, 2}, {50, 1}});
Blueprint::UP expect = addLeafsWithCostTier(std::make_unique<AndBlueprint>(), {{10, 1}, {50, 1}, {30, 2}, {20, 3}});
- optimize_and_compare(std::move(top), std::move(expect));
+ optimize_and_compare(std::move(top), std::move(expect), false);
}
template<typename BP>
@@ -1251,7 +1336,7 @@ void verify_cost(make &&mk, double expect) {
.cost(1.2).leaf(300)
.cost(1.3).leaf(500);
bp->setDocIdLimit(1000);
- bp = Blueprint::optimize(std::move(bp));
+ bp = Blueprint::optimize(std::move(bp), true);
EXPECT_EQUAL(bp->cost(), expect);
}
diff --git a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp
index f910ff5be1b..1180206279d 100644
--- a/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp
+++ b/searchlib/src/tests/queryeval/filter_search/filter_search_test.cpp
@@ -48,7 +48,7 @@ concept ChildCollector = requires(T a, std::unique_ptr<Blueprint> bp) {
// inherit Blueprint to capture the default filter factory
struct DefaultBlueprint : Blueprint {
double calculate_relative_estimate() const override { abort(); }
- void optimize(Blueprint* &, OptimizePass) override { abort(); }
+ void optimize(Blueprint* &, OptimizePass, bool) override { abort(); }
const State &getState() const override { abort(); }
void fetchPostings(const ExecuteInfo &) override { abort(); }
void freeze() override { abort(); }
diff --git a/searchlib/src/tests/queryeval/flow/CMakeLists.txt b/searchlib/src/tests/queryeval/flow/CMakeLists.txt
new file mode 100644
index 00000000000..70658d36f21
--- /dev/null
+++ b/searchlib/src/tests/queryeval/flow/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_queryeval_flow_test_app TEST
+ SOURCES
+ queryeval_flow_test.cpp
+ DEPENDS
+ searchlib
+ GTest::GTest
+)
+vespa_add_test(NAME searchlib_queryeval_flow_test_app COMMAND searchlib_queryeval_flow_test_app)
diff --git a/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
new file mode 100644
index 00000000000..ceda30f169a
--- /dev/null
+++ b/searchlib/src/tests/queryeval/flow/queryeval_flow_test.cpp
@@ -0,0 +1,117 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchlib/queryeval/flow.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <vector>
+#include <random>
+
+using search::queryeval::AndFlow;
+using search::queryeval::OrFlow;
+
+struct Item {
+ double rel_est;
+ double cost;
+ Item(double rel_est_in, double cost_in) noexcept
+ : rel_est(rel_est_in), cost(cost_in) {}
+ static void sort_for_and(std::vector<Item> &data) {
+ std::sort(data.begin(), data.end(), [](const Item &a, const Item &b) noexcept {
+ return (1.0 - a.rel_est) / a.cost > (1.0 - b.rel_est) / b.cost;
+ });
+ }
+ static void sort_for_or(std::vector<Item> &data) {
+ std::sort(data.begin(), data.end(), [](const Item &a, const Item &b) noexcept {
+ return a.rel_est / a.cost > b.rel_est / b.cost;
+ });
+ }
+ static double cost_of(const std::vector<Item> &data, auto flow) {
+ double cost = 0.0;
+ for (const Item &item: data) {
+ cost += flow.flow() * item.cost;
+ flow.add(item.rel_est);
+ }
+ return cost;
+ }
+ static double cost_of_and(const std::vector<Item> &data) { return cost_of(data, AndFlow()); }
+ static double cost_of_or(const std::vector<Item> &data) { return cost_of(data, OrFlow()); }
+};
+
+std::vector<Item> gen_data(size_t size) {
+ static std::mt19937 gen;
+ static std::uniform_real_distribution<double> rel_est(0.1, 0.9);
+ static std::uniform_real_distribution<double> cost(1.0, 10.0);
+ std::vector<Item> result;
+ result.reserve(size);
+ for (size_t i = 0; i < size; ++i) {
+ result.emplace_back(rel_est(gen), cost(gen));
+ }
+ return result;
+}
+
+template <typename T, typename F>
+void each_perm(std::vector<T> &data, size_t k, F fun) {
+ if (k <= 1) {
+ fun(const_cast<const std::vector<T> &>(data));
+ } else {
+ each_perm(data, k-1, fun);
+ for (size_t i = 0; i < k-1; ++i) {
+ if (k & 1) {
+ std::swap(data[0], data[k-1]);
+ } else {
+ std::swap(data[i], data[k-1]);
+ }
+ each_perm(data, k-1, fun);
+ }
+ }
+}
+
+template <typename T, typename F>
+void each_perm(std::vector<T> &data, F fun) {
+ each_perm(data, data.size(), fun);
+}
+
+TEST(FlowTest, perm_test) {
+ std::set<std::vector<int>> seen;
+ std::vector<int> data = {1,2,3,4,5};
+ auto hook = [&](const std::vector<int> &perm) {
+ EXPECT_EQ(perm.size(), 5);
+ seen.insert(perm);
+ };
+ each_perm(data, hook);
+ EXPECT_EQ(seen.size(), 120);
+}
+
+TEST(FlowTest, optimal_and_flow) {
+ for (size_t i = 0; i < 256; ++i) {
+ auto data = gen_data(7);
+ Item::sort_for_and(data);
+ double min_cost = Item::cost_of_and(data);
+ double max_cost = 0.0;
+ auto check = [min_cost,&max_cost](const std::vector<Item> &my_data) noexcept {
+ double my_cost = Item::cost_of_and(my_data);
+ EXPECT_LE(min_cost, my_cost);
+ max_cost = std::max(max_cost, my_cost);
+ };
+ each_perm(data, check);
+ fprintf(stderr, " and cost(%zu): min: %g, max: %g, factor: %g\n",
+ i, min_cost, max_cost, max_cost / min_cost);
+ }
+}
+
+TEST(FlowTest, optimal_or_flow) {
+ for (size_t i = 0; i < 256; ++i) {
+ auto data = gen_data(7);
+ Item::sort_for_or(data);
+ double min_cost = Item::cost_of_or(data);
+ double max_cost = 0.0;
+ auto check = [min_cost,&max_cost](const std::vector<Item> &my_data) noexcept {
+ double my_cost = Item::cost_of_or(my_data);
+ EXPECT_LE(min_cost, my_cost);
+ max_cost = std::max(max_cost, my_cost);
+ };
+ each_perm(data, check);
+ fprintf(stderr, " or cost(%zu): min: %g, max: %g, factor: %g\n",
+ i, min_cost, max_cost, max_cost / min_cost);
+ }
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp
index 2a59a578ec9..aa6d922f23f 100644
--- a/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp
+++ b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp
@@ -1,6 +1,6 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include <vespa/searchlib/query/tree/simplequery.h>
-#include <vespa/searchlib/queryeval/document_weight_search_iterator.h>
+#include <vespa/searchlib/queryeval/docid_with_weight_search_iterator.h>
#include <vespa/searchlib/queryeval/fake_requestcontext.h>
#include <vespa/searchlib/queryeval/fake_searchable.h>
#include <vespa/searchlib/queryeval/simpleresult.h>
@@ -10,8 +10,9 @@
#include <vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h>
#include <vespa/searchlib/queryeval/wand/parallel_weak_and_search.h>
#include <vespa/searchlib/test/document_weight_attribute_helper.h>
+#define ENABLE_GTEST_MIGRATION
#include <vespa/searchlib/test/weightedchildrenverifiers.h>
-#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/vespalib/gtest/gtest.h>
using namespace search::query;
using namespace search::queryeval;
@@ -284,89 +285,101 @@ struct AlgoExhaustPastFixture : public FixtureBase
};
-TEST_F("require that algorithm prunes bad hits after enough good ones are obtained", AlgoSimpleFixture)
+TEST(ParallelWeakAndTest, require_that_algorithm_prunes_bad_hits_after_enough_good_ones_are_obtained)
{
+ AlgoSimpleFixture f;
FakeResult expect = FakeResult()
.doc(1).score(1 * 1 + 4 * 1)
.doc(2).score(1 * 2)
.doc(3).score(1 * 3 + 4 * 3)
.doc(5).score(1 * 5 + 4 * 5);
- EXPECT_EQUAL(expect, f.result);
+ EXPECT_EQ(expect, f.result);
}
-TEST_F("require that algorithm uses subsearches as expected", AlgoSimpleFixture) {
- EXPECT_EQUAL(SearchHistory()
- .seek("PWAND", 1).seek("B", 1).step("B", 1).unpack("B", 1).step("PWAND", 1)
- .unpack("PWAND", 1).seek("A", 1).step("A", 1).unpack("A", 1)
- .seek("PWAND", 2).seek("B", 2).step("B", 3).seek("A", 2).step("A", 2).unpack("A", 2).step("PWAND", 2)
- .unpack("PWAND", 2)
- .seek("PWAND", 3).unpack("B", 3).step("PWAND", 3)
- .unpack("PWAND", 3).seek("A", 3).step("A", 3).unpack("A", 3)
- .seek("PWAND", 4).seek("B", 4).step("B", 5).seek("A", 4).step("A", 4).unpack("A", 4).unpack("B", 5).step("PWAND", 5)
- .unpack("PWAND", 5).seek("A", 5).step("A", 5).unpack("A", 5)
- .seek("PWAND", 6).seek("B", 6).step("B", search::endDocId).step("PWAND", search::endDocId),
- f.spec.getHistory());
+TEST(ParallelWeakAndTest, require_that_algorithm_uses_subsearches_as_expected)
+{
+ AlgoSimpleFixture f;
+ EXPECT_EQ(SearchHistory()
+ .seek("PWAND", 1).seek("B", 1).step("B", 1).unpack("B", 1).step("PWAND", 1)
+ .unpack("PWAND", 1).seek("A", 1).step("A", 1).unpack("A", 1)
+ .seek("PWAND", 2).seek("B", 2).step("B", 3).seek("A", 2).step("A", 2).unpack("A", 2).step("PWAND", 2)
+ .unpack("PWAND", 2)
+ .seek("PWAND", 3).unpack("B", 3).step("PWAND", 3)
+ .unpack("PWAND", 3).seek("A", 3).step("A", 3).unpack("A", 3)
+ .seek("PWAND", 4).seek("B", 4).step("B", 5).seek("A", 4).step("A", 4).unpack("A", 4).unpack("B", 5).step("PWAND", 5)
+ .unpack("PWAND", 5).seek("A", 5).step("A", 5).unpack("A", 5)
+ .seek("PWAND", 6).seek("B", 6).step("B", search::endDocId).step("PWAND", search::endDocId),
+ f.spec.getHistory());
}
-TEST_F("require that algorithm considers documents in the right order", AlgoAdvancedFixture)
+TEST(ParallelWeakAndTest, require_that_algorithm_considers_documents_in_the_right_order)
{
- EXPECT_EQUAL(SimpleResult()
- .addHit(1).addHit(2).addHit(3).addHit(4).addHit(5)
- .addHit(11).addHit(12).addHit(13).addHit(14).addHit(15)
- .addHit(111).addHit(112).addHit(113).addHit(114).addHit(115), asSimpleResult(f.result));
+ AlgoAdvancedFixture f;
+ EXPECT_EQ(SimpleResult()
+ .addHit(1).addHit(2).addHit(3).addHit(4).addHit(5)
+ .addHit(11).addHit(12).addHit(13).addHit(14).addHit(15)
+ .addHit(111).addHit(112).addHit(113).addHit(114).addHit(115), asSimpleResult(f.result));
}
-TEST_F("require that algorithm take initial docid for subsearches into account", AlgoSubsearchFixture)
+TEST(ParallelWeakAndTest, require_that_algorithm_take_initial_docid_for_subsearches_into_account)
{
- EXPECT_EQUAL(FakeResult().doc(10).score(20), f.result);
- EXPECT_EQUAL(SearchHistory().seek("PWAND", 1).unpack("B", 10).step("PWAND", 10).unpack("PWAND", 10)
- .seek("PWAND", 11).seek("B", 11).step("B", search::endDocId).step("PWAND", search::endDocId),
- f.spec.getHistory());
+ AlgoSubsearchFixture f;
+ EXPECT_EQ(FakeResult().doc(10).score(20), f.result);
+ EXPECT_EQ(SearchHistory().seek("PWAND", 1).unpack("B", 10).step("PWAND", 10).unpack("PWAND", 10)
+ .seek("PWAND", 11).seek("B", 11).step("B", search::endDocId).step("PWAND", search::endDocId),
+ f.spec.getHistory());
}
-TEST_F("require that algorithm uses first match when two matches have same score", AlgoSameScoreFixture)
+TEST(ParallelWeakAndTest, require_that_algorithm_uses_first_match_when_two_matches_have_same_score)
{
- EXPECT_EQUAL(FakeResult().doc(1).score(100), f.result);
+ AlgoSameScoreFixture f;
+ EXPECT_EQ(FakeResult().doc(1).score(100), f.result);
}
-TEST_F("require that algorithm uses initial score threshold (all hits greater)", AlgoScoreThresholdFixture(29))
+TEST(ParallelWeakAndTest, require_that_algorithm_uses_initial_score_threshold_case_all_hits_greater)
{
- EXPECT_EQUAL(FakeResult()
- .doc(1).score(1 * 10 + 2 * 20)
- .doc(2).score(1 * 30)
- .doc(3).score(2 * 40), f.result);
+ AlgoScoreThresholdFixture f(29);
+ EXPECT_EQ(FakeResult()
+ .doc(1).score(1 * 10 + 2 * 20)
+ .doc(2).score(1 * 30)
+ .doc(3).score(2 * 40), f.result);
}
-TEST_F("require that algorithm uses initial score threshold (2 hits greater)", AlgoScoreThresholdFixture(30))
+TEST(ParallelWeakAndTest, require_that_algorithm_uses_initial_score_threshold_case_2_hits_greater)
{
- EXPECT_EQUAL(FakeResult()
- .doc(1).score(1 * 10 + 2 * 20)
- .doc(3).score(2 * 40), f.result);
+ AlgoScoreThresholdFixture f(30);
+ EXPECT_EQ(FakeResult()
+ .doc(1).score(1 * 10 + 2 * 20)
+ .doc(3).score(2 * 40), f.result);
}
-TEST_F("require that algorithm uses initial score threshold (1 hit greater)", AlgoScoreThresholdFixture(50))
+TEST(ParallelWeakAndTest, require_that_algorithm_uses_initial_score_threshold_case_1_hit_greater)
{
- EXPECT_EQUAL(FakeResult()
- .doc(3).score(2 * 40), f.result);
+ AlgoScoreThresholdFixture f(50);
+ EXPECT_EQ(FakeResult()
+ .doc(3).score(2 * 40), f.result);
}
-TEST_F("require that algorithm uses initial score threshold (0 hits greater)", AlgoScoreThresholdFixture(80))
+TEST(ParallelWeakAndTest, require_that_algorithm_uses_initial_score_threshold_case_0_hits_greater)
{
- EXPECT_EQUAL(FakeResult(), f.result);
+ AlgoScoreThresholdFixture f(80);
+ EXPECT_EQ(FakeResult(), f.result);
}
-TEST_F("require that algorithm handle large scores", AlgoLargeScoresFixture(60000L * 70000L))
+TEST(ParallelWeakAndTest, require_that_algorithm_handles_large_scores)
{
- EXPECT_EQUAL(FakeResult()
- .doc(1).score(60000L * 60000L + 70000L * 80000L)
- .doc(3).score(70000L * 90000L), f.result);
+ AlgoLargeScoresFixture f(60000L * 70000L);
+ EXPECT_EQ(FakeResult()
+ .doc(1).score(60000L * 60000L + 70000L * 80000L)
+ .doc(3).score(70000L * 90000L), f.result);
}
-TEST_F("require that algorithm steps all present terms when past is empty", AlgoExhaustPastFixture(25))
+TEST(ParallelWeakAndTest, require_that_algorithm_steps_all_present_terms_when_past_is_empty)
{
- EXPECT_EQUAL(FakeResult()
- .doc(3).score(40)
- .doc(5).score(30), f.result);
+ AlgoExhaustPastFixture f(25);
+ EXPECT_EQ(FakeResult()
+ .doc(3).score(40)
+ .doc(5).score(30), f.result);
}
struct HeapFixture
@@ -380,14 +393,15 @@ struct HeapFixture
}
};
-TEST_F("require that scores are collected in batches before adjusting heap", HeapFixture)
+TEST(ParallelWeakAndTest, require_that_scores_are_collected_in_batches_before_adjusting_heap)
{
- EXPECT_EQUAL(SimpleResult().addHit(1).addHit(2).addHit(3).addHit(4).addHit(5).addHit(6),
- f.result);
- EXPECT_EQUAL(ScoresHistory().add(Scores().add(1).add(2))
- .add(Scores().add(3).add(4))
- .add(Scores().add(5).add(6)),
- f.spec.heap.history);
+ HeapFixture f;
+ EXPECT_EQ(SimpleResult().addHit(1).addHit(2).addHit(3).addHit(4).addHit(5).addHit(6),
+ f.result);
+ EXPECT_EQ(ScoresHistory().add(Scores().add(1).add(2))
+ .add(Scores().add(3).add(4))
+ .add(Scores().add(5).add(6)),
+ f.spec.heap.history);
}
@@ -400,13 +414,14 @@ struct SearchFixture : public FixtureBase
}
};
-TEST_F("require that dot product score is calculated", SearchFixture)
+TEST(ParallelWeakAndTest, require_that_dot_product_score_is_calculated)
{
+ SearchFixture f;
FakeResult expect = FakeResult()
.doc(1).score(1 * 10 + 2 * 20)
.doc(2).score(1 * 30)
.doc(3).score(2 * 40);
- EXPECT_EQUAL(expect, f.result);
+ EXPECT_EQ(expect, f.result);
}
@@ -452,8 +467,9 @@ struct BlueprintHitsFixture : public BlueprintFixtureBase
bool maxScoreFirst() {
SearchIterator::UP itr = iterator();
const ParallelWeakAndSearch *wand = dynamic_cast<ParallelWeakAndSearch*>(itr.get());
- ASSERT_EQUAL(2u, wand->get_num_terms());
- return (wand->get_term_weight(0) == 20);
+ bool failed = false;
+ EXPECT_EQ(2u, wand->get_num_terms()) << (failed = true, "");
+ return failed ? false : (wand->get_term_weight(0) == 20);
}
};
@@ -468,8 +484,11 @@ struct ThresholdBoostFixture : public FixtureBase
SearchIterator::UP si(spec.create());
result = doSearch(*si, spec.rootMatchData);
}
+ ~ThresholdBoostFixture();
};
+ThresholdBoostFixture::~ThresholdBoostFixture() = default;
+
struct BlueprintFixture : public BlueprintFixtureBase
{
BlueprintFixture() : BlueprintFixtureBase() {
@@ -497,89 +516,99 @@ struct BlueprintAsStringFixture : public BlueprintFixtureBase
};
-TEST_F("require that hit estimate is calculated", BlueprintFixture)
+TEST(ParallelWeakAndTest, require_that_hit_estimate_is_calculated)
{
+ BlueprintFixture f;
Node::UP term = f.spec.createNode();
Blueprint::UP bp = f.blueprint(*term);
- EXPECT_EQUAL(4u, bp->getState().estimate().estHits);
+ EXPECT_EQ(4u, bp->getState().estimate().estHits);
}
-TEST_F("require that blueprint picks up docid limit", BlueprintFixture)
+TEST(ParallelWeakAndTest, require_that_blueprint_picks_up_docid_limit)
{
+ BlueprintFixture f;
Node::UP term = f.spec.createNode(57, 67, 77.7);
Blueprint::UP bp = f.blueprint(*term);
const ParallelWeakAndBlueprint * pbp = dynamic_cast<const ParallelWeakAndBlueprint *>(bp.get());
- EXPECT_EQUAL(0u, pbp->get_docid_limit());
+ EXPECT_EQ(0u, pbp->get_docid_limit());
bp->setDocIdLimit(1000);
- EXPECT_EQUAL(1000u, pbp->get_docid_limit());
+ EXPECT_EQ(1000u, pbp->get_docid_limit());
}
-TEST_F("require that scores to track, score threshold and threshold boost factor is passed down from query node to blueprint", BlueprintFixture)
+TEST(ParallelWeakAndTest, require_that_scores_to_track_score_threshold_and_threshold_boost_factor_is_passed_down_from_query_node_to_blueprint)
{
+ BlueprintFixture f;
Node::UP term = f.spec.createNode(57, 67, 77.7);
Blueprint::UP bp = f.blueprint(*term);
const ParallelWeakAndBlueprint * pbp = dynamic_cast<const ParallelWeakAndBlueprint *>(bp.get());
- EXPECT_EQUAL(57u, pbp->getScores().getScoresToTrack());
- EXPECT_EQUAL(67u, pbp->getScoreThreshold());
- EXPECT_EQUAL(77.7, pbp->getThresholdBoostFactor());
+ EXPECT_EQ(57u, pbp->getScores().getScoresToTrack());
+ EXPECT_EQ(67u, pbp->getScoreThreshold());
+ EXPECT_EQ(77.7, pbp->getThresholdBoostFactor());
}
-TEST_F("require that search iterator is correctly setup and executed", BlueprintFixture)
+TEST(ParallelWeakAndTest, require_that_search_iterator_is_correctly_setup_and_executed)
{
+ BlueprintFixture f;
FakeResult expect = FakeResult()
.doc(1).score(1 * 10 + 2 * 20)
.doc(2).score(1 * 30)
.doc(3).score(2 * 40);
- EXPECT_EQUAL(expect, f.search());
+ EXPECT_EQ(expect, f.search());
}
-TEST_F("require that initial score threshold can be specified (1 hit greater)", BlueprintFixture)
+TEST(ParallelWeakAndTest, require_that_initial_score_threshold_can_be_specified_case_1_hit_greater)
{
+ BlueprintFixture f;
Node::UP term = f.spec.createNode(3, 50);
- EXPECT_EQUAL(FakeResult()
- .doc(3).score(2 * 40), f.search(*term));
+ EXPECT_EQ(FakeResult()
+ .doc(3).score(2 * 40), f.search(*term));
}
-TEST_F("require that large scores are handled", BlueprintLargeScoresFixture)
+TEST(ParallelWeakAndTest, require_that_large_scores_are_handled)
{
+ BlueprintLargeScoresFixture f;
Node::UP term = f.spec.createNode(3, 60000L * 70000L);
- EXPECT_EQUAL(FakeResult()
- .doc(1).score(60000L * 60000L + 70000L * 80000L)
- .doc(3).score(70000L * 90000L), f.search(*term));
+ EXPECT_EQ(FakeResult()
+ .doc(1).score(60000L * 60000L + 70000L * 80000L)
+ .doc(3).score(70000L * 90000L), f.search(*term));
}
-TEST_F("require that docid limit is propagated to search iterator", BlueprintFixture())
+TEST(ParallelWeakAndTest, require_that_docid_limit_is_propagated_to_search_iterator)
{
+ BlueprintFixture f1;
f1.spec.docIdLimit = 4050;
SearchIterator::UP itr = f1.iterator();
const ParallelWeakAndSearch *wand = dynamic_cast<ParallelWeakAndSearch*>(itr.get());
- EXPECT_EQUAL(4050u, wand->getMatchParams().docIdLimit);
+ EXPECT_EQ(4050u, wand->getMatchParams().docIdLimit);
}
-TEST_FFF("require that terms are sorted for maximum skipping",
- BlueprintHitsFixture(50, 50, 100),
- BlueprintHitsFixture(60, 50, 100),
- BlueprintHitsFixture(80, 50, 100))
+TEST(ParallelWeakAndTest, require_that_terms_are_sorted_for_maximum_skipping)
{
+ BlueprintHitsFixture f1(50, 50, 100);
+ BlueprintHitsFixture f2(60, 50, 100);
+ BlueprintHitsFixture f3(80, 50, 100);
EXPECT_TRUE(f1.maxScoreFirst());
EXPECT_TRUE(f2.maxScoreFirst());
EXPECT_FALSE(f3.maxScoreFirst());
}
-TEST_FF("require that threshold boosting works as expected", ThresholdBoostFixture(1.0), ThresholdBoostFixture(2.0))
-{
- EXPECT_EQUAL(FakeResult()
- .doc(1).score(1000)
- .doc(2).score(2000)
- .doc(3).score(3000)
- .doc(4).score(4200), f1.result);
- EXPECT_EQUAL(FakeResult()
- .doc(2).score(2000)
- .doc(4).score(4200), f2.result);
+TEST(ParallelWeakAndTest, require_that_threshold_boosting_works_as_expected)
+{
+ ThresholdBoostFixture f1(1.0);
+ ThresholdBoostFixture f2(2.0);
+ EXPECT_EQ(FakeResult()
+ .doc(1).score(1000)
+ .doc(2).score(2000)
+ .doc(3).score(3000)
+ .doc(4).score(4200), f1.result);
+ EXPECT_EQ(FakeResult()
+ .doc(2).score(2000)
+ .doc(4).score(4200), f2.result);
}
-TEST_F("require that asString() on blueprint works", BlueprintAsStringFixture)
+TEST(ParallelWeakAndTest, require_that_asString_on_blueprint_works)
{
+ BlueprintAsStringFixture f;
Node::UP term = f.spec.createNode(57, 67);
Blueprint::UP bp = f.blueprint(*term);
vespalib::string expStr = "search::queryeval::ParallelWeakAndBlueprint {\n"
@@ -599,6 +628,7 @@ TEST_F("require that asString() on blueprint works", BlueprintAsStringFixture)
" tree_size: 2\n"
" allow_termwise_eval: false\n"
" }\n"
+ " cost: 1\n"
" sourceId: 4294967295\n"
" docid_limit: 0\n"
" _weights: std::vector {\n"
@@ -622,12 +652,13 @@ TEST_F("require that asString() on blueprint works", BlueprintAsStringFixture)
" tree_size: 1\n"
" allow_termwise_eval: true\n"
" }\n"
+ " cost: 1\n"
" sourceId: 4294967295\n"
" docid_limit: 0\n"
" }\n"
" }\n"
"}\n";
- EXPECT_EQUAL(expStr, bp->asString());
+ EXPECT_EQ(expStr, bp->asString());
}
using MatchParams = ParallelWeakAndSearch::MatchParams;
@@ -659,7 +690,7 @@ SearchIterator::UP create_wand(bool use_dww,
assert(childrenMatchData->getNumTermFields() == dict_entries.size());
wand::Terms terms;
for (size_t i = 0; i < dict_entries.size(); ++i) {
- terms.push_back(wand::Term(new DocumentWeightSearchIterator(*(childrenMatchData->resolveTermField(handles[i])), attr, dict_entries[i]),
+ terms.push_back(wand::Term(new DocidWithWeightSearchIterator(*(childrenMatchData->resolveTermField(handles[i])), attr, dict_entries[i]),
weights[i],
dict_entries[i].posting_size,
childrenMatchData->resolveTermField(handles[i])));
@@ -684,11 +715,12 @@ private:
mutable DummyHeap _dummy_heap;
};
-TEST("verify search iterator conformance") {
+TEST(ParallelWeakAndTest, verify_search_iterator_conformance)
+{
for (bool use_dww: {false, true}) {
Verifier verifier(use_dww);
verifier.verify();
}
}
-TEST_MAIN() { TEST_RUN_ALL(); }
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/tests/queryeval/queryeval_test.cpp b/searchlib/src/tests/queryeval/queryeval_test.cpp
index a403f7a7c23..3fabb45a7ff 100644
--- a/searchlib/src/tests/queryeval/queryeval_test.cpp
+++ b/searchlib/src/tests/queryeval/queryeval_test.cpp
@@ -1,7 +1,7 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/vespalib/testkit/test_kit.h>
#include <vespa/vespalib/regex/regex.h>
+#define ENABLE_GTEST_MIGRATION
#include <vespa/searchlib/test/initrange.h>
#include <vespa/searchlib/queryeval/andnotsearch.h>
#include <vespa/searchlib/queryeval/andsearch.h>
@@ -19,9 +19,9 @@
#include <vespa/searchlib/query/query_term_simple.h>
#include <vespa/searchlib/attribute/singleboolattribute.h>
#include <vespa/searchcommon/common/growstrategy.h>
-#include <vespa/vespalib/test/insertion_operators.h>
#include <vespa/searchlib/fef/fef.h>
#include <vespa/vespalib/data/slime/slime.h>
+#include <vespa/vespalib/gtest/gtest.h>
#include <vespa/log/log.h>
LOG_SETUP("query_eval_test");
@@ -87,15 +87,16 @@ std::unique_ptr<sourceselector::Iterator> selector() {
void testMultiSearch(SearchIterator & search) {
auto & ms = dynamic_cast<MultiSearch &>(search);
ms.initRange(3, 309);
- EXPECT_EQUAL(2u, ms.getDocId());
- EXPECT_EQUAL(309u, ms.getEndId());
+ EXPECT_EQ(2u, ms.getDocId());
+ EXPECT_EQ(309u, ms.getEndId());
for (const auto & child : ms.getChildren()) {
- EXPECT_EQUAL(2u, child->getDocId());
- EXPECT_EQUAL(309u, child->getEndId());
+ EXPECT_EQ(2u, child->getDocId());
+ EXPECT_EQ(309u, child->getEndId());
}
}
-TEST("test that OR.andWith is a NOOP") {
+TEST(QueryEvalTest, test_that_or_andwith_is_a_noop)
+{
TermFieldMatchData tfmd;
MultiSearch::Children ch;
ch.emplace_back(new TrueSearch(tfmd));
@@ -106,7 +107,8 @@ TEST("test that OR.andWith is a NOOP") {
EXPECT_TRUE(search->andWith(std::move(filter), 1));
}
-TEST("test that non-strict AND.andWith is a NOOP") {
+TEST(QueryEvalTest, test_that_non_strict_and_andwidth_is_a_noop)
+{
TermFieldMatchData tfmd;
MultiSearch::Children ch;
ch.emplace_back(new TrueSearch(tfmd));
@@ -117,7 +119,8 @@ TEST("test that non-strict AND.andWith is a NOOP") {
EXPECT_TRUE(filter);
}
-TEST("test that strict AND.andWith steals filter and places it correctly based on estimate") {
+TEST(QueryEvalTest, test_that_strict_and_andwidth_steals_filter_and_places_it_correctly_based_on_estimate)
+{
TermFieldMatchData tfmd;
std::vector<SearchIterator *> ch;
ch.emplace_back(new TrueSearch(tfmd));
@@ -129,19 +132,19 @@ TEST("test that strict AND.andWith steals filter and places it correctly based o
EXPECT_TRUE(nullptr == search->andWith(std::move(filter), 8).get());
const auto & andChildren = dynamic_cast<MultiSearch &>(*search).getChildren();
- EXPECT_EQUAL(3u, andChildren.size());
- EXPECT_EQUAL(ch[0], andChildren[0].get());
- EXPECT_EQUAL(filterP, andChildren[1].get());
- EXPECT_EQUAL(ch[1], andChildren[2].get());
+ EXPECT_EQ(3u, andChildren.size());
+ EXPECT_EQ(ch[0], andChildren[0].get());
+ EXPECT_EQ(filterP, andChildren[1].get());
+ EXPECT_EQ(ch[1], andChildren[2].get());
auto filter2 = std::make_unique<TrueSearch>(tfmd);
SearchIterator * filter2P = filter2.get();
EXPECT_TRUE(nullptr == search->andWith(std::move(filter2), 6).get());
- EXPECT_EQUAL(4u, andChildren.size());
- EXPECT_EQUAL(filter2P, andChildren[0].get());
- EXPECT_EQUAL(ch[0], andChildren[1].get());
- EXPECT_EQUAL(filterP, andChildren[2].get());
- EXPECT_EQUAL(ch[1], andChildren[3].get());
+ EXPECT_EQ(4u, andChildren.size());
+ EXPECT_EQ(filter2P, andChildren[0].get());
+ EXPECT_EQ(ch[0], andChildren[1].get());
+ EXPECT_EQ(filterP, andChildren[2].get());
+ EXPECT_EQ(ch[1], andChildren[3].get());
}
class NonStrictTrueSearch : public TrueSearch
@@ -151,7 +154,8 @@ public:
[[nodiscard]] Trinary is_strict() const override { return Trinary::False; }
};
-TEST("test that strict AND.andWith does not place non-strict iterator first") {
+TEST(QueryEvalTest, test_that_strict_and_andwidth_does_not_place_non_strict_iterator_first)
+{
TermFieldMatchData tfmd;
std::vector<SearchIterator *> ch;
ch.emplace_back(new TrueSearch(tfmd));
@@ -162,34 +166,38 @@ TEST("test that strict AND.andWith does not place non-strict iterator first") {
SearchIterator * filterP = filter.get();
EXPECT_TRUE(nullptr == search->andWith(std::move(filter), 6).get());
const auto & andChildren = dynamic_cast<MultiSearch &>(*search).getChildren();
- EXPECT_EQUAL(3u, andChildren.size());
- EXPECT_EQUAL(ch[0], andChildren[0].get());
- EXPECT_EQUAL(filterP, andChildren[1].get());
- EXPECT_EQUAL(ch[1], andChildren[2].get());
+ EXPECT_EQ(3u, andChildren.size());
+ EXPECT_EQ(ch[0], andChildren[0].get());
+ EXPECT_EQ(filterP, andChildren[1].get());
+ EXPECT_EQ(ch[1], andChildren[2].get());
}
-TEST("test that strict rank search forwards to its greedy first child") {
+TEST(QueryEvalTest, test_that_strict_rank_search_forwards_to_its_greedy_first_child)
+{
TermFieldMatchData tfmd;
SearchIterator::UP search = RankSearch::create({ AndSearch::create(search2("a", "b"), true), new TrueSearch(tfmd) }, true);
auto filter = std::make_unique<TrueSearch>(tfmd);
EXPECT_TRUE(nullptr == search->andWith(std::move(filter), 8).get());
}
-TEST("test that non-strict rank search does NOT forward to its greedy first child") {
+TEST(QueryEvalTest, test_that_non_strict_rank_search_does_not_forward_to_its_greedy_first_child)
+{
TermFieldMatchData tfmd;
SearchIterator::UP search = RankSearch::create({ AndSearch::create(search2("a", "b"), true), new TrueSearch(tfmd) }, false);
auto filter = std::make_unique<TrueSearch>(tfmd);
EXPECT_TRUE(nullptr != search->andWith(std::move(filter), 8).get());
}
-TEST("test that strict andnot search forwards to its greedy first child") {
+TEST(QueryEvalTest, test_that_strict_andnot_search_forwards_to_its_greedy_first_child)
+{
TermFieldMatchData tfmd;
SearchIterator::UP search = AndNotSearch::create({ AndSearch::create(search2("a", "b"), true), new TrueSearch(tfmd) }, true);
auto filter = std::make_unique<TrueSearch>(tfmd);
EXPECT_TRUE(nullptr == search->andWith(std::move(filter), 8).get());
}
-TEST("test that non-strict andnot search does NOT forward to its greedy first child") {
+TEST(QueryEvalTest, test_that_non_strict_andnot_search_does_not_forward_to_its_greedy_first_child)
+{
TermFieldMatchData tfmd;
SearchIterator::UP search = AndNotSearch::create({ AndSearch::create(search2("a", "b"), true), new TrueSearch(tfmd) }, false);
auto filter = std::make_unique<TrueSearch>(tfmd);
@@ -199,13 +207,10 @@ TEST("test that non-strict andnot search does NOT forward to its greedy first ch
void expect_match(std::string input, std::string regexp) {
using vespalib::Regex;
Regex pattern = Regex::from_pattern(regexp, Regex::Options::DotMatchesNewline);
- if (! EXPECT_TRUE(pattern.partial_match(input))) {
- fprintf(stderr, "no match for pattern: >>>%s<<< in input:\n>>>\n%s\n<<<\n",
- regexp.c_str(), input.c_str());
- }
+ EXPECT_TRUE(pattern.partial_match(input)) << "no match for pattern: >>>" << regexp << "<<< in input: >>>\n" << input << "<<<";
}
-TEST("testAnd") {
+TEST(QueryEvalTest, test_and) {
SimpleResult a;
SimpleResult b;
a.addHit(5).addHit(10).addHit(16).addHit(30);
@@ -219,17 +224,17 @@ TEST("testAnd") {
SearchIterator::UP and_ab = and_b->createSearch(*md, true);
EXPECT_TRUE(dynamic_cast<const AndSearch *>(and_ab.get()) != nullptr);
- EXPECT_EQUAL(4u, dynamic_cast<AndSearch &>(*and_ab).estimate());
+ EXPECT_EQ(4u, dynamic_cast<AndSearch &>(*and_ab).estimate());
SimpleResult res;
res.search(*and_ab);
SimpleResult expect;
expect.addHit(5).addHit(30);
- EXPECT_EQUAL(res, expect);
+ EXPECT_EQ(res, expect);
SearchIterator::UP filter_ab = and_b->createFilterSearch(true, upper_bound);
SimpleResult filter_res;
filter_res.search(*filter_ab);
- EXPECT_EQUAL(res, expect);
+ EXPECT_EQ(res, expect);
std::string dump = filter_ab->asString();
expect_match(dump, "upper");
expect_match(dump, "AndSearchStrict.*NoUnpack.*SimpleSearch.*upper.*SimpleSearch.*upper");
@@ -239,10 +244,8 @@ TEST("testAnd") {
expect_match(dump, "AndSearchNoStrict.*NoUnpack.*SimpleSearch.*lower.*SimpleSearch.*lower");
}
-TEST("mutisearch and initRange") {
-}
-
-TEST("testOr") {
+TEST(QueryEvalTest, test_or)
+{
{
SimpleResult a;
SimpleResult b;
@@ -260,12 +263,12 @@ TEST("testOr") {
res.search(*or_ab);
SimpleResult expect;
expect.addHit(5).addHit(10).addHit(17).addHit(30);
- EXPECT_EQUAL(res, expect);
+ EXPECT_EQ(res, expect);
SearchIterator::UP filter_ab = or_b->createFilterSearch(true, upper_bound);
SimpleResult filter_res;
filter_res.search(*filter_ab);
- EXPECT_EQUAL(res, expect);
+ EXPECT_EQ(res, expect);
std::string dump = filter_ab->asString();
expect_match(dump, "upper");
expect_match(dump, "OrLikeSearch.true.*NoUnpack.*SimpleSearch.*upper.*SimpleSearch.*upper");
@@ -305,34 +308,35 @@ struct MultiSearchRemoveTest {
static SearchIterator::UP remove(MultiSearch &ms, size_t idx) { return ms.remove(idx); }
};
-TEST("testMultiSearch") {
+TEST(QueryEvalTest, test_multi_search)
+{
std::vector<SearchIterator *> orig;
orig.emplace_back(new EmptySearch());
orig.emplace_back(new EmptySearch());
orig.emplace_back(new EmptySearch());
TestInsertRemoveSearch ms({orig[0], orig[1], orig[2]});
- EXPECT_EQUAL(3u, ms.getChildren().size());
- EXPECT_EQUAL(orig[0], ms.getChildren()[0].get());
- EXPECT_EQUAL(orig[1], ms.getChildren()[1].get());
- EXPECT_EQUAL(orig[2], ms.getChildren()[2].get());
- EXPECT_EQUAL(0u, ms._accumInsert);
- EXPECT_EQUAL(0u, ms._accumRemove);
-
- EXPECT_EQUAL(orig[1], MultiSearchRemoveTest::remove(ms, 1).get());
- EXPECT_EQUAL(2u, ms.getChildren().size());
- EXPECT_EQUAL(orig[0], ms.getChildren()[0].get());
- EXPECT_EQUAL(orig[2], ms.getChildren()[1].get());
- EXPECT_EQUAL(0u, ms._accumInsert);
- EXPECT_EQUAL(1u, ms._accumRemove);
+ EXPECT_EQ(3u, ms.getChildren().size());
+ EXPECT_EQ(orig[0], ms.getChildren()[0].get());
+ EXPECT_EQ(orig[1], ms.getChildren()[1].get());
+ EXPECT_EQ(orig[2], ms.getChildren()[2].get());
+ EXPECT_EQ(0u, ms._accumInsert);
+ EXPECT_EQ(0u, ms._accumRemove);
+
+ EXPECT_EQ(orig[1], MultiSearchRemoveTest::remove(ms, 1).get());
+ EXPECT_EQ(2u, ms.getChildren().size());
+ EXPECT_EQ(orig[0], ms.getChildren()[0].get());
+ EXPECT_EQ(orig[2], ms.getChildren()[1].get());
+ EXPECT_EQ(0u, ms._accumInsert);
+ EXPECT_EQ(1u, ms._accumRemove);
orig.emplace_back(new EmptySearch());
ms.insert(1, SearchIterator::UP(orig.back()));
- EXPECT_EQUAL(3u, ms.getChildren().size());
- EXPECT_EQUAL(orig[0], ms.getChildren()[0].get());
- EXPECT_EQUAL(orig[3], ms.getChildren()[1].get());
- EXPECT_EQUAL(orig[2], ms.getChildren()[2].get());
- EXPECT_EQUAL(1u, ms._accumInsert);
- EXPECT_EQUAL(1u, ms._accumRemove);
+ EXPECT_EQ(3u, ms.getChildren().size());
+ EXPECT_EQ(orig[0], ms.getChildren()[0].get());
+ EXPECT_EQ(orig[3], ms.getChildren()[1].get());
+ EXPECT_EQ(orig[2], ms.getChildren()[2].get());
+ EXPECT_EQ(1u, ms._accumInsert);
+ EXPECT_EQ(1u, ms._accumRemove);
}
class DummySingleValueBitNumericAttributeBlueprint : public SimpleLeafBlueprint
@@ -370,7 +374,8 @@ private:
};
-TEST("testAndNot") {
+TEST(QueryEvalTest, test_andnot)
+{
{
SimpleResult a;
SimpleResult b;
@@ -388,12 +393,12 @@ TEST("testAndNot") {
res.search(*andnot_ab);
SimpleResult expect;
expect.addHit(10);
- EXPECT_EQUAL(res, expect);
+ EXPECT_EQ(res, expect);
SearchIterator::UP filter_ab = andnot_b->createFilterSearch(true, upper_bound);
SimpleResult filter_res;
filter_res.search(*filter_ab);
- EXPECT_EQUAL(res, expect);
+ EXPECT_EQ(res, expect);
std::string dump = filter_ab->asString();
expect_match(dump, "upper");
expect_match(dump, "AndNotSearch.*SimpleSearch.*<strict,upper>.*SimpleSearch.*<nostrict,lower>");
@@ -420,7 +425,7 @@ TEST("testAndNot") {
SimpleResult expect;
expect.addHit(1).addHit(10);
- EXPECT_EQUAL(res, expect);
+ EXPECT_EQ(res, expect);
}
{
SimpleResult a;
@@ -446,13 +451,14 @@ TEST("testAndNot") {
SimpleResult expect;
expect.addHit(1).addHit(10);
- EXPECT_EQUAL(res, expect);
+ EXPECT_EQ(res, expect);
}
{
}
}
-TEST("testRank") {
+TEST(QueryEvalTest, test_rank)
+{
{
SimpleResult a;
SimpleResult b;
@@ -471,7 +477,7 @@ TEST("testRank") {
SimpleResult expect;
expect.addHit(5).addHit(10).addHit(16).addHit(30);
- EXPECT_EQUAL(res, expect);
+ EXPECT_EQ(res, expect);
}
}
@@ -600,7 +606,8 @@ getExpectedSlime() {
"}";
}
-TEST("testDump") {
+TEST(QueryEvalTest, test_dump)
+{
using SBChild = SourceBlenderSearch::Child;
SearchIterator::UP search = AndSearch::create( {
@@ -622,13 +629,13 @@ TEST("testDump") {
auto s = slime.toString();
vespalib::Slime expectedSlime;
vespalib::slime::JsonFormat::decode(getExpectedSlime(), expectedSlime);
- EXPECT_EQUAL(expectedSlime, slime);
+ EXPECT_EQ(expectedSlime, slime);
// fprintf(stderr, "%s", search->asString().c_str());
}
-TEST("testFieldSpec") {
- EXPECT_EQUAL(8u, sizeof(FieldSpecBase));
- EXPECT_EQUAL(72u, sizeof(FieldSpec));
+TEST(QueryEvalTest, test_field_spec) {
+ EXPECT_EQ(8u, sizeof(FieldSpecBase));
+ EXPECT_EQ(72u, sizeof(FieldSpec));
}
@@ -652,9 +659,9 @@ std::vector<size_t> fill_vector(size_t begin, size_t end) {
void verify_unpack(const UnpackInfo &unpack, const std::vector<size_t> &expect) {
std::vector<size_t> actual = vectorize(unpack);
- EXPECT_EQUAL(unpack.empty(), expect.empty());
- EXPECT_EQUAL(unpack.unpackAll(), (expect.size() == unpack_child_cnt));
- EXPECT_EQUAL(expect, actual);
+ EXPECT_EQ(unpack.empty(), expect.empty());
+ EXPECT_EQ(unpack.unpackAll(), (expect.size() == unpack_child_cnt));
+ EXPECT_EQ(expect, actual);
size_t child_idx = 0;
for (size_t next_unpack: expect) {
while (child_idx < next_unpack) {
@@ -664,19 +671,23 @@ void verify_unpack(const UnpackInfo &unpack, const std::vector<size_t> &expect)
}
}
-TEST("require that unpack info has expected memory footprint") {
- EXPECT_EQUAL(32u, sizeof(UnpackInfo));
+TEST(QueryEvalTest, require_that_unpack_info_has_expected_memory_footprint)
+{
+ EXPECT_EQ(32u, sizeof(UnpackInfo));
}
-TEST("require that unpack info starts out empty") {
+TEST(QueryEvalTest, require_that_unpack_info_starts_out_empty)
+{
verify_unpack(UnpackInfo(), {});
}
-TEST("require that unpack info force all unpacks all children") {
+TEST(QueryEvalTest, require_that_unpack_info_force_all_unpacks_all_children)
+{
verify_unpack(UnpackInfo().forceAll(), fill_vector(0, unpack_child_cnt));
}
-TEST("require that adding a large index to unpack info forces unpack all") {
+TEST(QueryEvalTest, require_that_adding_a_large_index_to_unpack_info_forces_unpack_all)
+{
UnpackInfo unpack;
unpack.add(0);
unpack.add(max_unpack_index);
@@ -685,7 +696,8 @@ TEST("require that adding a large index to unpack info forces unpack all") {
verify_unpack(unpack, fill_vector(0, unpack_child_cnt));
}
-TEST("require that adding too many children to unpack info forces unpack all") {
+TEST(QueryEvalTest, require_that_adding_too_many_children_to_unpack_info_forces_unpack_all)
+{
UnpackInfo unpack;
std::vector<size_t> expect;
for (size_t i = 0; i < max_unpack_size; ++i) {
@@ -697,19 +709,22 @@ TEST("require that adding too many children to unpack info forces unpack all") {
verify_unpack(unpack, fill_vector(0, unpack_child_cnt));
}
-TEST("require that adding normal unpack info indexes works") {
+TEST(QueryEvalTest, require_that_adding_normal_unpack_info_indexes_works)
+{
UnpackInfo unpack;
unpack.add(3).add(5).add(7).add(14).add(50);
verify_unpack(unpack, {3,5,7,14,50});
}
-TEST("require that adding unpack info indexes out of order works") {
+TEST(QueryEvalTest, require_that_adding_unpack_info_indexes_out_of_order_works)
+{
UnpackInfo unpack;
unpack.add(5).add(3).add(7).add(50).add(14);
verify_unpack(unpack, {3,5,7,14,50});
}
-TEST("require that basic insert remove of unpack info works") {
+TEST(QueryEvalTest, require_that_basic_insert_remove_of_unpack_info_works)
+{
UnpackInfo unpack;
unpack.insert(1).insert(3);
verify_unpack(unpack, {1, 3});
@@ -729,7 +744,8 @@ TEST("require that basic insert remove of unpack info works") {
verify_unpack(unpack, {});
}
-TEST("require that inserting too many indexs into unpack info forces unpack all") {
+TEST(QueryEvalTest, require_that_inserting_too_many_indexes_into_unpack_info_forces_unpack_all)
+{
for (bool unpack_inserted: {true, false}) {
UnpackInfo unpack;
for (size_t i = 0; i < max_unpack_size; ++i) {
@@ -745,7 +761,8 @@ TEST("require that inserting too many indexs into unpack info forces unpack all"
}
}
-TEST("require that implicitly overflowing indexes during insert in unpack info forces unpack all") {
+TEST(QueryEvalTest, require_that_implicitly_overflowing_indexes_during_insert_in_unpack_info_forces_unpack_all)
+{
for (bool unpack_inserted: {true, false}) {
UnpackInfo unpack;
unpack.insert(max_unpack_index);
@@ -755,7 +772,8 @@ TEST("require that implicitly overflowing indexes during insert in unpack info f
}
}
-TEST("require that inserting a too high index into unpack info forces unpack all") {
+TEST(QueryEvalTest, require_that_inserting_a_too_high_index_into_unpack_info_forces_unpack_all)
+{
for (bool unpack_inserted: {true, false}) {
UnpackInfo unpack;
for (size_t i = 0; i < 10; ++i) {
@@ -771,7 +789,7 @@ TEST("require that inserting a too high index into unpack info forces unpack all
}
}
-TEST("require that we can insert indexes into unpack info that we do not unpack") {
+TEST(QueryEvalTest, require_that_we_can_insert_indexes_into_unpack_info_that_we_do_not_unpack) {
UnpackInfo unpack;
unpack.add(10).add(20).add(30);
verify_unpack(unpack, {10, 20, 30});
@@ -779,65 +797,85 @@ TEST("require that we can insert indexes into unpack info that we do not unpack"
verify_unpack(unpack, {11, 22, 33});
}
-TEST("testTrueSearch") {
- EXPECT_EQUAL(16u, sizeof(EmptySearch));
- EXPECT_EQUAL(24u, sizeof(TrueSearch));
+TEST(QueryEvalTest, test_true_search)
+{
+ EXPECT_EQ(16u, sizeof(EmptySearch));
+ EXPECT_EQ(24u, sizeof(TrueSearch));
TermFieldMatchData tfmd;
TrueSearch t(tfmd);
- EXPECT_EQUAL(0u, t.getDocId());
- EXPECT_EQUAL(0u, t.getEndId());
+ EXPECT_EQ(0u, t.getDocId());
+ EXPECT_EQ(0u, t.getEndId());
t.initRange(7, 10);
- EXPECT_EQUAL(6u, t.getDocId());
- EXPECT_EQUAL(10u, t.getEndId());
+ EXPECT_EQ(6u, t.getDocId());
+ EXPECT_EQ(10u, t.getEndId());
EXPECT_TRUE(t.seek(9));
- EXPECT_EQUAL(9u, t.getDocId());
+ EXPECT_EQ(9u, t.getDocId());
EXPECT_FALSE(t.isAtEnd());
EXPECT_TRUE(t.seek(10));
- EXPECT_EQUAL(10u, t.getDocId());
+ EXPECT_EQ(10u, t.getDocId());
EXPECT_TRUE(t.isAtEnd());
t.initRange(4, 14);
- EXPECT_EQUAL(3u, t.getDocId());
- EXPECT_EQUAL(14u, t.getEndId());
+ EXPECT_EQ(3u, t.getDocId());
+ EXPECT_EQ(14u, t.getEndId());
EXPECT_FALSE(t.isAtEnd());
}
-TEST("test InitRangeVerifier") {
+TEST(QueryEvalTest, test_init_range_verifier)
+{
InitRangeVerifier ir;
- EXPECT_EQUAL(207u, ir.getDocIdLimit());
- EXPECT_EQUAL(41u, ir.getExpectedDocIds().size());
+ EXPECT_EQ(207u, ir.getDocIdLimit());
+ EXPECT_EQ(41u, ir.getExpectedDocIds().size());
auto inverted = InitRangeVerifier::invert(ir.getExpectedDocIds(), 300);
size_t numInverted = 300 - 41 - 1;
- EXPECT_EQUAL(numInverted, inverted.size());
- EXPECT_EQUAL(2u, inverted[0]);
- EXPECT_EQUAL(299u, inverted[numInverted - 1]);
+ EXPECT_EQ(numInverted, inverted.size());
+ EXPECT_EQ(2u, inverted[0]);
+ EXPECT_EQ(299u, inverted[numInverted - 1]);
ir.verify(*ir.createIterator(ir.getExpectedDocIds(), false));
ir.verify(*ir.createIterator(ir.getExpectedDocIds(), true));
}
-TEST("Test multisearch and andsearchstrict iterators adheres to initRange") {
+TEST(QueryEvalTest, test_multisearch_and_andsearchstrict_iterators_adheres_to_init_range)
+{
InitRangeVerifier ir;
- ir.verify( AndSearch::create({ ir.createIterator(ir.getExpectedDocIds(), false),
- ir.createFullIterator() }, false));
-
- ir.verify( AndSearch::create({ ir.createIterator(ir.getExpectedDocIds(), true),
- ir.createFullIterator() }, true));
+ {
+ SCOPED_TRACE("non-strict");
+ ir.verify( AndSearch::create({ ir.createIterator(ir.getExpectedDocIds(), false),
+ ir.createFullIterator() }, false));
+ }
+ {
+ SCOPED_TRACE("strict");
+ ir.verify( AndSearch::create({ ir.createIterator(ir.getExpectedDocIds(), true),
+ ir.createFullIterator() }, true));
+ }
}
-TEST("Test andnotsearchstrict iterators adheres to initRange") {
+TEST(QueryEvalTest, test_andnotsearchstrict_iterators_adheres_to_init_range) {
InitRangeVerifier ir;
-
- TEST_DO(ir.verify( AndNotSearch::create({ir.createIterator(ir.getExpectedDocIds(), false),
- ir.createEmptyIterator() }, false)));
- TEST_DO(ir.verify( AndNotSearch::create({ir.createIterator(ir.getExpectedDocIds(), true),
- ir.createEmptyIterator() }, true)));
+
+ {
+ SCOPED_TRACE("non-strict");
+ ir.verify( AndNotSearch::create({ir.createIterator(ir.getExpectedDocIds(), false),
+ ir.createEmptyIterator() }, false));
+ }
+ {
+ SCOPED_TRACE("strict");
+ ir.verify( AndNotSearch::create({ir.createIterator(ir.getExpectedDocIds(), true),
+ ir.createEmptyIterator() }, true));
+ }
auto inverted = InitRangeVerifier::invert(ir.getExpectedDocIds(), ir.getDocIdLimit());
- TEST_DO(ir.verify( AndNotSearch::create({ir.createFullIterator(),
- ir.createIterator(inverted, false) }, false)));
- TEST_DO(ir.verify( AndNotSearch::create({ir.createFullIterator(),
- ir.createIterator(inverted, false) }, true)));
+ {
+ SCOPED_TRACE("non-strict full");
+ ir.verify( AndNotSearch::create({ir.createFullIterator(),
+ ir.createIterator(inverted, false) }, false));
+ }
+ {
+ SCOPED_TRACE("strict full");
+ ir.verify( AndNotSearch::create({ir.createFullIterator(),
+ ir.createIterator(inverted, false) }, true));
+ }
}
-TEST_MAIN() { TEST_RUN_ALL(); }
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/tests/queryeval/same_element/same_element_test.cpp b/searchlib/src/tests/queryeval/same_element/same_element_test.cpp
index d05e6c8e4f4..7c535e5d3d5 100644
--- a/searchlib/src/tests/queryeval/same_element/same_element_test.cpp
+++ b/searchlib/src/tests/queryeval/same_element/same_element_test.cpp
@@ -46,7 +46,7 @@ std::unique_ptr<SameElementBlueprint> make_blueprint(const std::vector<FakeResul
}
Blueprint::UP finalize(Blueprint::UP bp, bool strict) {
- Blueprint::UP result = Blueprint::optimize(std::move(bp));
+ Blueprint::UP result = Blueprint::optimize(std::move(bp), true);
result->fetchPostings(ExecuteInfo::createForTest(strict));
result->freeze();
return result;
diff --git a/searchlib/src/tests/queryeval/termwise_eval/termwise_eval_test.cpp b/searchlib/src/tests/queryeval/termwise_eval/termwise_eval_test.cpp
index 3ca35221c50..3a10ed6df53 100644
--- a/searchlib/src/tests/queryeval/termwise_eval/termwise_eval_test.cpp
+++ b/searchlib/src/tests/queryeval/termwise_eval/termwise_eval_test.cpp
@@ -1,5 +1,4 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/vespalib/testkit/test_kit.h>
#include <vespa/vespalib/util/stringfmt.h>
#include <vespa/searchlib/queryeval/searchiterator.h>
#include <vespa/searchlib/queryeval/andnotsearch.h>
@@ -8,11 +7,12 @@
#include <vespa/searchlib/queryeval/termwise_search.h>
#include <vespa/searchlib/queryeval/intermediate_blueprints.h>
#include <vespa/searchlib/queryeval/termwise_blueprint_helper.h>
-#include <vespa/vespalib/test/insertion_operators.h>
+#define ENABLE_GTEST_MIGRATION
#include <vespa/searchlib/test/searchiteratorverifier.h>
#include <vespa/searchlib/common/bitvectoriterator.h>
#include <vespa/searchlib/fef/matchdata.h>
#include <vespa/vespalib/objects/visit.hpp>
+#include <vespa/vespalib/gtest/gtest.h>
using namespace vespalib;
using namespace search;
@@ -192,7 +192,10 @@ std::vector<uint32_t> make_expect(uint32_t begin, uint32_t end) {
return expect;
}
-void verify(const std::vector<uint32_t> &expect, SearchIterator &search, uint32_t begin, uint32_t end) {
+void
+verify(const std::vector<uint32_t> &expect, SearchIterator &search, uint32_t begin, uint32_t end, const vespalib::string& label)
+{
+ SCOPED_TRACE(label);
std::vector<uint32_t> actual;
search.initRange(begin, end);
for (uint32_t docid = begin; docid < end; ++docid) {
@@ -200,7 +203,7 @@ void verify(const std::vector<uint32_t> &expect, SearchIterator &search, uint32_
actual.push_back(docid);
}
}
- EXPECT_EQUAL(expect, actual);
+ EXPECT_EQ(expect, actual);
}
//-----------------------------------------------------------------------------
@@ -213,95 +216,107 @@ MatchData::UP make_match_data() {
//-----------------------------------------------------------------------------
-TEST("require that pseudo term produces correct results") {
- TEST_DO(verify({1,2,3,4,5}, *UP(TERM({1,2,3,4,5}, true)), 1, 6));
- TEST_DO(verify({1,2,3,4,5}, *UP(TERM({1,2,3,4,5}, false)), 1, 6));
- TEST_DO(verify({3,4,5}, *UP(TERM({1,2,3,4,5}, true)), 3, 6));
- TEST_DO(verify({3,4,5}, *UP(TERM({1,2,3,4,5}, false)), 3, 6));
- TEST_DO(verify({1,2,3}, *UP(TERM({1,2,3,4,5}, true)), 1, 4));
- TEST_DO(verify({1,2,3}, *UP(TERM({1,2,3,4,5}, false)), 1, 4));
+TEST(TermwiseEvalTest, require_that_pseudo_term_produces_correct_results)
+{
+ verify({1,2,3,4,5}, *UP(TERM({1,2,3,4,5}, true)), 1, 6, "strict full");
+ verify({1,2,3,4,5}, *UP(TERM({1,2,3,4,5}, false)), 1, 6, "non-strict full");
+ verify({3,4,5}, *UP(TERM({1,2,3,4,5}, true)), 3, 6, "strict last");
+ verify({3,4,5}, *UP(TERM({1,2,3,4,5}, false)), 3, 6, "non-strict last");
+ verify({1,2,3}, *UP(TERM({1,2,3,4,5}, true)), 1, 4, "strict first");
+ verify({1,2,3}, *UP(TERM({1,2,3,4,5}, false)), 1, 4, "non-strict first");
}
-TEST("require that normal search gives expected results") {
+TEST(TermwiseEvalTest, require_that_normal_search_gives_expected_results)
+{
auto search = make_search(true);
- TEST_DO(verify(make_expect(1, 10), *search, 1, 10));
+ verify(make_expect(1, 10), *search, 1, 10, "strict normal");
}
-TEST("require that filter search gives expected results") {
+TEST(TermwiseEvalTest, require_that_filter_search_gives_expected_results)
+{
auto search = make_filter_search(true);
- TEST_DO(verify(make_expect(1, 10), *search, 1, 10));
+ verify(make_expect(1, 10), *search, 1, 10, "strict filter");
}
-TEST("require that termwise AND/OR search produces appropriate results") {
+TEST(TermwiseEvalTest, require_that_termwise_and_or_or_search_produces_appropriate_results)
+{
for (uint32_t begin: {1, 2, 5}) {
for (uint32_t end: {6, 7, 10}) {
for (bool strict_search: {true, false}) {
for (bool strict_wrapper: {true, false}) {
- TEST_STATE(make_string("begin: %u, end: %u, strict_search: %s, strict_wrapper: %s",
- begin, end, strict_search ? "true" : "false",
- strict_wrapper ? "true" : "false").c_str());
+ auto label = make_string("begin: %u, end: %u, strict_search: %s, strict_wrapper: %s",
+ begin, end, strict_search ? "true" : "false",
+ strict_wrapper ? "true" : "false");
auto search = make_termwise(make_search(strict_search), strict_wrapper);
- TEST_DO(verify(make_expect(begin, end), *search, begin, end));
+ verify(make_expect(begin, end), *search, begin, end, label);
}
}
}
}
}
-TEST("require that termwise filter search produces appropriate results") {
+TEST(TermwiseEvalTest, require_that_termwise_filter_search_produces_appropriate_results)
+{
for (uint32_t begin: {1, 2, 5}) {
for (uint32_t end: {6, 7, 10}) {
for (bool strict_search: {true, false}) {
for (bool strict_wrapper: {true, false}) {
- TEST_STATE(make_string("begin: %u, end: %u, strict_search: %s, strict_wrapper: %s",
- begin, end, strict_search ? "true" : "false",
- strict_wrapper ? "true" : "false").c_str());
+ auto label = make_string("begin: %u, end: %u, strict_search: %s, strict_wrapper: %s",
+ begin, end, strict_search ? "true" : "false",
+ strict_wrapper ? "true" : "false");
auto search = make_termwise(make_filter_search(strict_search), strict_wrapper);
- TEST_DO(verify(make_expect(begin, end), *search, begin, end));
+ verify(make_expect(begin, end), *search, begin, end, label);
}
}
}
}
}
-TEST("require that termwise ANDNOT with single term works") {
- TEST_DO(verify({2,3,4}, *make_termwise(ANDNOT({ TERM({1,2,3,4,5}, true) }, true), true), 2, 5));
+TEST(TermwiseEvalTest, require_that_termwise_andnot_with_single_term_works)
+{
+ verify({2,3,4}, *make_termwise(ANDNOT({ TERM({1,2,3,4,5}, true) }, true), true), 2, 5, "termwise andnot");
}
-TEST("require that pseudo term is rewindable") {
+TEST(TermwiseEvalTest, require_that_pseudo_term_is_rewindable)
+{
auto search = UP(TERM({1,2,3,4,5}, true));
- TEST_DO(verify({3,4,5}, *search, 3, 6));
- TEST_DO(verify({1,2,3,4}, *search, 1, 5));
+ verify({3,4,5}, *search, 3, 6, "pseudo term end");
+ verify({1,2,3,4}, *search, 1, 5, "pseudo term rewound to start");
}
-TEST("require that termwise wrapper is rewindable") {
+TEST(TermwiseEvalTest, require_that_termwise_wrapper_is_rewindable)
+{
auto search = make_termwise(make_search(true), true);
- TEST_DO(verify(make_expect(3, 7), *search, 3, 7));
- TEST_DO(verify(make_expect(1, 5), *search, 1, 5));
+ verify(make_expect(3, 7), *search, 3, 7, "termwise wrapper end");
+ verify(make_expect(1, 5), *search, 1, 5, "termwise wrapper rewound to start");
}
//-----------------------------------------------------------------------------
-TEST("require that leaf blueprints allow termwise evaluation by default") {
+TEST(TermwiseEvalTest, require_that_leaf_blueprints_allow_termwise_evaluation_by_default)
+{
MyBlueprint bp({});
EXPECT_TRUE(bp.getState().allow_termwise_eval());
}
-TEST("require that leaf blueprints can enable/disable termwise evaluation") {
+TEST(TermwiseEvalTest, require_that_leaf_blueprints_can_enable_and_disable_termwise_evaluation)
+{
MyBlueprint enable({}, true);
MyBlueprint disable({}, false);
EXPECT_TRUE(enable.getState().allow_termwise_eval());
EXPECT_FALSE(disable.getState().allow_termwise_eval());
}
-TEST("require that intermediate blueprints disallow termwise evaluation by default") {
+TEST(TermwiseEvalTest, require_that_intermediate_blueprints_disallow_termwise_evaluation_by_default)
+{
MyOr bp(false);
bp.addChild(UP(new MyBlueprint({}, true)));
bp.addChild(UP(new MyBlueprint({}, true)));
EXPECT_FALSE(bp.getState().allow_termwise_eval());
}
-TEST("require that intermediate blueprints can enable/disable termwise evaluation") {
+TEST(TermwiseEvalTest, require_that_intermediate_blueprints_can_enable_and_disable_termwise_evaluation)
+{
MyOr enable(true, true);
enable.addChild(UP(new MyBlueprint({}, true)));
enable.addChild(UP(new MyBlueprint({}, true)));
@@ -312,7 +327,8 @@ TEST("require that intermediate blueprints can enable/disable termwise evaluatio
EXPECT_FALSE(disable.getState().allow_termwise_eval());
}
-TEST("require that intermediate blueprints cannot be termwise unless all its children are termwise") {
+TEST(TermwiseEvalTest, require_that_intermediate_blueprints_cannot_be_termwise_unless_all_its_children_are_termwise)
+{
MyOr bp(true, true);
bp.addChild(UP(new MyBlueprint({}, true)));
bp.addChild(UP(new MyBlueprint({}, false)));
@@ -321,27 +337,30 @@ TEST("require that intermediate blueprints cannot be termwise unless all its chi
//-----------------------------------------------------------------------------
-TEST("require that leafs have tree size 1") {
+TEST(TermwiseEvalTest, require_that_leafs_have_tree_size_1)
+{
MyBlueprint bp({});
- EXPECT_EQUAL(1u, bp.getState().tree_size());
+ EXPECT_EQ(1u, bp.getState().tree_size());
}
-TEST("require that tree size is accumulated correctly by intermediate nodes") {
+TEST(TermwiseEvalTest, require_that_tree_size_is_accumulated_correctly_by_intermediate_nodes)
+{
MyOr bp(false);
- EXPECT_EQUAL(1u, bp.getState().tree_size());
+ EXPECT_EQ(1u, bp.getState().tree_size());
bp.addChild(UP(new MyBlueprint({})));
bp.addChild(UP(new MyBlueprint({})));
- EXPECT_EQUAL(3u, bp.getState().tree_size());
+ EXPECT_EQ(3u, bp.getState().tree_size());
auto child = UP(new MyOr(false));
child->addChild(UP(new MyBlueprint({})));
child->addChild(UP(new MyBlueprint({})));
bp.addChild(std::move(child));
- EXPECT_EQUAL(6u, bp.getState().tree_size());
+ EXPECT_EQ(6u, bp.getState().tree_size());
}
//-----------------------------------------------------------------------------
-TEST("require that any blueprint node can obtain the root") {
+TEST(TermwiseEvalTest, require_that_any_blueprint_node_can_obtain_the_root)
+{
MyOr bp(false);
bp.addChild(UP(new MyBlueprint({1,2,3})));
bp.addChild(UP(new MyBlueprint({1,2,3,4,5,6})));
@@ -354,35 +373,38 @@ TEST("require that any blueprint node can obtain the root") {
//-----------------------------------------------------------------------------
-TEST("require that match data keeps track of the termwise limit") {
+TEST(TermwiseEvalTest, require_that_match_data_keeps_track_of_the_termwise_limit)
+{
auto md = make_match_data();
- EXPECT_EQUAL(1.0, md->get_termwise_limit());
+ EXPECT_EQ(1.0, md->get_termwise_limit());
md->set_termwise_limit(0.03);
- EXPECT_EQUAL(0.03, md->get_termwise_limit());
+ EXPECT_EQ(0.03, md->get_termwise_limit());
}
//-----------------------------------------------------------------------------
-TEST("require that terwise test search string dump is detailed enough") {
- EXPECT_EQUAL(make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, true), true)->asString(),
- make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, true), true)->asString());
+TEST(TermwiseEvalTest, require_that_terwise_test_search_string_dump_is_detailed_enough)
+{
+ EXPECT_EQ(make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, true), true)->asString(),
+ make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, true), true)->asString());
- EXPECT_NOT_EQUAL(make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, true), true)->asString(),
- make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, false), TERM({3}, true) }, true), true)->asString());
+ EXPECT_NE(make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, true), true)->asString(),
+ make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, false), TERM({3}, true) }, true), true)->asString());
- EXPECT_NOT_EQUAL(make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, true), true)->asString(),
- make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, false), true)->asString());
+ EXPECT_NE(make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, true), true)->asString(),
+ make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, false), true)->asString());
- EXPECT_NOT_EQUAL(make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, true), true)->asString(),
- make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, true), false)->asString());
+ EXPECT_NE(make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, true), true)->asString(),
+ make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, true), false)->asString());
- EXPECT_NOT_EQUAL(make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, true), true)->asString(),
- make_termwise(OR({ TERM({1,2,3}, true), TERM({3}, true), TERM({2,3}, true) }, true), true)->asString());
+ EXPECT_NE(make_termwise(OR({ TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true) }, true), true)->asString(),
+ make_termwise(OR({ TERM({1,2,3}, true), TERM({3}, true), TERM({2,3}, true) }, true), true)->asString());
}
//-----------------------------------------------------------------------------
-TEST("require that basic termwise evaluation works") {
+TEST(TermwiseEvalTest, require_that_basic_termwise_evaluation_works)
+{
auto md = make_match_data();
md->set_termwise_limit(0.0);
md->resolveTermField(1)->tagAsNotNeeded();
@@ -391,12 +413,13 @@ TEST("require that basic termwise evaluation works") {
my_or.addChild(UP(new MyBlueprint({1}, true, 1)));
my_or.addChild(UP(new MyBlueprint({2}, true, 2)));
for (bool strict: {true, false}) {
- EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(),
- make_termwise(OR({ TERM({1}, strict), TERM({2}, strict) }, strict), strict)->asString());
+ EXPECT_EQ(my_or.createSearch(*md, strict)->asString(),
+ make_termwise(OR({ TERM({1}, strict), TERM({2}, strict) }, strict), strict)->asString());
}
}
-TEST("require that the hit rate must be high enough for termwise evaluation to be activated") {
+TEST(TermwiseEvalTest, require_that_the_hit_rate_must_be_high_enough_for_termwise_evaluation_to_be_activated)
+{
auto md = make_match_data();
md->set_termwise_limit(1.0); // <-
md->resolveTermField(1)->tagAsNotNeeded();
@@ -409,7 +432,8 @@ TEST("require that the hit rate must be high enough for termwise evaluation to b
}
}
-TEST("require that enough unranked termwise terms are present for termwise evaluation to be activated") {
+TEST(TermwiseEvalTest, require_that_enough_unranked_termwise_terms_are_present_for_termwise_evaluation_to_be_activated)
+{
auto md = make_match_data();
md->set_termwise_limit(0.0);
md->resolveTermField(1)->tagAsNotNeeded();
@@ -423,29 +447,31 @@ TEST("require that enough unranked termwise terms are present for termwise evalu
}
}
-TEST("require that termwise evaluation can be multi-level, but not duplicated") {
+TEST(TermwiseEvalTest, require_that_termwise_evaluation_can_be_multi_level_but_not_duplicated)
+{
auto md = make_match_data();
md->set_termwise_limit(0.0);
md->resolveTermField(1)->tagAsNotNeeded();
md->resolveTermField(2)->tagAsNotNeeded();
md->resolveTermField(3)->tagAsNotNeeded();
OrBlueprint my_or;
- my_or.addChild(UP(new MyBlueprint({1}, true, 1)));
+ my_or.addChild(UP(new MyBlueprint({1}, true, 1)));
auto child = UP(new OrBlueprint());
child->addChild(UP(new MyBlueprint({2}, true, 2)));
child->addChild(UP(new MyBlueprint({3}, true, 3)));
my_or.addChild(std::move(child));
for (bool strict: {true, false}) {
- EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(),
- make_termwise(OR({ TERM({1}, strict),
- ORz({ TERM({2}, strict), TERM({3}, strict) }, strict) },
- strict), strict)->asString());
+ EXPECT_EQ(my_or.createSearch(*md, strict)->asString(),
+ make_termwise(OR({ TERM({1}, strict),
+ ORz({ TERM({2}, strict), TERM({3}, strict) }, strict) },
+ strict), strict)->asString());
}
}
//-----------------------------------------------------------------------------
-TEST("require that OR can be completely termwise") {
+TEST(TermwiseEvalTest, require_that_or_can_be_completely_termwise)
+{
auto md = make_match_data();
md->set_termwise_limit(0.0);
md->resolveTermField(1)->tagAsNotNeeded();
@@ -454,12 +480,13 @@ TEST("require that OR can be completely termwise") {
my_or.addChild(UP(new MyBlueprint({1}, true, 1)));
my_or.addChild(UP(new MyBlueprint({2}, true, 2)));
for (bool strict: {true, false}) {
- EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(),
- make_termwise(OR({ TERM({1}, strict), TERM({2}, strict) }, strict), strict)->asString());
+ EXPECT_EQ(my_or.createSearch(*md, strict)->asString(),
+ make_termwise(OR({ TERM({1}, strict), TERM({2}, strict) }, strict), strict)->asString());
}
}
-TEST("require that OR can be partially termwise") {
+TEST(TermwiseEvalTest, require_that_or_can_be_partially_termwise)
+{
auto md = make_match_data();
md->set_termwise_limit(0.0);
md->resolveTermField(1)->tagAsNotNeeded();
@@ -469,13 +496,14 @@ TEST("require that OR can be partially termwise") {
my_or.addChild(UP(new MyBlueprint({2}, true, 2)));
my_or.addChild(UP(new MyBlueprint({3}, true, 3)));
for (bool strict: {true, false}) {
- EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(),
- ORs({ make_termwise(OR({ TERM({1}, strict), TERM({3}, strict) }, strict), strict),
+ EXPECT_EQ(my_or.createSearch(*md, strict)->asString(),
+ ORs({ make_termwise(OR({ TERM({1}, strict), TERM({3}, strict) }, strict), strict),
TERM({2}, strict) }, strict)->asString());
}
}
-TEST("require that OR puts termwise subquery at the right place") {
+TEST(TermwiseEvalTest, require_that_or_puts_termwise_subquery_at_the_right_place)
+{
auto md = make_match_data();
md->set_termwise_limit(0.0);
md->resolveTermField(2)->tagAsNotNeeded();
@@ -485,14 +513,15 @@ TEST("require that OR puts termwise subquery at the right place") {
my_or.addChild(UP(new MyBlueprint({2}, true, 2)));
my_or.addChild(UP(new MyBlueprint({3}, true, 3)));
for (bool strict: {true, false}) {
- EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(),
- ORs({ TERM({1}, strict),
- make_termwise(OR({ TERM({2}, strict), TERM({3}, strict) }, strict),
- strict) }, strict)->asString());
+ EXPECT_EQ(my_or.createSearch(*md, strict)->asString(),
+ ORs({ TERM({1}, strict),
+ make_termwise(OR({ TERM({2}, strict), TERM({3}, strict) }, strict),
+ strict) }, strict)->asString());
}
}
-TEST("require that OR can use termwise eval also when having non-termwise children") {
+TEST(TermwiseEvalTest, require_that_or_can_use_termwise_eval_also_when_having_non_termwise_children)
+{
auto md = make_match_data();
md->set_termwise_limit(0.0);
md->resolveTermField(1)->tagAsNotNeeded();
@@ -503,17 +532,18 @@ TEST("require that OR can use termwise eval also when having non-termwise childr
my_or.addChild(UP(new MyBlueprint({2}, true, 2)));
my_or.addChild(UP(new MyBlueprint({3}, true, 3)));
for (bool strict: {true, false}) {
- EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(),
- ORz({ TERM({1}, strict),
- make_termwise(OR({ TERM({2}, strict), TERM({3}, strict) }, strict),
- strict)},
- strict)->asString());
+ EXPECT_EQ(my_or.createSearch(*md, strict)->asString(),
+ ORz({ TERM({1}, strict),
+ make_termwise(OR({ TERM({2}, strict), TERM({3}, strict) }, strict),
+ strict)},
+ strict)->asString());
}
}
//-----------------------------------------------------------------------------
-TEST("require that AND can be completely termwise") {
+TEST(TermwiseEvalTest, require_that_and_can_be_completely_termwise)
+{
auto md = make_match_data();
md->set_termwise_limit(0.0);
md->resolveTermField(1)->tagAsNotNeeded();
@@ -522,12 +552,13 @@ TEST("require that AND can be completely termwise") {
my_and.addChild(UP(new MyBlueprint({1}, true, 1)));
my_and.addChild(UP(new MyBlueprint({2}, true, 2)));
for (bool strict: {true, false}) {
- EXPECT_EQUAL(my_and.createSearch(*md, strict)->asString(),
- make_termwise(AND({ TERM({1}, strict), TERM({2}, false) }, strict), strict)->asString());
+ EXPECT_EQ(my_and.createSearch(*md, strict)->asString(),
+ make_termwise(AND({ TERM({1}, strict), TERM({2}, false) }, strict), strict)->asString());
}
}
-TEST("require that AND can be partially termwise") {
+TEST(TermwiseEvalTest, require_that_and_can_be_partially_termwise)
+{
auto md = make_match_data();
md->set_termwise_limit(0.0);
md->resolveTermField(1)->tagAsNotNeeded();
@@ -537,7 +568,7 @@ TEST("require that AND can be partially termwise") {
my_and.addChild(UP(new MyBlueprint({2}, true, 2)));
my_and.addChild(UP(new MyBlueprint({3}, true, 3)));
for (bool strict: {true, false}) {
- EXPECT_EQUAL(my_and.createSearch(*md, strict)->asString(),
+ EXPECT_EQ(my_and.createSearch(*md, strict)->asString(),
ANDs({ make_termwise(AND({ TERM({1}, strict), TERM({3}, false) },
strict),
strict),
@@ -545,7 +576,8 @@ TEST("require that AND can be partially termwise") {
}
}
-TEST("require that AND puts termwise subquery at the right place") {
+TEST(TermwiseEvalTest, require_that_and_puts_termwise_subquery_at_the_right_place)
+{
auto md = make_match_data();
md->set_termwise_limit(0.0);
md->resolveTermField(2)->tagAsNotNeeded();
@@ -555,14 +587,15 @@ TEST("require that AND puts termwise subquery at the right place") {
my_and.addChild(UP(new MyBlueprint({2}, true, 2)));
my_and.addChild(UP(new MyBlueprint({3}, true, 3)));
for (bool strict: {true, false}) {
- EXPECT_EQUAL(my_and.createSearch(*md, strict)->asString(),
+ EXPECT_EQ(my_and.createSearch(*md, strict)->asString(),
ANDs({ TERM({1}, strict),
make_termwise(AND({ TERM({2}, false), TERM({3}, false) }, false),
false) }, strict)->asString());
}
}
-TEST("require that AND can use termwise eval also when having non-termwise children") {
+TEST(TermwiseEvalTest, require_that_and_can_use_termwise_eval_also_when_having_non_termwise_children)
+{
auto md = make_match_data();
md->set_termwise_limit(0.0);
md->resolveTermField(1)->tagAsNotNeeded();
@@ -573,7 +606,7 @@ TEST("require that AND can use termwise eval also when having non-termwise child
my_and.addChild(UP(new MyBlueprint({2}, true, 2)));
my_and.addChild(UP(new MyBlueprint({3}, true, 3)));
for (bool strict: {true, false}) {
- EXPECT_EQUAL(my_and.createSearch(*md, strict)->asString(),
+ EXPECT_EQ(my_and.createSearch(*md, strict)->asString(),
ANDz({ TERM({1}, strict),
make_termwise(AND({ TERM({2}, false), TERM({3}, false) }, false),
false) }, strict)->asString());
@@ -582,7 +615,8 @@ TEST("require that AND can use termwise eval also when having non-termwise child
//-----------------------------------------------------------------------------
-TEST("require that ANDNOT can be completely termwise") {
+TEST(TermwiseEvalTest, require_that_andnot_can_be_completely_termwise)
+{
auto md = make_match_data();
md->set_termwise_limit(0.0);
md->resolveTermField(1)->tagAsNotNeeded();
@@ -590,13 +624,14 @@ TEST("require that ANDNOT can be completely termwise") {
my_andnot.addChild(UP(new MyBlueprint({1}, true, 1)));
my_andnot.addChild(UP(new MyBlueprint({2}, true, 2)));
for (bool strict: {true, false}) {
- EXPECT_EQUAL(my_andnot.createSearch(*md, strict)->asString(),
+ EXPECT_EQ(my_andnot.createSearch(*md, strict)->asString(),
make_termwise(ANDNOT({ TERM({1}, strict), TERM({2}, false) },
strict), strict)->asString());
}
}
-TEST("require that ANDNOT can be partially termwise") {
+TEST(TermwiseEvalTest, require_that_andnot_can_be_partially_termwise)
+{
auto md = make_match_data();
md->set_termwise_limit(0.0);
AndNotBlueprint my_andnot;
@@ -604,14 +639,15 @@ TEST("require that ANDNOT can be partially termwise") {
my_andnot.addChild(UP(new MyBlueprint({2}, true, 2)));
my_andnot.addChild(UP(new MyBlueprint({3}, true, 3)));
for (bool strict: {true, false}) {
- EXPECT_EQUAL(my_andnot.createSearch(*md, strict)->asString(),
+ EXPECT_EQ(my_andnot.createSearch(*md, strict)->asString(),
ANDNOT({ TERM({1}, strict),
make_termwise(OR({ TERM({2}, false), TERM({3}, false) }, false),
false) }, strict)->asString());
}
}
-TEST("require that ANDNOT can be partially termwise with first child being termwise") {
+TEST(TermwiseEvalTest, require_that_andnot_can_be_partially_termwise_with_first_child_being_termwise)
+{
auto md = make_match_data();
md->set_termwise_limit(0.0);
md->resolveTermField(1)->tagAsNotNeeded();
@@ -620,7 +656,7 @@ TEST("require that ANDNOT can be partially termwise with first child being termw
my_andnot.addChild(UP(new MyBlueprint({2}, false, 2)));
my_andnot.addChild(UP(new MyBlueprint({3}, true, 3)));
for (bool strict: {true, false}) {
- EXPECT_EQUAL(my_andnot.createSearch(*md, strict)->asString(),
+ EXPECT_EQ(my_andnot.createSearch(*md, strict)->asString(),
ANDNOT({ make_termwise(ANDNOT({ TERM({1}, strict), TERM({3}, false) }, strict),
strict),
TERM({2}, false) }, strict)->asString());
@@ -629,7 +665,8 @@ TEST("require that ANDNOT can be partially termwise with first child being termw
//-----------------------------------------------------------------------------
-TEST("require that termwise blueprint helper calculates unpack info correctly") {
+TEST(TermwiseEvalTest, require_that_termwise_blueprint_helper_calculates_unpack_info_correctly)
+{
OrBlueprint my_or;
my_or.addChild(UP(new MyBlueprint({1}, false, 1))); // termwise not allowed
my_or.addChild(UP(new MyBlueprint({2}, false, 2))); // termwise not allowed and ranked
@@ -641,9 +678,9 @@ TEST("require that termwise blueprint helper calculates unpack info correctly")
unpack.add(1);
unpack.add(3);
TermwiseBlueprintHelper helper(my_or, std::move(dummy_searches), unpack);
- EXPECT_EQUAL(helper.get_result().size(), 3u);
- EXPECT_EQUAL(helper.get_termwise_children().size(), 2u);
- EXPECT_EQUAL(helper.first_termwise, 2u);
+ EXPECT_EQ(helper.get_result().size(), 3u);
+ EXPECT_EQ(helper.get_termwise_children().size(), 2u);
+ EXPECT_EQ(helper.first_termwise, 2u);
EXPECT_TRUE(!helper.termwise_unpack.needUnpack(0));
EXPECT_TRUE(helper.termwise_unpack.needUnpack(1));
EXPECT_TRUE(!helper.termwise_unpack.needUnpack(2));
@@ -658,11 +695,12 @@ public:
return make_termwise(createIterator(getExpectedDocIds(), strict), strict);
}
};
-TEST("test terwise adheres to search iterator requirements.") {
+TEST(TermwiseEvalTest, test_termwise_adheres_to_search_iterator_requirements)
+{
Verifier verifier;
verifier.verify();
}
//-----------------------------------------------------------------------------
-TEST_MAIN() { TEST_RUN_ALL(); }
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp b/searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp
index 1054980e4ec..9409b2b26c4 100644
--- a/searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp
+++ b/searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp
@@ -1,5 +1,4 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/vespalib/testkit/test_kit.h>
#include <vespa/searchlib/queryeval/fake_search.h>
#include <vespa/searchlib/queryeval/wand/weak_and_search.h>
#include <vespa/searchlib/queryeval/simpleresult.h>
@@ -7,7 +6,9 @@
#include <vespa/searchlib/queryeval/test/eagerchild.h>
#include <vespa/searchlib/queryeval/test/leafspec.h>
#include <vespa/searchlib/queryeval/test/wandspec.h>
+#define ENABLE_GTEST_MIGRATION
#include <vespa/searchlib/test/weightedchildrenverifiers.h>
+#include <vespa/vespalib/gtest/gtest.h>
using namespace search::fef;
using namespace search::queryeval;
@@ -60,32 +61,39 @@ struct WeightOrder {
} // namespace <unnamed>
-TEST_F("require that wand prunes bad hits after enough good ones are obtained", SimpleWandFixture) {
- EXPECT_EQUAL(SimpleResult().addHit(1).addHit(2).addHit(3).addHit(5), f.hits);
+TEST(WeakAndTest, require_that_wand_prunes_bad_hits_after_enough_good_ones_are_obtained)
+{
+ SimpleWandFixture f;
+ EXPECT_EQ(SimpleResult().addHit(1).addHit(2).addHit(3).addHit(5), f.hits);
}
-TEST_F("require that wand uses subsearches as expected", SimpleWandFixture) {
- EXPECT_EQUAL(History()
- .seek("WAND", 1).seek("bar", 1).step("bar", 1).step("WAND", 1)
- .unpack("WAND", 1).seek("foo", 1).step("foo", 1).unpack("bar", 1).unpack("foo", 1)
- .seek("WAND", 2).seek("bar", 2).step("bar", 3).seek("foo", 2).step("foo", 2).step("WAND", 2)
- .unpack("WAND", 2).unpack("foo", 2)
- .seek("WAND", 3).step("WAND", 3)
- .unpack("WAND", 3).seek("foo", 3).step("foo", 3).unpack("bar", 3).unpack("foo", 3)
- .seek("WAND", 4).seek("bar", 4).step("bar", 5).seek("foo", 5).step("foo", 5).step("WAND", 5)
- .unpack("WAND", 5).unpack("bar", 5).unpack("foo", 5)
- .seek("WAND", 6).seek("bar", 6).step("bar", search::endDocId).step("WAND", search::endDocId),
- f.spec.getHistory());
+TEST(WeakAndTest, require_that_wand_uses_subsearches_as_expected)
+{
+ SimpleWandFixture f;
+ EXPECT_EQ(History()
+ .seek("WAND", 1).seek("bar", 1).step("bar", 1).step("WAND", 1)
+ .unpack("WAND", 1).seek("foo", 1).step("foo", 1).unpack("bar", 1).unpack("foo", 1)
+ .seek("WAND", 2).seek("bar", 2).step("bar", 3).seek("foo", 2).step("foo", 2).step("WAND", 2)
+ .unpack("WAND", 2).unpack("foo", 2)
+ .seek("WAND", 3).step("WAND", 3)
+ .unpack("WAND", 3).seek("foo", 3).step("foo", 3).unpack("bar", 3).unpack("foo", 3)
+ .seek("WAND", 4).seek("bar", 4).step("bar", 5).seek("foo", 5).step("foo", 5).step("WAND", 5)
+ .unpack("WAND", 5).unpack("bar", 5).unpack("foo", 5)
+ .seek("WAND", 6).seek("bar", 6).step("bar", search::endDocId).step("WAND", search::endDocId),
+ f.spec.getHistory());
}
-TEST_F("require that documents are considered in the right order", AdvancedWandFixture) {
- EXPECT_EQUAL(SimpleResult()
- .addHit(1).addHit(2).addHit(3).addHit(4).addHit(5)
- .addHit(11).addHit(12).addHit(13).addHit(14).addHit(15)
- .addHit(111).addHit(112).addHit(113).addHit(114).addHit(115), f.hits);
+TEST(WeakAndTest, require_that_documents_are_considered_in_the_right_order)
+{
+ AdvancedWandFixture f;
+ EXPECT_EQ(SimpleResult()
+ .addHit(1).addHit(2).addHit(3).addHit(4).addHit(5)
+ .addHit(11).addHit(12).addHit(13).addHit(14).addHit(15)
+ .addHit(111).addHit(112).addHit(113).addHit(114).addHit(115), f.hits);
}
-TEST("require that initial docid for subsearches are taken into account") {
+TEST(WeakAndTest, require_that_initial_docid_for_subsearches_are_taken_into_account)
+{
History history;
wand::Terms terms;
terms.push_back(wand::Term(new TrackedSearch("foo", history, new EagerChild(search::endDocId)), 100, 1));
@@ -93,10 +101,10 @@ TEST("require that initial docid for subsearches are taken into account") {
SearchIterator::UP search(new TrackedSearch("WAND", history, WeakAndSearch::create(terms, 2, true)));
SimpleResult hits;
hits.search(*search);
- EXPECT_EQUAL(SimpleResult().addHit(10), hits);
- EXPECT_EQUAL(History().seek("WAND", 1).step("WAND", 10).unpack("WAND", 10).unpack("bar", 10)
- .seek("WAND", 11).seek("bar", 11).step("bar", search::endDocId).step("WAND", search::endDocId),
- history);
+ EXPECT_EQ(SimpleResult().addHit(10), hits);
+ EXPECT_EQ(History().seek("WAND", 1).step("WAND", 10).unpack("WAND", 10).unpack("bar", 10)
+ .seek("WAND", 11).seek("bar", 11).step("bar", search::endDocId).step("WAND", search::endDocId),
+ history);
}
class IteratorChildrenVerifier : public search::test::IteratorChildrenVerifier {
@@ -111,9 +119,10 @@ private:
}
};
-TEST("verify search iterator conformance") {
+TEST(WeakAndTest, verify_search_iterator_conformance)
+{
IteratorChildrenVerifier verifier;
verifier.verify();
}
-TEST_MAIN() { TEST_RUN_ALL(); }
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp
index 5faead1175e..24d62f66714 100644
--- a/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp
+++ b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp
@@ -1,6 +1,5 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/vespalib/testkit/testapp.h>
#include <vespa/searchlib/queryeval/weighted_set_term_search.h>
#include <vespa/searchlib/query/tree/simplequery.h>
@@ -11,7 +10,9 @@
#include <vespa/searchlib/queryeval/emptysearch.h>
#include <vespa/searchlib/queryeval/fake_searchable.h>
#include <vespa/searchlib/queryeval/fake_requestcontext.h>
+#define ENABLE_GTEST_MIGRATION
#include <vespa/searchlib/test/weightedchildrenverifiers.h>
+#include <vespa/vespalib/gtest/gtest.h>
using namespace search;
using namespace search::query;
@@ -112,8 +113,9 @@ WS::WS()
term_is_not_needed(false)
{
MatchData::UP tmp = layout.createMatchData();
- ASSERT_TRUE(tmp->resolveTermField(handle)->getFieldId() == fieldId);
+ assert(tmp->resolveTermField(handle)->getFieldId() == fieldId);
}
+
WS::~WS() = default;
struct MockSearch : public SearchIterator {
@@ -143,8 +145,11 @@ struct MockFixture {
weights.push_back(1);
search = WeightedSetTermSearch::create(children, tfmd, false, weights, {});
}
+ ~MockFixture();
};
+MockFixture::~MockFixture() = default;
+
} // namespace <unnamed>
void run_simple(bool field_is_filter, bool term_is_not_needed, bool singleTerm)
@@ -179,42 +184,50 @@ void run_simple(bool field_is_filter, bool term_is_not_needed, bool singleTerm)
EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", true));
EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", false));
- EXPECT_EQUAL(expect, ws.search(index, "field", true));
- EXPECT_EQUAL(expect, ws.search(index, "field", false));
- EXPECT_EQUAL(expect, ws.search(index, "multi-field", true));
- EXPECT_EQUAL(expect, ws.search(index, "multi-field", false));
+ EXPECT_EQ(expect, ws.search(index, "field", true));
+ EXPECT_EQ(expect, ws.search(index, "field", false));
+ EXPECT_EQ(expect, ws.search(index, "multi-field", true));
+ EXPECT_EQ(expect, ws.search(index, "multi-field", false));
}
-TEST("testSimple") {
- TEST_DO(run_simple(false, false, false));
+TEST(WeightedSetTermTest, test_simple)
+{
+ run_simple(false, false, false);
}
-TEST("testSimple filter field") {
- TEST_DO(run_simple(true, false, false));
+TEST(WeightedSetTermTest, test_simple_filter_field)
+{
+ run_simple(true, false, false);
}
-TEST("testSimple unranked") {
- TEST_DO(run_simple(false, true, false));
+TEST(WeightedSetTermTest, test_simple_unranked)
+{
+ run_simple(false, true, false);
}
-TEST("testSimple unranked filter filed") {
- TEST_DO(run_simple(true, true, false));
+TEST(WeightedSetTermTest, test_simple_unranked_filter_field)
+{
+ run_simple(true, true, false);
}
-TEST("testSimple single") {
- TEST_DO(run_simple(false, false, true));
+TEST(WeightedSetTermTest, test_simple_single)
+{
+ run_simple(false, false, true);
}
-TEST("testSimple single filter field") {
- TEST_DO(run_simple(true, false, true));
+TEST(WeightedSetTermTest, test_simple_single_filter_field)
+{
+ run_simple(true, false, true);
}
-TEST("testSimple single unranked") {
- TEST_DO(run_simple(false, true, true));
+TEST(WeightedSetTermTest, test_simple_single_unranked)
+{
+ run_simple(false, true, true);
}
-TEST("testSimple single unranked filter field") {
- TEST_DO(run_simple(true, true, true));
+TEST(WeightedSetTermTest, test_simple_single_unranked_filter_field)
+{
+ run_simple(true, true, true);
}
void run_multi(bool field_is_filter, bool term_is_not_needed)
@@ -240,46 +253,53 @@ void run_multi(bool field_is_filter, bool term_is_not_needed)
EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", true));
EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", false));
- EXPECT_EQUAL(expect, ws.search(index, "multi-field", true));
- EXPECT_EQUAL(expect, ws.search(index, "multi-field", false));
+ EXPECT_EQ(expect, ws.search(index, "multi-field", true));
+ EXPECT_EQ(expect, ws.search(index, "multi-field", false));
}
-TEST("testMulti") {
- TEST_DO(run_multi(false, false));
+TEST(WeightedSetTermTest, test_multi)
+{
+ run_multi(false, false);
}
-TEST("testMulti filter field") {
- TEST_DO(run_multi(true, false));
+TEST(WeightedSetTermTest, test_multi_filter_field)
+{
+ run_multi(true, false);
}
-TEST("testMulti unranked") {
- TEST_DO(run_multi(false, true));
+TEST(WeightedSetTermTest, test_multi_unranked)
+{
+ run_multi(false, true);
}
-TEST_F("test Eager Empty Child", MockFixture(search::endDocId)) {
+TEST(WeightedSetTermTest, test_eager_empty_child)
+{
+ MockFixture f1(search::endDocId);
MockSearch *mock = f1.mock;
SearchIterator &search = *f1.search;
search.initFullRange();
- EXPECT_EQUAL(search.beginId(), search.getDocId());
+ EXPECT_EQ(search.beginId(), search.getDocId());
EXPECT_TRUE(!search.seek(1));
EXPECT_TRUE(search.isAtEnd());
- EXPECT_EQUAL(0, mock->seekCnt);
+ EXPECT_EQ(0, mock->seekCnt);
}
-TEST_F("test Eager Matching Child", MockFixture(5)) {
+TEST(WeightedSetTermTest, test_eager_matching_child)
+{
+ MockFixture f1(5);
MockSearch *mock = f1.mock;
SearchIterator &search = *f1.search;
search.initFullRange();
- EXPECT_EQUAL(search.beginId(), search.getDocId());
+ EXPECT_EQ(search.beginId(), search.getDocId());
EXPECT_TRUE(!search.seek(3));
- EXPECT_EQUAL(5u, search.getDocId());
- EXPECT_EQUAL(0, mock->seekCnt);
+ EXPECT_EQ(5u, search.getDocId());
+ EXPECT_EQ(0, mock->seekCnt);
EXPECT_TRUE(search.seek(5));
- EXPECT_EQUAL(5u, search.getDocId());
- EXPECT_EQUAL(0, mock->seekCnt);
+ EXPECT_EQ(5u, search.getDocId());
+ EXPECT_EQ(0, mock->seekCnt);
EXPECT_TRUE(!search.seek(7));
EXPECT_TRUE(search.isAtEnd());
- EXPECT_EQUAL(1, mock->seekCnt);
+ EXPECT_EQ(1, mock->seekCnt);
}
class IteratorChildrenVerifier : public search::test::IteratorChildrenVerifier {
@@ -296,12 +316,14 @@ private:
}
};
-TEST("verify search iterator conformance with search iterator children") {
+TEST(WeightedSetTermTest, verify_search_iterator_conformance_with_search_iterator_children)
+{
IteratorChildrenVerifier verifier;
verifier.verify();
}
-TEST("verify search iterator conformance with document weight iterator children") {
+TEST(WeightedSetTermTest, verify_search_iterator_conformance_with_document_weight_iterator_children)
+{
WeightIteratorChildrenVerifier verifier;
verifier.verify();
}
@@ -312,12 +334,12 @@ struct VerifyMatchData {
MyBlueprint(VerifyMatchData &vmd_in, FieldSpecBase spec_in)
: SimpleLeafBlueprint(spec_in), vmd(vmd_in) {}
[[nodiscard]] SearchIterator::UP createLeafSearch(const fef::TermFieldMatchDataArray &tfmda, bool) const override {
- EXPECT_EQUAL(tfmda.size(), 1u);
+ EXPECT_EQ(tfmda.size(), 1u);
EXPECT_TRUE(tfmda[0] != nullptr);
if (vmd.child_tfmd == nullptr) {
vmd.child_tfmd = tfmda[0];
} else {
- EXPECT_EQUAL(vmd.child_tfmd, tfmda[0]);
+ EXPECT_EQ(vmd.child_tfmd, tfmda[0]);
}
++vmd.child_cnt;
return std::make_unique<EmptySearch>();
@@ -333,7 +355,8 @@ struct VerifyMatchData {
}
};
-TEST("require that children get a common (yet separate) term field match data") {
+TEST(WeightedSetTermTest, require_that_children_get_a_common_yet_separate_term_field_match_data)
+{
VerifyMatchData vmd;
MatchDataLayout layout;
auto top_handle = layout.allocTermField(42);
@@ -347,9 +370,9 @@ TEST("require that children get a common (yet separate) term field match data")
auto match_data = layout.createMatchData();
auto search = blueprint.createSearch(*match_data, true);
auto top_tfmd = match_data->resolveTermField(top_handle);
- EXPECT_EQUAL(vmd.child_cnt, 5u);
+ EXPECT_EQ(vmd.child_cnt, 5u);
EXPECT_TRUE(vmd.child_tfmd != nullptr);
- EXPECT_NOT_EQUAL(top_tfmd, vmd.child_tfmd);
+ EXPECT_NE(top_tfmd, vmd.child_tfmd);
}
-TEST_MAIN() { TEST_RUN_ALL(); }
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/tests/sortspec/multilevelsort_test.cpp b/searchlib/src/tests/sortspec/multilevelsort_test.cpp
index 2d0456e13fc..f3bf363645e 100644
--- a/searchlib/src/tests/sortspec/multilevelsort_test.cpp
+++ b/searchlib/src/tests/sortspec/multilevelsort_test.cpp
@@ -8,7 +8,6 @@
#include <vespa/searchlib/attribute/attributemanager.h>
#include <vespa/searchlib/uca/ucaconverter.h>
#include <vespa/searchcommon/attribute/config.h>
-#include <vespa/vespalib/util/testclock.h>
#include <vespa/vespalib/testkit/testapp.h>
#include <type_traits>
#include <cinttypes>
@@ -242,10 +241,8 @@ MultilevelSortTest::sortAndCheck(const std::vector<Spec> &specs, uint32_t num,
hits.emplace_back(i, getRandomValue<uint32_t>());
}
- vespalib::TestClock clock;
- vespalib::Doom doom(clock.clock(), vespalib::steady_time::max());
search::uca::UcaConverterFactory ucaFactory;
- FastS_SortSpec sorter("no-metastore", 7, doom, ucaFactory);
+ FastS_SortSpec sorter("no-metastore", 7, vespalib::Doom::never(), ucaFactory);
// init sorter with sort data
for (const auto & spec : specs) {
AttributeGuard ag;
@@ -384,10 +381,8 @@ TEST("require that all sort methods behave the same")
}
TEST("test that [docid] translates to [lid][paritionid]") {
- vespalib::TestClock clock;
- vespalib::Doom doom(clock.clock(), vespalib::steady_time::max());
search::uca::UcaConverterFactory ucaFactory;
- FastS_SortSpec asc("no-metastore", 7, doom, ucaFactory);
+ FastS_SortSpec asc("no-metastore", 7, vespalib::Doom::never(), ucaFactory);
RankedHit hits[2] = {RankedHit(91, 0.0), RankedHit(3, 2.0)};
search::AttributeManager mgr;
search::AttributeContext ac(mgr);
@@ -404,7 +399,7 @@ TEST("test that [docid] translates to [lid][paritionid]") {
EXPECT_EQUAL(6u, sr2.second);
EXPECT_EQUAL(0, memcmp(SECOND_ASC, sr2.first, 6));
- FastS_SortSpec desc("no-metastore", 7, doom, ucaFactory);
+ FastS_SortSpec desc("no-metastore", 7, vespalib::Doom::never(), ucaFactory);
desc.Init("-[docid]", ac);
desc.initWithoutSorting(hits, 2);
sr1 = desc.getSortRef(0);
@@ -416,10 +411,8 @@ TEST("test that [docid] translates to [lid][paritionid]") {
}
TEST("test that [docid] uses attribute when one exists") {
- vespalib::TestClock clock;
- vespalib::Doom doom(clock.clock(), vespalib::steady_time::max());
search::uca::UcaConverterFactory ucaFactory;
- FastS_SortSpec asc("metastore", 7, doom, ucaFactory);
+ FastS_SortSpec asc("metastore", 7, vespalib::Doom::never(), ucaFactory);
RankedHit hits[2] = {RankedHit(91, 0.0), RankedHit(3, 2.0)};
Config cfg(BasicType::INT64, CollectionType::SINGLE);
auto metastore = AttributeFactory::createAttribute("metastore", cfg);
@@ -445,7 +438,7 @@ TEST("test that [docid] uses attribute when one exists") {
EXPECT_EQUAL(8u, sr2.second);
EXPECT_EQUAL(0, memcmp(SECOND_ASC, sr2.first, 8));
- FastS_SortSpec desc("metastore", 7, doom, ucaFactory);
+ FastS_SortSpec desc("metastore", 7, vespalib::Doom::never(), ucaFactory);
desc.Init("-[docid]", ac);
desc.initWithoutSorting(hits, 2);
sr1 = desc.getSortRef(0);
diff --git a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
index 74d4600a079..da58dd749ba 100644
--- a/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
+++ b/searchlib/src/tests/tensor/hnsw_index/hnsw_index_test.cpp
@@ -106,7 +106,7 @@ public:
.set(7, {3, 5}).set(8, {0, 3}).set(9, {4, 5});
}
- ~HnswIndexTest() {}
+ ~HnswIndexTest() override {}
auto dff() {
return search::tensor::make_distance_function_factory(
@@ -135,7 +135,7 @@ public:
gen_handler.incGeneration();
index->reclaim_memory(gen_handler.get_oldest_used_generation());
}
- void set_filter(std::vector<uint32_t> docids) {
+ void set_filter(const std::vector<uint32_t>& docids) {
uint32_t sz = 10;
global_filter = GlobalFilter::create(docids, sz);
}
@@ -168,7 +168,7 @@ public:
ASSERT_EQ(exp_levels.size(), act_node.size());
EXPECT_EQ(exp_levels, act_node.levels());
}
- void expect_top_3_by_docid(const vespalib::string& label, std::vector<float> qv, std::vector<uint32_t> exp) {
+ void expect_top_3_by_docid(const vespalib::string& label, std::vector<float> qv, const std::vector<uint32_t>& exp) {
SCOPED_TRACE(label);
uint32_t k = 3;
uint32_t explore_k = 100;
@@ -794,7 +794,7 @@ class MyGlobalFilter : public GlobalFilter {
std::shared_ptr<GlobalFilter> _filter;
mutable uint32_t _max_docid;
public:
- MyGlobalFilter(std::shared_ptr<GlobalFilter> filter) noexcept
+ explicit MyGlobalFilter(std::shared_ptr<GlobalFilter> filter) noexcept
: _filter(std::move(filter)),
_max_docid(0)
{
@@ -845,7 +845,7 @@ TEST_F(HnswMultiIndexTest, duplicate_docid_is_removed)
global_filter = filter;
this->expect_top_3_by_docid("{2,2}", {2, 2}, {1, 2});
EXPECT_EQ(2, filter->max_docid());
-};
+}
TEST_F(HnswMultiIndexTest, docid_with_empty_tensor_can_be_removed)
{
@@ -904,10 +904,10 @@ TEST(LevelGeneratorTest, gives_various_levels)
}
hist[l]++;
}
- for (uint32_t l = 0; l < hist.size(); ++l) {
+ for (unsigned int l : hist) {
double expected = left * 0.75;
- EXPECT_TRUE(hist[l] < expected*1.01 + 100);
- EXPECT_TRUE(hist[l] > expected*0.99 - 100);
+ EXPECT_TRUE(l < expected*1.01 + 100);
+ EXPECT_TRUE(l > expected*0.99 - 100);
left *= 0.25;
}
EXPECT_TRUE(hist.size() < 14);
diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
index f80e8dbe7be..96bd07bc5a3 100644
--- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
@@ -43,7 +43,6 @@ vespa_add_library(searchlib_attribute OBJECT
direct_multi_term_blueprint.cpp
distance_metric_utils.cpp
diversity.cpp
- document_weight_or_filter_search.cpp
empty_search_context.cpp
enum_store_compaction_spec.cpp
enum_store_dictionary.cpp
@@ -84,6 +83,7 @@ vespa_add_library(searchlib_attribute OBJECT
multi_numeric_search_context.cpp
multi_string_enum_hint_search_context.cpp
multi_string_enum_search_context.cpp
+ multi_term_or_filter_search.cpp
multi_value_mapping.cpp
multi_value_mapping_base.cpp
multienumattribute.cpp
diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
index 3a5f79ef665..5d689f5bd81 100644
--- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp
@@ -5,9 +5,10 @@
#include "attribute_object_visitor.h"
#include "attribute_weighted_set_blueprint.h"
#include "direct_multi_term_blueprint.h"
-#include "document_weight_or_filter_search.h"
-#include "i_direct_posting_store.h"
-#include "posting_iterator_pack.h"
+#include "i_docid_posting_store.h"
+#include "i_docid_with_weight_posting_store.h"
+#include "in_term_search.h"
+#include "multi_term_or_filter_search.h"
#include "predicate_attribute.h"
#include <vespa/eval/eval/value.h>
#include <vespa/searchlib/common/location.h>
@@ -18,7 +19,7 @@
#include <vespa/searchlib/query/tree/stackdumpcreator.h>
#include <vespa/searchlib/queryeval/andsearchstrict.h>
#include <vespa/searchlib/queryeval/create_blueprint_visitor_helper.h>
-#include <vespa/searchlib/queryeval/document_weight_search_iterator.h>
+#include <vespa/searchlib/queryeval/docid_with_weight_search_iterator.h>
#include <vespa/searchlib/queryeval/dot_product_blueprint.h>
#include <vespa/searchlib/queryeval/dot_product_search.h>
#include <vespa/searchlib/queryeval/emptysearch.h>
@@ -468,7 +469,7 @@ DirectWandBlueprint::createFilterSearch(bool, FilterConstraint constraint) const
for (const IDirectPostingStore::LookupResult &r : _terms) {
_attr.create(r.posting_idx, iterators);
}
- return attribute::DocumentWeightOrFilterSearch::create(std::move(iterators));
+ return attribute::MultiTermOrFilterSearch::create(std::move(iterators));
} else {
return std::make_unique<queryeval::EmptySearch>();
}
@@ -528,7 +529,7 @@ public:
}
}
if (_attr.has_btree_iterator(_dict_entry.posting_idx)) {
- return std::make_unique<queryeval::DocumentWeightSearchIterator>(*tfmda[0], _attr, _dict_entry);
+ return std::make_unique<queryeval::DocidWithWeightSearchIterator>(*tfmda[0], _attr, _dict_entry);
} else {
return _attr.make_bitvector_iterator(_dict_entry.posting_idx, get_docid_limit(), *tfmda[0], strict);
}
@@ -574,12 +575,13 @@ class CreateBlueprintVisitor : public CreateBlueprintVisitorHelper
private:
const FieldSpec &_field;
const IAttributeVector &_attr;
- const IDocidWithWeightPostingStore *_dww;
+ const IDocidPostingStore *_dps;
+ const IDocidWithWeightPostingStore *_dwwps;
vespalib::string _scratchPad;
bool use_docid_with_weight_posting_store() const {
// TODO: Relax requirement on always having weight iterator for query operators where that makes sense.
- return (_dww != nullptr) && (_dww->has_always_btree_iterator());
+ return (_dwwps != nullptr) && (_dwwps->has_always_btree_iterator());
}
public:
@@ -588,7 +590,8 @@ public:
: CreateBlueprintVisitorHelper(searchable, field, requestContext),
_field(field),
_attr(attr),
- _dww(attr.as_docid_with_weight_posting_store()),
+ _dps(attr.as_docid_posting_store()),
+ _dwwps(attr.as_docid_with_weight_posting_store()),
_scratchPad()
{
}
@@ -598,7 +601,7 @@ public:
void visitSimpleTerm(TermNode &n) {
if (use_docid_with_weight_posting_store() && !_field.isFilter() && n.isRanked() && !Term::isPossibleRangeTerm(n.getTerm())) {
NodeAsKey key(n, _scratchPad);
- setResult(std::make_unique<DirectAttributeBlueprint>(_field, _attr, *_dww, key));
+ setResult(std::make_unique<DirectAttributeBlueprint>(_field, _attr, *_dwwps, key));
} else {
visitTerm(n);
}
@@ -663,8 +666,8 @@ public:
void visit(PredicateQuery &n) override { visitPredicate(n); }
void visit(RegExpTerm & n) override { visitTerm(n); }
- template <typename WS>
- void createDirectWeightedSet(WS *bp, MultiTerm &n);
+ template <typename BlueprintType>
+ void createDirectMultiTerm(BlueprintType *bp, MultiTerm &n);
template <typename WS>
void createShallowWeightedSet(WS *bp, MultiTerm &n, const FieldSpec &fs, bool isInteger);
@@ -677,8 +680,7 @@ public:
return std::make_unique<QueryTermUCS4>(term, QueryTermSimple::Type::WORD);
}
- template <typename Node>
- void create_weighted_set_or_in(Node &n) {
+ void visit(query::WeightedSetTerm &n) override {
bool isSingleValue = !_attr.hasMultiValue();
bool isString = (_attr.isStringType() && _attr.hasEnum());
bool isInteger = _attr.isIntegerType();
@@ -693,8 +695,8 @@ public:
} else {
if (use_docid_with_weight_posting_store()) {
auto *bp = new attribute::DirectMultiTermBlueprint<IDocidWithWeightPostingStore, queryeval::WeightedSetTermSearch>
- (_field, _attr, *_dww, n.getNumTerms());
- createDirectWeightedSet(bp, n);
+ (_field, _attr, *_dwwps, n.getNumTerms());
+ createDirectMultiTerm(bp, n);
} else {
auto *bp = new WeightedSetTermBlueprint(_field);
createShallowWeightedSet(bp, n, _field, _attr.isIntegerType());
@@ -702,15 +704,11 @@ public:
}
}
- void visit(query::WeightedSetTerm &n) override {
- create_weighted_set_or_in(n);
- }
-
void visit(query::DotProduct &n) override {
if (use_docid_with_weight_posting_store()) {
auto *bp = new attribute::DirectMultiTermBlueprint<IDocidWithWeightPostingStore, queryeval::DotProductSearch>
- (_field, _attr, *_dww, n.getNumTerms());
- createDirectWeightedSet(bp, n);
+ (_field, _attr, *_dwwps, n.getNumTerms());
+ createDirectMultiTerm(bp, n);
} else {
auto *bp = new DotProductBlueprint(_field);
createShallowWeightedSet(bp, n, _field, _attr.isIntegerType());
@@ -719,10 +717,10 @@ public:
void visit(query::WandTerm &n) override {
if (use_docid_with_weight_posting_store()) {
- auto *bp = new DirectWandBlueprint(_field, *_dww,
+ auto *bp = new DirectWandBlueprint(_field, *_dwwps,
n.getTargetNumHits(), n.getScoreThreshold(), n.getThresholdBoostFactor(),
n.getNumTerms());
- createDirectWeightedSet(bp, n);
+ createDirectMultiTerm(bp, n);
} else {
auto *bp = new ParallelWeakAndBlueprint(_field,
n.getTargetNumHits(),
@@ -733,7 +731,18 @@ public:
}
void visit(query::InTerm &n) override {
- create_weighted_set_or_in(n);
+ if (_dps != nullptr) {
+ auto* bp = new attribute::DirectMultiTermBlueprint<IDocidPostingStore, attribute::InTermSearch>
+ (_field, _attr, *_dps, n.getNumTerms());
+ createDirectMultiTerm(bp, n);
+ } else if (_dwwps != nullptr) {
+ auto* bp = new attribute::DirectMultiTermBlueprint<IDocidWithWeightPostingStore, attribute::InTermSearch>
+ (_field, _attr, *_dwwps, n.getNumTerms());
+ createDirectMultiTerm(bp, n);
+ } else {
+ auto* bp = new WeightedSetTermBlueprint(_field);
+ createShallowWeightedSet(bp, n, _field, _attr.isIntegerType());
+ }
}
void fail_nearest_neighbor_term(query::NearestNeighborTerm&n, const vespalib::string& error_msg) {
@@ -768,9 +777,9 @@ public:
void visit(query::FuzzyTerm &n) override { visitTerm(n); }
};
-template <typename WS>
+template <typename BlueprintType>
void
-CreateBlueprintVisitor::createDirectWeightedSet(WS *bp, MultiTerm &n) {
+CreateBlueprintVisitor::createDirectMultiTerm(BlueprintType *bp, MultiTerm &n) {
Blueprint::UP result(bp);
Blueprint::HitEstimate estimate;
for (uint32_t i(0); i < n.getNumTerms(); i++) {
diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.cpp b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.cpp
index 12ae226895e..d7f9cd84d8d 100644
--- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.cpp
@@ -4,12 +4,15 @@
#include "direct_multi_term_blueprint.hpp"
#include "i_docid_posting_store.h"
#include "i_docid_with_weight_posting_store.h"
+#include "in_term_search.h"
#include <vespa/searchlib/queryeval/dot_product_search.h>
#include <vespa/searchlib/queryeval/weighted_set_term_search.h>
namespace search::attribute {
+template class DirectMultiTermBlueprint<IDocidPostingStore, InTermSearch>;
template class DirectMultiTermBlueprint<IDocidPostingStore, queryeval::WeightedSetTermSearch>;
+template class DirectMultiTermBlueprint<IDocidWithWeightPostingStore, InTermSearch>;
template class DirectMultiTermBlueprint<IDocidWithWeightPostingStore, queryeval::WeightedSetTermSearch>;
template class DirectMultiTermBlueprint<IDocidWithWeightPostingStore, queryeval::DotProductSearch>;
diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h
index 668034ecd3d..066b70481dc 100644
--- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h
+++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.h
@@ -18,7 +18,7 @@ namespace search::attribute {
/**
* Blueprint used for multi-term query operators as InTerm, WeightedSetTerm or DotProduct
- * over a multi-value attribute which supports the IDocidWithWeightPostingStore interface.
+ * over an attribute which supports the IDocidPostingStore or IDocidWithWeightPostingStore interface.
*
* This uses access to low-level posting lists, which speeds up query execution.
*/
@@ -44,7 +44,9 @@ private:
std::vector<std::unique_ptr<queryeval::SearchIterator>>&& bitvectors,
bool strict) const;
- std::unique_ptr<queryeval::SearchIterator> create_search_helper(const fef::TermFieldMatchDataArray& tfmda, bool strict, bool is_filter_search) const;
+ template <bool filter_search, bool need_match_data>
+ std::unique_ptr<queryeval::SearchIterator> create_search_helper(const fef::TermFieldMatchDataArray& tfmda,
+ bool strict) const;
public:
DirectMultiTermBlueprint(const queryeval::FieldSpec &field, const IAttributeVector &iattr, const PostingStoreType &attr, size_t size_hint);
diff --git a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp
index 5ca943a356d..f195e97fee0 100644
--- a/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/direct_multi_term_blueprint.hpp
@@ -3,7 +3,7 @@
#pragma once
#include "direct_multi_term_blueprint.h"
-#include "document_weight_or_filter_search.h"
+#include "multi_term_or_filter_search.h"
#include <vespa/searchlib/fef/termfieldmatchdata.h>
#include <vespa/searchlib/queryeval/emptysearch.h>
#include <vespa/searchlib/queryeval/filter_wrapper.h>
@@ -88,8 +88,10 @@ DirectMultiTermBlueprint<PostingStoreType, SearchType>::combine_iterators(std::u
}
template <typename PostingStoreType, typename SearchType>
+template <bool filter_search, bool need_match_data>
std::unique_ptr<queryeval::SearchIterator>
-DirectMultiTermBlueprint<PostingStoreType, SearchType>::create_search_helper(const fef::TermFieldMatchDataArray& tfmda, bool strict, bool is_filter_search) const
+DirectMultiTermBlueprint<PostingStoreType, SearchType>::create_search_helper(const fef::TermFieldMatchDataArray& tfmda,
+ bool strict) const
{
if (_terms.empty()) {
return std::make_unique<queryeval::EmptySearch>();
@@ -98,24 +100,30 @@ DirectMultiTermBlueprint<PostingStoreType, SearchType>::create_search_helper(con
std::vector<queryeval::SearchIterator::UP> bitvectors;
const size_t num_children = _terms.size();
btree_iterators.reserve(num_children);
- bool use_bit_vector_when_available = is_filter_search || !_attr.has_always_btree_iterator();
- auto weights = create_iterators(btree_iterators, bitvectors, use_bit_vector_when_available, *tfmda[0], strict);
- if (is_filter_search) {
- auto filter = !btree_iterators.empty() ? attribute::DocumentWeightOrFilterSearch::create(std::move(btree_iterators)) : std::unique_ptr<SearchIterator>();
+ auto& tfmd = *tfmda[0];
+ bool use_bit_vector_when_available = filter_search || !_attr.has_always_btree_iterator();
+ auto weights = create_iterators(btree_iterators, bitvectors, use_bit_vector_when_available, tfmd, strict);
+ if constexpr (filter_search || (!need_match_data && !SearchType::require_btree_iterators)) {
+ auto filter = !btree_iterators.empty() ?
+ (need_match_data ?
+ attribute::MultiTermOrFilterSearch::create(std::move(btree_iterators), tfmd) :
+ attribute::MultiTermOrFilterSearch::create(std::move(btree_iterators))) :
+ std::unique_ptr<SearchIterator>();
return combine_iterators(std::move(filter), std::move(bitvectors), strict);
}
bool field_is_filter = getState().fields()[0].isFilter();
- if constexpr (std::is_same_v<SearchType, queryeval::WeightedSetTermSearch>) {
+ if constexpr (!filter_search && !SearchType::require_btree_iterators) {
auto multi_term = !btree_iterators.empty() ?
- SearchType::create(*tfmda[0], field_is_filter, std::move(weights), std::move(btree_iterators))
+ SearchType::create(tfmd, field_is_filter, std::move(weights), std::move(btree_iterators))
: std::unique_ptr<SearchIterator>();
return combine_iterators(std::move(multi_term), std::move(bitvectors), strict);
- } else {
- // In this case we should only have weight iterators.
+ } else if constexpr (SearchType::require_btree_iterators) {
+ // In this case we should only have btree iterators.
assert(btree_iterators.size() == _terms.size());
assert(weights.index() == 0);
- return SearchType::create(*tfmda[0], field_is_filter, std::get<0>(weights).get(), std::move(btree_iterators));
+ return SearchType::create(tfmd, field_is_filter, std::get<0>(weights).get(), std::move(btree_iterators));
}
+ return std::make_unique<queryeval::EmptySearch>();
}
template <typename PostingStoreType, typename SearchType>
@@ -124,9 +132,12 @@ DirectMultiTermBlueprint<PostingStoreType, SearchType>::createLeafSearch(const f
{
assert(tfmda.size() == 1);
assert(getState().numFields() == 1);
- bool field_is_filter = getState().fields()[0].isFilter();
- bool is_filter_search = field_is_filter && tfmda[0]->isNotNeeded();
- return create_search_helper(tfmda, strict, is_filter_search);
+ bool need_match_data = !tfmda[0]->isNotNeeded();
+ if (need_match_data) {
+ return create_search_helper<SearchType::filter_search, true>(tfmda, strict);
+ } else {
+ return create_search_helper<SearchType::filter_search, false>(tfmda, strict);
+ }
}
template <typename PostingStoreType, typename SearchType>
@@ -135,7 +146,7 @@ DirectMultiTermBlueprint<PostingStoreType, SearchType>::createFilterSearch(bool
{
assert(getState().numFields() == 1);
auto wrapper = std::make_unique<FilterWrapper>(getState().numFields());
- wrapper->wrap(create_search_helper(wrapper->tfmda(), strict, true));
+ wrapper->wrap(create_search_helper<true, false>(wrapper->tfmda(), strict));
return wrapper;
}
diff --git a/searchlib/src/vespa/searchlib/attribute/in_term_search.h b/searchlib/src/vespa/searchlib/attribute/in_term_search.h
new file mode 100644
index 00000000000..36776499e51
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/in_term_search.h
@@ -0,0 +1,15 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+namespace search::attribute {
+
+/**
+ * Class used as template argument in DirectMultiTermBlueprint to configure it for the InTerm query operator.
+ */
+struct InTermSearch {
+ static constexpr bool filter_search = true;
+ static constexpr bool require_btree_iterators = false;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp b/searchlib/src/vespa/searchlib/attribute/multi_term_or_filter_search.cpp
index b910e64b665..a4cc7405bbf 100644
--- a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/multi_term_or_filter_search.cpp
@@ -1,6 +1,6 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "document_weight_or_filter_search.h"
+#include "multi_term_or_filter_search.h"
#include "posting_iterator_pack.h"
#include <vespa/searchlib/fef/matchdata.h>
#include <vespa/searchlib/queryeval/iterator_pack.h>
@@ -12,21 +12,32 @@ using search::queryeval::SearchIteratorPack;
namespace search::attribute {
template<typename IteratorPack>
-class DocumentWeightOrFilterSearchImpl : public DocumentWeightOrFilterSearch
+class MultiTermOrFilterSearchImpl : public MultiTermOrFilterSearch
{
+ // Vector that caches the docids of the current positions of the iterators in the pack.
+ // This reduces cache misses in doSeek() and seek_all().
+ std::vector<uint32_t> _docids;
IteratorPack _children;
+ fef::TermFieldMatchData* _tfmd;
void seek_all(uint32_t docId);
public:
- explicit DocumentWeightOrFilterSearchImpl(IteratorPack&& children);
- ~DocumentWeightOrFilterSearchImpl() override;
+ explicit MultiTermOrFilterSearchImpl(IteratorPack&& children, fef::TermFieldMatchData* tfmd);
+ ~MultiTermOrFilterSearchImpl() override;
void doSeek(uint32_t docId) override;
- void doUnpack(uint32_t) override { }
+ void doUnpack(uint32_t docid) override {
+ if (_tfmd != nullptr) {
+ _tfmd->resetOnlyDocId(docid);
+ }
+ }
void initRange(uint32_t begin, uint32_t end) override {
SearchIterator::initRange(begin, end);
_children.initRange(begin, end);
+ for (uint16_t i = 0; i < _children.size(); ++i) {
+ _docids[i] = _children.get_docid(i);
+ }
}
void or_hits_into(BitVector &result, uint32_t begin_id) override {
@@ -46,35 +57,39 @@ public:
};
template<typename IteratorPack>
-DocumentWeightOrFilterSearchImpl<IteratorPack>::DocumentWeightOrFilterSearchImpl(IteratorPack&& children)
- : DocumentWeightOrFilterSearch(),
- _children(std::move(children))
+MultiTermOrFilterSearchImpl<IteratorPack>::MultiTermOrFilterSearchImpl(IteratorPack&& children, fef::TermFieldMatchData* tfmd)
+ : MultiTermOrFilterSearch(),
+ _docids(children.size(), 0),
+ _children(std::move(children)),
+ _tfmd(tfmd)
{
}
template<typename IteratorPack>
-DocumentWeightOrFilterSearchImpl<IteratorPack>::~DocumentWeightOrFilterSearchImpl() = default;
+MultiTermOrFilterSearchImpl<IteratorPack>::~MultiTermOrFilterSearchImpl() = default;
template<typename IteratorPack>
void
-DocumentWeightOrFilterSearchImpl<IteratorPack>::seek_all(uint32_t docId) {
+MultiTermOrFilterSearchImpl<IteratorPack>::seek_all(uint32_t docId) {
for (uint16_t i = 0; i < _children.size(); ++i) {
- uint32_t next = _children.get_docid(i);
+ uint32_t next = _docids[i];
if (next < docId) {
- _children.seek(i, docId);
+ next = _children.seek(i, docId);
+ _docids[i] = next;
}
}
}
template<typename IteratorPack>
void
-DocumentWeightOrFilterSearchImpl<IteratorPack>::doSeek(uint32_t docId)
+MultiTermOrFilterSearchImpl<IteratorPack>::doSeek(uint32_t docId)
{
uint32_t min_doc_id = endDocId;
for (uint16_t i = 0; i < _children.size(); ++i) {
- uint32_t next = _children.get_docid(i);
+ uint32_t next = _docids[i];
if (next < docId) {
next = _children.seek(i, docId);
+ _docids[i] = next;
}
if (next == docId) {
setDocId(next);
@@ -89,41 +104,53 @@ namespace {
template <typename IteratorType, typename IteratorPackType>
std::unique_ptr<queryeval::SearchIterator>
-create_helper(std::vector<IteratorType>&& children)
+create_helper(std::vector<IteratorType>&& children, fef::TermFieldMatchData* tfmd)
{
if (children.empty()) {
return std::make_unique<queryeval::EmptySearch>();
} else {
std::sort(children.begin(), children.end(),
[](const auto & a, const auto & b) { return a.size() > b.size(); });
- using OrFilter = DocumentWeightOrFilterSearchImpl<IteratorPackType>;
- return std::make_unique<OrFilter>(IteratorPackType(std::move(children)));
+ using OrFilter = MultiTermOrFilterSearchImpl<IteratorPackType>;
+ return std::make_unique<OrFilter>(IteratorPackType(std::move(children)), tfmd);
}
}
}
std::unique_ptr<queryeval::SearchIterator>
-DocumentWeightOrFilterSearch::create(std::vector<DocidIterator>&& children)
+MultiTermOrFilterSearch::create(std::vector<DocidIterator>&& children)
+{
+ return create_helper<DocidIterator, DocidIteratorPack>(std::move(children), nullptr);
+}
+
+std::unique_ptr<queryeval::SearchIterator>
+MultiTermOrFilterSearch::create(std::vector<DocidIterator>&& children, fef::TermFieldMatchData& tfmd)
+{
+ return create_helper<DocidIterator, DocidIteratorPack>(std::move(children), &tfmd);
+}
+
+std::unique_ptr<queryeval::SearchIterator>
+MultiTermOrFilterSearch::create(std::vector<DocidWithWeightIterator>&& children)
{
- return create_helper<DocidIterator, DocidIteratorPack>(std::move(children));
+ return create_helper<DocidWithWeightIterator, DocidWithWeightIteratorPack>(std::move(children), nullptr);
}
std::unique_ptr<queryeval::SearchIterator>
-DocumentWeightOrFilterSearch::create(std::vector<DocidWithWeightIterator>&& children)
+MultiTermOrFilterSearch::create(std::vector<DocidWithWeightIterator>&& children, fef::TermFieldMatchData& tfmd)
{
- return create_helper<DocidWithWeightIterator, DocidWithWeightIteratorPack>(std::move(children));
+ return create_helper<DocidWithWeightIterator, DocidWithWeightIteratorPack>(std::move(children), &tfmd);
}
std::unique_ptr<queryeval::SearchIterator>
-DocumentWeightOrFilterSearch::create(const std::vector<SearchIterator *>& children,
+MultiTermOrFilterSearch::create(const std::vector<SearchIterator *>& children,
std::unique_ptr<fef::MatchData> md)
{
if (children.empty()) {
return std::make_unique<queryeval::EmptySearch>();
} else {
- using OrFilter = DocumentWeightOrFilterSearchImpl<SearchIteratorPack>;
- return std::make_unique<OrFilter>(SearchIteratorPack(children, std::move(md)));
+ using OrFilter = MultiTermOrFilterSearchImpl<SearchIteratorPack>;
+ return std::make_unique<OrFilter>(SearchIteratorPack(children, std::move(md)), nullptr);
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.h b/searchlib/src/vespa/searchlib/attribute/multi_term_or_filter_search.h
index 5ed0dd16d83..1e8227c3007 100644
--- a/searchlib/src/vespa/searchlib/attribute/document_weight_or_filter_search.h
+++ b/searchlib/src/vespa/searchlib/attribute/multi_term_or_filter_search.h
@@ -7,16 +7,20 @@ namespace search::fef { class MatchData; }
namespace search::attribute {
/**
- * Filter iterator on top of document weight iterators with OR semantics used during
- * calculation of global filter for weighted set terms, wand terms and dot product terms.
+ * Filter iterator on top of low-level posting list iterators or regular search iterators with OR semantics.
+ *
+ * Used during calculation of global filter for InTerm, WeightedSetTerm, DotProduct and WandTerm,
+ * or when ranking is not needed for InTerm and WeightedSetTerm.
*/
-class DocumentWeightOrFilterSearch : public queryeval::SearchIterator
+class MultiTermOrFilterSearch : public queryeval::SearchIterator
{
protected:
- DocumentWeightOrFilterSearch() = default;
+ MultiTermOrFilterSearch() = default;
public:
static std::unique_ptr<SearchIterator> create(std::vector<DocidIterator>&& children);
+ static std::unique_ptr<SearchIterator> create(std::vector<DocidIterator>&& children, fef::TermFieldMatchData& tfmd);
static std::unique_ptr<SearchIterator> create(std::vector<DocidWithWeightIterator>&& children);
+ static std::unique_ptr<SearchIterator> create(std::vector<DocidWithWeightIterator>&& children, fef::TermFieldMatchData& tfmd);
static std::unique_ptr<SearchIterator> create(const std::vector<SearchIterator *>& children,
std::unique_ptr<fef::MatchData> md);
};
diff --git a/searchlib/src/vespa/searchlib/attribute/numeric_range_matcher.h b/searchlib/src/vespa/searchlib/attribute/numeric_range_matcher.h
index 93612fde893..7f1c3e31367 100644
--- a/searchlib/src/vespa/searchlib/attribute/numeric_range_matcher.h
+++ b/searchlib/src/vespa/searchlib/attribute/numeric_range_matcher.h
@@ -24,7 +24,8 @@ private:
int _limit;
size_t _max_per_group;
public:
- NumericRangeMatcher(const QueryTermSimple& queryTerm, bool avoidUndefinedInRange=false);
+ NumericRangeMatcher(const QueryTermSimple& queryTerm) : NumericRangeMatcher(queryTerm, false) {}
+ NumericRangeMatcher(const QueryTermSimple& queryTerm, bool avoidUndefinedInRange);
protected:
Int64Range getRange() const {
return {static_cast<int64_t>(_low), static_cast<int64_t>(_high)};
@@ -41,8 +42,8 @@ protected:
search::Range<BaseType>
cappedRange(bool isFloat)
{
- BaseType low = static_cast<BaseType>(_low);
- BaseType high = static_cast<BaseType>(_high);
+ auto low = static_cast<BaseType>(_low);
+ auto high = static_cast<BaseType>(_high);
BaseType numMin = std::numeric_limits<BaseType>::min();
BaseType numMax = std::numeric_limits<BaseType>::max();
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
index 0b75e8c2c0d..3e0794835ae 100644
--- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
@@ -57,6 +57,13 @@ protected:
~PostingListSearchContext() override;
+ double avg_values_per_document() const noexcept {
+ return static_cast<double>(_numValues) / static_cast<double>(_docIdLimit);
+ }
+ double avg_postinglist_size() const noexcept {
+ return static_cast<double>(_numValues) / _dictSize;
+ }
+
void lookupTerm(const vespalib::datastore::EntryComparator &comp);
void lookupRange(const vespalib::datastore::EntryComparator &low, const vespalib::datastore::EntryComparator &high);
void lookupSingle();
@@ -72,8 +79,6 @@ protected:
* by looking at the posting lists in the range [lower, upper>.
*/
virtual size_t calc_estimated_hits_in_range() const = 0;
- virtual void fillArray() = 0;
- virtual void fillBitVector(vespalib::ThreadBundle & thread_bundle) = 0;
};
@@ -88,6 +93,7 @@ protected:
using AtomicEntryRef = vespalib::datastore::AtomicEntryRef;
using EntryRef = vespalib::datastore::EntryRef;
using FrozenView = typename PostingStore::BTreeType::FrozenView;
+ using ExecuteInfo = queryeval::ExecuteInfo;
const PostingStore& _posting_store;
/*
@@ -101,10 +107,10 @@ protected:
~PostingListSearchContextT() override;
void lookupSingle();
- void fillArray() override;
- void fillBitVector(vespalib::ThreadBundle & thread_bundle) override;
+ virtual void fillArray();
+ virtual void fillBitVector(const ExecuteInfo &);
- void fetchPostings(const queryeval::ExecuteInfo & strict) override;
+ void fetchPostings(const ExecuteInfo & strict) override;
// this will be called instead of the fetchPostings function in some cases
void diversify(bool forward, size_t wanted_hits, const IAttributeVector &diversity_attr,
size_t max_per_group, size_t cutoff_groups, bool cutoff_strict);
@@ -131,6 +137,7 @@ protected:
using DictionaryConstIterator = Dictionary::ConstIterator;
using EntryRef = vespalib::datastore::EntryRef;
using PostingStore = typename Parent::PostingStore;
+ using ExecuteInfo = queryeval::ExecuteInfo;
using Parent::_docIdLimit;
using Parent::_lowerDictItr;
using Parent::_merger;
@@ -154,7 +161,7 @@ protected:
template <bool fill_array>
void fill_array_or_bitvector();
void fillArray() override;
- void fillBitVector(vespalib::ThreadBundle & thread_bundle) override;
+ void fillBitVector(const ExecuteInfo &) override;
};
@@ -177,6 +184,7 @@ class StringPostingSearchContext
: public PostingSearchContext<BaseSC, PostingListFoldedSearchContextT<DataT>, AttrT>
{
private:
+ using ExecuteInfo = queryeval::ExecuteInfo;
using Parent = PostingSearchContext<BaseSC, PostingListFoldedSearchContextT<DataT>, AttrT>;
using RegexpUtil = vespalib::RegexpUtil;
using Parent::_enumStore;
@@ -186,7 +194,7 @@ private:
bool use_single_dictionary_entry(PostingListSearchContext::DictionaryConstIterator it) const {
return use_dictionary_entry(it);
}
- bool use_posting_lists_when_non_strict(const queryeval::ExecuteInfo& info) const override;
+ bool use_posting_lists_when_non_strict(const ExecuteInfo& info) const override;
public:
StringPostingSearchContext(BaseSC&& base_sc, bool useBitVector, const AttrT &toBeSearched);
};
@@ -196,6 +204,7 @@ class NumericPostingSearchContext
: public PostingSearchContext<BaseSC, PostingListSearchContextT<DataT>, AttrT>
{
private:
+ using ExecuteInfo = queryeval::ExecuteInfo;
using Parent = PostingSearchContext<BaseSC, PostingListSearchContextT<DataT>, AttrT>;
using BaseType = typename AttrT::T;
using Params = attribute::SearchContextParams;
@@ -215,7 +224,7 @@ private:
? limit
: estimate;
}
- void fetchPostings(const queryeval::ExecuteInfo & execInfo) override {
+ void fetchPostings(const ExecuteInfo & execInfo) override {
if (params().diversityAttribute() != nullptr) {
bool forward = (this->getRangeLimit() > 0);
size_t wanted_hits = std::abs(this->getRangeLimit());
@@ -227,7 +236,7 @@ private:
}
}
- bool use_posting_lists_when_non_strict(const queryeval::ExecuteInfo& info) const override;
+ bool use_posting_lists_when_non_strict(const ExecuteInfo& info) const override;
size_t calc_estimated_hits_in_range() const override;
public:
@@ -343,14 +352,11 @@ NumericPostingSearchContext<BaseSC, AttrT, DataT>::use_posting_lists_when_non_st
// Based on this we see that LMC = 5 * PLMC.
// The same relationship is found with the test case range_hits_ratio=[200].
- if ( ! info.create_postinglist_when_non_strict()) return false;
-
- constexpr float lookup_match_constant = 5.0;
- constexpr float posting_list_merge_constant = 1.0;
+ constexpr double lookup_match_constant = 5.0;
+ constexpr double posting_list_merge_constant = 1.0;
uint32_t exp_doc_hits = this->_docIdLimit * info.hit_rate();
- float avg_values_per_document = static_cast<float>(this->_numValues) / static_cast<float>(this->_docIdLimit);
- float lookup_match_cost = exp_doc_hits * avg_values_per_document * lookup_match_constant;
+ float lookup_match_cost = exp_doc_hits * this->avg_values_per_document() * lookup_match_constant;
float posting_list_cost = this->estimated_hits_in_range() * posting_list_merge_constant;
return posting_list_cost < lookup_match_cost;
}
@@ -362,14 +368,25 @@ NumericPostingSearchContext<BaseSC, AttrT, DataT>::calc_estimated_hits_in_range(
size_t exact_sum = 0;
size_t estimated_sum = 0;
- auto it = this->_lowerDictItr;
- for (uint32_t count = 0; (it != this->_upperDictItr) && (count < this->max_posting_lists_to_count); ++it, ++count) {
- exact_sum += this->_posting_store.frozenSize(it.getData().load_acquire());
+ // Sample lower range
+ auto it_forward = this->_lowerDictItr;
+ for (uint32_t count = 0; (it_forward != this->_upperDictItr) && (count < this->max_posting_lists_to_count); ++it_forward, ++count) {
+ exact_sum += this->_posting_store.frozenSize(it_forward.getData().load_acquire());
}
- if (it != this->_upperDictItr) {
- uint32_t remaining_posting_lists = this->_upperDictItr - it;
- float hits_per_posting_list = static_cast<float>(exact_sum) / static_cast<float>(this->max_posting_lists_to_count);
- estimated_sum = remaining_posting_lists * hits_per_posting_list;
+ if (it_forward != this->_upperDictItr) {
+ //Sample upper range
+ auto it_backward = this->_upperDictItr;
+ for (uint32_t count = 0; (it_backward != it_forward) && (count < this->max_posting_lists_to_count);++count) {
+ --it_backward;
+ exact_sum += this->_posting_store.frozenSize(it_backward.getData().load_acquire());
+ }
+ if (it_forward != it_backward) {
+ // Estimate the rest
+ uint32_t remaining_posting_lists = it_backward - it_forward;
+ double measured_hits_per_posting_list = static_cast<double>(exact_sum) / (this->max_posting_lists_to_count * 2);
+ // Let measure and global rate count equally, to reduce the effect of outlayers.
+ estimated_sum = remaining_posting_lists * (measured_hits_per_posting_list + this->avg_postinglist_size())/2;
+ }
}
return exact_sum + estimated_sum;
}
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp
index 65ed15a866f..f937d567588 100644
--- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.hpp
@@ -68,11 +68,14 @@ PostingListSearchContextT<DataT>::fillArray()
template <typename DataT>
struct PostingListSearchContextT<DataT>::FillPart : public vespalib::Runnable {
- FillPart(const PostingStore& posting_store, const DictionaryConstIterator & from, size_t count, uint32_t limit)
- : FillPart(posting_store, from, count, nullptr, limit)
+ FillPart(const vespalib::Doom & doom, const PostingStore& posting_store, const DictionaryConstIterator & from,
+ size_t count, uint32_t limit)
+ : FillPart(doom, posting_store, from, count, nullptr, limit)
{ }
- FillPart(const PostingStore& posting_store, const DictionaryConstIterator & from, size_t count, BitVector * bv, uint32_t limit)
- : _posting_store(posting_store),
+ FillPart(const vespalib::Doom & doom, const PostingStore& posting_store, const DictionaryConstIterator & from,
+ size_t count, BitVector * bv, uint32_t limit)
+ : _doom(doom),
+ _posting_store(posting_store),
_bv(bv),
_docIdLimit(limit),
_from(from),
@@ -86,7 +89,8 @@ struct PostingListSearchContextT<DataT>::FillPart : public vespalib::Runnable {
_owned_bv = BitVector::create(_docIdLimit);
_bv = _owned_bv.get();
}
- for (;_from != _to;++_from) {
+ //TODO Add && !_doom.soft_doom() to loop
+ for ( ;_from != _to; ++_from) {
addToBitVector(PostingListTraverser<PostingStore>(_posting_store, _from.getData().load_acquire()));
}
}
@@ -95,6 +99,7 @@ struct PostingListSearchContextT<DataT>::FillPart : public vespalib::Runnable {
if (__builtin_expect(key < _docIdLimit, true)) { _bv->setBit(key); }
});
}
+ const vespalib::Doom _doom;
const PostingStore &_posting_store;
BitVector *_bv;
uint32_t _docIdLimit;
@@ -105,8 +110,9 @@ struct PostingListSearchContextT<DataT>::FillPart : public vespalib::Runnable {
template <typename DataT>
void
-PostingListSearchContextT<DataT>::fillBitVector(vespalib::ThreadBundle & thread_bundle)
+PostingListSearchContextT<DataT>::fillBitVector(const ExecuteInfo & exec_info)
{
+ vespalib::ThreadBundle & thread_bundle = exec_info.thread_bundle();
size_t num_iter = _upperDictItr - _lowerDictItr;
size_t num_threads = std::min(thread_bundle.size(), num_iter);
@@ -115,10 +121,10 @@ PostingListSearchContextT<DataT>::fillBitVector(vespalib::ThreadBundle & thread_
std::vector<FillPart> parts;
parts.reserve(num_threads);
BitVector * master = _merger.getBitVector();
- parts.emplace_back(_posting_store, _lowerDictItr, per_thread + (rest_docs > 0), master, _merger.getDocIdLimit());
+ parts.emplace_back(exec_info.doom(), _posting_store, _lowerDictItr, per_thread + (rest_docs > 0), master, _merger.getDocIdLimit());
for (size_t i(1); i < num_threads; i++) {
size_t num_this_thread = per_thread + (i < rest_docs);
- parts.emplace_back(_posting_store, parts[i-1]._to, num_this_thread, _merger.getDocIdLimit());
+ parts.emplace_back(exec_info.doom(), _posting_store, parts[i-1]._to, num_this_thread, _merger.getDocIdLimit());
}
thread_bundle.run(parts);
std::vector<BitVector *> vectors;
@@ -131,7 +137,7 @@ PostingListSearchContextT<DataT>::fillBitVector(vespalib::ThreadBundle & thread_
template <typename DataT>
void
-PostingListSearchContextT<DataT>::fetchPostings(const queryeval::ExecuteInfo & execInfo)
+PostingListSearchContextT<DataT>::fetchPostings(const ExecuteInfo & exec_info)
{
// The following constant is derived after running parts of
// the range search performance test with 10M documents on an Apple M1 Pro with 32 GB memory.
@@ -162,7 +168,7 @@ PostingListSearchContextT<DataT>::fetchPostings(const queryeval::ExecuteInfo & e
// The threshold for when to use array merging is therefore 0.0025 (0.08 / 32).
constexpr float threshold_for_using_array = 0.0025;
if (!_merger.merge_done() && _uniqueValues >= 2u && this->_dictionary.get_has_btree_dictionary()) {
- if (execInfo.is_strict() || use_posting_lists_when_non_strict(execInfo)) {
+ if (exec_info.is_strict() || use_posting_lists_when_non_strict(exec_info)) {
size_t sum = estimated_hits_in_range();
//TODO Honour soft_doom and forward it to merge code
if (sum < (_docIdLimit * threshold_for_using_array)) {
@@ -170,7 +176,7 @@ PostingListSearchContextT<DataT>::fetchPostings(const queryeval::ExecuteInfo & e
fillArray();
} else {
_merger.allocBitVector();
- fillBitVector(execInfo.thread_bundle());
+ fillBitVector(exec_info);
}
_merger.merge();
}
@@ -223,7 +229,7 @@ createPostingIterator(fef::TermFieldMatchData *matchData, bool strict)
}
const BitVector *bv(_merger.getBitVector());
assert(bv != nullptr);
- return search::BitVectorIterator::create(bv, bv->size(), *matchData, strict);
+ return BitVectorIterator::create(bv, bv->size(), *matchData, strict);
}
if (_uniqueValues == 1) {
if (_bv != nullptr && (!_pidx.valid() || _useBitVector || matchData->isNotNeeded())) {
@@ -421,9 +427,9 @@ PostingListFoldedSearchContextT<DataT>::fillArray()
template <typename DataT>
void
-PostingListFoldedSearchContextT<DataT>::fillBitVector(vespalib::ThreadBundle & thread_bundle)
+PostingListFoldedSearchContextT<DataT>::fillBitVector(const ExecuteInfo & exec_info)
{
- (void) thread_bundle;
+ (void) exec_info;
fill_array_or_bitvector<false>();
}
@@ -487,7 +493,7 @@ StringPostingSearchContext<BaseSC, AttrT, DataT>::use_dictionary_entry(PostingLi
template <typename BaseSC, typename AttrT, typename DataT>
bool
-StringPostingSearchContext<BaseSC, AttrT, DataT>::use_posting_lists_when_non_strict(const queryeval::ExecuteInfo& info) const
+StringPostingSearchContext<BaseSC, AttrT, DataT>::use_posting_lists_when_non_strict(const ExecuteInfo& info) const
{
if (this->isFuzzy()) {
uint32_t exp_doc_hits = this->_docIdLimit * info.hit_rate();
diff --git a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h
index 482dc90f6cd..749fa48565b 100644
--- a/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/singlenumericpostattribute.h
@@ -52,16 +52,16 @@ private:
using ValueModifier = typename B::BaseClass::ValueModifier;
using generation_t = typename SingleValueNumericEnumAttribute<B>::generation_t;
- using DirectPostingStoreAdapterType = attribute::NumericDirectPostingStoreAdapter<IDocidPostingStore,
- PostingStore, EnumStore>;
- DirectPostingStoreAdapterType _posting_store_adapter;
-
using PostingParent::_posting_store;
using PostingParent::clearAllPostings;
using PostingParent::handle_load_posting_lists;
using PostingParent::handle_load_posting_lists_and_update_enum_store;
using PostingParent::forwardedOnAddDoc;
+ using DirectPostingStoreAdapterType = attribute::NumericDirectPostingStoreAdapter<IDocidPostingStore,
+ PostingStore, EnumStore>;
+ DirectPostingStoreAdapterType _posting_store_adapter;
+
void freezeEnumDictionary() override;
void mergeMemoryStats(vespalib::MemoryUsage & total) override;
void applyUpdateValueChange(const Change & c, EnumStore & enumStore,
diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.h b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.h
index 543cfdd90ec..5a5b599244f 100644
--- a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.h
+++ b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.h
@@ -2,8 +2,10 @@
#pragma once
-#include "singlestringattribute.h"
+#include "i_docid_posting_store.h"
#include "postinglistattribute.h"
+#include "singlestringattribute.h"
+#include "string_direct_posting_store_adapter.h"
namespace search {
@@ -48,12 +50,17 @@ private:
using PostingParent::handle_load_posting_lists;
using PostingParent::handle_load_posting_lists_and_update_enum_store;
using PostingParent::forwardedOnAddDoc;
+
public:
using PostingStore = typename PostingParent::PostingStore;
using Dictionary = EnumPostingTree;
using PostingParent::get_posting_store;
private:
+ using DirectPostingStoreAdapterType = attribute::StringDirectPostingStoreAdapter<IDocidPostingStore,
+ PostingStore, EnumStore>;
+ DirectPostingStoreAdapterType _posting_store_adapter;
+
void freezeEnumDictionary() override;
void mergeMemoryStats(vespalib::MemoryUsage & total) override;
void applyUpdateValueChange(const Change & c,
@@ -77,6 +84,8 @@ public:
std::unique_ptr<attribute::SearchContext>
getSearch(QueryTermSimpleUP term, const attribute::SearchContextParams & params) const override;
+ const IDocidPostingStore* as_docid_posting_store() const override { return &_posting_store_adapter; }
+
bool onAddDoc(DocId doc) override {
return forwardedOnAddDoc(doc, this->_enumIndices.size(), this->_enumIndices.capacity());
}
diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp
index 85b0c095d76..5822ab2c786 100644
--- a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp
@@ -4,6 +4,7 @@
#include "singlestringpostattribute.h"
#include "single_string_enum_search_context.h"
+#include "string_direct_posting_store_adapter.hpp"
#include <vespa/searchcommon/attribute/config.h>
#include <vespa/searchlib/query/query_term_ucs4.h>
@@ -13,7 +14,8 @@ template <typename B>
SingleValueStringPostingAttributeT<B>::SingleValueStringPostingAttributeT(const vespalib::string & name,
const AttributeVector::Config & c) :
SingleValueStringAttributeT<B>(name, c),
- PostingParent(*this, this->getEnumStore())
+ PostingParent(*this, this->getEnumStore()),
+ _posting_store_adapter(this->get_posting_store(), this->_enumStore, this->getIsFilter())
{
}
@@ -152,5 +154,5 @@ SingleValueStringPostingAttributeT<B>::getSearch(QueryTermSimpleUP qTerm,
*this);
}
-} // namespace search
+}
diff --git a/searchlib/src/vespa/searchlib/common/CMakeLists.txt b/searchlib/src/vespa/searchlib/common/CMakeLists.txt
index 970937e18ec..3270c170327 100644
--- a/searchlib/src/vespa/searchlib/common/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/common/CMakeLists.txt
@@ -10,6 +10,7 @@ vespa_add_library(searchlib_common OBJECT
documentlocations.cpp
documentsummary.cpp
fileheadercontext.cpp
+ fileheadertags.cpp
flush_token.cpp
geo_gcd.cpp
geo_location.cpp
diff --git a/searchlib/src/vespa/searchlib/common/bitvector.cpp b/searchlib/src/vespa/searchlib/common/bitvector.cpp
index 898a21d860e..cee5801beb9 100644
--- a/searchlib/src/vespa/searchlib/common/bitvector.cpp
+++ b/searchlib/src/vespa/searchlib/common/bitvector.cpp
@@ -96,7 +96,7 @@ BitVector::allocatePaddedAndAligned(Index start, Index end, Index capacity, cons
{
assert(capacity >= end);
uint32_t words = numActiveWords(start, capacity);
- words += (-words & 15); // Pad to 64 byte alignment
+ words += (-words & (getAlignment()/sizeof(Word) - 1)); // Pad to required alignment
const size_t sz(words * sizeof(Word));
Alloc alloc = (init_alloc != nullptr) ? init_alloc->create(sz) : Alloc::alloc(sz, MMAP_LIMIT);
assert(alloc.size()/sizeof(Word) >= words);
@@ -445,7 +445,6 @@ void
MMappedBitVector::read(Index numberOfElements, FastOS_FileInterface &file,
int64_t offset, Index doccount)
{
- assert((offset & (getAlignment() - 1)) == 0);
void *mapptr = file.MemoryMapPtr(offset);
assert(mapptr != nullptr);
if (mapptr != nullptr) {
@@ -478,10 +477,21 @@ operator>>(nbostream &in, AllocatedBitVector &bv)
in >> size >> cachedHits >> fileBytes;
assert(size <= std::numeric_limits<BitVector::Index>::max());
assert(cachedHits <= size || ! bv.isValidCount(cachedHits));
- if (bv.size() != size)
+ if (bv.size() != size) {
bv.resize(size);
- assert(bv.getFileBytes() == fileBytes);
- in.read(bv.getStart(), bv.getFileBytes());
+ }
+ size_t expected_file_bytes = bv.getFileBytes();
+ size_t read_size = fileBytes;
+ size_t skip_size = 0;
+ if (expected_file_bytes < fileBytes) {
+ read_size = expected_file_bytes;
+ skip_size = fileBytes - expected_file_bytes;
+ }
+ in.read(bv.getStart(), read_size);
+ if (skip_size != 0) {
+ std::vector<char> dummy(skip_size);
+ in.read(dummy.data(), skip_size);
+ }
assert(bv.testBit(size));
bv.setTrueBits(cachedHits);
return in;
diff --git a/searchlib/src/vespa/searchlib/common/bitvector.h b/searchlib/src/vespa/searchlib/common/bitvector.h
index 0312e37a33c..4fac4b7897c 100644
--- a/searchlib/src/vespa/searchlib/common/bitvector.h
+++ b/searchlib/src/vespa/searchlib/common/bitvector.h
@@ -288,6 +288,9 @@ public:
* TODO: Extend to handle both AND/OR
*/
static void parallellOr(vespalib::ThreadBundle & thread_bundle, vespalib::ConstArrayRef<BitVector *> vectors);
+
+ static Index numWords(Index bits) noexcept { return wordNum(bits + 1 + (WordLen - 1)); }
+ static Index numBytes(Index bits) noexcept { return numWords(bits) * sizeof(Word); }
protected:
using Alloc = vespalib::alloc::Alloc;
VESPA_DLL_LOCAL BitVector(void * buf, Index start, Index end) noexcept;
@@ -299,10 +302,8 @@ protected:
VESPA_DLL_LOCAL void clearIntervalNoInvalidation(Range range);
bool isValidCount() const noexcept { return isValidCount(_numTrueBits.load(std::memory_order_relaxed)); }
static bool isValidCount(Index v) noexcept { return v != invalidCount(); }
- static Index numWords(Index bits) noexcept { return wordNum(bits + 1 + (WordLen - 1)); }
- static Index numBytes(Index bits) noexcept { return numWords(bits) * sizeof(Word); }
size_t numWords() const noexcept { return numWords(size()); }
- static constexpr size_t getAlignment() noexcept { return 0x40u; }
+ static constexpr size_t getAlignment() noexcept { return 0x100u; }
static size_t numActiveBytes(Index start, Index end) noexcept { return numActiveWords(start, end) * sizeof(Word); }
static Alloc allocatePaddedAndAligned(Index sz) {
return allocatePaddedAndAligned(0, sz);
diff --git a/searchlib/src/vespa/searchlib/common/fileheadercontext.cpp b/searchlib/src/vespa/searchlib/common/fileheadercontext.cpp
index 77246e2b202..067d7aeaae9 100644
--- a/searchlib/src/vespa/searchlib/common/fileheadercontext.cpp
+++ b/searchlib/src/vespa/searchlib/common/fileheadercontext.cpp
@@ -1,6 +1,7 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "fileheadercontext.h"
+#include "fileheadertags.h"
#include <vespa/vespalib/data/fileheader.h>
#include <chrono>
@@ -9,6 +10,7 @@ using namespace std::chrono;
namespace search::common {
using vespalib::GenericHeader;
+using namespace tags;
FileHeaderContext::FileHeaderContext() = default;
@@ -18,17 +20,17 @@ void
FileHeaderContext::addCreateAndFreezeTime(GenericHeader &header)
{
using Tag = GenericHeader::Tag;
- header.putTag(Tag("createTime", duration_cast<microseconds>(system_clock::now().time_since_epoch()).count()));
- header.putTag(Tag("freezeTime", 0));
+ header.putTag(Tag(CREATE_TIME, duration_cast<microseconds>(system_clock::now().time_since_epoch()).count()));
+ header.putTag(Tag(FREEZE_TIME, 0));
}
void
FileHeaderContext::setFreezeTime(GenericHeader &header)
{
using Tag = GenericHeader::Tag;
- if (header.hasTag("freezeTime") &&
- header.getTag("freezeTime").getType() == Tag::TYPE_INTEGER) {
- header.putTag(Tag("freezeTime", duration_cast<microseconds>(system_clock::now().time_since_epoch()).count()));
+ if (header.hasTag(FREEZE_TIME) &&
+ header.getTag(FREEZE_TIME).getType() == Tag::TYPE_INTEGER) {
+ header.putTag(Tag(FREEZE_TIME, duration_cast<microseconds>(system_clock::now().time_since_epoch()).count()));
}
}
diff --git a/searchlib/src/vespa/searchlib/common/fileheadertags.cpp b/searchlib/src/vespa/searchlib/common/fileheadertags.cpp
new file mode 100644
index 00000000000..c6c15f08918
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/fileheadertags.cpp
@@ -0,0 +1,16 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "fileheadertags.h"
+
+namespace search::tags {
+// Do not change these constants, they are persisted in many file headers.
+vespalib::string FREEZE_TIME("freezeTime");
+vespalib::string CREATE_TIME("createTime");
+vespalib::string FROZEN("frozen");
+vespalib::string DOCID_LIMIT("docIdLimit");
+vespalib::string FILE_BIT_SIZE("fileBitSize");
+vespalib::string DESC("desc");
+vespalib::string ENTRY_SIZE("entrySize");
+vespalib::string NUM_KEYS("numKeys");
+
+}
diff --git a/searchlib/src/vespa/searchlib/common/fileheadertags.h b/searchlib/src/vespa/searchlib/common/fileheadertags.h
new file mode 100644
index 00000000000..c7e7385160e
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/fileheadertags.h
@@ -0,0 +1,17 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search::tags {
+
+extern vespalib::string FREEZE_TIME;
+extern vespalib::string CREATE_TIME;
+extern vespalib::string FROZEN;
+extern vespalib::string DOCID_LIMIT;
+extern vespalib::string FILE_BIT_SIZE;
+extern vespalib::string DESC;
+extern vespalib::string ENTRY_SIZE;
+extern vespalib::string NUM_KEYS;
+
+}
diff --git a/searchlib/src/vespa/searchlib/common/sortspec.cpp b/searchlib/src/vespa/searchlib/common/sortspec.cpp
index 04bc87f1000..40e2616367f 100644
--- a/searchlib/src/vespa/searchlib/common/sortspec.cpp
+++ b/searchlib/src/vespa/searchlib/common/sortspec.cpp
@@ -30,7 +30,7 @@ LowercaseConverter::onConvert(const ConstBufferRef & src) const
vespalib::Utf8Writer w(_buffer);
while (r.hasMore()) {
ucs4_t c = r.getChar(0xFFFD);
- c = Fast_NormalizeWordFolder::ToFold(c);
+ c = Fast_NormalizeWordFolder::lowercase_and_fold(c);
w.putChar(c);
}
return {_buffer.begin(), _buffer.size()};
diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp b/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp
index f2a7ec4d88b..5f001b20dda 100644
--- a/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/bitvectordictionary.cpp
@@ -1,6 +1,7 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "bitvectordictionary.h"
+#include <vespa/searchlib/common/fileheadertags.h>
#include <vespa/vespalib/data/fileheader.h>
#include <vespa/fastos/file.h>
#include <cassert>
@@ -10,44 +11,48 @@ LOG_SETUP(".diskindex.bitvectordictionary");
namespace search::diskindex {
+using namespace tags;
+
BitVectorDictionary::BitVectorDictionary()
- : _docIdLimit(),
+ : _docIdLimit(0u),
_entries(),
- _vectorSize(),
+ _vectorSize(0u),
_datFile(),
_datHeaderLen(0u)
{ }
-
BitVectorDictionary::~BitVectorDictionary() = default;
-
bool
BitVectorDictionary::open(const vespalib::string &pathPrefix,
const TuneFileRandRead &tuneFileRead,
BitVectorKeyScope scope)
{
- vespalib::string booloccIdxName = pathPrefix + "boolocc" +
- getBitVectorKeyScopeSuffix(scope);
- vespalib::string booloccDatName = pathPrefix + "boolocc.bdat";
{
+ vespalib::string booloccIdxName = pathPrefix + "boolocc" + getBitVectorKeyScopeSuffix(scope);
FastOS_File idxFile;
idxFile.OpenReadOnly(booloccIdxName.c_str());
if (!idxFile.IsOpened()) {
- LOG(warning, "Could not open bitvector idx file '%s'",
- booloccIdxName.c_str());
+ LOG(warning, "Could not open bitvector idx file '%s'", booloccIdxName.c_str());
return false;
}
vespalib::FileHeader idxHeader;
uint32_t idxHeaderLen = idxHeader.readFile(idxFile);
idxFile.SetPosition(idxHeaderLen);
- assert(idxHeader.hasTag("frozen"));
- assert(idxHeader.hasTag("docIdLimit"));
- assert(idxHeader.hasTag("numKeys"));
- assert(idxHeader.getTag("frozen").asInteger() != 0);
- _docIdLimit = idxHeader.getTag("docIdLimit").asInteger();
- uint32_t numEntries = idxHeader.getTag("numKeys").asInteger();
+ assert(idxHeader.hasTag(FROZEN));
+ assert(idxHeader.hasTag(DOCID_LIMIT));
+ assert(idxHeader.hasTag(NUM_KEYS));
+ assert(idxHeader.getTag(FROZEN).asInteger() != 0);
+ _docIdLimit = idxHeader.getTag(DOCID_LIMIT).asInteger();
+ uint32_t numEntries = idxHeader.getTag(NUM_KEYS).asInteger();
+ if (idxHeader.hasTag(ENTRY_SIZE)) {
+ _vectorSize = idxHeader.getTag(ENTRY_SIZE).asInteger();
+ } else {
+ constexpr size_t LEGACY_ALIGNMENT = 0x40;
+ BitVector::Index bytes = BitVector::numBytes(_docIdLimit);
+ _vectorSize = bytes + (-bytes & (LEGACY_ALIGNMENT - 1));
+ }
_entries.resize(numEntries);
size_t bufSize = sizeof(WordSingleKey) * numEntries;
@@ -58,7 +63,7 @@ BitVectorDictionary::open(const vespalib::string &pathPrefix,
}
}
- _vectorSize = BitVector::getFileBytes(_docIdLimit);
+ vespalib::string booloccDatName = pathPrefix + "boolocc.bdat";
_datFile = std::make_unique<FastOS_File>();
_datFile->setFAdviseOptions(tuneFileRead.getAdvise());
@@ -69,14 +74,12 @@ BitVectorDictionary::open(const vespalib::string &pathPrefix,
}
_datFile->OpenReadOnly(booloccDatName.c_str());
if (!_datFile->IsOpened()) {
- LOG(warning, "Could not open bitvector dat file '%s'",
- booloccDatName.c_str());
+ LOG(warning, "Could not open bitvector dat file '%s'", booloccDatName.c_str());
return false;
}
vespalib::FileHeader datHeader(64);
_datHeaderLen = datHeader.readFile(*_datFile);
- assert(_datFile->getSize() >=
- static_cast<int64_t>(_vectorSize * _entries.size() + _datHeaderLen));
+ assert(_datFile->getSize() >= static_cast<int64_t>(_vectorSize * _entries.size() + _datHeaderLen));
return true;
}
@@ -88,12 +91,11 @@ BitVectorDictionary::lookup(uint64_t wordNum)
key._wordNum = wordNum;
auto itr = std::lower_bound(_entries.begin(), _entries.end(), key);
if (itr == _entries.end() || key < *itr) {
- return BitVector::UP();
+ return {};
}
int64_t pos = &*itr - &_entries[0];
- return BitVector::create(_docIdLimit, *_datFile,
- ((int64_t) _vectorSize) * pos + _datHeaderLen,
- itr->_numDocs);
+ int64_t offset = ((int64_t) _vectorSize) * pos + _datHeaderLen;
+ return BitVector::create(_docIdLimit, *_datFile, offset, itr->_numDocs);
}
}
diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp b/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp
index e78b740c837..fc7fd9c2fb7 100644
--- a/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/bitvectorfile.cpp
@@ -3,6 +3,7 @@
#include "bitvectorfile.h"
#include <vespa/searchlib/common/bitvector.h>
#include <vespa/searchlib/common/fileheadercontext.h>
+#include <vespa/searchlib/common/fileheadertags.h>
#include <vespa/searchlib/index/bitvectorkeys.h>
#include <vespa/searchlib/util/file_settings.h>
#include <vespa/vespalib/data/fileheader.h>
@@ -14,12 +15,12 @@ namespace search::diskindex {
using search::index::BitVectorWordSingleKey;
using search::common::FileHeaderContext;
+using namespace tags;
namespace {
void
-readHeader(vespalib::FileHeader &h,
- const vespalib::string &name)
+readHeader(vespalib::FileHeader &h, const vespalib::string &name)
{
Fast_BufferedFile file(32_Ki);
file.ReadOpenExisting(name.c_str());
@@ -35,13 +36,10 @@ BitVectorFileWrite::BitVectorFileWrite(BitVectorKeyScope scope)
{
}
-
BitVectorFileWrite::~BitVectorFileWrite() = default;
-
void
-BitVectorFileWrite::open(const vespalib::string &name,
- uint32_t docIdLimit,
+BitVectorFileWrite::open(const vespalib::string &name, uint32_t docIdLimit,
const TuneFileSeqWrite &tuneFileWrite,
const FileHeaderContext &fileHeaderContext)
{
@@ -65,18 +63,10 @@ BitVectorFileWrite::open(const vespalib::string &name,
makeDatHeader(fileHeaderContext);
}
- int64_t pos;
- size_t bitmapbytes;
-
- bitmapbytes = BitVector::getFileBytes(_docIdLimit);
-
- pos = static_cast<int64_t>(_numKeys) *
- static_cast<int64_t>(bitmapbytes) + _datHeaderLen;
-
- int64_t olddatsize = _datFile->getSize();
- assert(olddatsize >= pos);
- (void) olddatsize;
+ size_t bitmapbytes = BitVector::getFileBytes(_docIdLimit);
+ int64_t pos = static_cast<int64_t>(_numKeys) * static_cast<int64_t>(bitmapbytes) + _datHeaderLen;
+ assert(_datFile->getSize() >= pos);
_datFile->SetSize(pos);
assert(pos == _datFile->getPosition());
@@ -89,11 +79,12 @@ BitVectorFileWrite::makeDatHeader(const FileHeaderContext &fileHeaderContext)
vespalib::FileHeader h(FileSettings::DIRECTIO_ALIGNMENT);
using Tag = vespalib::GenericHeader::Tag;
fileHeaderContext.addTags(h, _datFile->GetFileName());
- h.putTag(Tag("docIdLimit", _docIdLimit));
- h.putTag(Tag("numKeys", _numKeys));
- h.putTag(Tag("frozen", 0));
- h.putTag(Tag("fileBitSize", 0));
- h.putTag(Tag("desc", "Bitvector data file"));
+ h.putTag(Tag(ENTRY_SIZE, (int64_t) BitVector::getFileBytes(_docIdLimit)));
+ h.putTag(Tag(DOCID_LIMIT, _docIdLimit));
+ h.putTag(Tag(NUM_KEYS, _numKeys));
+ h.putTag(Tag(FROZEN, 0));
+ h.putTag(Tag(FILE_BIT_SIZE, 0));
+ h.putTag(Tag(DESC, "Bitvector data file"));
_datFile->SetPosition(0);
_datHeaderLen = h.writeFile(*_datFile);
_datFile->Flush();
@@ -107,9 +98,9 @@ BitVectorFileWrite::updateDatHeader(uint64_t fileBitSize)
using Tag = vespalib::GenericHeader::Tag;
readHeader(h, _datFile->GetFileName());
FileHeaderContext::setFreezeTime(h);
- h.putTag(Tag("numKeys", _numKeys));
- h.putTag(Tag("frozen", 1));
- h.putTag(Tag("fileBitSize", fileBitSize));
+ h.putTag(Tag(NUM_KEYS, _numKeys));
+ h.putTag(Tag(FROZEN, 1));
+ h.putTag(Tag(FILE_BIT_SIZE, fileBitSize));
bool sync_ok = _datFile->Sync();
assert(sync_ok);
assert(h.getSize() == _datHeaderLen);
@@ -121,14 +112,12 @@ BitVectorFileWrite::updateDatHeader(uint64_t fileBitSize)
void
-BitVectorFileWrite::addWordSingle(uint64_t wordNum,
- const BitVector &bitVector)
+BitVectorFileWrite::addWordSingle(uint64_t wordNum, const BitVector &bitVector)
{
assert(bitVector.size() == _docIdLimit);
bitVector.invalidateCachedCount();
Parent::addWordSingle(wordNum, bitVector.countTrueBits());
- _datFile->WriteBuf(bitVector.getStart(),
- bitVector.getFileBytes());
+ _datFile->WriteBuf(bitVector.getStart(), bitVector.getFileBytes());
}
@@ -153,21 +142,17 @@ BitVectorFileWrite::sync()
void
BitVectorFileWrite::close()
{
- size_t bitmapbytes = BitVector::getFileBytes(_docIdLimit);
-
- if (_datFile != nullptr) {
- if (_datFile->IsOpened()) {
- uint64_t pos = _datFile->getPosition();
- assert(pos == static_cast<uint64_t>(_numKeys) *
- static_cast<uint64_t>(bitmapbytes) + _datHeaderLen);
- (void) bitmapbytes;
- _datFile->alignEndForDirectIO();
- updateDatHeader(pos * 8);
- bool close_ok = _datFile->Close();
- assert(close_ok);
- }
- _datFile.reset();
+ if (_datFile && _datFile->IsOpened()) {
+ size_t bitmapbytes = BitVector::getFileBytes(_docIdLimit);
+ uint64_t pos = _datFile->getPosition();
+ assert(pos == static_cast<uint64_t>(_numKeys) * static_cast<uint64_t>(bitmapbytes) + _datHeaderLen);
+ (void) bitmapbytes;
+ _datFile->alignEndForDirectIO();
+ updateDatHeader(pos * 8);
+ bool close_ok = _datFile->Close();
+ assert(close_ok);
}
+ _datFile.reset();
Parent::close();
}
diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp
index ec436205578..7e3f0f5f258 100644
--- a/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp
+++ b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.cpp
@@ -2,6 +2,7 @@
#include "bitvectoridxfile.h"
#include <vespa/searchlib/common/fileheadercontext.h>
+#include <vespa/searchlib/common/fileheadertags.h>
#include <vespa/searchlib/index/bitvectorkeys.h>
#include <vespa/searchlib/util/file_settings.h>
#include <vespa/vespalib/data/fileheader.h>
@@ -13,6 +14,7 @@ namespace search::diskindex {
using search::index::BitVectorWordSingleKey;
using search::common::FileHeaderContext;
+using namespace tags;
namespace {
@@ -45,10 +47,9 @@ BitVectorIdxFileWrite::idxSize() const
}
void
-BitVectorIdxFileWrite::open(const vespalib::string &name,
- uint32_t docIdLimit,
- const TuneFileSeqWrite &tuneFileWrite,
- const FileHeaderContext &fileHeaderContext)
+BitVectorIdxFileWrite::open(const vespalib::string &name, uint32_t docIdLimit,
+ const TuneFileSeqWrite &tuneFileWrite,
+ const FileHeaderContext &fileHeaderContext)
{
if (_numKeys != 0) {
assert(docIdLimit == _docIdLimit);
@@ -90,13 +91,14 @@ BitVectorIdxFileWrite::makeIdxHeader(const FileHeaderContext &fileHeaderContext)
vespalib::FileHeader h(FileSettings::DIRECTIO_ALIGNMENT);
using Tag = vespalib::GenericHeader::Tag;
fileHeaderContext.addTags(h, _idxFile->GetFileName());
- h.putTag(Tag("docIdLimit", _docIdLimit));
- h.putTag(Tag("numKeys", _numKeys));
- h.putTag(Tag("frozen", 0));
+ h.putTag(Tag(ENTRY_SIZE, (int64_t) BitVector::getFileBytes(_docIdLimit)));
+ h.putTag(Tag(DOCID_LIMIT, _docIdLimit));
+ h.putTag(Tag(NUM_KEYS, _numKeys));
+ h.putTag(Tag(FROZEN, 0));
if (_scope != BitVectorKeyScope::SHARED_WORDS) {
- h.putTag(Tag("fileBitSize", 0));
+ h.putTag(Tag(FILE_BIT_SIZE, 0));
}
- h.putTag(Tag("desc", "Bitvector dictionary file, single words"));
+ h.putTag(Tag(DESC, "Bitvector dictionary file, single words"));
_idxFile->SetPosition(0);
_idxHeaderLen = h.writeFile(*_idxFile);
_idxFile->Flush();
@@ -109,10 +111,10 @@ BitVectorIdxFileWrite::updateIdxHeader(uint64_t fileBitSize)
using Tag = vespalib::GenericHeader::Tag;
readHeader(h, _idxFile->GetFileName());
FileHeaderContext::setFreezeTime(h);
- h.putTag(Tag("numKeys", _numKeys));
- h.putTag(Tag("frozen", 1));
+ h.putTag(Tag(NUM_KEYS, _numKeys));
+ h.putTag(Tag(FROZEN, 1));
if (_scope != BitVectorKeyScope::SHARED_WORDS) {
- h.putTag(Tag("fileBitSize", fileBitSize));
+ h.putTag(Tag(FILE_BIT_SIZE, fileBitSize));
}
bool sync_ok = _idxFile->Sync();
assert(sync_ok);
@@ -160,17 +162,15 @@ BitVectorIdxFileWrite::sync()
void
BitVectorIdxFileWrite::close()
{
- if (_idxFile) {
- if (_idxFile->IsOpened()) {
- uint64_t pos = _idxFile->getPosition();
- assert(pos == idxSize());
- _idxFile->alignEndForDirectIO();
- updateIdxHeader(pos * 8);
- bool close_ok = _idxFile->Close();
- assert(close_ok);
- }
- _idxFile.reset();
+ if (_idxFile && _idxFile->IsOpened()) {
+ uint64_t pos = _idxFile->getPosition();
+ assert(pos == idxSize());
+ _idxFile->alignEndForDirectIO();
+ updateIdxHeader(pos * 8);
+ bool close_ok = _idxFile->Close();
+ assert(close_ok);
}
+ _idxFile.reset();
}
}
diff --git a/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.h b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.h
index 533f5620ea2..9f1e5ce0f80 100644
--- a/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.h
+++ b/searchlib/src/vespa/searchlib/diskindex/bitvectoridxfile.h
@@ -2,10 +2,10 @@
#pragma once
+#include "bitvectorkeyscope.h"
#include <vespa/searchlib/common/bitvector.h>
#include <vespa/searchlib/common/tunefileinfo.h>
#include <vespa/vespalib/stllike/string.h>
-#include "bitvectorkeyscope.h"
class Fast_BufferedFile;
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
index cd9dbff99cb..4637ad5a4e8 100644
--- a/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.cpp
@@ -455,27 +455,15 @@ FuzzyAlgorithm::lookup(const Properties& props, vespalib::FuzzyMatchingAlgorithm
return vespalib::fuzzy_matching_algorithm_from_string(value, default_value);
}
-const vespalib::string AlwaysMarkPhraseExpensive::NAME("vespa.matching.always_mark_phrase_expensive");
-const bool AlwaysMarkPhraseExpensive::DEFAULT_VALUE(false);
-bool AlwaysMarkPhraseExpensive::check(const Properties &props, bool fallback) {
- return lookupBool(props, NAME, fallback);
-}
-
-const vespalib::string CreatePostingListWhenNonStrict::NAME("vespa.matching.create_postinglist_when_non_strict");
-const bool CreatePostingListWhenNonStrict::DEFAULT_VALUE(true);
-bool CreatePostingListWhenNonStrict::check(const Properties &props, bool fallback) {
+const vespalib::string SortBlueprintsByCost::NAME("vespa.matching.sort_blueprints_by_cost");
+const bool SortBlueprintsByCost::DEFAULT_VALUE(false);
+bool SortBlueprintsByCost::check(const Properties &props, bool fallback) {
return lookupBool(props, NAME, fallback);
}
-const vespalib::string UseEstimateForFetchPostings::NAME("vespa.matching.use_estimate_for_fetch_postings");
-const bool UseEstimateForFetchPostings::DEFAULT_VALUE(false);
-bool UseEstimateForFetchPostings::check(const Properties &props, bool fallback) {
- return lookupBool(props, NAME, fallback);
-}
-
-const vespalib::string UseThreadBundleForFetchPostings::NAME("vespa.matching.use_thread_bundle_for_fetch_postings");
-const bool UseThreadBundleForFetchPostings::DEFAULT_VALUE(false);
-bool UseThreadBundleForFetchPostings::check(const Properties &props, bool fallback) {
+const vespalib::string AlwaysMarkPhraseExpensive::NAME("vespa.matching.always_mark_phrase_expensive");
+const bool AlwaysMarkPhraseExpensive::DEFAULT_VALUE(false);
+bool AlwaysMarkPhraseExpensive::check(const Properties &props, bool fallback) {
return lookupBool(props, NAME, fallback);
}
diff --git a/searchlib/src/vespa/searchlib/fef/indexproperties.h b/searchlib/src/vespa/searchlib/fef/indexproperties.h
index 0183fdf1a13..db8de8209a9 100644
--- a/searchlib/src/vespa/searchlib/fef/indexproperties.h
+++ b/searchlib/src/vespa/searchlib/fef/indexproperties.h
@@ -336,12 +336,10 @@ namespace matching {
static vespalib::FuzzyMatchingAlgorithm lookup(const Properties& props);
static vespalib::FuzzyMatchingAlgorithm lookup(const Properties& props, vespalib::FuzzyMatchingAlgorithm default_value);
};
-
/**
- * When enabled, the unpacking part of the phrase iterator will be tagged as expensive
- * under all intermediate iterators, not only AND.
+ * Sort blueprints based on relative cost estimate rather than est_hits
**/
- struct AlwaysMarkPhraseExpensive {
+ struct SortBlueprintsByCost {
static const vespalib::string NAME;
static const bool DEFAULT_VALUE;
static bool check(const Properties &props) { return check(props, DEFAULT_VALUE); }
@@ -349,26 +347,10 @@ namespace matching {
};
/**
- * When enabled posting lists can be created on the fly even if iterator is not strict.
- **/
- struct CreatePostingListWhenNonStrict {
- static const vespalib::string NAME;
- static const bool DEFAULT_VALUE;
- static bool check(const Properties &props) { return check(props, DEFAULT_VALUE); }
- static bool check(const Properties &props, bool fallback);
- };
-
- /**
- * When enabled posting lists can be created on the fly even if iterator is not strict.
+ * When enabled, the unpacking part of the phrase iterator will be tagged as expensive
+ * under all intermediate iterators, not only AND.
**/
- struct UseEstimateForFetchPostings {
- static const vespalib::string NAME;
- static const bool DEFAULT_VALUE;
- static bool check(const Properties &props) { return check(props, DEFAULT_VALUE); }
- static bool check(const Properties &props, bool fallback);
- };
-
- struct UseThreadBundleForFetchPostings {
+ struct AlwaysMarkPhraseExpensive {
static const vespalib::string NAME;
static const bool DEFAULT_VALUE;
static bool check(const Properties &props) { return check(props, DEFAULT_VALUE); }
diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
index 5c28f1814d5..aadc5300ede 100644
--- a/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
+++ b/searchlib/src/vespa/searchlib/fef/ranksetup.cpp
@@ -56,14 +56,12 @@ RankSetup::RankSetup(const BlueprintFactory &factory, const IIndexEnvironment &i
_dumpFeatures(),
_warnings(),
_feature_rename_map(),
+ _sort_blueprints_by_cost(false),
_ignoreDefaultRankFeatures(false),
_compiled(false),
_compileError(false),
_degradationAscendingOrder(false),
_always_mark_phrase_expensive(false),
- _create_postinglist_when_non_strict(true),
- _use_estimate_for_fetch_postings(false),
- _use_thread_bundle_for_fetch_postings(false),
_diversityAttribute(),
_diversityMinGroups(1),
_diversityCutoffFactor(10.0),
@@ -137,10 +135,8 @@ RankSetup::configure()
_mutateOnSummary._attribute = mutate::on_summary::Attribute::lookup(_indexEnv.getProperties());
_mutateOnSummary._operation = mutate::on_summary::Operation::lookup(_indexEnv.getProperties());
_mutateAllowQueryOverride = mutate::AllowQueryOverride::check(_indexEnv.getProperties());
+ _sort_blueprints_by_cost = matching::SortBlueprintsByCost::check(_indexEnv.getProperties());
_always_mark_phrase_expensive = matching::AlwaysMarkPhraseExpensive::check(_indexEnv.getProperties());
- _create_postinglist_when_non_strict = matching::CreatePostingListWhenNonStrict::check(_indexEnv.getProperties());
- _use_estimate_for_fetch_postings = matching::UseEstimateForFetchPostings::check(_indexEnv.getProperties());
- _use_thread_bundle_for_fetch_postings = matching::UseThreadBundleForFetchPostings::check(_indexEnv.getProperties());
}
void
diff --git a/searchlib/src/vespa/searchlib/fef/ranksetup.h b/searchlib/src/vespa/searchlib/fef/ranksetup.h
index 04659955490..d8b977a0331 100644
--- a/searchlib/src/vespa/searchlib/fef/ranksetup.h
+++ b/searchlib/src/vespa/searchlib/fef/ranksetup.h
@@ -65,14 +65,12 @@ private:
std::vector<vespalib::string> _dumpFeatures;
Warnings _warnings;
StringStringMap _feature_rename_map;
+ bool _sort_blueprints_by_cost;
bool _ignoreDefaultRankFeatures;
bool _compiled;
bool _compileError;
bool _degradationAscendingOrder;
bool _always_mark_phrase_expensive;
- bool _create_postinglist_when_non_strict;
- bool _use_estimate_for_fetch_postings;
- bool _use_thread_bundle_for_fetch_postings;
vespalib::string _diversityAttribute;
uint32_t _diversityMinGroups;
double _diversityCutoffFactor;
@@ -225,9 +223,6 @@ public:
return _degradationAscendingOrder;
}
bool always_mark_phrase_expensive() const noexcept { return _always_mark_phrase_expensive; }
- bool create_postinglist_when_non_strict() const noexcept { return _create_postinglist_when_non_strict; }
- bool use_estimate_for_fetch_postings() const noexcept { return _use_estimate_for_fetch_postings; }
- bool use_thread_bundle_for_fetch_postings() const noexcept { return _use_thread_bundle_for_fetch_postings; }
/** get number of hits to collect during graceful degradation in match phase */
uint32_t getDegradationMaxHits() const {
return _degradationMaxHits;
@@ -465,6 +460,7 @@ public:
const MutateOperation & getMutateOnSummary() const { return _mutateOnSummary; }
bool allowMutateQueryOverride() const { return _mutateAllowQueryOverride; }
+ bool sort_blueprints_by_cost() const noexcept { return _sort_blueprints_by_cost; }
};
}
diff --git a/searchlib/src/vespa/searchlib/fef/test/ftlib.cpp b/searchlib/src/vespa/searchlib/fef/test/ftlib.cpp
index 774e17d015a..b3b42a179d8 100644
--- a/searchlib/src/vespa/searchlib/fef/test/ftlib.cpp
+++ b/searchlib/src/vespa/searchlib/fef/test/ftlib.cpp
@@ -1,14 +1,9 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "ftlib.h"
-#include "dummy_dependency_handler.h"
#include <vespa/searchlib/features/utils.h>
-#include <vespa/vespalib/util/stringfmt.h>
#include <vespa/vespalib/text/stringtokenizer.h>
-#include <vespa/log/log.h>
-LOG_SETUP(".ftlib");
-
using namespace search::features;
using namespace search::fef;
using namespace search::fef::test;
@@ -110,280 +105,3 @@ FtUtil::toRankResult(const vespalib::string & baseName, const vespalib::string &
}
FtIndex::~FtIndex() = default;
-
-//---------------------------------------------------------------------------------------------------------------------
-// FtTestApp
-//---------------------------------------------------------------------------------------------------------------------
-void
-FtTestApp::FT_SETUP_FAIL(const search::fef::Blueprint &prototype, const StringList &params)
-{
- search::fef::test::IndexEnvironment ie;
- FT_SETUP_FAIL(prototype, ie, params);
-}
-
-void
-FtTestApp::FT_SETUP_FAIL(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env,
- const StringList &params)
-{
- FT_LOG(prototype, env, params);
- search::fef::Blueprint::UP bp = prototype.createInstance();
- DummyDependencyHandler deps(*bp);
- EXPECT_TRUE(!bp->setup(env, params));
-}
-
-void
-FtTestApp::FT_SETUP_OK(const search::fef::Blueprint &prototype, const StringList &params,
- const StringList &expectedIn, const StringList &expectedOut)
-{
- search::fef::test::IndexEnvironment ie;
- FT_SETUP_OK(prototype, ie, params, expectedIn, expectedOut);
-}
-
-void
-FtTestApp::FT_SETUP_OK(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env,
- const StringList &params, const StringList &expectedIn, const StringList &expectedOut)
-{
- FT_LOG(prototype, env, params);
- search::fef::Blueprint::UP bp = prototype.createInstance();
- DummyDependencyHandler deps(*bp);
- ASSERT_TRUE(bp->setup(env, params));
- FT_EQUAL(expectedIn, deps.input, "In, ");
- FT_EQUAL(expectedOut, deps.output, "Out,");
-}
-
-void
-FtTestApp::FT_DUMP_EMPTY(search::fef::BlueprintFactory &factory, const vespalib::string &baseName)
-{
- StringList empty;
- FT_DUMP(factory, baseName, empty);
-}
-
-void
-FtTestApp::FT_DUMP_EMPTY(search::fef::BlueprintFactory &factory, const vespalib::string &baseName,
- search::fef::test::IndexEnvironment &env)
-{
- StringList empty;
- FT_DUMP(factory, baseName, env, empty);
-}
-
-void
-FtTestApp::FT_DUMP(search::fef::BlueprintFactory &factory, const vespalib::string &baseName,
- const StringList &expected)
-{
- search::fef::test::IndexEnvironment ie;
- FT_DUMP(factory, baseName, ie, expected);
-}
-
-void
-FtTestApp::FT_DUMP(search::fef::BlueprintFactory &factory, const vespalib::string &baseName,
- search::fef::test::IndexEnvironment &env,
- const StringList &expected)
-{
- FtDumpFeatureVisitor dfv;
- search::fef::Blueprint::SP bp = factory.createBlueprint(baseName);
- if ( ! bp) {
- LOG(error, "Blueprint '%s' does not exist in factory, did you forget to add it?", baseName.c_str());
- ASSERT_TRUE(bp);
- }
- bp->visitDumpFeatures(env, dfv);
- FT_EQUAL(expected, dfv.features(), "Dump");
-}
-
-void
-FtTestApp::FT_EQUAL(const std::vector<string> &expected, const std::vector<string> &actual,
- const vespalib::string &prefix)
-{
- FT_LOG(prefix + " expected", expected);
- FT_LOG(prefix + " actual ", actual);
- EXPECT_EQUAL(expected.size(), actual.size());
- ASSERT_TRUE(expected.size() == actual.size());
- for (uint32_t i = 0; i < expected.size(); ++i) {
- EXPECT_EQUAL(expected[i], actual[i]);
- ASSERT_TRUE(expected[i] == actual[i]);
- }
-}
-
-void
-FtTestApp::FT_LOG(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env,
- const StringList &params)
-{
- LOG(info, "Testing blueprint '%s'.", prototype.getBaseName().c_str());
- std::vector<vespalib::string> arr;
- for (const auto & it : env.getFields()) {
- arr.push_back(it.name());
- }
- FT_LOG("Environment ", arr);
- FT_LOG("Parameters ", params);
-}
-
-void
-FtTestApp::FT_LOG(const vespalib::string &prefix, const std::vector<vespalib::string> &arr)
-{
- vespalib::string str = prefix + " = [ ";
- for (uint32_t i = 0; i < arr.size(); ++i) {
- str.append("'").append(arr[i]).append("'");
- if (i < arr.size() - 1) {
- str.append(", ");
- }
- }
- str.append(" ]");
- LOG(info, "%s", str.c_str());
-}
-
-void
-FtTestApp::FT_SETUP(FtFeatureTest &test, const vespalib::string &query, const StringMap &index,
- uint32_t docId)
-{
- LOG(info, "Setup test for query '%s'.", query.c_str());
-
- // Add all query terms.
- FtQueryEnvironment &queryEnv = test.getQueryEnv();
- for (uint32_t i = 0; i < query.size(); ++i) {
- queryEnv.getBuilder().addAllFields();
- }
- ASSERT_TRUE(test.setup());
-
- // Add all occurences.
- search::fef::test::MatchDataBuilder::UP mdb = test.createMatchDataBuilder();
- for (auto it = index.begin();it != index.end(); ++it) {
- ASSERT_TRUE(mdb->setFieldLength(it->first, it->second.size()));
- for (uint32_t i = 0; i < it->second.size(); ++i) {
- size_t pos = query.find_first_of(it->second[i]);
- if (pos != vespalib::string::npos) {
- LOG(debug, "Occurence of '%c' added to field '%s' at position %d.", query[pos], it->first.c_str(), i);
- ASSERT_TRUE(mdb->addOccurence(it->first, pos, i));
- }
- }
- }
- ASSERT_TRUE(mdb->apply(docId));
-}
-
-void
-FtTestApp::FT_SETUP(FtFeatureTest & test, const std::vector<FtQueryTerm> & query, const StringVectorMap & index,
- uint32_t docId)
-{
- setupQueryEnv(test.getQueryEnv(), query);
- ASSERT_TRUE(test.setup());
-
- search::fef::test::MatchDataBuilder::UP mdb = test.createMatchDataBuilder();
-
- // Add all occurences.
- for (auto itr = index.begin(); itr != index.end(); ++itr) {
- ASSERT_TRUE(mdb->setFieldLength(itr->first, itr->second.size()));
- for (uint32_t i = 0; i < itr->second.size(); ++i) {
- auto fitr = query.begin();
- for (;;) {
- fitr = std::find(fitr, query.end(), FtQueryTerm(itr->second[i]));
- if (fitr != query.end()) {
- uint32_t termId = fitr - query.begin();
- LOG(debug, "Occurence of '%s' added to field '%s' at position %u.", fitr->term.c_str(), itr->first.c_str(), i);
- ASSERT_TRUE(mdb->addOccurence(itr->first, termId, i));
- ++fitr;
- } else {
- break;
- }
- }
- }
- }
- ASSERT_TRUE(mdb->apply(docId));
-}
-
-void
-FtTestApp::FT_SETUP(FtFeatureTest &test, const FtQuery &query, const FtIndex &index, uint32_t docId)
-{
- setupQueryEnv(test.getQueryEnv(), query);
- ASSERT_TRUE(test.setup());
- search::fef::test::MatchDataBuilder::UP mdb = test.createMatchDataBuilder();
-
- // Add all occurences.
- for (auto itr = index.index.begin(); itr != index.index.end(); ++itr) {
- const FtIndex::Field &field = itr->second;
- for (size_t e = 0; e < field.size(); ++e) {
- const FtIndex::Element &element = field[e];
- ASSERT_TRUE(mdb->addElement(itr->first, element.weight, element.tokens.size()));
- for (size_t t = 0; t < element.tokens.size(); ++t) {
- const vespalib::string &token = element.tokens[t];
- for (size_t q = 0; q < query.size(); ++q) {
- if (query[q].term == token) {
- ASSERT_TRUE(mdb->addOccurence(itr->first, q, t, e));
- }
- }
- }
- }
- }
- ASSERT_TRUE(mdb->apply(docId));
-}
-
-void
-FtTestApp::setupQueryEnv(FtQueryEnvironment & queryEnv, const FtQuery & query)
-{
- // Add all query terms.
- for (uint32_t i = 0; i < query.size(); ++i) {
- queryEnv.getBuilder().addAllFields();
- queryEnv.getTerms()[i].setPhraseLength(1);
- queryEnv.getTerms()[i].setUniqueId(i);
- queryEnv.getTerms()[i].setWeight(query[i].termWeight);
- if (i > 0) {
- vespalib::string from = vespalib::make_string("vespa.term.%u.connexity", i);
- vespalib::string to = vespalib::make_string("%u", i - 1);
- vespalib::string connexity = vespalib::make_string("%f", query[i].connexity);
- queryEnv.getProperties().add(from, to);
- queryEnv.getProperties().add(from, connexity);
- }
- vespalib::string term = vespalib::make_string("vespa.term.%u.significance", i);
- vespalib::string significance = vespalib::make_string("%f", query[i].significance);
- queryEnv.getProperties().add(term, significance);
- LOG(debug, "Add term node: '%s'", query[i].term.c_str());
- }
-}
-
-void
-FtTestApp::setupFieldMatch(FtFeatureTest & ft, const vespalib::string & indexName,
- const vespalib::string & query, const vespalib::string & field,
- const fieldmatch::Params * params, uint32_t totalTermWeight, feature_t totalSignificance,
- uint32_t docId)
-{
- ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, FieldInfo::CollectionType::SINGLE, indexName);
-
- if (params != nullptr) {
- Properties & p = ft.getIndexEnv().getProperties();
- p.add("fieldMatch(" + indexName + ").proximityLimit", vespalib::make_string("%u", params->getProximityLimit()));
- p.add("fieldMatch(" + indexName + ").maxAlternativeSegmentations", vespalib::make_string("%u", params->getMaxAlternativeSegmentations()));
- p.add("fieldMatch(" + indexName + ").maxOccurrences", vespalib::make_string("%u", params->getMaxOccurrences()));
- p.add("fieldMatch(" + indexName + ").proximityCompletenessImportance", vespalib::make_string("%f", params->getProximityCompletenessImportance()));
- p.add("fieldMatch(" + indexName + ").relatednessImportance", vespalib::make_string("%f", params->getRelatednessImportance()));
- p.add("fieldMatch(" + indexName + ").earlinessImportance", vespalib::make_string("%f", params->getEarlinessImportance()));
- p.add("fieldMatch(" + indexName + ").segmentProximityImportance", vespalib::make_string("%f", params->getSegmentProximityImportance()));
- p.add("fieldMatch(" + indexName + ").occurrenceImportance", vespalib::make_string("%f", params->getOccurrenceImportance()));
- p.add("fieldMatch(" + indexName + ").fieldCompletenessImportance", vespalib::make_string("%f", params->getFieldCompletenessImportance()));
- for (double it : params->getProximityTable()) {
- p.add("fieldMatch(" + indexName + ").proximityTable", vespalib::make_string("%f", it));
- }
- }
-
- if (totalTermWeight > 0) {
- ft.getQueryEnv().getProperties().add("fieldMatch(" + indexName + ").totalTermWeight",
- vespalib::make_string("%u", totalTermWeight));
- }
-
- if (totalSignificance > 0.0f) {
- ft.getQueryEnv().getProperties().add("fieldMatch(" + indexName + ").totalTermSignificance",
- vespalib::make_string("%f", totalSignificance));
- }
-
- std::map<vespalib::string, std::vector<vespalib::string> > index;
- index[indexName] = FtUtil::tokenize(field);
- FT_SETUP(ft, FtUtil::toQuery(query), index, docId);
-}
-
-
-RankResult
-FtTestApp::toRankResult(const vespalib::string & baseName,
- const vespalib::string & result,
- const vespalib::string & separator)
-{
- return FtUtil::toRankResult(baseName, result, separator);
-}
-
-
-
diff --git a/searchlib/src/vespa/searchlib/fef/test/ftlib.h b/searchlib/src/vespa/searchlib/fef/test/ftlib.h
index be52b407369..2281151f9cf 100644
--- a/searchlib/src/vespa/searchlib/fef/test/ftlib.h
+++ b/searchlib/src/vespa/searchlib/fef/test/ftlib.h
@@ -8,10 +8,8 @@
#include "queryenvironment.h"
#include "queryenvironmentbuilder.h"
#include "rankresult.h"
-#include <vespa/vespalib/testkit/testapp.h>
#include <vespa/searchlib/common/feature.h>
#include <vespa/searchlib/query/weight.h>
-#include <vespa/searchlib/features/fieldmatch/params.h>
#include <vespa/searchlib/fef/fef.h>
using search::feature_t;
@@ -183,58 +181,3 @@ struct FtIndex {
return *this;
}
};
-
-//---------------------------------------------------------------------------------------------------------------------
-// FtTestApp
-//---------------------------------------------------------------------------------------------------------------------
-struct FtTestApp : public vespalib::TestApp {
- using string = vespalib::string;
- static void FT_SETUP_FAIL(const search::fef::Blueprint &prototype, const StringList &params);
- static void FT_SETUP_FAIL(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env,
- const StringList &params);
- static void FT_SETUP_OK(const search::fef::Blueprint &prototype, const StringList &params,
- const StringList &expectedIn, const StringList &expectedOut);
- static void FT_SETUP_OK(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env,
- const StringList &params, const StringList &expectedIn, const StringList &expectedOut);
-
- static void FT_DUMP_EMPTY(search::fef::BlueprintFactory &factory, const vespalib::string &baseName);
- static void FT_DUMP_EMPTY(search::fef::BlueprintFactory &factory, const vespalib::string &baseName,
- search::fef::test::IndexEnvironment &env);
- static void FT_DUMP(search::fef::BlueprintFactory &factory, const vespalib::string &baseName,
- const StringList &expected);
- static void FT_DUMP(search::fef::BlueprintFactory &factory, const vespalib::string &baseName,
- search::fef::test::IndexEnvironment &env,
- const StringList &expected);
-
- static void FT_EQUAL(const std::vector<string> &expected, const std::vector<string> &actual,
- const vespalib::string & prefix = "");
-
- static void FT_LOG(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env, const StringList &params);
- static void FT_LOG(const vespalib::string &prefix, const std::vector<vespalib::string> &arr);
-
-
- static void FT_SETUP(FtFeatureTest & test, const vespalib::string & query, const StringMap & index, uint32_t docId);
- static void FT_SETUP(FtFeatureTest & test, const FtQuery & query, const StringVectorMap & index, uint32_t docId);
-
- static void FT_SETUP(FtFeatureTest &test, const FtQuery &query, const FtIndex &index, uint32_t docId);
-
- static void setupQueryEnv(FtQueryEnvironment & queryEnv, const FtQuery & query);
- static void setupFieldMatch(FtFeatureTest & test, const vespalib::string & indexName,
- const vespalib::string & query, const vespalib::string & field,
- const search::features::fieldmatch::Params * params,
- uint32_t totalTermWeight, feature_t totalSignificance,
- uint32_t docId);
-
- static search::fef::test::RankResult toRankResult(const vespalib::string & baseName,
- const vespalib::string & result,
- const vespalib::string & separator = " ");
-
- template <typename T>
- static bool assertCreateInstance(const T & prototype, const vespalib::string & baseName) {
- search::fef::Blueprint::UP bp = prototype.createInstance();
- if (!EXPECT_TRUE(dynamic_cast<T*>(bp.get()) != NULL)) return false;
- if (!EXPECT_EQUAL(bp->getBaseName(), baseName)) return false;
- return true;
- }
-};
-
diff --git a/searchlib/src/vespa/searchlib/query/query_term_simple.cpp b/searchlib/src/vespa/searchlib/query/query_term_simple.cpp
index b7a1719fb5f..ab3bd512d1d 100644
--- a/searchlib/src/vespa/searchlib/query/query_term_simple.cpp
+++ b/searchlib/src/vespa/searchlib/query/query_term_simple.cpp
@@ -2,20 +2,83 @@
#include "query_term_simple.h"
#include "base.h"
+#include <vespa/vespalib/locale/c.h>
#include <vespa/vespalib/objects/visit.h>
#include <vespa/vespalib/util/classname.h>
-#include <vespa/vespalib/locale/c.h>
#include <cmath>
#include <limits>
+#include <charconv>
namespace {
template <typename N>
-bool isValidInteger(int64_t value)
+constexpr bool isValidInteger(int64_t value) noexcept
{
- return value >= std::numeric_limits<N>::min() && value <= std::numeric_limits<N>::max();
+ return (value >= std::numeric_limits<N>::min()) &&
+ (value <= std::numeric_limits<N>::max());
+}
+
+constexpr bool isRepresentableByInt64(double d) noexcept {
+ return (d > double(std::numeric_limits<int64_t>::min())) &&
+ (d < double(std::numeric_limits<int64_t>::max()));
}
+bool isFullRange(vespalib::stringref s) noexcept {
+ const size_t sz(s.size());
+ return (sz >= 3u) &&
+ (s[0] == '<' || s[0] == '[') &&
+ (s[sz-1] == '>' || s[sz-1] == ']');
+}
+
+struct IntDecoder {
+ static int64_t fromstr(const char * q, const char * qend, const char ** end) noexcept {
+ int64_t v(0);
+ for (;q < qend && (isspace(*q) || (*q == '+')); q++);
+ std::from_chars_result err = std::from_chars(q, qend, v, 10);
+ if (err.ec == std::errc::result_out_of_range) {
+ v = (*q == '-') ? std::numeric_limits<int64_t>::min() : std::numeric_limits<int64_t>::max();
+ }
+ *end = err.ptr;
+ return v;
+ }
+ static int64_t nearestDownwd(int64_t n, int64_t min) noexcept { return (n > min ? n - 1 : n); }
+ static int64_t nearestUpward(int64_t n, int64_t max) noexcept { return (n < max ? n + 1 : n); }
+};
+
+template <typename T>
+struct FloatDecoder {
+ static T fromstr(const char * q, const char * qend, const char ** end) noexcept {
+ T v(0);
+#if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION < 180000
+ vespalib::string tmp(q, qend - q);
+ char* tmp_end = nullptr;
+ const char *tmp_cstring = tmp.c_str();
+ if constexpr (std::is_same_v<T, float>) {
+ v = vespalib::locale::c::strtof_au(tmp_cstring, &tmp_end);
+ } else {
+ v = vespalib::locale::c::strtod_au(tmp_cstring, &tmp_end);
+ }
+ if (end != nullptr) {
+ *end = (tmp_end != nullptr) ? (q + (tmp_end - tmp_cstring)) : nullptr;
+ }
+#else
+ for (;q < qend && (isspace(*q) || (*q == '+')); q++);
+ std::from_chars_result err = std::from_chars(q, qend, v);
+ if (err.ec == std::errc::result_out_of_range) {
+ v = (*q == '-') ? -std::numeric_limits<T>::infinity() : std::numeric_limits<T>::infinity();
+ }
+ *end = err.ptr;
+#endif
+ return v;
+ }
+ static T nearestDownwd(T n, T min) noexcept {
+ return std::nextafter(n, min);
+ }
+ static T nearestUpward(T n, T max) noexcept {
+ return std::nextafter(n, max);
+ }
+};
+
}
namespace search {
@@ -29,15 +92,15 @@ QueryTermSimple::visitMembers(vespalib::ObjectVisitor & visitor) const
template <typename N>
QueryTermSimple::RangeResult<N>
-QueryTermSimple::getFloatRange() const
+QueryTermSimple::getFloatRange() const noexcept
{
- double lowRaw, highRaw;
- bool valid = getAsDoubleTerm(lowRaw, highRaw);
+ N lowRaw, highRaw;
+ bool valid = getAsFloatTerm(lowRaw, highRaw);
RangeResult<N> res;
res.valid = valid;
if (!valid) {
- res.low = std::numeric_limits<N>::max();
- res.high = - std::numeric_limits<N>::max();
+ res.low = std::numeric_limits<N>::infinity();
+ res.high = -std::numeric_limits<N>::infinity();
res.adjusted = true;
} else {
res.low = lowRaw;
@@ -46,25 +109,16 @@ QueryTermSimple::getFloatRange() const
return res;
}
-namespace {
-
-bool isRepresentableByInt64(double d) {
- return (d > double(std::numeric_limits<int64_t>::min()))
- && (d < double(std::numeric_limits<int64_t>::max()));
-}
-
-}
-
bool
-QueryTermSimple::getRangeInternal(int64_t & low, int64_t & high) const
+QueryTermSimple::getRangeInternal(int64_t & low, int64_t & high) const noexcept
{
bool valid = getAsIntegerTerm(low, high);
if ( ! valid ) {
double l(0), h(0);
- valid = getAsDoubleTerm(l, h);
+ valid = getAsFloatTerm(l, h);
if (valid) {
if ((l == h) && isRepresentableByInt64(l)) {
- low = high = std::round(l);
+ low = high = static_cast<int64_t>(std::round(l));
} else {
if (l > double(std::numeric_limits<int64_t>::min())) {
if (l < double(std::numeric_limits<int64_t>::max())) {
@@ -88,7 +142,7 @@ QueryTermSimple::getRangeInternal(int64_t & low, int64_t & high) const
template <typename N>
QueryTermSimple::RangeResult<N>
-QueryTermSimple::getIntegerRange() const
+QueryTermSimple::getIntegerRange() const noexcept
{
int64_t lowRaw, highRaw;
bool valid = getRangeInternal(lowRaw, highRaw);
@@ -121,83 +175,72 @@ QueryTermSimple::getIntegerRange() const
template <>
QueryTermSimple::RangeResult<float>
-QueryTermSimple::getRange() const
+QueryTermSimple::getRange() const noexcept
{
return getFloatRange<float>();
}
template <>
QueryTermSimple::RangeResult<double>
-QueryTermSimple::getRange() const
+QueryTermSimple::getRange() const noexcept
{
return getFloatRange<double>();
}
template <>
QueryTermSimple::RangeResult<int8_t>
-QueryTermSimple::getRange() const
+QueryTermSimple::getRange() const noexcept
{
return getIntegerRange<int8_t>();
}
template <>
QueryTermSimple::RangeResult<int16_t>
-QueryTermSimple::getRange() const
+QueryTermSimple::getRange() const noexcept
{
return getIntegerRange<int16_t>();
}
template <>
QueryTermSimple::RangeResult<int32_t>
-QueryTermSimple::getRange() const
+QueryTermSimple::getRange() const noexcept
{
return getIntegerRange<int32_t>();
}
template <>
QueryTermSimple::RangeResult<int64_t>
-QueryTermSimple::getRange() const
+QueryTermSimple::getRange() const noexcept
{
return getIntegerRange<int64_t>();
}
-template <int B>
-struct IntDecoder {
- static int64_t fromstr(const char * v, char ** end) { return strtoll(v, end, B); }
- static int64_t nearestDownwd(int64_t n, int64_t min) { return (n > min ? n - 1 : n); }
- static int64_t nearestUpward(int64_t n, int64_t max) { return (n < max ? n + 1 : n); }
-};
-
-struct DoubleDecoder {
- static double fromstr(const char * v, char ** end) { return vespalib::locale::c::strtod(v, end); }
- static double nearestDownwd(double n, double min) { return std::nextafterf(n, min); }
- static double nearestUpward(double n, double max) { return std::nextafterf(n, max); }
-};
-
-bool QueryTermSimple::getAsIntegerTerm(int64_t & lower, int64_t & upper) const
+bool QueryTermSimple::getAsIntegerTerm(int64_t & lower, int64_t & upper) const noexcept
{
lower = std::numeric_limits<int64_t>::min();
upper = std::numeric_limits<int64_t>::max();
- return getAsNumericTerm(lower, upper, IntDecoder<10>());
+ return getAsNumericTerm(lower, upper, IntDecoder());
+}
+
+bool QueryTermSimple::getAsFloatTerm(double & lower, double & upper) const noexcept
+{
+ lower = -std::numeric_limits<double>::infinity();
+ upper = std::numeric_limits<double>::infinity();
+ return getAsNumericTerm(lower, upper, FloatDecoder<double>());
}
-bool QueryTermSimple::getAsDoubleTerm(double & lower, double & upper) const
+bool QueryTermSimple::getAsFloatTerm(float & lower, float & upper) const noexcept
{
- lower = - std::numeric_limits<double>::max();
- upper = std::numeric_limits<double>::max();
- return getAsNumericTerm(lower, upper, DoubleDecoder());
+ lower = -std::numeric_limits<float>::infinity();
+ upper = std::numeric_limits<float>::infinity();
+ return getAsNumericTerm(lower, upper, FloatDecoder<float>());
}
QueryTermSimple::~QueryTermSimple() = default;
namespace {
-bool isFullRange(vespalib::stringref s) {
- const size_t sz(s.size());
- return (sz >= 3u) &&
- (s[0] == '<' || s[0] == '[') &&
- (s[sz-1] == '>' || s[sz-1] == ']');
-}
+
}
@@ -232,7 +275,7 @@ QueryTermSimple::QueryTermSimple(const string & term_, Type type)
}
_valid = (numParts >= 2) && (numParts < NELEMS(parts));
if (_valid && numParts > 2) {
- _rangeLimit = strtol(parts[2].data(), nullptr, 0);
+ _rangeLimit = static_cast<int32_t>(strtol(parts[2].data(), nullptr, 0));
if (numParts > 3) {
_valid = (numParts >= 5);
if (_valid) {
@@ -257,48 +300,56 @@ QueryTermSimple::QueryTermSimple(const string & term_, Type type)
template <typename T, typename D>
bool
-QueryTermSimple::getAsNumericTerm(T & lower, T & upper, D d) const
+QueryTermSimple::getAsNumericTerm(T & lower, T & upper, D d) const noexcept
{
- bool valid(empty());
+ if (empty()) return false;
+
size_t sz(_term.size());
- if (sz) {
- char *err(nullptr);
- T low(lower);
- T high(upper);
- const char * q = _term.c_str();
- const char first(q[0]);
- const char last(q[sz-1]);
- q += ((first == '<') || (first == '>') || (first == '[')) ? 1 : 0;
- T ll = d.fromstr(q, &err);
- valid = isValid() && ((*err == 0) || (*err == ';'));
- if (valid) {
- if (first == '<' && (*err == 0)) {
- high = d.nearestDownwd(ll, lower);
- } else if (first == '>' && (*err == 0)) {
- low = d.nearestUpward(ll, upper);
- } else if ((first == '[') || (first == '<')) {
- if (q != err) {
- low = (first == '[') ? ll : d.nearestUpward(ll, upper);
- }
- q = err + 1;
- T hh = d.fromstr(q, &err);
- bool hasUpperLimit(q != err);
- if (*err == ';') {
- err = const_cast<char *>(_term.end() - 1);
- }
- valid = (*err == last) && ((last == ']') || (last == '>'));
- if (hasUpperLimit) {
- high = (last == ']') ? hh : d.nearestDownwd(hh, lower);
- }
- } else {
- low = high = ll;
- }
+ const char *err(nullptr);
+ T low(lower);
+ T high(upper);
+ const char * q = _term.c_str();
+ const char * qend = q + sz;
+ const char first(q[0]);
+ const char last(q[sz-1]);
+ bool isRange = (first == '<') || (first == '>') || (first == '[');
+ q += isRange ? 1 : 0;
+ T ll = d.fromstr(q, qend, &err);
+ bool valid = isValid() && ((*err == 0) || (*err == ';'));
+ if (!valid) return false;
+
+ if (*err == 0) {
+ if (first == '<') {
+ high = d.nearestDownwd(ll, lower);
+ } else if (first == '>') {
+ low = d.nearestUpward(ll, upper);
+ } else {
+ low = high = ll;
+ valid = ! isRange;
}
- if (valid) {
- lower = low;
- upper = high;
+ } else {
+ if ((first == '[') || (first == '<')) {
+ if (q != err) {
+ low = (first == '[') ? ll : d.nearestUpward(ll, upper);
+ }
+ q = err + 1;
+ T hh = d.fromstr(q, qend, &err);
+ bool hasUpperLimit(q != err);
+ if (*err == ';') {
+ err = const_cast<char *>(_term.end() - 1);
+ }
+ valid = (*err == last) && ((last == ']') || (last == '>'));
+ if (hasUpperLimit) {
+ high = (last == ']') ? hh : d.nearestDownwd(hh, lower);
+ }
+ } else {
+ valid = false;
}
}
+ if (valid) {
+ lower = low;
+ upper = high;
+ }
return valid;
}
diff --git a/searchlib/src/vespa/searchlib/query/query_term_simple.h b/searchlib/src/vespa/searchlib/query/query_term_simple.h
index 2b64e3812ab..87bf7c26b80 100644
--- a/searchlib/src/vespa/searchlib/query/query_term_simple.h
+++ b/searchlib/src/vespa/searchlib/query/query_term_simple.h
@@ -33,8 +33,8 @@ public:
N high;
bool valid; // Whether parsing of the range was successful
bool adjusted; // Whether the low and high was adjusted according to min and max limits of the given type.
- RangeResult() : low(), high(), valid(true), adjusted(false) {}
- bool isEqual() const { return low == high; }
+ RangeResult() noexcept : low(), high(), valid(true), adjusted(false) {}
+ bool isEqual() const noexcept { return low == high; }
};
QueryTermSimple(const QueryTermSimple &) = delete;
@@ -47,39 +47,40 @@ public:
* Extracts the content of this query term as a range with low and high values.
*/
template <typename N>
- RangeResult<N> getRange() const;
- int getRangeLimit() const { return _rangeLimit; }
- size_t getMaxPerGroup() const { return _maxPerGroup; }
- size_t getDiversityCutoffGroups() const { return _diversityCutoffGroups; }
- bool getDiversityCutoffStrict() const { return _diversityCutoffStrict; }
- vespalib::stringref getDiversityAttribute() const { return _diversityAttribute; }
- size_t getFuzzyMaxEditDistance() const { return _fuzzyMaxEditDistance; }
- size_t getFuzzyPrefixLength() const { return _fuzzyPrefixLength; }
- bool getAsIntegerTerm(int64_t & lower, int64_t & upper) const;
- bool getAsDoubleTerm(double & lower, double & upper) const;
- const char * getTerm() const { return _term.c_str(); }
- bool isPrefix() const { return (_type == Type::PREFIXTERM); }
- bool isSubstring() const { return (_type == Type::SUBSTRINGTERM); }
- bool isExactstring() const { return (_type == Type::EXACTSTRINGTERM); }
- bool isSuffix() const { return (_type == Type::SUFFIXTERM); }
- bool isWord() const { return (_type == Type::WORD); }
- bool isRegex() const { return (_type == Type::REGEXP); }
- bool isGeoLoc() const { return (_type == Type::GEO_LOCATION); }
- bool isFuzzy() const { return (_type == Type::FUZZYTERM); }
+ RangeResult<N> getRange() const noexcept;
+ int getRangeLimit() const noexcept { return _rangeLimit; }
+ size_t getMaxPerGroup() const noexcept { return _maxPerGroup; }
+ size_t getDiversityCutoffGroups() const noexcept { return _diversityCutoffGroups; }
+ bool getDiversityCutoffStrict() const noexcept { return _diversityCutoffStrict; }
+ vespalib::stringref getDiversityAttribute() const noexcept { return _diversityAttribute; }
+ size_t getFuzzyMaxEditDistance() const noexcept { return _fuzzyMaxEditDistance; }
+ size_t getFuzzyPrefixLength() const noexcept { return _fuzzyPrefixLength; }
+ bool getAsIntegerTerm(int64_t & lower, int64_t & upper) const noexcept;
+ bool getAsFloatTerm(double & lower, double & upper) const noexcept;
+ bool getAsFloatTerm(float & lower, float & upper) const noexcept;
+ const char * getTerm() const noexcept { return _term.c_str(); }
+ bool isPrefix() const noexcept { return (_type == Type::PREFIXTERM); }
+ bool isSubstring() const noexcept { return (_type == Type::SUBSTRINGTERM); }
+ bool isExactstring() const noexcept { return (_type == Type::EXACTSTRINGTERM); }
+ bool isSuffix() const noexcept { return (_type == Type::SUFFIXTERM); }
+ bool isWord() const noexcept { return (_type == Type::WORD); }
+ bool isRegex() const noexcept { return (_type == Type::REGEXP); }
+ bool isGeoLoc() const noexcept { return (_type == Type::GEO_LOCATION); }
+ bool isFuzzy() const noexcept { return (_type == Type::FUZZYTERM); }
bool is_nearest_neighbor() const noexcept { return (_type == Type::NEAREST_NEIGHBOR); }
- bool empty() const { return _term.empty(); }
+ bool empty() const noexcept { return _term.empty(); }
virtual void visitMembers(vespalib::ObjectVisitor &visitor) const;
vespalib::string getClassName() const;
- bool isValid() const { return _valid; }
- const string & getTermString() const { return _term; }
+ bool isValid() const noexcept { return _valid; }
+ const string & getTermString() const noexcept { return _term; }
private:
- bool getRangeInternal(int64_t & low, int64_t & high) const;
+ bool getRangeInternal(int64_t & low, int64_t & high) const noexcept;
template <typename N>
- RangeResult<N> getIntegerRange() const;
+ RangeResult<N> getIntegerRange() const noexcept;
template <typename N>
- RangeResult<N> getFloatRange() const;
- int _rangeLimit;
+ RangeResult<N> getFloatRange() const noexcept;
+ int32_t _rangeLimit;
uint32_t _maxPerGroup;
uint32_t _diversityCutoffGroups;
Type _type;
@@ -88,7 +89,7 @@ private:
string _term;
stringref _diversityAttribute;
template <typename T, typename D>
- bool getAsNumericTerm(T & lower, T & upper, D d) const;
+ bool getAsNumericTerm(T & lower, T & upper, D d) const noexcept;
protected:
uint32_t _fuzzyMaxEditDistance; // set in QueryTerm
diff --git a/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt b/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt
index 0813292a9da..9b53407aff5 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt
@@ -9,5 +9,6 @@ vespa_add_library(searchlib_query_streaming OBJECT
querynode.cpp
querynoderesultbase.cpp
queryterm.cpp
+ wand_term.cpp
DEPENDS
)
diff --git a/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.cpp b/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.cpp
index d2c1ba872f5..1871bda564d 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.cpp
@@ -11,36 +11,51 @@ using search::fef::MatchData;
namespace search::streaming {
DotProductTerm::DotProductTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string & index, uint32_t num_terms)
- : MultiTerm(std::move(result_base), index, Type::WORD, num_terms)
+ : MultiTerm(std::move(result_base), index, num_terms)
{
}
DotProductTerm::~DotProductTerm() = default;
void
-DotProductTerm::unpack_match_data(uint32_t docid, const ITermData& td, MatchData& match_data)
+DotProductTerm::build_scores(Scores& scores) const
{
- vespalib::hash_map<uint32_t,double> scores;
HitList hl_store;
for (const auto& term : _terms) {
auto& hl = term->evaluateHits(hl_store);
for (auto& hit : hl) {
- scores[hit.context()] += term->weight().percent() * hit.weight();
+ scores[hit.context()] += ((int64_t)term->weight().percent()) * hit.weight();
}
}
+}
+
+void
+DotProductTerm::unpack_scores(Scores& scores, std::optional<double> score_threshold, uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data)
+{
auto num_fields = td.numFields();
for (uint32_t field_idx = 0; field_idx < num_fields; ++field_idx) {
auto& tfd = td.field(field_idx);
auto field_id = tfd.getFieldId();
if (scores.contains(field_id)) {
- auto handle = tfd.getHandle();
- if (handle != fef::IllegalHandle) {
- auto tmd = match_data.resolveTermField(tfd.getHandle());
- tmd->setFieldId(field_id);
- tmd->setRawScore(docid, scores[field_id]);
+ auto score = scores[field_id];
+ if (!score_threshold.has_value() || score_threshold.value() < score) {
+ auto handle = tfd.getHandle();
+ if (handle != fef::IllegalHandle) {
+ auto tmd = match_data.resolveTermField(tfd.getHandle());
+ tmd->setFieldId(field_id);
+ tmd->setRawScore(docid, score);
+ }
}
}
}
}
+void
+DotProductTerm::unpack_match_data(uint32_t docid, const ITermData& td, MatchData& match_data)
+{
+ Scores scores;
+ build_scores(scores);
+ unpack_scores(scores, std::nullopt, docid, td, match_data);
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.h b/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.h
index 77cac693781..3702bd4721c 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.h
+++ b/searchlib/src/vespa/searchlib/query/streaming/dot_product_term.h
@@ -3,6 +3,8 @@
#pragma once
#include "multi_term.h"
+#include <vespa/vespalib/stllike/hash_map.h>
+#include <optional>
namespace search::streaming {
@@ -10,6 +12,10 @@ namespace search::streaming {
* A dot product query term for streaming search.
*/
class DotProductTerm : public MultiTerm {
+protected:
+ using Scores = vespalib::hash_map<uint32_t,double>;
+ void build_scores(Scores& scores) const;
+ void unpack_scores(Scores& scores, std::optional<double> score_threshold, uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data);
public:
DotProductTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string& index, uint32_t num_terms);
~DotProductTerm() override;
diff --git a/searchlib/src/vespa/searchlib/query/streaming/in_term.cpp b/searchlib/src/vespa/searchlib/query/streaming/in_term.cpp
index 36303d4e991..3e75f4a5114 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/in_term.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/in_term.cpp
@@ -12,8 +12,9 @@ using search::query::TermVector;
namespace search::streaming {
-InTerm::InTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string & index, std::unique_ptr<TermVector> terms)
- : MultiTerm(std::move(result_base), index, Type::WORD, std::move(terms))
+InTerm::InTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string & index,
+ std::unique_ptr<TermVector> terms, Normalizing normalize_mode)
+ : MultiTerm(std::move(result_base), index, std::move(terms), normalize_mode)
{
}
diff --git a/searchlib/src/vespa/searchlib/query/streaming/in_term.h b/searchlib/src/vespa/searchlib/query/streaming/in_term.h
index 7d03ed989c7..7b388b3f6e6 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/in_term.h
+++ b/searchlib/src/vespa/searchlib/query/streaming/in_term.h
@@ -11,7 +11,8 @@ namespace search::streaming {
*/
class InTerm : public MultiTerm {
public:
- InTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string& index, std::unique_ptr<query::TermVector> terms);
+ InTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string& index,
+ std::unique_ptr<query::TermVector> terms, Normalizing normalize_mode);
~InTerm() override;
void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) override;
};
diff --git a/searchlib/src/vespa/searchlib/query/streaming/multi_term.cpp b/searchlib/src/vespa/searchlib/query/streaming/multi_term.cpp
index ad5857b8c41..dd34b9b7e73 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/multi_term.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/multi_term.cpp
@@ -9,19 +9,20 @@ using search::query::TermVector;
namespace search::streaming {
-MultiTerm::MultiTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string & index, Type type, uint32_t num_terms)
- : QueryTerm(std::move(result_base), "", index, type),
+MultiTerm::MultiTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string & index, uint32_t num_terms)
+ : QueryTerm(std::move(result_base), "", index, Type::WORD, Normalizing::NONE),
_terms()
{
_terms.reserve(num_terms);
}
-MultiTerm::MultiTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string & index, Type type, std::unique_ptr<TermVector> terms)
- : MultiTerm(std::move(result_base), index, type, terms->size())
+MultiTerm::MultiTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string & index,
+ std::unique_ptr<TermVector> terms, Normalizing normalizing)
+ : MultiTerm(std::move(result_base), index, terms->size())
{
auto num_terms = terms->size();
for (uint32_t i = 0; i < num_terms; ++i) {
- add_term(std::make_unique<QueryTerm>(std::unique_ptr<QueryNodeResultBase>(), terms->getAsString(i).first, "", QueryTermSimple::Type::WORD));
+ add_term(std::make_unique<QueryTerm>(std::unique_ptr<QueryNodeResultBase>(), terms->getAsString(i).first, "", Type::WORD, normalizing));
}
}
@@ -33,12 +34,6 @@ MultiTerm::add_term(std::unique_ptr<QueryTerm> term)
_terms.emplace_back(std::move(term));
}
-MultiTerm*
-MultiTerm::as_multi_term() noexcept
-{
- return this;
-}
-
void
MultiTerm::reset()
{
diff --git a/searchlib/src/vespa/searchlib/query/streaming/multi_term.h b/searchlib/src/vespa/searchlib/query/streaming/multi_term.h
index 4c3f1ea5b5a..3bb69e29693 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/multi_term.h
+++ b/searchlib/src/vespa/searchlib/query/streaming/multi_term.h
@@ -24,11 +24,12 @@ class MultiTerm : public QueryTerm {
protected:
std::vector<std::unique_ptr<QueryTerm>> _terms;
public:
- MultiTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string & index, Type type, uint32_t num_terms);
- MultiTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string & index, Type type, std::unique_ptr<query::TermVector> terms);
+ MultiTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string & index, uint32_t num_terms);
+ MultiTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string & index,
+ std::unique_ptr<query::TermVector> terms, Normalizing normalizing);
~MultiTerm() override;
void add_term(std::unique_ptr<QueryTerm> term);
- MultiTerm* as_multi_term() noexcept override;
+ MultiTerm* as_multi_term() noexcept override { return this; }
void reset() override;
bool evaluate() const override;
const HitList& evaluateHits(HitList& hl) const override;
diff --git a/searchlib/src/vespa/searchlib/query/streaming/nearest_neighbor_query_node.cpp b/searchlib/src/vespa/searchlib/query/streaming/nearest_neighbor_query_node.cpp
index f710218297d..1317d1c0651 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/nearest_neighbor_query_node.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/nearest_neighbor_query_node.cpp
@@ -9,7 +9,7 @@ NearestNeighborQueryNode::NearestNeighborQueryNode(std::unique_ptr<QueryNodeResu
const string& query_tensor_name, const string& field_name,
uint32_t target_hits, double distance_threshold,
int32_t unique_id, search::query::Weight weight)
- : QueryTerm(std::move(resultBase), query_tensor_name, field_name, Type::NEAREST_NEIGHBOR),
+ : QueryTerm(std::move(resultBase), query_tensor_name, field_name, Type::NEAREST_NEIGHBOR, Normalizing::NONE),
_target_hits(target_hits),
_distance_threshold(distance_threshold),
_distance(),
diff --git a/searchlib/src/vespa/searchlib/query/streaming/query.cpp b/searchlib/src/vespa/searchlib/query/streaming/query.cpp
index d2eee5d345f..3079ec31e8f 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/query.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/query.cpp
@@ -12,7 +12,7 @@ QueryConnector::visitMembers(vespalib::ObjectVisitor &visitor) const
visit(visitor, "Operator", _opName);
}
-QueryConnector::QueryConnector(const char * opName)
+QueryConnector::QueryConnector(const char * opName) noexcept
: QueryNode(),
_opName(opName),
_index(),
@@ -31,7 +31,7 @@ const HitList &
QueryConnector::evaluateHits(HitList & hl) const
{
if (evaluate()) {
- hl.push_back(Hit(1, 0, 0, 1));
+ hl.emplace_back(1, 0, 0, 1);
}
return hl;
}
@@ -105,10 +105,10 @@ QueryConnector::create(ParseItem::ItemType type)
{
switch (type) {
case search::ParseItem::ITEM_AND: return std::make_unique<AndQueryNode>();
- case search::ParseItem::ITEM_OR: return std::make_unique<OrQueryNode>();
+ case search::ParseItem::ITEM_OR:
case search::ParseItem::ITEM_WEAK_AND: return std::make_unique<OrQueryNode>();
+ case search::ParseItem::ITEM_WEIGHTED_SET:
case search::ParseItem::ITEM_EQUIV: return std::make_unique<EquivQueryNode>();
- case search::ParseItem::ITEM_WEIGHTED_SET: return std::make_unique<EquivQueryNode>();
case search::ParseItem::ITEM_WAND: return std::make_unique<OrQueryNode>();
case search::ParseItem::ITEM_NOT: return std::make_unique<AndNotQueryNode>();
case search::ParseItem::ITEM_PHRASE: return std::make_unique<PhraseQueryNode>();
@@ -340,7 +340,7 @@ Query::Query(const QueryNodeResultFactory & factory, vespalib::stringref queryRe
bool
Query::evaluate() const {
- return valid() ? _root->evaluate() : false;
+ return valid() && _root->evaluate();
}
bool
diff --git a/searchlib/src/vespa/searchlib/query/streaming/query.h b/searchlib/src/vespa/searchlib/query/streaming/query.h
index 42c3b94002c..8befa2fe7fa 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/query.h
+++ b/searchlib/src/vespa/searchlib/query/streaming/query.h
@@ -13,8 +13,8 @@ namespace search::streaming {
class QueryConnector : public QueryNode
{
public:
- QueryConnector(const char * opName);
- ~QueryConnector();
+ explicit QueryConnector(const char * opName) noexcept;
+ ~QueryConnector() override;
const HitList & evaluateHits(HitList & hl) const override;
void reset() override;
void getLeaves(QueryTermList & tl) override;
@@ -44,7 +44,7 @@ private:
class TrueNode : public QueryConnector
{
public:
- TrueNode() : QueryConnector("AND") { }
+ TrueNode() noexcept : QueryConnector("AND") { }
bool evaluate() const override;
};
@@ -52,7 +52,7 @@ public:
class FalseNode : public QueryConnector
{
public:
- FalseNode() : QueryConnector("AND") { }
+ FalseNode() noexcept : QueryConnector("AND") { }
bool evaluate() const override;
};
@@ -62,8 +62,8 @@ public:
class AndQueryNode : public QueryConnector
{
public:
- AndQueryNode() : QueryConnector("AND") { }
- AndQueryNode(const char * opName) : QueryConnector(opName) { }
+ AndQueryNode() noexcept : QueryConnector("AND") { }
+ explicit AndQueryNode(const char * opName) noexcept : QueryConnector(opName) { }
bool evaluate() const override;
bool isFlattenable(ParseItem::ItemType type) const override { return type == ParseItem::ITEM_AND; }
};
@@ -74,7 +74,7 @@ public:
class AndNotQueryNode : public QueryConnector
{
public:
- AndNotQueryNode() : QueryConnector("ANDNOT") { }
+ AndNotQueryNode() noexcept : QueryConnector("ANDNOT") { }
bool evaluate() const override;
bool isFlattenable(ParseItem::ItemType type) const override { return type == ParseItem::ITEM_NOT; }
};
@@ -85,13 +85,11 @@ public:
class OrQueryNode : public QueryConnector
{
public:
- OrQueryNode() : QueryConnector("OR") { }
- OrQueryNode(const char * opName) : QueryConnector(opName) { }
+ OrQueryNode() noexcept : QueryConnector("OR") { }
+ explicit OrQueryNode(const char * opName) noexcept : QueryConnector(opName) { }
bool evaluate() const override;
bool isFlattenable(ParseItem::ItemType type) const override {
return (type == ParseItem::ITEM_OR) ||
- (type == ParseItem::ITEM_DOT_PRODUCT) ||
- (type == ParseItem::ITEM_WAND) ||
(type == ParseItem::ITEM_WEAK_AND);
}
};
@@ -102,7 +100,7 @@ public:
class EquivQueryNode : public OrQueryNode
{
public:
- EquivQueryNode() : OrQueryNode("EQUIV") { }
+ EquivQueryNode() noexcept : OrQueryNode("EQUIV") { }
bool evaluate() const override;
bool isFlattenable(ParseItem::ItemType type) const override {
return (type == ParseItem::ITEM_EQUIV) ||
@@ -117,7 +115,7 @@ public:
class PhraseQueryNode : public AndQueryNode
{
public:
- PhraseQueryNode() : AndQueryNode("PHRASE"), _fieldInfo(32) { }
+ PhraseQueryNode() noexcept : AndQueryNode("PHRASE"), _fieldInfo(32) { }
bool evaluate() const override;
const HitList & evaluateHits(HitList & hl) const override;
void getPhrases(QueryNodeRefList & tl) override;
@@ -138,7 +136,7 @@ private:
class SameElementQueryNode : public AndQueryNode
{
public:
- SameElementQueryNode() : AndQueryNode("SAME_ELEMENT") { }
+ SameElementQueryNode() noexcept : AndQueryNode("SAME_ELEMENT") { }
bool evaluate() const override;
const HitList & evaluateHits(HitList & hl) const override;
bool isFlattenable(ParseItem::ItemType type) const override { return type == ParseItem::ITEM_NOT; }
@@ -151,8 +149,8 @@ public:
class NearQueryNode : public AndQueryNode
{
public:
- NearQueryNode() : AndQueryNode("NEAR"), _distance(0) { }
- NearQueryNode(const char * opName) : AndQueryNode(opName), _distance(0) { }
+ NearQueryNode() noexcept : AndQueryNode("NEAR"), _distance(0) { }
+ explicit NearQueryNode(const char * opName) noexcept : AndQueryNode(opName), _distance(0) { }
bool evaluate() const override;
void distance(size_t dist) { _distance = dist; }
size_t distance() const { return _distance; }
@@ -169,8 +167,7 @@ private:
class ONearQueryNode : public NearQueryNode
{
public:
- ONearQueryNode() : NearQueryNode("ONEAR") { }
- ~ONearQueryNode() { }
+ ONearQueryNode() noexcept : NearQueryNode("ONEAR") { }
bool evaluate() const override;
};
diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp
index db0fbd5b98e..c24f41d16cf 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp
@@ -5,6 +5,7 @@
#include <vespa/searchlib/parsequery/stackdumpiterator.h>
#include <vespa/searchlib/query/streaming/dot_product_term.h>
#include <vespa/searchlib/query/streaming/in_term.h>
+#include <vespa/searchlib/query/streaming/wand_term.h>
#include <vespa/searchlib/query/tree/term_vector.h>
#include <charconv>
#include <vespa/log/log.h>
@@ -13,12 +14,18 @@ LOG_SETUP(".vsm.querynode");
namespace search::streaming {
namespace {
- vespalib::stringref DEFAULT("default");
- bool disableRewrite(const QueryNode * qn) {
- return dynamic_cast<const NearQueryNode *> (qn) ||
- dynamic_cast<const PhraseQueryNode *> (qn) ||
- dynamic_cast<const SameElementQueryNode *>(qn);
- }
+
+vespalib::stringref DEFAULT("default");
+bool disableRewrite(const QueryNode * qn) {
+ return dynamic_cast<const NearQueryNode *> (qn) ||
+ dynamic_cast<const PhraseQueryNode *> (qn) ||
+ dynamic_cast<const SameElementQueryNode *>(qn);
+}
+
+bool possibleFloat(const QueryTerm & qt, const QueryTerm::string & term) {
+ return !qt.encoding().isBase10Integer() && qt.encoding().isFloat() && (term.find('.') != QueryTerm::string::npos);
+}
+
}
QueryNode::UP
@@ -34,7 +41,6 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor
case ParseItem::ITEM_WEAK_AND:
case ParseItem::ITEM_EQUIV:
case ParseItem::ITEM_WEIGHTED_SET:
- case ParseItem::ITEM_WAND:
case ParseItem::ITEM_NOT:
case ParseItem::ITEM_PHRASE:
case ParseItem::ITEM_SAME_ELEMENT:
@@ -43,16 +49,14 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor
{
qn = QueryConnector::create(type);
if (qn) {
- QueryConnector * qc = dynamic_cast<QueryConnector *> (qn.get());
- NearQueryNode * nqn = dynamic_cast<NearQueryNode *> (qc);
+ auto * qc = dynamic_cast<QueryConnector *> (qn.get());
+ auto * nqn = dynamic_cast<NearQueryNode *> (qc);
if (nqn) {
nqn->distance(queryRep.getNearDistance());
}
if ((type == ParseItem::ITEM_WEAK_AND) ||
(type == ParseItem::ITEM_WEIGHTED_SET) ||
- (type == ParseItem::ITEM_DOT_PRODUCT) ||
- (type == ParseItem::ITEM_SAME_ELEMENT) ||
- (type == ParseItem::ITEM_WAND))
+ (type == ParseItem::ITEM_SAME_ELEMENT))
{
qn->setIndex(queryRep.getIndexName());
}
@@ -75,10 +79,8 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor
break;
case ParseItem::ITEM_GEO_LOCATION_TERM:
// just keep the string representation here; parsed in vsm::GeoPosFieldSearcher
- qn = std::make_unique<QueryTerm>(factory.create(),
- queryRep.getTerm(),
- queryRep.getIndexName(),
- QueryTerm::Type::GEO_LOCATION);
+ qn = std::make_unique<QueryTerm>(factory.create(), queryRep.getTerm(), queryRep.getIndexName(),
+ QueryTerm::Type::GEO_LOCATION, Normalizing::NONE);
break;
case ParseItem::ITEM_NEAREST_NEIGHBOR:
qn = build_nearest_neighbor_query_node(factory, queryRep);
@@ -143,28 +145,25 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor
// But it will do for now as only correct sddocname queries are sent down.
qn = std::make_unique<TrueNode>();
} else {
- auto qt = std::make_unique<QueryTerm>(factory.create(), ssTerm, ssIndex, sTerm);
+ Normalizing normalize_mode = factory.normalizing_mode(ssIndex);
+ auto qt = std::make_unique<QueryTerm>(factory.create(), ssTerm, ssIndex, sTerm, normalize_mode);
qt->setWeight(queryRep.GetWeight());
qt->setUniqueId(queryRep.getUniqueId());
if (qt->isFuzzy()) {
qt->setFuzzyMaxEditDistance(queryRep.getFuzzyMaxEditDistance());
qt->setFuzzyPrefixLength(queryRep.getFuzzyPrefixLength());
}
- if (qt->encoding().isBase10Integer() ||
- ! qt->encoding().isFloat() ||
- ! factory.getRewriteFloatTerms() ||
- ! allowRewrite ||
- (ssTerm.find('.') == vespalib::string::npos))
- {
- qn = std::move(qt);
- } else {
+ if (allowRewrite && possibleFloat(*qt, ssTerm) && factory.allow_float_terms_rewrite(ssIndex)) {
auto phrase = std::make_unique<PhraseQueryNode>();
- phrase->addChild(std::make_unique<QueryTerm>(factory.create(), ssTerm.substr(0, ssTerm.find('.')), ssIndex, TermType::WORD));
- phrase->addChild(std::make_unique<QueryTerm>(factory.create(), ssTerm.substr(ssTerm.find('.') + 1), ssIndex, TermType::WORD));
+ auto dotPos = ssTerm.find('.');
+ phrase->addChild(std::make_unique<QueryTerm>(factory.create(), ssTerm.substr(0, dotPos), ssIndex, TermType::WORD, normalize_mode));
+ phrase->addChild(std::make_unique<QueryTerm>(factory.create(), ssTerm.substr(dotPos + 1), ssIndex, TermType::WORD, normalize_mode));
auto orqn = std::make_unique<EquivQueryNode>();
orqn->addChild(std::move(qt));
orqn->addChild(std::move(phrase));
qn = std::move(orqn);
+ } else {
+ qn = std::move(qt);
}
}
}
@@ -181,12 +180,18 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor
}
break;
case ParseItem::ITEM_STRING_IN:
+ qn = std::make_unique<InTerm>(factory.create(), queryRep.getIndexName(), queryRep.get_terms(),
+ factory.normalizing_mode(queryRep.getIndexName()));
+ break;
case ParseItem::ITEM_NUMERIC_IN:
- qn = std::make_unique<InTerm>(factory.create(), queryRep.getIndexName(), queryRep.get_terms());
+ qn = std::make_unique<InTerm>(factory.create(), queryRep.getIndexName(), queryRep.get_terms(), Normalizing::NONE);
break;
case ParseItem::ITEM_DOT_PRODUCT:
qn = build_dot_product_term(factory, queryRep);
break;
+ case ParseItem::ITEM_WAND:
+ qn = build_wand_term(factory, queryRep);
+ break;
default:
skip_unknown(queryRep);
break;
@@ -208,17 +213,12 @@ QueryNode::build_nearest_neighbor_query_node(const QueryNodeResultFactory& facto
auto weight = query_rep.GetWeight();
uint32_t target_hits = query_rep.getTargetHits();
double distance_threshold = query_rep.getDistanceThreshold();
- return std::make_unique<NearestNeighborQueryNode>(factory.create(),
- query_tensor_name,
- field_name,
- target_hits,
- distance_threshold,
- unique_id,
- weight);
+ return std::make_unique<NearestNeighborQueryNode>(factory.create(), query_tensor_name, field_name,
+ target_hits, distance_threshold, unique_id, weight);
}
void
-QueryNode::populate_multi_term(MultiTerm& mt, SimpleQueryStackDumpIterator& queryRep)
+QueryNode::populate_multi_term(Normalizing string_normalize_mode, MultiTerm& mt, SimpleQueryStackDumpIterator& queryRep)
{
char buf[24];
vespalib::string subterm;
@@ -227,13 +227,15 @@ QueryNode::populate_multi_term(MultiTerm& mt, SimpleQueryStackDumpIterator& quer
std::unique_ptr<QueryTerm> term;
switch (queryRep.getType()) {
case ParseItem::ITEM_PURE_WEIGHTED_STRING:
- term = std::make_unique<QueryTerm>(std::unique_ptr<QueryNodeResultBase>(), queryRep.getTerm(), "", QueryTermSimple::Type::WORD);
+ term = std::make_unique<QueryTerm>(std::unique_ptr<QueryNodeResultBase>(), queryRep.getTerm(), "",
+ QueryTermSimple::Type::WORD, string_normalize_mode);
break;
case ParseItem::ITEM_PURE_WEIGHTED_LONG:
{
auto res = std::to_chars(buf, buf + sizeof(buf), queryRep.getIntergerTerm(), 10);
subterm.assign(buf, res.ptr - buf);
- term = std::make_unique<QueryTerm>(std::unique_ptr<QueryNodeResultBase>(), subterm, "", QueryTermSimple::Type::WORD);
+ term = std::make_unique<QueryTerm>(std::unique_ptr<QueryNodeResultBase>(), subterm, "",
+ QueryTermSimple::Type::WORD, Normalizing::NONE);
}
break;
default:
@@ -250,13 +252,24 @@ QueryNode::populate_multi_term(MultiTerm& mt, SimpleQueryStackDumpIterator& quer
std::unique_ptr<QueryNode>
QueryNode::build_dot_product_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep)
{
- auto dp =std::make_unique<DotProductTerm>(factory.create(), queryRep.getIndexName(), queryRep.getArity());
+ auto dp = std::make_unique<DotProductTerm>(factory.create(), queryRep.getIndexName(), queryRep.getArity());
dp->setWeight(queryRep.GetWeight());
dp->setUniqueId(queryRep.getUniqueId());
- populate_multi_term(*dp, queryRep);
+ populate_multi_term(factory.normalizing_mode(dp->index()), *dp, queryRep);
return dp;
}
+std::unique_ptr<QueryNode>
+QueryNode::build_wand_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep)
+{
+ auto wand = std::make_unique<WandTerm>(factory.create(), queryRep.getIndexName(), queryRep.getArity());
+ wand->setWeight(queryRep.GetWeight());
+ wand->setUniqueId(queryRep.getUniqueId());
+ wand->set_score_threshold(queryRep.getScoreThreshold());
+ populate_multi_term(factory.normalizing_mode(wand->index()), *wand, queryRep);
+ return wand;
+}
+
void
QueryNode::skip_unknown(SimpleQueryStackDumpIterator& queryRep)
{
diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.h b/searchlib/src/vespa/searchlib/query/streaming/querynode.h
index 09c44d951d3..a0561b2e52e 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/querynode.h
+++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.h
@@ -2,8 +2,7 @@
#pragma once
#include "hit.h"
-#include <vespa/vespalib/stllike/string.h>
-#include <memory>
+#include "querynoderesultbase.h"
namespace search { class SimpleQueryStackDumpIterator; }
@@ -30,13 +29,14 @@ using ConstQueryTermList = std::vector<const QueryTerm *>;
class QueryNode
{
static std::unique_ptr<QueryNode> build_nearest_neighbor_query_node(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep);
- static void populate_multi_term(MultiTerm& mt, SimpleQueryStackDumpIterator& queryRep);
+ static void populate_multi_term(Normalizing string_normalize_mode, MultiTerm& mt, SimpleQueryStackDumpIterator& queryRep);
static std::unique_ptr<QueryNode> build_dot_product_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep);
+ static std::unique_ptr<QueryNode> build_wand_term(const QueryNodeResultFactory& factory, SimpleQueryStackDumpIterator& queryRep);
static void skip_unknown(SimpleQueryStackDumpIterator& queryRep);
public:
using UP = std::unique_ptr<QueryNode>;
- virtual ~QueryNode() { }
+ virtual ~QueryNode() = default;
/// This evalutes if the subtree starting here evaluates to true.
virtual bool evaluate() const = 0;
/// This return the hitList for this subtree. Does only give meaning in a
diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h b/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h
index 62fc32a4575..74f872ad187 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h
+++ b/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h
@@ -1,6 +1,7 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
+#include <vespa/vespalib/stllike/string.h>
#include <memory>
namespace search::streaming {
@@ -17,11 +18,24 @@ public:
virtual QueryNodeResultBase * clone() const = 0;
};
+enum class Normalizing {
+ NONE,
+ LOWERCASE,
+ LOWERCASE_AND_FOLD
+};
+
class QueryNodeResultFactory {
public:
virtual ~QueryNodeResultFactory() = default;
- virtual bool getRewriteFloatTerms() const { return false; }
- virtual std::unique_ptr<QueryNodeResultBase> create() const { return std::unique_ptr<QueryNodeResultBase>(); }
+ virtual bool allow_float_terms_rewrite(vespalib::stringref index) const noexcept {
+ (void) index;
+ return false;
+ }
+ virtual Normalizing normalizing_mode(vespalib::stringref index) const noexcept {
+ (void) index;
+ return Normalizing::NONE;
+ }
+ virtual std::unique_ptr<QueryNodeResultBase> create() const { return {}; }
};
}
diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp
index 9c45427d07d..3950a179d67 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp
@@ -1,6 +1,7 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "queryterm.h"
+#include <vespa/fastlib/text/normwordfolder.h>
#include <vespa/vespalib/objects/visit.h>
#include <cmath>
@@ -9,12 +10,13 @@ namespace {
class CharInfo {
public:
CharInfo();
- uint8_t get(uint8_t c) const { return _charInfo[c]; }
+ uint8_t get(uint8_t c) const noexcept { return _charInfo[c]; }
private:
uint8_t _charInfo[256];
};
CharInfo::CharInfo()
+ : _charInfo()
{
// XXX: Should refactor to reduce number of magic constants.
memset(_charInfo, 0x01, 128); // All 7 bits are ascii7bit
@@ -33,7 +35,7 @@ CharInfo::CharInfo()
_charInfo[uint8_t('E')] = 0x05;
}
-static CharInfo _G_charTable;
+CharInfo G_charTable;
}
@@ -53,29 +55,102 @@ QueryTerm::visitMembers(vespalib::ObjectVisitor & visitor) const
visit(visitor, "uniqueid", _uniqueId);
}
-QueryTerm::QueryTerm(std::unique_ptr<QueryNodeResultBase> org, const string & termS, const string & indexS, Type type) :
- QueryTermUCS4(termS, type),
- _index(indexS),
- _encoding(0x01),
- _result(org.release()),
- _hitList(),
- _weight(100),
- _uniqueId(0),
- _fieldInfo()
+namespace {
+
+using Type = QueryTermSimple::Type;
+
+Normalizing
+requireFold(Type type, Normalizing normalizing) {
+ if (normalizing == Normalizing::NONE) return Normalizing::NONE;
+ if (normalizing == Normalizing::LOWERCASE) return Normalizing::LOWERCASE;
+ if (type == Type::EXACTSTRINGTERM) return Normalizing::LOWERCASE;
+ return ((type == Type::WORD) || (type == Type::SUBSTRINGTERM) ||
+ (type == Type::PREFIXTERM) || (type == Type::SUFFIXTERM))
+ ? Normalizing::LOWERCASE_AND_FOLD
+ : Normalizing::NONE;
+}
+
+vespalib::string
+fold(vespalib::stringref s) {
+ const auto * curr = reinterpret_cast<const unsigned char *>(s.data());
+ const unsigned char * end = curr + s.size();
+ vespalib::string folded;
+ for (; curr < end;) {
+ uint32_t c_ucs4 = *curr;
+ if (c_ucs4 < 0x80) {
+ folded.append(Fast_NormalizeWordFolder::lowercase_and_fold_ascii(*curr++));
+ } else {
+ c_ucs4 = Fast_UnicodeUtil::GetUTF8CharNonAscii(curr);
+ const char *repl = Fast_NormalizeWordFolder::ReplacementString(c_ucs4);
+ if (repl != nullptr) {
+ size_t repllen = strlen(repl);
+ folded.append(repl, repllen);
+ } else {
+ c_ucs4 = Fast_NormalizeWordFolder::lowercase_and_fold(c_ucs4);
+ char tmp[6];
+ const char * tmp_end = Fast_UnicodeUtil::utf8cput(tmp, c_ucs4);
+ folded.append(tmp, tmp_end - tmp);
+ }
+ }
+ }
+ return folded;
+}
+
+vespalib::string
+lowercase(vespalib::stringref s) {
+ const auto * curr = reinterpret_cast<const unsigned char *>(s.data());
+ const unsigned char * end = curr + s.size();
+ vespalib::string folded;
+ for (; curr < end;) {
+ uint32_t c_ucs4 = *curr;
+ if (c_ucs4 < 0x80) {
+ folded.append(static_cast<char>(Fast_NormalizeWordFolder::lowercase_ascii(*curr++)));
+ } else {
+ c_ucs4 = Fast_NormalizeWordFolder::lowercase(Fast_UnicodeUtil::GetUTF8CharNonAscii(curr));
+ char tmp[6];
+ const char * tmp_end = Fast_UnicodeUtil::utf8cput(tmp, c_ucs4);
+ folded.append(tmp, tmp_end - tmp);
+ }
+ }
+ return folded;
+}
+
+vespalib::string
+optional_fold(vespalib::stringref s, Type type, Normalizing normalizing) {
+ switch ( requireFold(type, normalizing)) {
+ case Normalizing::NONE: return s;
+ case Normalizing::LOWERCASE: return lowercase(s);
+ case Normalizing::LOWERCASE_AND_FOLD: return fold(s);
+ }
+ return s;
+}
+
+}
+
+QueryTerm::QueryTerm(std::unique_ptr<QueryNodeResultBase> org, stringref termS, const string & indexS,
+ Type type, Normalizing normalizing)
+ : QueryTermUCS4(optional_fold(termS, type, normalizing), type),
+ _index(indexS),
+ _encoding(0x01),
+ _result(org.release()),
+ _hitList(),
+ _weight(100),
+ _uniqueId(0),
+ _fieldInfo()
{
- if (!termS.empty()) {
+ if (!empty()) {
uint8_t enc(0xff);
- for (size_t i(0), m(termS.size()); i < m; i++) {
- enc &= _G_charTable.get(termS[i]);
+ for (char c : getTermString()) {
+ enc &= G_charTable.get(c);
}
- _encoding = enc;
+ _encoding = EncodingBitMap(enc);
}
}
void QueryTerm::getPhrases(QueryNodeRefList & tl) { (void) tl; }
void QueryTerm::getPhrases(ConstQueryNodeRefList & tl) const { (void) tl; }
-void QueryTerm::getLeaves(QueryTermList & tl) { tl.push_back(this); }
-void QueryTerm::getLeaves(ConstQueryTermList & tl) const { tl.push_back(this); }
+void QueryTerm::getLeaves(QueryTermList & tl) { tl.push_back(this); }
+void QueryTerm::getLeaves(ConstQueryTermList & tl) const { tl.push_back(this); }
bool QueryTerm::evaluate() const { return !_hitList.empty(); }
void QueryTerm::reset() { _hitList.clear(); }
const HitList & QueryTerm::evaluateHits(HitList &) const { return _hitList; }
diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h
index 6e91437b1f9..743998a630e 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h
+++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h
@@ -27,13 +27,10 @@ public:
class EncodingBitMap
{
public:
- EncodingBitMap(uint8_t bm=0) : _enc(bm) { }
+ explicit EncodingBitMap(uint8_t bm) : _enc(bm) { }
bool isFloat() const { return _enc & Float; }
bool isBase10Integer() const { return _enc & Base10Integer; }
bool isAscii7Bit() const { return _enc & Ascii7Bit; }
- void setBase10Integer(bool v) { if (v) _enc |= Base10Integer; else _enc &= ~Base10Integer; }
- void setAscii7Bit(bool v) { if (v) _enc |= Ascii7Bit; else _enc &= ~Ascii7Bit; }
- void setFloat(bool v) { if (v) _enc |= Float; else _enc &= ~Float; }
private:
enum { Ascii7Bit=0x01, Base10Integer=0x02, Float=0x04 };
uint8_t _enc;
@@ -54,7 +51,12 @@ public:
uint32_t _hitCount;
uint32_t _fieldLength;
};
- QueryTerm(std::unique_ptr<QueryNodeResultBase> resultBase, const string & term, const string & index, Type type);
+ QueryTerm(std::unique_ptr<QueryNodeResultBase> resultBase, stringref term, const string & index, Type type)
+ : QueryTerm(std::move(resultBase), term, index, type, (type == Type::EXACTSTRINGTERM)
+ ? Normalizing::LOWERCASE
+ : Normalizing::LOWERCASE_AND_FOLD)
+ {}
+ QueryTerm(std::unique_ptr<QueryNodeResultBase> resultBase, stringref term, const string & index, Type type, Normalizing normalizing);
QueryTerm(const QueryTerm &) = delete;
QueryTerm & operator = (const QueryTerm &) = delete;
QueryTerm(QueryTerm &&) = delete;
diff --git a/searchlib/src/vespa/searchlib/query/streaming/wand_term.cpp b/searchlib/src/vespa/searchlib/query/streaming/wand_term.cpp
new file mode 100644
index 00000000000..a561adf5b42
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/streaming/wand_term.cpp
@@ -0,0 +1,44 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "wand_term.h"
+#include <vespa/searchlib/fef/itermdata.h>
+#include <vespa/searchlib/fef/matchdata.h>
+
+using search::fef::ITermData;
+using search::fef::MatchData;
+
+namespace search::streaming {
+
+WandTerm::WandTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string & index, uint32_t num_terms)
+ : DotProductTerm(std::move(result_base), index, num_terms),
+ _score_threshold(0.0)
+{
+}
+
+WandTerm::~WandTerm() = default;
+
+bool
+WandTerm::evaluate() const
+{
+ if (_score_threshold <= 0.0) {
+ return DotProductTerm::evaluate();
+ }
+ Scores scores;
+ build_scores(scores);
+ for (auto &field_and_score : scores) {
+ if (field_and_score.second > _score_threshold) {
+ return true;
+ }
+ }
+ return false;
+}
+
+void
+WandTerm::unpack_match_data(uint32_t docid, const ITermData& td, MatchData& match_data)
+{
+ Scores scores;
+ build_scores(scores);
+ unpack_scores(scores, _score_threshold, docid, td, match_data);
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/query/streaming/wand_term.h b/searchlib/src/vespa/searchlib/query/streaming/wand_term.h
new file mode 100644
index 00000000000..1b342834216
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/streaming/wand_term.h
@@ -0,0 +1,22 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "dot_product_term.h"
+
+namespace search::streaming {
+
+/*
+ * A wand query term for streaming search.
+ */
+class WandTerm : public DotProductTerm {
+ double _score_threshold;
+public:
+ WandTerm(std::unique_ptr<QueryNodeResultBase> result_base, const string& index, uint32_t num_terms);
+ ~WandTerm() override;
+ void set_score_threshold(double value) { _score_threshold = value; }
+ bool evaluate() const override;
+ void unpack_match_data(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data) override;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt
index 5e6d31d3761..51fe2d12637 100644
--- a/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/queryeval/CMakeLists.txt
@@ -7,7 +7,7 @@ vespa_add_library(searchlib_queryeval OBJECT
booleanmatchiteratorwrapper.cpp
children_iterators.cpp
create_blueprint_visitor_helper.cpp
- document_weight_search_iterator.cpp
+ docid_with_weight_search_iterator.cpp
dot_product_blueprint.cpp
dot_product_search.cpp
elementiterator.cpp
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
index b71a579e097..6ca072d6dc7 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.cpp
@@ -130,15 +130,15 @@ Blueprint::Blueprint() noexcept
Blueprint::~Blueprint() = default;
Blueprint::UP
-Blueprint::optimize(Blueprint::UP bp) {
+Blueprint::optimize(Blueprint::UP bp, bool sort_by_cost) {
Blueprint *root = bp.release();
- root->optimize(root, OptimizePass::FIRST);
- root->optimize(root, OptimizePass::LAST);
+ root->optimize(root, OptimizePass::FIRST, sort_by_cost);
+ root->optimize(root, OptimizePass::LAST, sort_by_cost);
return Blueprint::UP(root);
}
void
-Blueprint::optimize_self(OptimizePass)
+Blueprint::optimize_self(OptimizePass, bool)
{
}
@@ -358,6 +358,7 @@ Blueprint::visitMembers(vespalib::ObjectVisitor &visitor) const
visitor.visitInt("tree_size", state.tree_size());
visitor.visitBool("allow_termwise_eval", state.allow_termwise_eval());
visitor.closeStruct();
+ visitor.visitFloat("cost", _cost);
visitor.visitInt("sourceId", _sourceId);
visitor.visitInt("docid_limit", _docid_limit);
}
@@ -526,10 +527,9 @@ IntermediateBlueprint::calculateState() const
}
double
-IntermediateBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const
+IntermediateBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate) const
{
(void) child;
- (void) use_estimate;
return hit_rate;
}
@@ -548,19 +548,19 @@ IntermediateBlueprint::should_do_termwise_eval(const UnpackInfo &unpack, double
}
void
-IntermediateBlueprint::optimize(Blueprint* &self, OptimizePass pass)
+IntermediateBlueprint::optimize(Blueprint* &self, OptimizePass pass, bool sort_by_cost)
{
assert(self == this);
if (should_optimize_children()) {
for (auto &child : _children) {
auto *child_ptr = child.release();
- child_ptr->optimize(child_ptr, pass);
+ child_ptr->optimize(child_ptr, pass, sort_by_cost);
child.reset(child_ptr);
}
}
- optimize_self(pass);
+ optimize_self(pass, sort_by_cost);
if (pass == OptimizePass::LAST) {
- sort(_children);
+ sort(_children, sort_by_cost);
set_cost(calculate_cost());
}
maybe_eliminate_self(self, get_replacement());
@@ -634,7 +634,7 @@ IntermediateBlueprint::fetchPostings(const ExecuteInfo &execInfo)
for (size_t i = 0; i < _children.size(); ++i) {
Blueprint & child = *_children[i];
child.fetchPostings(ExecuteInfo::create(execInfo.is_strict() && inheritStrict(i), nextHitRate, execInfo));
- nextHitRate = computeNextHitRate(child, nextHitRate, execInfo.use_estimate_for_fetch_postings());
+ nextHitRate = computeNextHitRate(child, nextHitRate);
}
}
@@ -758,10 +758,10 @@ LeafBlueprint::getRange(vespalib::string &, vespalib::string &) const {
}
void
-LeafBlueprint::optimize(Blueprint* &self, OptimizePass pass)
+LeafBlueprint::optimize(Blueprint* &self, OptimizePass pass, bool sort_by_cost)
{
assert(self == this);
- optimize_self(pass);
+ optimize_self(pass, sort_by_cost);
maybe_eliminate_self(self, get_replacement());
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/blueprint.h b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
index 66d55015f62..a78dd092f5a 100644
--- a/searchlib/src/vespa/searchlib/queryeval/blueprint.h
+++ b/searchlib/src/vespa/searchlib/queryeval/blueprint.h
@@ -172,6 +172,20 @@ public:
// lower limit for docid_limit: max child estimate
static HitEstimate sat_sum(const std::vector<HitEstimate> &data, uint32_t docid_limit);
+ // sort children to minimize total cost of OR flow
+ struct MinimalOrCost {
+ bool operator () (const auto &a, const auto &b) const noexcept {
+ return a->estimate() / a->cost() > b->estimate() / b->cost();
+ }
+ };
+
+ // sort children to minimize total cost of AND flow
+ struct MinimalAndCost {
+ bool operator () (const auto &a, const auto &b) const noexcept {
+ return (1.0 - a->estimate()) / a->cost() > (1.0 - b->estimate()) / b->cost();
+ }
+ };
+
// utility to get the greater estimate to sort first, higher tiers last
struct TieredGreaterEstimate {
bool operator () (const auto &a, const auto &b) const noexcept {
@@ -246,9 +260,9 @@ public:
virtual void setDocIdLimit(uint32_t limit) noexcept { _docid_limit = limit; }
uint32_t get_docid_limit() const noexcept { return _docid_limit; }
- static Blueprint::UP optimize(Blueprint::UP bp);
- virtual void optimize(Blueprint* &self, OptimizePass pass) = 0;
- virtual void optimize_self(OptimizePass pass);
+ static Blueprint::UP optimize(Blueprint::UP bp, bool sort_by_cost);
+ virtual void optimize(Blueprint* &self, OptimizePass pass, bool sort_by_cost) = 0;
+ virtual void optimize_self(OptimizePass pass, bool sort_by_cost);
virtual Blueprint::UP get_replacement();
virtual bool should_optimize_children() const { return true; }
@@ -354,7 +368,7 @@ private:
bool infer_want_global_filter() const;
size_t count_termwise_nodes(const UnpackInfo &unpack) const;
- virtual double computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const;
+ virtual double computeNextHitRate(const Blueprint & child, double hit_rate) const;
protected:
// returns an empty collection if children have empty or
@@ -376,7 +390,7 @@ public:
void setDocIdLimit(uint32_t limit) noexcept final;
- void optimize(Blueprint* &self, OptimizePass pass) final;
+ void optimize(Blueprint* &self, OptimizePass pass, bool sort_by_cost) final;
void set_global_filter(const GlobalFilter &global_filter, double estimated_hit_ratio) override;
IndexList find(const IPredicate & check) const;
@@ -393,7 +407,7 @@ public:
virtual double calculate_cost() const = 0;
virtual HitEstimate combine(const std::vector<HitEstimate> &data) const = 0;
virtual FieldSpecBaseList exposeFields() const = 0;
- virtual void sort(Children &children) const = 0;
+ virtual void sort(Children &children, bool sort_by_cost) const = 0;
virtual bool inheritStrict(size_t i) const = 0;
virtual SearchIteratorUP
createIntermediateSearch(MultiSearch::Children subSearches,
@@ -413,7 +427,7 @@ class LeafBlueprint : public Blueprint
private:
State _state;
protected:
- void optimize(Blueprint* &self, OptimizePass pass) final;
+ void optimize(Blueprint* &self, OptimizePass pass, bool sort_by_cost) final;
void setEstimate(HitEstimate est) {
_state.estimate(est);
_state.relative_estimate(calculate_relative_estimate());
diff --git a/searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.cpp b/searchlib/src/vespa/searchlib/queryeval/docid_with_weight_search_iterator.cpp
index 6b0bd3ec7fc..85bd751df27 100644
--- a/searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/docid_with_weight_search_iterator.cpp
@@ -1,3 +1,3 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include "document_weight_search_iterator.h"
+#include "docid_with_weight_search_iterator.h"
diff --git a/searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.h b/searchlib/src/vespa/searchlib/queryeval/docid_with_weight_search_iterator.h
index 448f1c8f2b4..8201c6a78b8 100644
--- a/searchlib/src/vespa/searchlib/queryeval/document_weight_search_iterator.h
+++ b/searchlib/src/vespa/searchlib/queryeval/docid_with_weight_search_iterator.h
@@ -8,7 +8,12 @@
namespace search::queryeval {
-class DocumentWeightSearchIterator : public SearchIterator
+/**
+ * SearchIterator implementation over a low-level posting list with {docid, weight} tuples.
+ *
+ * This is used by the parallel weak AND search iterator.
+ */
+class DocidWithWeightSearchIterator : public SearchIterator
{
private:
fef::TermFieldMatchData &_tfmd;
@@ -17,9 +22,9 @@ private:
queryeval::MinMaxPostingInfo _postingInfo;
public:
- DocumentWeightSearchIterator(fef::TermFieldMatchData &tfmd,
- const IDocidWithWeightPostingStore &attr,
- IDirectPostingStore::LookupResult dict_entry)
+ DocidWithWeightSearchIterator(fef::TermFieldMatchData &tfmd,
+ const IDocidWithWeightPostingStore &attr,
+ IDirectPostingStore::LookupResult dict_entry)
: _tfmd(tfmd),
_matchPosition(_tfmd.populate_fixed()),
_iterator(attr.create(dict_entry.posting_idx)),
diff --git a/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h b/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h
index 2f1a4386a95..e49fcbcb5bc 100644
--- a/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h
+++ b/searchlib/src/vespa/searchlib/queryeval/dot_product_search.h
@@ -25,6 +25,9 @@ protected:
DotProductSearch() {}
public:
+ static constexpr bool filter_search = false;
+ static constexpr bool require_btree_iterators = true;
+
// TODO: use MultiSearch::Children to pass ownership
static SearchIterator::UP create(const std::vector<SearchIterator*> &children,
search::fef::TermFieldMatchData &tmd,
diff --git a/searchlib/src/vespa/searchlib/queryeval/executeinfo.cpp b/searchlib/src/vespa/searchlib/queryeval/executeinfo.cpp
index 858cb92331a..c9ec6edb225 100644
--- a/searchlib/src/vespa/searchlib/queryeval/executeinfo.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/executeinfo.cpp
@@ -2,9 +2,19 @@
#include "executeinfo.h"
+using vespalib::Doom;
namespace search::queryeval {
-const ExecuteInfo ExecuteInfo::TRUE(true, 1.0, nullptr, vespalib::ThreadBundle::trivial(), true, true);
-const ExecuteInfo ExecuteInfo::FALSE(false, 1.0, nullptr, vespalib::ThreadBundle::trivial(), true, true);
+const ExecuteInfo ExecuteInfo::TRUE(true, 1.0, Doom::never(), vespalib::ThreadBundle::trivial());
+const ExecuteInfo ExecuteInfo::FALSE(false, 1.0, Doom::never(), vespalib::ThreadBundle::trivial());
+
+ExecuteInfo::ExecuteInfo() noexcept
+ : ExecuteInfo(false, 1.0, Doom::never(), vespalib::ThreadBundle::trivial())
+{ }
+
+ExecuteInfo
+ExecuteInfo::createForTest(bool strict, double hitRate) noexcept {
+ return createForTest(strict, hitRate, Doom::never());
+}
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/executeinfo.h b/searchlib/src/vespa/searchlib/queryeval/executeinfo.h
index 3300a2aea4d..fa2c69e0400 100644
--- a/searchlib/src/vespa/searchlib/queryeval/executeinfo.h
+++ b/searchlib/src/vespa/searchlib/queryeval/executeinfo.h
@@ -13,12 +13,10 @@ namespace search::queryeval {
*/
class ExecuteInfo {
public:
- ExecuteInfo() noexcept : ExecuteInfo(false, 1.0, nullptr, vespalib::ThreadBundle::trivial(), true, true) { }
+ ExecuteInfo() noexcept;
bool is_strict() const noexcept { return _strict; }
- bool create_postinglist_when_non_strict() const noexcept { return _create_postinglist_when_non_strict; }
- bool use_estimate_for_fetch_postings() const noexcept { return _use_estimate_for_fetch_postings; }
double hit_rate() const noexcept { return _hitRate; }
- bool soft_doom() const noexcept { return _doom && _doom->soft_doom(); }
+ const vespalib::Doom & doom() const noexcept { return _doom; }
vespalib::ThreadBundle & thread_bundle() const noexcept { return _thread_bundle; }
static const ExecuteInfo TRUE;
@@ -27,39 +25,33 @@ public:
return create(strict, org._hitRate, org);
}
static ExecuteInfo create(bool strict, double hitRate, const ExecuteInfo & org) noexcept {
- return {strict, hitRate, org._doom, org.thread_bundle(), org.create_postinglist_when_non_strict(), org.use_estimate_for_fetch_postings()};
+ return {strict, hitRate, org._doom, org.thread_bundle()};
}
- static ExecuteInfo create(bool strict, double hitRate, const vespalib::Doom * doom, vespalib::ThreadBundle & thread_bundle_in,
- bool postinglist_when_non_strict, bool use_estimate_for_fetch_postings) noexcept
+ static ExecuteInfo create(bool strict, double hitRate, const vespalib::Doom & doom,
+ vespalib::ThreadBundle & thread_bundle_in) noexcept
{
- return {strict, hitRate, doom, thread_bundle_in, postinglist_when_non_strict, use_estimate_for_fetch_postings};
+ return {strict, hitRate, doom, thread_bundle_in};
}
static ExecuteInfo createForTest(bool strict) noexcept {
return createForTest(strict, 1.0);
}
- static ExecuteInfo createForTest(bool strict, double hitRate) noexcept {
- return createForTest(strict, hitRate, nullptr);
- }
- static ExecuteInfo createForTest(bool strict, double hitRate, const vespalib::Doom * doom) noexcept {
- return create(strict, hitRate, doom, vespalib::ThreadBundle::trivial(), true, true);
+ static ExecuteInfo createForTest(bool strict, double hitRate) noexcept;
+ static ExecuteInfo createForTest(bool strict, double hitRate, const vespalib::Doom & doom) noexcept {
+ return create(strict, hitRate, doom, vespalib::ThreadBundle::trivial());
}
private:
- ExecuteInfo(bool strict, double hitRate_in, const vespalib::Doom * doom, vespalib::ThreadBundle & thread_bundle_in,
- bool postinglist_when_non_strict, bool use_estimate_for_fetch_postings) noexcept
+ ExecuteInfo(bool strict, double hitRate_in, const vespalib::Doom & doom,
+ vespalib::ThreadBundle & thread_bundle_in) noexcept
: _doom(doom),
_thread_bundle(thread_bundle_in),
_hitRate(hitRate_in),
- _strict(strict),
- _create_postinglist_when_non_strict(postinglist_when_non_strict),
- _use_estimate_for_fetch_postings(use_estimate_for_fetch_postings)
+ _strict(strict)
{ }
- const vespalib::Doom * _doom;
+ const vespalib::Doom _doom;
vespalib::ThreadBundle & _thread_bundle;
double _hitRate;
bool _strict;
- bool _create_postinglist_when_non_strict;
- bool _use_estimate_for_fetch_postings;
};
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.cpp b/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.cpp
index ae5a7583c8c..f15f3d0e84c 100644
--- a/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/fake_requestcontext.cpp
@@ -3,7 +3,6 @@
#include "fake_requestcontext.h"
#include <vespa/vespalib/util/testclock.h>
-
namespace search::queryeval {
FakeRequestContext::FakeRequestContext()
@@ -13,7 +12,7 @@ FakeRequestContext::FakeRequestContext()
FakeRequestContext::FakeRequestContext(attribute::IAttributeContext * context, vespalib::steady_time softDoom, vespalib::steady_time hardDoom)
: _clock(std::make_unique<vespalib::TestClock>()),
- _doom(_clock->clock(), softDoom, hardDoom, false),
+ _doom(_clock->nowRef(), softDoom, hardDoom, false),
_attributeContext(context),
_query_tensor_name(),
_query_tensor(),
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
index c43335a6fdf..bebc1f433f7 100644
--- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.cpp
@@ -33,7 +33,7 @@ size_t lookup_create_source(std::vector<std::unique_ptr<CombineType> > &sources,
}
template <typename CombineType>
-void optimize_source_blenders(IntermediateBlueprint &self, size_t begin_idx) {
+void optimize_source_blenders(IntermediateBlueprint &self, size_t begin_idx, bool sort_by_cost) {
std::vector<size_t> source_blenders;
const SourceBlenderBlueprint * reference = nullptr;
for (size_t i = begin_idx; i < self.childCnt(); ++i) {
@@ -63,7 +63,7 @@ void optimize_source_blenders(IntermediateBlueprint &self, size_t begin_idx) {
top->addChild(std::move(sources.back()));
sources.pop_back();
}
- blender_up = Blueprint::optimize(std::move(blender_up));
+ blender_up = Blueprint::optimize(std::move(blender_up), sort_by_cost);
self.addChild(std::move(blender_up));
}
}
@@ -114,7 +114,7 @@ AndNotBlueprint::exposeFields() const
}
void
-AndNotBlueprint::optimize_self(OptimizePass pass)
+AndNotBlueprint::optimize_self(OptimizePass pass, bool sort_by_cost)
{
if (childCnt() == 0) {
return;
@@ -133,7 +133,14 @@ AndNotBlueprint::optimize_self(OptimizePass pass)
while (grand_child->childCnt() > 1) {
addChild(grand_child->removeLastChild());
}
- child->addChild(grand_child->removeChild(0));
+ auto orphan = grand_child->removeChild(0);
+ if (auto *orphan_and = orphan->asAnd()) {
+ while (orphan_and->childCnt() > 0) {
+ child->addChild(orphan_and->removeLastChild());
+ }
+ } else {
+ child->addChild(std::move(orphan));
+ }
child->removeChild(i--);
}
}
@@ -145,7 +152,7 @@ AndNotBlueprint::optimize_self(OptimizePass pass)
}
}
if (pass == OptimizePass::LAST) {
- optimize_source_blenders<OrBlueprint>(*this, 1);
+ optimize_source_blenders<OrBlueprint>(*this, 1, sort_by_cost);
}
}
@@ -159,10 +166,14 @@ AndNotBlueprint::get_replacement()
}
void
-AndNotBlueprint::sort(Children &children) const
+AndNotBlueprint::sort(Children &children, bool sort_by_cost) const
{
if (children.size() > 2) {
- std::sort(children.begin() + 1, children.end(), TieredGreaterEstimate());
+ if (sort_by_cost) {
+ std::sort(children.begin() + 1, children.end(), MinimalOrCost());
+ } else {
+ std::sort(children.begin() + 1, children.end(), TieredGreaterEstimate());
+ }
}
}
@@ -224,7 +235,7 @@ AndBlueprint::exposeFields() const
}
void
-AndBlueprint::optimize_self(OptimizePass pass)
+AndBlueprint::optimize_self(OptimizePass pass, bool sort_by_cost)
{
if (pass == OptimizePass::FIRST) {
for (size_t i = 0; i < childCnt(); ++i) {
@@ -237,7 +248,7 @@ AndBlueprint::optimize_self(OptimizePass pass)
}
}
if (pass == OptimizePass::LAST) {
- optimize_source_blenders<AndBlueprint>(*this, 0);
+ optimize_source_blenders<AndBlueprint>(*this, 0, sort_by_cost);
}
}
@@ -251,9 +262,13 @@ AndBlueprint::get_replacement()
}
void
-AndBlueprint::sort(Children &children) const
+AndBlueprint::sort(Children &children, bool sort_by_cost) const
{
- std::sort(children.begin(), children.end(), TieredLessEstimate());
+ if (sort_by_cost) {
+ std::sort(children.begin(), children.end(), MinimalAndCost());
+ } else {
+ std::sort(children.begin(), children.end(), TieredLessEstimate());
+ }
}
bool
@@ -293,21 +308,13 @@ AndBlueprint::createFilterSearch(bool strict, FilterConstraint constraint) const
}
double
-AndBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const {
- double estimate = use_estimate ? child.estimate() : child.hit_ratio();
- return hit_rate * estimate;
+AndBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate) const {
+ return hit_rate * child.estimate();
}
double
-OrBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const {
- // Avoid dropping hitRate to zero when meeting a conservatively high hitrate in a child.
- // Happens at least when using non fast-search attributes, and with AND nodes.
- constexpr double MIN_INVERSE_HIT_RATIO = 0.10;
- double estimate = use_estimate ? child.estimate() : child.hit_ratio();
- double inverse_child_estimate = 1.0 - estimate;
- return (use_estimate || (inverse_child_estimate > MIN_INVERSE_HIT_RATIO))
- ? hit_rate * inverse_child_estimate
- : hit_rate;
+OrBlueprint::computeNextHitRate(const Blueprint & child, double hit_rate) const {
+ return hit_rate * (1.0 - child.estimate());
}
//-----------------------------------------------------------------------------
@@ -337,7 +344,7 @@ OrBlueprint::exposeFields() const
}
void
-OrBlueprint::optimize_self(OptimizePass pass)
+OrBlueprint::optimize_self(OptimizePass pass, bool sort_by_cost)
{
if (pass == OptimizePass::FIRST) {
for (size_t i = 0; (childCnt() > 1) && (i < childCnt()); ++i) {
@@ -352,7 +359,7 @@ OrBlueprint::optimize_self(OptimizePass pass)
}
}
if (pass == OptimizePass::LAST) {
- optimize_source_blenders<OrBlueprint>(*this, 0);
+ optimize_source_blenders<OrBlueprint>(*this, 0, sort_by_cost);
}
}
@@ -366,9 +373,13 @@ OrBlueprint::get_replacement()
}
void
-OrBlueprint::sort(Children &children) const
+OrBlueprint::sort(Children &children, bool sort_by_cost) const
{
- std::sort(children.begin(), children.end(), TieredGreaterEstimate());
+ if (sort_by_cost) {
+ std::sort(children.begin(), children.end(), MinimalOrCost());
+ } else {
+ std::sort(children.begin(), children.end(), TieredGreaterEstimate());
+ }
}
bool
@@ -445,7 +456,7 @@ WeakAndBlueprint::exposeFields() const
}
void
-WeakAndBlueprint::sort(Children &) const
+WeakAndBlueprint::sort(Children &, bool) const
{
// order needs to stay the same as _weights
}
@@ -509,9 +520,13 @@ NearBlueprint::exposeFields() const
}
void
-NearBlueprint::sort(Children &children) const
+NearBlueprint::sort(Children &children, bool sort_by_cost) const
{
- std::sort(children.begin(), children.end(), TieredLessEstimate());
+ if (sort_by_cost) {
+ std::sort(children.begin(), children.end(), MinimalAndCost());
+ } else {
+ std::sort(children.begin(), children.end(), TieredLessEstimate());
+ }
}
bool
@@ -572,10 +587,9 @@ ONearBlueprint::exposeFields() const
}
void
-ONearBlueprint::sort(Children &children) const
+ONearBlueprint::sort(Children &, bool) const
{
// ordered near cannot sort children here
- (void)children;
}
bool
@@ -641,7 +655,7 @@ RankBlueprint::exposeFields() const
}
void
-RankBlueprint::optimize_self(OptimizePass pass)
+RankBlueprint::optimize_self(OptimizePass pass, bool sort_by_cost)
{
if (pass == OptimizePass::FIRST) {
for (size_t i = 1; i < childCnt(); ++i) {
@@ -651,7 +665,7 @@ RankBlueprint::optimize_self(OptimizePass pass)
}
}
if (pass == OptimizePass::LAST) {
- optimize_source_blenders<OrBlueprint>(*this, 1);
+ optimize_source_blenders<OrBlueprint>(*this, 1, sort_by_cost);
}
}
@@ -665,9 +679,8 @@ RankBlueprint::get_replacement()
}
void
-RankBlueprint::sort(Children &children) const
+RankBlueprint::sort(Children &, bool) const
{
- (void)children;
}
bool
@@ -744,7 +757,7 @@ SourceBlenderBlueprint::exposeFields() const
}
void
-SourceBlenderBlueprint::sort(Children &) const
+SourceBlenderBlueprint::sort(Children &, bool) const
{
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
index 14672c2a5cd..620280e979b 100644
--- a/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
+++ b/searchlib/src/vespa/searchlib/queryeval/intermediate_blueprints.h
@@ -19,10 +19,10 @@ public:
double calculate_relative_estimate() const final;
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
FieldSpecBaseList exposeFields() const override;
- void optimize_self(OptimizePass pass) override;
+ void optimize_self(OptimizePass pass, bool sort_by_cost) override;
AndNotBlueprint * asAndNot() noexcept final { return this; }
Blueprint::UP get_replacement() override;
- void sort(Children &children) const override;
+ void sort(Children &children, bool sort_by_cost) const override;
bool inheritStrict(size_t i) const override;
SearchIterator::UP
createIntermediateSearch(MultiSearch::Children subSearches,
@@ -47,10 +47,10 @@ public:
double calculate_relative_estimate() const final;
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
FieldSpecBaseList exposeFields() const override;
- void optimize_self(OptimizePass pass) override;
+ void optimize_self(OptimizePass pass, bool sort_by_cost) override;
AndBlueprint * asAnd() noexcept final { return this; }
Blueprint::UP get_replacement() override;
- void sort(Children &children) const override;
+ void sort(Children &children, bool sort_by_cost) const override;
bool inheritStrict(size_t i) const override;
SearchIterator::UP
createIntermediateSearch(MultiSearch::Children subSearches,
@@ -58,7 +58,7 @@ public:
SearchIterator::UP
createFilterSearch(bool strict, FilterConstraint constraint) const override;
private:
- double computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const override;
+ double computeNextHitRate(const Blueprint & child, double hit_rate) const override;
};
//-----------------------------------------------------------------------------
@@ -73,10 +73,10 @@ public:
double calculate_relative_estimate() const final;
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
FieldSpecBaseList exposeFields() const override;
- void optimize_self(OptimizePass pass) override;
+ void optimize_self(OptimizePass pass, bool sort_by_cost) override;
OrBlueprint * asOr() noexcept final { return this; }
Blueprint::UP get_replacement() override;
- void sort(Children &children) const override;
+ void sort(Children &children, bool sort_by_cost) const override;
bool inheritStrict(size_t i) const override;
SearchIterator::UP
createIntermediateSearch(MultiSearch::Children subSearches,
@@ -84,7 +84,7 @@ public:
SearchIterator::UP
createFilterSearch(bool strict, FilterConstraint constraint) const override;
private:
- double computeNextHitRate(const Blueprint & child, double hit_rate, bool use_estimate) const override;
+ double computeNextHitRate(const Blueprint & child, double hit_rate) const override;
uint8_t calculate_cost_tier() const override;
};
@@ -101,7 +101,7 @@ public:
double calculate_relative_estimate() const final;
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
FieldSpecBaseList exposeFields() const override;
- void sort(Children &children) const override;
+ void sort(Children &children, bool sort_by_cost) const override;
bool inheritStrict(size_t i) const override;
bool always_needs_unpack() const override;
WeakAndBlueprint * asWeakAnd() noexcept final { return this; }
@@ -133,7 +133,7 @@ public:
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
FieldSpecBaseList exposeFields() const override;
bool should_optimize_children() const override { return false; }
- void sort(Children &children) const override;
+ void sort(Children &children, bool sort_by_cost) const override;
bool inheritStrict(size_t i) const override;
SearchIteratorUP createSearch(fef::MatchData &md, bool strict) const override;
SearchIterator::UP
@@ -157,7 +157,7 @@ public:
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
FieldSpecBaseList exposeFields() const override;
bool should_optimize_children() const override { return false; }
- void sort(Children &children) const override;
+ void sort(Children &children, bool sort_by_cost) const override;
bool inheritStrict(size_t i) const override;
SearchIteratorUP createSearch(fef::MatchData &md, bool strict) const override;
SearchIterator::UP
@@ -177,9 +177,9 @@ public:
double calculate_relative_estimate() const final;
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
FieldSpecBaseList exposeFields() const override;
- void optimize_self(OptimizePass pass) override;
+ void optimize_self(OptimizePass pass, bool sort_by_cost) override;
Blueprint::UP get_replacement() override;
- void sort(Children &children) const override;
+ void sort(Children &children, bool sort_by_cost) const override;
bool inheritStrict(size_t i) const override;
bool isRank() const noexcept final { return true; }
SearchIterator::UP
@@ -206,7 +206,7 @@ public:
double calculate_relative_estimate() const final;
HitEstimate combine(const std::vector<HitEstimate> &data) const override;
FieldSpecBaseList exposeFields() const override;
- void sort(Children &children) const override;
+ void sort(Children &children, bool sort_by_cost) const override;
bool inheritStrict(size_t i) const override;
SearchIterator::UP
createIntermediateSearch(MultiSearch::Children subSearches,
diff --git a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp
index fdf4ec950dd..e90156868fb 100644
--- a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.cpp
@@ -4,7 +4,6 @@
#include "andsearch.h"
#include "andnotsearch.h"
#include "sourceblendersearch.h"
-#include <vespa/searchlib/common/bitvectoriterator.h>
#include <vespa/vespalib/hwaccelrated/iaccelrated.h>
namespace search::queryeval {
@@ -18,17 +17,17 @@ namespace {
struct And {
using Word = BitWord::Word;
void operator () (const IAccelrated & accel, size_t offset, const std::vector<Meta> & src, void *dest) noexcept {
- accel.and64(offset, src, dest);
+ accel.and128(offset, src, dest);
}
- static bool isAnd() noexcept { return true; }
+ static constexpr bool isAnd() noexcept { return true; }
};
struct Or {
using Word = BitWord::Word;
void operator () (const IAccelrated & accel, size_t offset, const std::vector<Meta> & src, void *dest) noexcept {
- accel.or64(offset, src, dest);
+ accel.or128(offset, src, dest);
}
- static bool isAnd() noexcept { return false; }
+ static constexpr bool isAnd() noexcept { return false; }
};
}
@@ -56,43 +55,47 @@ MultiBitVector<Update>::MultiBitVector(size_t reserved)
_accel(IAccelrated::getAccelerator()),
_lastWords()
{
- static_assert(sizeof(_lastWords) == 64, "Lastwords should have 64 byte size");
- static_assert(NumWordsInBatch == 8, "Batch size should be 8 words.");
+ static_assert(sizeof(_lastWords) == 128, "Lastwords should have 128 byte size");
+ static_assert(NumWordsInBatch == 16, "Batch size should be 16 words.");
memset(_lastWords, 0, sizeof(_lastWords));
}
template<typename Update>
bool
-MultiBitVector<Update>::updateLastValue(uint32_t docId) noexcept
+MultiBitVector<Update>::updateLastValueCold(uint32_t docId) noexcept
{
- if (docId >= _lastMaxDocIdLimit) {
- if (__builtin_expect(isAtEnd(docId), false)) {
- return true;
- }
- const uint32_t index(BitWord::wordNum(docId));
- if (docId >= _lastMaxDocIdLimitRequireFetch) {
- uint32_t baseIndex = index & ~(NumWordsInBatch - 1);
- _update(_accel, baseIndex*sizeof(Word), _bvs, _lastWords);
- _lastMaxDocIdLimitRequireFetch = (baseIndex + NumWordsInBatch) * BitWord::WordLen;
- }
- _lastValue = _lastWords[index % NumWordsInBatch];
- _lastMaxDocIdLimit = (index + 1) * BitWord::WordLen;
+ if (__builtin_expect(isAtEnd(docId), false)) {
+ return true;
+ }
+ const uint32_t index(BitWord::wordNum(docId));
+ if (docId >= _lastMaxDocIdLimitRequireFetch) {
+ fetchChunk(index);
}
+ _lastValue = _lastWords[index % NumWordsInBatch];
+ _lastMaxDocIdLimit = (index + 1) * BitWord::WordLen;
return false;
}
template<typename Update>
+void
+MultiBitVector<Update>::fetchChunk(uint32_t index) noexcept
+{
+ uint32_t baseIndex = index & ~(NumWordsInBatch - 1);
+ _update(_accel, baseIndex*sizeof(Word), _bvs, _lastWords);
+ _lastMaxDocIdLimitRequireFetch = (baseIndex + NumWordsInBatch) * BitWord::WordLen;
+}
+
+template<typename Update>
uint32_t
MultiBitVector<Update>::strictSeek(uint32_t docId) noexcept
{
bool atEnd;
for (atEnd = updateLastValue(docId), _lastValue = _lastValue & BitWord::checkTab(docId);
- (_lastValue == 0) && __builtin_expect(! atEnd, true);
+ __builtin_expect(_lastValue == 0, Update::isAnd()) && __builtin_expect(! atEnd, true); // And is likely to have few bits, while Or has many.
atEnd = updateLastValue(_lastMaxDocIdLimit));
- if (__builtin_expect(!atEnd, true)) {
- return _lastMaxDocIdLimit - BitWord::WordLen + vespalib::Optimized::lsbIdx(_lastValue);
- }
- return _numDocs;
+ return (__builtin_expect(!atEnd, true))
+ ? _lastMaxDocIdLimit - BitWord::WordLen + vespalib::Optimized::lsbIdx(_lastValue)
+ : _numDocs;
}
template<typename Update>
@@ -100,12 +103,8 @@ bool
MultiBitVector<Update>::seek(uint32_t docId) noexcept
{
bool atEnd = updateLastValue(docId);
- if (__builtin_expect( ! atEnd, true)) {
- if (_lastValue & BitWord::mask(docId)) {
- return true;
- }
- }
- return false;
+ return __builtin_expect( ! atEnd, true) &&
+ __builtin_expect(_lastValue & BitWord::mask(docId), false);
}
namespace {
@@ -160,7 +159,7 @@ template<typename Update>
void
MultiBitVectorIterator<Update>::doSeek(uint32_t docId)
{
- if (_mbv.seek(docId)) {
+ if (_mbv.seek(docId)) [[unlikely]] {
setDocId(docId);
}
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h
index 2b4f90544ac..0ecf9d85b92 100644
--- a/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h
+++ b/searchlib/src/vespa/searchlib/queryeval/multibitvectoriterator.h
@@ -37,12 +37,20 @@ public:
bool seek(uint32_t docId) noexcept;
bool acceptExtraFilter() const noexcept { return Update::isAnd(); }
private:
- bool updateLastValue(uint32_t docId) noexcept;
+ bool updateLastValue(uint32_t docId) noexcept {
+ if (docId >= _lastMaxDocIdLimit) {
+ return updateLastValueCold(docId);
+ }
+ return false;
+ }
+ VESPA_DLL_LOCAL bool updateLastValueCold(uint32_t docId) noexcept __attribute__((noinline));
+ VESPA_DLL_LOCAL void fetchChunk(uint32_t docId) noexcept __attribute__((noinline));
+
using IAccelrated = vespalib::hwaccelrated::IAccelrated;
Update _update;
const IAccelrated & _accel;
- alignas(64) Word _lastWords[8];
+ alignas(64) Word _lastWords[16];
static constexpr size_t NumWordsInBatch = sizeof(_lastWords) / sizeof(Word);
};
diff --git a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp
index f0c75173671..500e9fe4dbb 100644
--- a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.cpp
@@ -45,7 +45,7 @@ SameElementBlueprint::addTerm(Blueprint::UP term)
}
void
-SameElementBlueprint::optimize_self(OptimizePass pass)
+SameElementBlueprint::optimize_self(OptimizePass pass, bool)
{
if (pass == OptimizePass::LAST) {
std::sort(_terms.begin(), _terms.end(),
@@ -60,13 +60,11 @@ SameElementBlueprint::fetchPostings(const ExecuteInfo &execInfo)
{
if (_terms.empty()) return;
_terms[0]->fetchPostings(execInfo);
- double estimate = execInfo.use_estimate_for_fetch_postings() ? _terms[0]->hit_ratio() : _terms[0]->estimate();
- double hit_rate = execInfo.hit_rate() * estimate;
+ double hit_rate = execInfo.hit_rate() * _terms[0]->estimate();
for (size_t i = 1; i < _terms.size(); ++i) {
Blueprint & term = *_terms[i];
term.fetchPostings(ExecuteInfo::create(false, hit_rate, execInfo));
- estimate = execInfo.use_estimate_for_fetch_postings() ? _terms[0]->hit_ratio() : _terms[0]->estimate();
- hit_rate = hit_rate * estimate;
+ hit_rate = hit_rate * _terms[i]->estimate();
}
}
diff --git a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.h b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.h
index 6a988e67149..06c20339e81 100644
--- a/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.h
+++ b/searchlib/src/vespa/searchlib/queryeval/same_element_blueprint.h
@@ -34,7 +34,7 @@ public:
// used by create visitor
void addTerm(Blueprint::UP term);
- void optimize_self(OptimizePass pass) override;
+ void optimize_self(OptimizePass pass, bool sort_by_cost) override;
void fetchPostings(const ExecuteInfo &execInfo) override;
std::unique_ptr<SameElementSearch> create_same_element_search(search::fef::TermFieldMatchData& tfmd, bool strict) const;
diff --git a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp
index 828ca4be08d..f3028f5159a 100644
--- a/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/wand/parallel_weak_and_search.cpp
@@ -1,7 +1,7 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "parallel_weak_and_search.h"
-#include <vespa/searchlib/queryeval/document_weight_search_iterator.h>
+#include <vespa/searchlib/queryeval/docid_with_weight_search_iterator.h>
#include <vespa/searchlib/queryeval/monitoring_dump_iterator.h>
#include <vespa/searchlib/fef/matchdatalayout.h>
#include <vespa/vespalib/objects/visit.h>
@@ -243,7 +243,7 @@ ParallelWeakAndSearch::create(search::fef::TermFieldMatchData &tfmd,
assert(childrenMatchData->getNumTermFields() == dict_entries.size());
wand::Terms terms;
for (size_t i = 0; i < dict_entries.size(); ++i) {
- terms.push_back(wand::Term(new DocumentWeightSearchIterator(*(childrenMatchData->resolveTermField(handles[i])), attr, dict_entries[i]),
+ terms.push_back(wand::Term(new DocidWithWeightSearchIterator(*(childrenMatchData->resolveTermField(handles[i])), attr, dict_entries[i]),
weights[i],
dict_entries[i].posting_size,
childrenMatchData->resolveTermField(handles[i])));
diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp
index 1cecbca7660..0929f80a8f0 100644
--- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp
+++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.cpp
@@ -2,7 +2,7 @@
#include "weighted_set_term_search.h"
#include <vespa/searchlib/common/bitvector.h>
-#include <vespa/searchlib/attribute/document_weight_or_filter_search.h>
+#include <vespa/searchlib/attribute/multi_term_or_filter_search.h>
#include <vespa/vespalib/objects/visit.h>
#include <vespa/searchcommon/attribute/i_search_context.h>
@@ -175,7 +175,7 @@ WeightedSetTermSearch::create(const std::vector<SearchIterator *> &children,
using HeapImpl = WeightedSetTermSearchImpl<vespalib::LeftHeap, SearchIteratorPack>;
if (tmd.isNotNeeded()) {
- return attribute::DocumentWeightOrFilterSearch::create(children, std::move(match_data));
+ return attribute::MultiTermOrFilterSearch::create(children, std::move(match_data));
}
if (children.size() < 128) {
diff --git a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h
index a497a647ac6..a9ab86e2c5f 100644
--- a/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h
+++ b/searchlib/src/vespa/searchlib/queryeval/weighted_set_term_search.h
@@ -27,6 +27,9 @@ protected:
WeightedSetTermSearch() = default;
public:
+ static constexpr bool filter_search = false;
+ static constexpr bool require_btree_iterators = false;
+
// TODO: pass ownership with unique_ptr
static SearchIterator::UP create(const std::vector<SearchIterator *> &children,
search::fef::TermFieldMatchData &tmd,
diff --git a/searchlib/src/vespa/searchlib/test/CMakeLists.txt b/searchlib/src/vespa/searchlib/test/CMakeLists.txt
index e7401d74c71..a4db57a44cd 100644
--- a/searchlib/src/vespa/searchlib/test/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/test/CMakeLists.txt
@@ -4,6 +4,8 @@ vespa_add_library(searchlib_test
attribute_builder.cpp
document_weight_attribute_helper.cpp
doc_builder.cpp
+ ft_test_app.cpp
+ ft_test_app_base.cpp
imported_attribute_fixture.cpp
initrange.cpp
make_attribute_map_lookup_node.cpp
diff --git a/searchlib/src/vespa/searchlib/test/document_weight_attribute_helper.h b/searchlib/src/vespa/searchlib/test/document_weight_attribute_helper.h
index 059456c383b..10d64b0aa6d 100644
--- a/searchlib/src/vespa/searchlib/test/document_weight_attribute_helper.h
+++ b/searchlib/src/vespa/searchlib/test/document_weight_attribute_helper.h
@@ -6,7 +6,7 @@
#include <vespa/searchlib/attribute/multinumericattribute.h>
#include <vespa/searchlib/attribute/multinumericpostattribute.hpp>
#include <vespa/searchlib/attribute/attributefactory.h>
-#include <vespa/vespalib/testkit/test_kit.h>
+#include <cassert>
namespace search::test {
@@ -25,8 +25,8 @@ public:
_int_attr(dynamic_cast<IntegerAttribute *>(_attr.get())),
_dww(_attr->as_docid_with_weight_posting_store())
{
- ASSERT_TRUE(_int_attr != nullptr);
- ASSERT_TRUE(_dww != nullptr);
+ assert(_int_attr != nullptr);
+ assert(_dww != nullptr);
}
~DocumentWeightAttributeHelper();
@@ -36,7 +36,7 @@ public:
_attr->addDoc(docid);
}
_attr->commit();
- ASSERT_EQUAL((limit - 1), docid);
+ assert((limit - 1) == docid);
}
void set_doc(uint32_t docid, int64_t key, int32_t weight) {
diff --git a/searchlib/src/vespa/searchlib/test/ft_test_app.cpp b/searchlib/src/vespa/searchlib/test/ft_test_app.cpp
new file mode 100644
index 00000000000..1d9d7c05d76
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/ft_test_app.cpp
@@ -0,0 +1,5 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "ft_test_app.h"
+
+FtTestApp::~FtTestApp() = default;
diff --git a/searchlib/src/vespa/searchlib/test/ft_test_app.h b/searchlib/src/vespa/searchlib/test/ft_test_app.h
new file mode 100644
index 00000000000..432d9d80e4d
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/ft_test_app.h
@@ -0,0 +1,13 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "ft_test_app_base.h"
+#include <vespa/vespalib/testkit/testapp.h>
+
+/*
+ * Test application used by feature unit tests.
+ */
+struct FtTestApp : public vespalib::TestApp, public FtTestAppBase {
+ ~FtTestApp() override;
+};
diff --git a/searchlib/src/vespa/searchlib/test/ft_test_app_base.cpp b/searchlib/src/vespa/searchlib/test/ft_test_app_base.cpp
new file mode 100644
index 00000000000..eee5631dcc5
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/ft_test_app_base.cpp
@@ -0,0 +1,286 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "ft_test_app.h"
+#include <vespa/searchlib/fef/test/dummy_dependency_handler.h>
+#include <vespa/vespalib/util/stringfmt.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP(".ft_test_app_base");
+
+namespace fieldmatch = search::features::fieldmatch;
+using search::fef::test::DummyDependencyHandler;
+using search::fef::FieldInfo;
+using search::fef::FieldType;
+using search::fef::Properties;
+using search::fef::test::RankResult;
+
+void
+FtTestAppBase::FT_SETUP_FAIL(const search::fef::Blueprint &prototype, const StringList &params)
+{
+ search::fef::test::IndexEnvironment ie;
+ FT_SETUP_FAIL(prototype, ie, params);
+}
+
+void
+FtTestAppBase::FT_SETUP_FAIL(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env,
+ const StringList &params)
+{
+ FT_LOG(prototype, env, params);
+ search::fef::Blueprint::UP bp = prototype.createInstance();
+ DummyDependencyHandler deps(*bp);
+ EXPECT_TRUE(!bp->setup(env, params));
+}
+
+void
+FtTestAppBase::FT_SETUP_OK(const search::fef::Blueprint &prototype, const StringList &params,
+ const StringList &expectedIn, const StringList &expectedOut)
+{
+ search::fef::test::IndexEnvironment ie;
+ FT_SETUP_OK(prototype, ie, params, expectedIn, expectedOut);
+}
+
+void
+FtTestAppBase::FT_SETUP_OK(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env,
+ const StringList &params, const StringList &expectedIn, const StringList &expectedOut)
+{
+ FT_LOG(prototype, env, params);
+ search::fef::Blueprint::UP bp = prototype.createInstance();
+ DummyDependencyHandler deps(*bp);
+ ASSERT_TRUE(bp->setup(env, params));
+ FT_EQUAL(expectedIn, deps.input, "In, ");
+ FT_EQUAL(expectedOut, deps.output, "Out,");
+}
+
+void
+FtTestAppBase::FT_DUMP_EMPTY(search::fef::BlueprintFactory &factory, const vespalib::string &baseName)
+{
+ StringList empty;
+ FT_DUMP(factory, baseName, empty);
+}
+
+void
+FtTestAppBase::FT_DUMP_EMPTY(search::fef::BlueprintFactory &factory, const vespalib::string &baseName,
+ search::fef::test::IndexEnvironment &env)
+{
+ StringList empty;
+ FT_DUMP(factory, baseName, env, empty);
+}
+
+void
+FtTestAppBase::FT_DUMP(search::fef::BlueprintFactory &factory, const vespalib::string &baseName,
+ const StringList &expected)
+{
+ search::fef::test::IndexEnvironment ie;
+ FT_DUMP(factory, baseName, ie, expected);
+}
+
+void
+FtTestAppBase::FT_DUMP(search::fef::BlueprintFactory &factory, const vespalib::string &baseName,
+ search::fef::test::IndexEnvironment &env,
+ const StringList &expected)
+{
+ FtDumpFeatureVisitor dfv;
+ search::fef::Blueprint::SP bp = factory.createBlueprint(baseName);
+ if ( ! bp) {
+ LOG(error, "Blueprint '%s' does not exist in factory, did you forget to add it?", baseName.c_str());
+ ASSERT_TRUE(bp);
+ }
+ bp->visitDumpFeatures(env, dfv);
+ FT_EQUAL(expected, dfv.features(), "Dump");
+}
+
+void
+FtTestAppBase::FT_EQUAL(const std::vector<string> &expected, const std::vector<string> &actual,
+ const vespalib::string &prefix)
+{
+ FT_LOG(prefix + " expected", expected);
+ FT_LOG(prefix + " actual ", actual);
+ EXPECT_EQUAL(expected.size(), actual.size());
+ ASSERT_TRUE(expected.size() == actual.size());
+ for (uint32_t i = 0; i < expected.size(); ++i) {
+ EXPECT_EQUAL(expected[i], actual[i]);
+ ASSERT_TRUE(expected[i] == actual[i]);
+ }
+}
+
+void
+FtTestAppBase::FT_LOG(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env,
+ const StringList &params)
+{
+ LOG(info, "Testing blueprint '%s'.", prototype.getBaseName().c_str());
+ std::vector<vespalib::string> arr;
+ for (const auto & it : env.getFields()) {
+ arr.push_back(it.name());
+ }
+ FT_LOG("Environment ", arr);
+ FT_LOG("Parameters ", params);
+}
+
+void
+FtTestAppBase::FT_LOG(const vespalib::string &prefix, const std::vector<vespalib::string> &arr)
+{
+ vespalib::string str = prefix + " = [ ";
+ for (uint32_t i = 0; i < arr.size(); ++i) {
+ str.append("'").append(arr[i]).append("'");
+ if (i < arr.size() - 1) {
+ str.append(", ");
+ }
+ }
+ str.append(" ]");
+ LOG(info, "%s", str.c_str());
+}
+
+void
+FtTestAppBase::FT_SETUP(FtFeatureTest &test, const vespalib::string &query, const StringMap &index,
+ uint32_t docId)
+{
+ LOG(info, "Setup test for query '%s'.", query.c_str());
+
+ // Add all query terms.
+ FtQueryEnvironment &queryEnv = test.getQueryEnv();
+ for (uint32_t i = 0; i < query.size(); ++i) {
+ queryEnv.getBuilder().addAllFields();
+ }
+ ASSERT_TRUE(test.setup());
+
+ // Add all occurences.
+ search::fef::test::MatchDataBuilder::UP mdb = test.createMatchDataBuilder();
+ for (auto it = index.begin();it != index.end(); ++it) {
+ ASSERT_TRUE(mdb->setFieldLength(it->first, it->second.size()));
+ for (uint32_t i = 0; i < it->second.size(); ++i) {
+ size_t pos = query.find_first_of(it->second[i]);
+ if (pos != vespalib::string::npos) {
+ LOG(debug, "Occurence of '%c' added to field '%s' at position %d.", query[pos], it->first.c_str(), i);
+ ASSERT_TRUE(mdb->addOccurence(it->first, pos, i));
+ }
+ }
+ }
+ ASSERT_TRUE(mdb->apply(docId));
+}
+
+void
+FtTestAppBase::FT_SETUP(FtFeatureTest & test, const std::vector<FtQueryTerm> & query, const StringVectorMap & index,
+ uint32_t docId)
+{
+ setupQueryEnv(test.getQueryEnv(), query);
+ ASSERT_TRUE(test.setup());
+
+ search::fef::test::MatchDataBuilder::UP mdb = test.createMatchDataBuilder();
+
+ // Add all occurences.
+ for (auto itr = index.begin(); itr != index.end(); ++itr) {
+ ASSERT_TRUE(mdb->setFieldLength(itr->first, itr->second.size()));
+ for (uint32_t i = 0; i < itr->second.size(); ++i) {
+ auto fitr = query.begin();
+ for (;;) {
+ fitr = std::find(fitr, query.end(), FtQueryTerm(itr->second[i]));
+ if (fitr != query.end()) {
+ uint32_t termId = fitr - query.begin();
+ LOG(debug, "Occurence of '%s' added to field '%s' at position %u.", fitr->term.c_str(), itr->first.c_str(), i);
+ ASSERT_TRUE(mdb->addOccurence(itr->first, termId, i));
+ ++fitr;
+ } else {
+ break;
+ }
+ }
+ }
+ }
+ ASSERT_TRUE(mdb->apply(docId));
+}
+
+void
+FtTestAppBase::FT_SETUP(FtFeatureTest &test, const FtQuery &query, const FtIndex &index, uint32_t docId)
+{
+ setupQueryEnv(test.getQueryEnv(), query);
+ ASSERT_TRUE(test.setup());
+ search::fef::test::MatchDataBuilder::UP mdb = test.createMatchDataBuilder();
+
+ // Add all occurences.
+ for (auto itr = index.index.begin(); itr != index.index.end(); ++itr) {
+ const FtIndex::Field &field = itr->second;
+ for (size_t e = 0; e < field.size(); ++e) {
+ const FtIndex::Element &element = field[e];
+ ASSERT_TRUE(mdb->addElement(itr->first, element.weight, element.tokens.size()));
+ for (size_t t = 0; t < element.tokens.size(); ++t) {
+ const vespalib::string &token = element.tokens[t];
+ for (size_t q = 0; q < query.size(); ++q) {
+ if (query[q].term == token) {
+ ASSERT_TRUE(mdb->addOccurence(itr->first, q, t, e));
+ }
+ }
+ }
+ }
+ }
+ ASSERT_TRUE(mdb->apply(docId));
+}
+
+void
+FtTestAppBase::setupQueryEnv(FtQueryEnvironment & queryEnv, const FtQuery & query)
+{
+ // Add all query terms.
+ for (uint32_t i = 0; i < query.size(); ++i) {
+ queryEnv.getBuilder().addAllFields();
+ queryEnv.getTerms()[i].setPhraseLength(1);
+ queryEnv.getTerms()[i].setUniqueId(i);
+ queryEnv.getTerms()[i].setWeight(query[i].termWeight);
+ if (i > 0) {
+ vespalib::string from = vespalib::make_string("vespa.term.%u.connexity", i);
+ vespalib::string to = vespalib::make_string("%u", i - 1);
+ vespalib::string connexity = vespalib::make_string("%f", query[i].connexity);
+ queryEnv.getProperties().add(from, to);
+ queryEnv.getProperties().add(from, connexity);
+ }
+ vespalib::string term = vespalib::make_string("vespa.term.%u.significance", i);
+ vespalib::string significance = vespalib::make_string("%f", query[i].significance);
+ queryEnv.getProperties().add(term, significance);
+ LOG(debug, "Add term node: '%s'", query[i].term.c_str());
+ }
+}
+
+void
+FtTestAppBase::setupFieldMatch(FtFeatureTest & ft, const vespalib::string & indexName,
+ const vespalib::string & query, const vespalib::string & field,
+ const fieldmatch::Params * params, uint32_t totalTermWeight, feature_t totalSignificance,
+ uint32_t docId)
+{
+ ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, FieldInfo::CollectionType::SINGLE, indexName);
+
+ if (params != nullptr) {
+ Properties & p = ft.getIndexEnv().getProperties();
+ p.add("fieldMatch(" + indexName + ").proximityLimit", vespalib::make_string("%u", params->getProximityLimit()));
+ p.add("fieldMatch(" + indexName + ").maxAlternativeSegmentations", vespalib::make_string("%u", params->getMaxAlternativeSegmentations()));
+ p.add("fieldMatch(" + indexName + ").maxOccurrences", vespalib::make_string("%u", params->getMaxOccurrences()));
+ p.add("fieldMatch(" + indexName + ").proximityCompletenessImportance", vespalib::make_string("%f", params->getProximityCompletenessImportance()));
+ p.add("fieldMatch(" + indexName + ").relatednessImportance", vespalib::make_string("%f", params->getRelatednessImportance()));
+ p.add("fieldMatch(" + indexName + ").earlinessImportance", vespalib::make_string("%f", params->getEarlinessImportance()));
+ p.add("fieldMatch(" + indexName + ").segmentProximityImportance", vespalib::make_string("%f", params->getSegmentProximityImportance()));
+ p.add("fieldMatch(" + indexName + ").occurrenceImportance", vespalib::make_string("%f", params->getOccurrenceImportance()));
+ p.add("fieldMatch(" + indexName + ").fieldCompletenessImportance", vespalib::make_string("%f", params->getFieldCompletenessImportance()));
+ for (double it : params->getProximityTable()) {
+ p.add("fieldMatch(" + indexName + ").proximityTable", vespalib::make_string("%f", it));
+ }
+ }
+
+ if (totalTermWeight > 0) {
+ ft.getQueryEnv().getProperties().add("fieldMatch(" + indexName + ").totalTermWeight",
+ vespalib::make_string("%u", totalTermWeight));
+ }
+
+ if (totalSignificance > 0.0f) {
+ ft.getQueryEnv().getProperties().add("fieldMatch(" + indexName + ").totalTermSignificance",
+ vespalib::make_string("%f", totalSignificance));
+ }
+
+ std::map<vespalib::string, std::vector<vespalib::string> > index;
+ index[indexName] = FtUtil::tokenize(field);
+ FT_SETUP(ft, FtUtil::toQuery(query), index, docId);
+}
+
+
+RankResult
+FtTestAppBase::toRankResult(const vespalib::string & baseName,
+ const vespalib::string & result,
+ const vespalib::string & separator)
+{
+ return FtUtil::toRankResult(baseName, result, separator);
+}
diff --git a/searchlib/src/vespa/searchlib/test/ft_test_app_base.h b/searchlib/src/vespa/searchlib/test/ft_test_app_base.h
new file mode 100644
index 00000000000..329d93e4c47
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/test/ft_test_app_base.h
@@ -0,0 +1,61 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/features/fieldmatch/params.h>
+#include <vespa/searchlib/fef/test/ftlib.h>
+#include <vespa/vespalib/testkit/test_macros.h>
+
+/*
+ * Base class for test application used by feature unit tests.
+ */
+struct FtTestAppBase {
+ using string = vespalib::string;
+ static void FT_SETUP_FAIL(const search::fef::Blueprint &prototype, const StringList &params);
+ static void FT_SETUP_FAIL(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env,
+ const StringList &params);
+ static void FT_SETUP_OK(const search::fef::Blueprint &prototype, const StringList &params,
+ const StringList &expectedIn, const StringList &expectedOut);
+ static void FT_SETUP_OK(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env,
+ const StringList &params, const StringList &expectedIn, const StringList &expectedOut);
+
+ static void FT_DUMP_EMPTY(search::fef::BlueprintFactory &factory, const vespalib::string &baseName);
+ static void FT_DUMP_EMPTY(search::fef::BlueprintFactory &factory, const vespalib::string &baseName,
+ search::fef::test::IndexEnvironment &env);
+ static void FT_DUMP(search::fef::BlueprintFactory &factory, const vespalib::string &baseName,
+ const StringList &expected);
+ static void FT_DUMP(search::fef::BlueprintFactory &factory, const vespalib::string &baseName,
+ search::fef::test::IndexEnvironment &env,
+ const StringList &expected);
+
+ static void FT_EQUAL(const std::vector<string> &expected, const std::vector<string> &actual,
+ const vespalib::string & prefix = "");
+
+ static void FT_LOG(const search::fef::Blueprint &prototype, const search::fef::test::IndexEnvironment &env, const StringList &params);
+ static void FT_LOG(const vespalib::string &prefix, const std::vector<vespalib::string> &arr);
+
+
+ static void FT_SETUP(FtFeatureTest & test, const vespalib::string & query, const StringMap & index, uint32_t docId);
+ static void FT_SETUP(FtFeatureTest & test, const FtQuery & query, const StringVectorMap & index, uint32_t docId);
+
+ static void FT_SETUP(FtFeatureTest &test, const FtQuery &query, const FtIndex &index, uint32_t docId);
+
+ static void setupQueryEnv(FtQueryEnvironment & queryEnv, const FtQuery & query);
+ static void setupFieldMatch(FtFeatureTest & test, const vespalib::string & indexName,
+ const vespalib::string & query, const vespalib::string & field,
+ const search::features::fieldmatch::Params * params,
+ uint32_t totalTermWeight, feature_t totalSignificance,
+ uint32_t docId);
+
+ static search::fef::test::RankResult toRankResult(const vespalib::string & baseName,
+ const vespalib::string & result,
+ const vespalib::string & separator = " ");
+
+ template <typename T>
+ static bool assertCreateInstance(const T & prototype, const vespalib::string & baseName) {
+ search::fef::Blueprint::UP bp = prototype.createInstance();
+ if (!EXPECT_TRUE(dynamic_cast<T*>(bp.get()) != NULL)) return false;
+ if (!EXPECT_EQUAL(bp->getBaseName(), baseName)) return false;
+ return true;
+ }
+};