diff options
Diffstat (limited to 'searchlib/src/tests')
6 files changed, 259 insertions, 60 deletions
diff --git a/searchlib/src/tests/attribute/dfa_fuzzy_matcher/dfa_fuzzy_matcher_test.cpp b/searchlib/src/tests/attribute/dfa_fuzzy_matcher/dfa_fuzzy_matcher_test.cpp index c7bd0e917f3..c2a39779061 100644 --- a/searchlib/src/tests/attribute/dfa_fuzzy_matcher/dfa_fuzzy_matcher_test.cpp +++ b/searchlib/src/tests/attribute/dfa_fuzzy_matcher/dfa_fuzzy_matcher_test.cpp @@ -8,6 +8,7 @@ #include <vespa/vespalib/fuzzy/levenshtein_dfa.h> #include <vespa/vespalib/gtest/gtest.h> #include <vespa/vespalib/util/time.h> +#include <vespa/vespalib/text/utf8.h> #include <filesystem> #include <fstream> #include <iostream> @@ -26,13 +27,24 @@ using namespace search::attribute; using namespace search; using vespalib::FuzzyMatcher; using vespalib::datastore::AtomicEntryRef; +using vespalib::datastore::EntryRef; using vespalib::fuzzy::LevenshteinDfa; +using vespalib::Utf8Reader; +using vespalib::Utf8Writer; using StringEnumStore = EnumStoreT<const char*>; using DictionaryEntry = std::pair<std::string, size_t>; using RawDictionary = std::vector<DictionaryEntry>; using StringVector = std::vector<std::string>; +namespace { + +const char* char_from_u8(const char8_t* p) { + return reinterpret_cast<const char*>(p); +} + +} + RawDictionary read_dictionary() { @@ -109,11 +121,11 @@ struct MatchStats { template <bool collect_matches> void -brute_force_fuzzy_match_in_dictionary(std::string_view target, const StringEnumStore& store, MatchStats& stats, StringVector& matched_words) +brute_force_fuzzy_match_in_dictionary(std::string_view target, const StringEnumStore& store, uint32_t prefix_size, bool cased, MatchStats& stats, StringVector& matched_words) { auto view = store.get_dictionary().get_posting_dictionary().getFrozenView(); vespalib::Timer timer; - FuzzyMatcher matcher(target, 2, 0, false); + FuzzyMatcher matcher(target, 2, prefix_size, cased); auto itr = view.begin(); size_t matches = 0; size_t seeks = 0; @@ -133,15 +145,33 @@ brute_force_fuzzy_match_in_dictionary(std::string_view target, const StringEnumS template <bool collect_matches> void -dfa_fuzzy_match_in_dictionary(std::string_view target, const StringEnumStore& store, MatchStats& stats, StringVector& matched_words) +dfa_fuzzy_match_in_dictionary(std::string_view target, const StringEnumStore& store, uint32_t prefix_size, bool cased, MatchStats& stats, StringVector& matched_words) { auto view = store.get_dictionary().get_posting_dictionary().getFrozenView(); vespalib::Timer timer; - DfaFuzzyMatcher matcher(target, 2, false, LevenshteinDfa::DfaType::Explicit); - auto itr = view.begin(); + DfaFuzzyMatcher matcher(target, 2, prefix_size, cased, LevenshteinDfa::DfaType::Explicit); + Utf8Reader reader(vespalib::stringref(target.data(), target.size())); + std::string target_copy; + Utf8Writer<std::string> writer(target_copy); + for (size_t pos = 0; pos < prefix_size && reader.hasMore(); ++pos) { + auto code_point = reader.getChar(); + writer.putChar(code_point); + } + auto prefix_cmp = store.make_folded_comparator_prefix(target_copy.c_str()); + auto itr = prefix_size > 0 ? view.lowerBound(AtomicEntryRef(), prefix_cmp) : view.begin(); + auto itr_end = itr; + if (itr_end.valid()) { + if (prefix_size > 0) { + if (!prefix_cmp.less(EntryRef(), itr_end.getKey().load_relaxed())) { + itr_end.seekPast(AtomicEntryRef(), prefix_cmp); + } + } else { + itr_end.end(); + } + } size_t matches = 0; size_t seeks = 0; - while (itr.valid()) { + while (itr != itr_end) { auto word = store.get_value(itr.getKey().load_relaxed()); if (matcher.is_match(word, itr, store.get_data_store())) { ++itr; @@ -156,10 +186,58 @@ dfa_fuzzy_match_in_dictionary(std::string_view target, const StringEnumStore& st stats.add_sample(matches, seeks, timer.elapsed()); } -struct DfaFuzzyMatcherTest : public ::testing::Test { +template <bool collect_matches> +void +dfa_fuzzy_match_in_dictionary_no_skip(std::string_view target, const StringEnumStore& store, uint32_t prefix_size, bool cased, MatchStats& stats, StringVector& matched_words) +{ + auto view = store.get_dictionary().get_posting_dictionary().getFrozenView(); + vespalib::Timer timer; + DfaFuzzyMatcher matcher(target, 2, prefix_size, cased, LevenshteinDfa::DfaType::Explicit); + auto itr = view.begin(); + size_t matches = 0; + size_t seeks = 0; + for (;itr.valid(); ++itr) { + auto word = store.get_value(itr.getKey().load_relaxed()); + if (matcher.is_match(word)) { + ++matches; + if (collect_matches) { + matched_words.push_back(word); + } + } else { + ++seeks; + } + } + stats.add_sample(matches, seeks, timer.elapsed()); +} + +struct TestParam +{ + vespalib::string _name; + bool _cased; + + TestParam(vespalib::string name, bool cased) + : _name(std::move(name)), + _cased(cased) + { + } + TestParam(const TestParam&); + ~TestParam(); +}; + +TestParam::TestParam(const TestParam&) = default; + +TestParam::~TestParam() = default; + +std::ostream& operator<<(std::ostream& os, const TestParam& param) +{ + os << param._name; + return os; +} + +struct DfaFuzzyMatcherTest : public ::testing::TestWithParam<TestParam> { StringEnumStore store; DfaFuzzyMatcherTest() - : store(true, DictionaryConfig(DictionaryConfig::Type::BTREE, DictionaryConfig::Match::UNCASED)) + : store(true, DictionaryConfig(DictionaryConfig::Type::BTREE, GetParam()._cased ? DictionaryConfig::Match::CASED : DictionaryConfig::Match::UNCASED)) {} void populate_dictionary(const StringVector& words) { auto updater = store.make_batch_updater(); @@ -170,18 +248,31 @@ struct DfaFuzzyMatcherTest : public ::testing::Test { updater.commit(); store.freeze_dictionary(); } - void expect_matches(std::string_view target, const StringVector& exp_matches) { + void expect_prefix_matches(std::string_view target, uint32_t prefix_size, const StringVector& exp_matches) { MatchStats stats; StringVector brute_force_matches; StringVector dfa_matches; - brute_force_fuzzy_match_in_dictionary<true>(target, store, stats, brute_force_matches); - dfa_fuzzy_match_in_dictionary<true>(target, store, stats, dfa_matches); + StringVector dfa_no_skip_matches; + bool cased = GetParam()._cased; + SCOPED_TRACE(target); + brute_force_fuzzy_match_in_dictionary<true>(target, store, prefix_size, cased, stats, brute_force_matches); + dfa_fuzzy_match_in_dictionary<true>(target, store, prefix_size, cased, stats, dfa_matches); + dfa_fuzzy_match_in_dictionary_no_skip<true>(target, store, prefix_size, cased, stats, dfa_no_skip_matches); EXPECT_EQ(exp_matches, brute_force_matches); EXPECT_EQ(exp_matches, dfa_matches); + EXPECT_EQ(exp_matches, dfa_no_skip_matches); + } + void expect_matches(std::string_view target, const StringVector& exp_matches) { + expect_prefix_matches(target, 0, exp_matches); } }; -TEST_F(DfaFuzzyMatcherTest, fuzzy_match_in_dictionary) +INSTANTIATE_TEST_SUITE_P(DfaFuzzyMatcherMultiTest, + DfaFuzzyMatcherTest, + testing::Values(TestParam("uncased", false), TestParam("cased", true)), + testing::PrintToStringParamName()); + +TEST_P(DfaFuzzyMatcherTest, fuzzy_match_in_dictionary) { StringVector words = { "board", "boat", "bob", "door", "food", "foot", "football", "foothill", "for", "forbid", "force", "ford", "forearm", "forecast", "forest" }; @@ -194,23 +285,67 @@ TEST_F(DfaFuzzyMatcherTest, fuzzy_match_in_dictionary) expect_matches("forcecast", {"forecast"}); } +TEST_P(DfaFuzzyMatcherTest, fuzzy_match_in_dictionary_with_prefix_size) +{ + bool cased = GetParam()._cased; + StringVector words = { "board", "boat", "bob", "door", "food", "foot", "football", "foothill", + "for", "forbid", "force", "ford", "forearm", "forecast", "forest", "H", "HA", "h", "ha", char_from_u8(u8"Ørn"), char_from_u8(u8"øre"), char_from_u8(u8"Ås"), char_from_u8(u8"ås")}; + populate_dictionary(words); + expect_prefix_matches("a", 1, {}); + expect_prefix_matches("b", 1, {"bob"}); + expect_prefix_matches("board", 1, {"board", "boat"}); + expect_prefix_matches("c", 1, {}); + expect_prefix_matches("food", 1, {"food", "foot", "for", "ford"}); + expect_prefix_matches("food", 2, {"food", "foot", "for", "ford"}); + expect_prefix_matches("food", 3, {"food", "foot"}); + expect_prefix_matches("foothill", 1, {"football", "foothill"}); + expect_prefix_matches("for", 1, {"food", "foot", "for", "force", "ford"}); + expect_prefix_matches("for", 2, {"food", "foot", "for", "force", "ford"}); + expect_prefix_matches("for", 3, {"for", "force", "ford"}); + expect_prefix_matches("force", 1, {"for", "force", "ford"}); + expect_prefix_matches("forcecast", 1, {"forecast"}); + expect_prefix_matches("forcecast", 4, {}); + expect_prefix_matches("z", 1, {}); + if (cased) { + expect_prefix_matches("h", 1, {"h", "ha"}); + expect_prefix_matches(char_from_u8(u8"Ø"), 1, {char_from_u8(u8"Ørn")}); + expect_prefix_matches(char_from_u8(u8"ø"), 1, {char_from_u8(u8"øre")}); + expect_prefix_matches(char_from_u8(u8"å"), 1, {char_from_u8(u8"ås")}); + /* Corner case: prefix length > target length means exact match */ + expect_prefix_matches("h", 2, {"h"}); + } else { + expect_prefix_matches("h", 1, {"H", "h", "HA", "ha"}); + expect_prefix_matches(char_from_u8(u8"ø"), 1, {char_from_u8(u8"øre"), char_from_u8(u8"Ørn")}); + expect_prefix_matches(char_from_u8(u8"å"), 1, {char_from_u8(u8"Ås"), char_from_u8(u8"ås")}); + /* Corner case: prefix length > target length means exact match */ + expect_prefix_matches("h", 2, {"H", "h"}); + } +} + void -benchmark_fuzzy_match_in_dictionary(const StringEnumStore& store, const RawDictionary& dict, size_t words_to_match, bool dfa_algorithm) +benchmark_fuzzy_match_in_dictionary(const StringEnumStore& store, const RawDictionary& dict, size_t words_to_match, bool cased, bool dfa_algorithm) { MatchStats stats; StringVector dummy; for (size_t i = 0; i < std::min(words_to_match, dict.size()); ++i) { const auto& entry = dict[i]; if (dfa_algorithm) { - dfa_fuzzy_match_in_dictionary<false>(entry.first, store, stats, dummy); + dfa_fuzzy_match_in_dictionary<false>(entry.first, store, 0, cased, stats, dummy); } else { - brute_force_fuzzy_match_in_dictionary<false>(entry.first, store, stats, dummy); + brute_force_fuzzy_match_in_dictionary<false>(entry.first, store, 0, cased, stats, dummy); } } std::cout << (dfa_algorithm ? "DFA:" : "Brute force:") << " samples=" << stats.samples << ", avg_matches=" << stats.avg_matches() << ", avg_seeks=" << stats.avg_seeks() << ", avg_elapsed_ms=" << stats.avg_elapsed_ms() << std::endl; } -TEST_F(DfaFuzzyMatcherTest, benchmark_fuzzy_match_in_dictionary) +using DfaFuzzyMatcherBenchmarkTest = DfaFuzzyMatcherTest; + +INSTANTIATE_TEST_SUITE_P(DfaFuzzyMatcherBenchmarkMultiTest, + DfaFuzzyMatcherBenchmarkTest, + testing::Values(TestParam("uncased", false)), + testing::PrintToStringParamName()); + +TEST_P(DfaFuzzyMatcherBenchmarkTest, benchmark_fuzzy_match_in_dictionary) { if (!benchmarking_enabled()) { GTEST_SKIP() << "benchmarking not enabled"; @@ -219,8 +354,9 @@ TEST_F(DfaFuzzyMatcherTest, benchmark_fuzzy_match_in_dictionary) populate_dictionary(to_string_vector(dict)); std::cout << "Unique words: " << store.get_num_uniques() << std::endl; sort_by_freq(dict); - benchmark_fuzzy_match_in_dictionary(store, dict, dfa_words_to_match, true); - benchmark_fuzzy_match_in_dictionary(store, dict, brute_force_words_to_match, false); + bool cased = GetParam()._cased; + benchmark_fuzzy_match_in_dictionary(store, dict, dfa_words_to_match, cased, true); + benchmark_fuzzy_match_in_dictionary(store, dict, brute_force_words_to_match, cased, false); } int diff --git a/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp b/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp index b9c70d76934..1fd9dde09c7 100644 --- a/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp +++ b/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp @@ -24,14 +24,14 @@ class DocumentWeightOrFilterSearchTest : public ::testing::Test { uint32_t _range_end; public: DocumentWeightOrFilterSearchTest(); - ~DocumentWeightOrFilterSearchTest(); + ~DocumentWeightOrFilterSearchTest() override; void inc_generation(); size_t num_trees() const { return _trees.size(); } Iterator get_tree(size_t idx) const { if (idx < _trees.size()) { return _postings.beginFrozen(_trees[idx]); } else { - return Iterator(); + return {}; } } void ensure_tree(size_t idx) { @@ -39,13 +39,13 @@ public: _trees.resize(idx + 1); } } - void add_tree(size_t idx, std::vector<uint32_t> keys) { + void add_tree(size_t idx, const std::vector<uint32_t>& keys) { ensure_tree(idx); std::vector<KeyData> adds; std::vector<uint32_t> removes; adds.reserve(keys.size()); for (auto& key : keys) { - adds.emplace_back(KeyData(key, 1)); + adds.emplace_back(key, 1); } _postings.apply(_trees[idx], adds.data(), adds.data() + adds.size(), removes.data(), removes.data() + removes.size()); } @@ -67,7 +67,7 @@ public: return result; }; - std::vector<uint32_t> eval_daat(SearchIterator &iterator) { + std::vector<uint32_t> eval_daat(SearchIterator &iterator) const { std::vector<uint32_t> result; uint32_t doc_id = _range_start; while (doc_id < _range_end) { @@ -81,7 +81,7 @@ public: return result; } - std::vector<uint32_t> frombv(const BitVector &bv) { + std::vector<uint32_t> frombv(const BitVector &bv) const { std::vector<uint32_t> result; uint32_t doc_id = _range_start; doc_id = bv.getNextTrueBit(doc_id); @@ -93,7 +93,7 @@ public: return result; } - std::unique_ptr<BitVector> tobv(std::vector<uint32_t> values) { + std::unique_ptr<BitVector> tobv(const std::vector<uint32_t> & values) const { auto bv = BitVector::create(_range_start, _range_end); for (auto value : values) { bv->setBit(value); @@ -102,7 +102,7 @@ public: return bv; } - void expect_result(std::vector<uint32_t> exp, std::vector<uint32_t> act) + static void expect_result(const std::vector<uint32_t> & exp, const std::vector<uint32_t> & act) { EXPECT_EQ(exp, act); } @@ -227,7 +227,7 @@ public: } _test.inc_generation(); } - ~Verifier() { + ~Verifier() override { for (uint32_t tree_id = 0; tree_id < _test.num_trees(); ++tree_id) { _test.clear_tree(tree_id); } diff --git a/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp b/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp index d7a854e0afc..6c6f05fd5e2 100644 --- a/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp +++ b/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp @@ -29,14 +29,14 @@ using namespace search::attribute::test; namespace { void -setupAttributeManager(MockAttributeManager &manager) +setupAttributeManager(MockAttributeManager &manager, bool isFilter) { AttributeVector::DocId docId; { - AttributeVector::SP attr_sp = AttributeFactory::createAttribute("integer", Config(BasicType("int64"))); + AttributeVector::SP attr_sp = AttributeFactory::createAttribute("integer", Config(BasicType("int64")).setIsFilter(isFilter)); manager.addAttribute(attr_sp); - IntegerAttribute *attr = (IntegerAttribute*)(attr_sp.get()); + auto *attr = (IntegerAttribute*)(attr_sp.get()); for (size_t i = 1; i < 10; ++i) { attr->addDoc(docId); assert(i == docId); @@ -45,10 +45,10 @@ setupAttributeManager(MockAttributeManager &manager) } } { - AttributeVector::SP attr_sp = AttributeFactory::createAttribute("string", Config(BasicType("string"))); + AttributeVector::SP attr_sp = AttributeFactory::createAttribute("string", Config(BasicType("string")).setIsFilter(isFilter)); manager.addAttribute(attr_sp); - StringAttribute *attr = (StringAttribute*)(attr_sp.get()); + auto *attr = (StringAttribute*)(attr_sp.get()); for (size_t i = 1; i < 10; ++i) { attr->addDoc(docId); assert(i == docId); @@ -58,9 +58,9 @@ setupAttributeManager(MockAttributeManager &manager) } { AttributeVector::SP attr_sp = AttributeFactory::createAttribute( - "multi", Config(BasicType("int64"), search::attribute::CollectionType("array"))); + "multi", Config(BasicType("int64"), search::attribute::CollectionType("array")).setIsFilter(isFilter)); manager.addAttribute(attr_sp); - IntegerAttribute *attr = (IntegerAttribute*)(attr_sp.get()); + auto *attr = (IntegerAttribute*)(attr_sp.get()); for (size_t i = 1; i < 10; ++i) { attr->addDoc(docId); assert(i == docId); @@ -78,35 +78,43 @@ struct WS { TermFieldHandle handle; std::vector<std::pair<std::string, uint32_t> > tokens; - WS(IAttributeManager & manager) : attribute_manager(manager), layout(), handle(layout.allocTermField(fieldId)), tokens() { + explicit WS(IAttributeManager & manager) + : attribute_manager(manager), + layout(), handle(layout.allocTermField(fieldId)), + tokens() + { MatchData::UP tmp = layout.createMatchData(); ASSERT_TRUE(tmp->resolveTermField(handle)->getFieldId() == fieldId); } WS &add(const std::string &token, uint32_t weight) { - tokens.push_back(std::make_pair(token, weight)); + tokens.emplace_back(token, weight); return *this; } Node::UP createNode() const { - SimpleWeightedSetTerm *node = new SimpleWeightedSetTerm(tokens.size(), "view", 0, Weight(0)); - for (size_t i = 0; i < tokens.size(); ++i) { - node->addTerm(tokens[i].first, Weight(tokens[i].second)); + auto *node = new SimpleWeightedSetTerm(tokens.size(), "view", 0, Weight(0)); + for (const auto & token : tokens) { + node->addTerm(token.first, Weight(token.second)); } return Node::UP(node); } - bool isGenericSearch(Searchable &searchable, const std::string &field, bool strict) const { + SearchIterator::UP + createSearch(Searchable &searchable, const std::string &field, bool strict) const { AttributeContext ac(attribute_manager); FakeRequestContext requestContext(&ac); MatchData::UP md = layout.createMatchData(); Node::UP node = createNode(); FieldSpecList fields; - fields.add(FieldSpec(field, fieldId, handle)); + fields.add(FieldSpec(field, fieldId, handle, ac.getAttribute(field)->getIsFilter())); queryeval::Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, *node); bp->fetchPostings(queryeval::ExecuteInfo::create(strict)); SearchIterator::UP sb = bp->createSearch(*md, strict); - return (dynamic_cast<WeightedSetTermSearch*>(sb.get()) != 0); + return sb; + } + bool isWeightedSetTermSearch(Searchable &searchable, const std::string &field, bool strict) const { + return dynamic_cast<WeightedSetTermSearch *>(createSearch(searchable, field, strict).get()) != nullptr; } FakeResult search(Searchable &searchable, const std::string &field, bool strict) const { @@ -140,23 +148,58 @@ struct WS { } // namespace <unnamed> +void test_tokens(bool isFilter, const std::vector<uint32_t> & docs) { + MockAttributeManager manager; + setupAttributeManager(manager, isFilter); + AttributeBlueprintFactory adapter; + + FakeResult expect = FakeResult(); + WS ws = WS(manager); + for (uint32_t doc : docs) { + auto docS = vespalib::stringify(doc); + int32_t weight = doc * 10; + expect.doc(doc).weight(weight).pos(0); + ws.add(docS, weight); + } + + EXPECT_TRUE(ws.isWeightedSetTermSearch(adapter, "integer", true)); + EXPECT_TRUE(!ws.isWeightedSetTermSearch(adapter, "integer", false)); + EXPECT_TRUE(ws.isWeightedSetTermSearch(adapter, "string", true)); + EXPECT_TRUE(!ws.isWeightedSetTermSearch(adapter, "string", false)); + EXPECT_TRUE(ws.isWeightedSetTermSearch(adapter, "multi", true)); + EXPECT_TRUE(ws.isWeightedSetTermSearch(adapter, "multi", false)); + + EXPECT_EQUAL(expect, ws.search(adapter, "integer", true)); + EXPECT_EQUAL(expect, ws.search(adapter, "integer", false)); + EXPECT_EQUAL(expect, ws.search(adapter, "string", true)); + EXPECT_EQUAL(expect, ws.search(adapter, "string", false)); + EXPECT_EQUAL(expect, ws.search(adapter, "multi", true)); + EXPECT_EQUAL(expect, ws.search(adapter, "multi", false)); +} TEST("attribute_weighted_set_test") { + test_tokens(false, {3, 5, 7}); + test_tokens(true, {3, 5, 7}); + test_tokens(false, {3}); +} + +TEST("attribute_weighted_set_single_token_filter_lifted_out") { MockAttributeManager manager; - setupAttributeManager(manager); + setupAttributeManager(manager, true); AttributeBlueprintFactory adapter; - FakeResult expect = FakeResult() - .doc(3).elem(0).weight(30).pos(0) - .doc(5).elem(0).weight(50).pos(0) - .doc(7).elem(0).weight(70).pos(0); - WS ws = WS(manager).add("7", 70).add("5", 50).add("3", 30); - - EXPECT_TRUE(ws.isGenericSearch(adapter, "integer", true)); - EXPECT_TRUE(!ws.isGenericSearch(adapter, "integer", false)); - EXPECT_TRUE(ws.isGenericSearch(adapter, "string", true)); - EXPECT_TRUE(!ws.isGenericSearch(adapter, "string", false)); - EXPECT_TRUE(ws.isGenericSearch(adapter, "multi", true)); - EXPECT_TRUE(ws.isGenericSearch(adapter, "multi", false)); + FakeResult expect = FakeResult().doc(3).elem(0).weight(30).pos(0); + WS ws = WS(manager).add("3", 30); + + EXPECT_EQUAL("search::FilterAttributeIteratorStrict<search::attribute::SingleNumericSearchContext<long, search::attribute::NumericMatcher<long> > >", + ws.createSearch(adapter, "integer", true)->getClassName()); + EXPECT_EQUAL("search::FilterAttributeIteratorT<search::attribute::SingleNumericSearchContext<long, search::attribute::NumericMatcher<long> > >", + ws.createSearch(adapter, "integer", false)->getClassName()); + EXPECT_EQUAL("search::FilterAttributeIteratorStrict<search::attribute::SingleEnumSearchContext<char const*, search::attribute::StringSearchContext> >", + ws.createSearch(adapter, "string", true)->getClassName()); + EXPECT_EQUAL("search::FilterAttributeIteratorT<search::attribute::SingleEnumSearchContext<char const*, search::attribute::StringSearchContext> >", + ws.createSearch(adapter, "string", false)->getClassName()); + EXPECT_TRUE(ws.isWeightedSetTermSearch(adapter, "multi", true)); + EXPECT_TRUE(ws.isWeightedSetTermSearch(adapter, "multi", false)); EXPECT_EQUAL(expect, ws.search(adapter, "integer", true)); EXPECT_EQUAL(expect, ws.search(adapter, "integer", false)); diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp index 40d4b20aaf2..ac1042dda6c 100644 --- a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp +++ b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp @@ -4,6 +4,7 @@ #include <vespa/searchlib/attribute/attributefactory.h> #include <vespa/searchlib/attribute/attributeiterators.h> #include <vespa/searchlib/attribute/flagattribute.h> +#include <vespa/searchlib/attribute/postinglistsearchcontext.h> #include <vespa/searchlib/attribute/searchcontextelementiterator.h> #include <vespa/searchlib/attribute/singleboolattribute.h> #include <vespa/searchlib/attribute/stringbase.h> @@ -1424,6 +1425,25 @@ SearchContextTest::testPrefixSearch(const vespalib::string& name, const Config& } } } + + // Long range of prefixes with unique strings that causes + // PostingListFoldedSearchContextT<DataT>::countHits() to populate + // partial vector of posting indexes, with scan resumed by + // fillArray or fillBitVector. + auto& vec = dynamic_cast<StringAttribute &>(*attr.get()); + uint32_t old_size = attr->getNumDocs(); + constexpr uint32_t longrange_values = search::attribute::PostingListFoldedSearchContextT<int32_t>::MAX_POSTING_INDEXES_SIZE + 100; + attr->addDocs(longrange_values); + DocSet exp_longrange; + for (uint32_t i = 0; i < longrange_values; ++i) { + vespalib::asciistream ss; + ss << "lpref" << i; + vespalib::string sss(ss.str()); + exp_longrange.put(old_size + i); + vec.update(old_size + i, vespalib::string(ss.str()).c_str()); + } + attr->commit(); + performSearch(*attr, "lpref", exp_longrange, TermType::PREFIXTERM); } diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp index e9d0f8cb736..52329f31ba7 100644 --- a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp +++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp @@ -389,8 +389,8 @@ testSingleValue(Attribute & svsa, Config &cfg) TEST("testSingleValue") { EXPECT_EQUAL(24u, sizeof(SearchContext)); - EXPECT_EQUAL(40u, sizeof(StringSearchHelper)); - EXPECT_EQUAL(96u, sizeof(attribute::SingleStringEnumSearchContext)); + EXPECT_EQUAL(48u, sizeof(StringSearchHelper)); + EXPECT_EQUAL(104u, sizeof(attribute::SingleStringEnumSearchContext)); { Config cfg(BasicType::STRING, CollectionType::SINGLE); SingleValueStringAttribute svsa("svsa", cfg); diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp index ef0fd56840a..c617db871a7 100644 --- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp +++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp @@ -128,9 +128,9 @@ TEST("test And propagates updated histestimate") { const RememberExecuteInfo & child = dynamic_cast<const RememberExecuteInfo &>(bp.getChild(i)); EXPECT_EQUAL((i == 0), child.executeInfo.isStrict()); } - EXPECT_EQUAL(1.0, dynamic_cast<const RememberExecuteInfo &>(bp.getChild(0)).executeInfo.hitRate()); - EXPECT_EQUAL(1.0/250, dynamic_cast<const RememberExecuteInfo &>(bp.getChild(1)).executeInfo.hitRate()); - EXPECT_EQUAL(1.0/(250*25), dynamic_cast<const RememberExecuteInfo &>(bp.getChild(2)).executeInfo.hitRate()); + EXPECT_EQUAL(1.0f, dynamic_cast<const RememberExecuteInfo &>(bp.getChild(0)).executeInfo.hitRate()); + EXPECT_EQUAL(1.0f/250, dynamic_cast<const RememberExecuteInfo &>(bp.getChild(1)).executeInfo.hitRate()); + EXPECT_EQUAL(1.0f/(250*25), dynamic_cast<const RememberExecuteInfo &>(bp.getChild(2)).executeInfo.hitRate()); } TEST("test And Blueprint") { |