aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/tests
diff options
context:
space:
mode:
Diffstat (limited to 'searchlib/src/tests')
-rw-r--r--searchlib/src/tests/attribute/dfa_fuzzy_matcher/dfa_fuzzy_matcher_test.cpp172
-rw-r--r--searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp18
-rw-r--r--searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp99
-rw-r--r--searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp20
-rw-r--r--searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp4
-rw-r--r--searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp6
6 files changed, 259 insertions, 60 deletions
diff --git a/searchlib/src/tests/attribute/dfa_fuzzy_matcher/dfa_fuzzy_matcher_test.cpp b/searchlib/src/tests/attribute/dfa_fuzzy_matcher/dfa_fuzzy_matcher_test.cpp
index c7bd0e917f3..c2a39779061 100644
--- a/searchlib/src/tests/attribute/dfa_fuzzy_matcher/dfa_fuzzy_matcher_test.cpp
+++ b/searchlib/src/tests/attribute/dfa_fuzzy_matcher/dfa_fuzzy_matcher_test.cpp
@@ -8,6 +8,7 @@
#include <vespa/vespalib/fuzzy/levenshtein_dfa.h>
#include <vespa/vespalib/gtest/gtest.h>
#include <vespa/vespalib/util/time.h>
+#include <vespa/vespalib/text/utf8.h>
#include <filesystem>
#include <fstream>
#include <iostream>
@@ -26,13 +27,24 @@ using namespace search::attribute;
using namespace search;
using vespalib::FuzzyMatcher;
using vespalib::datastore::AtomicEntryRef;
+using vespalib::datastore::EntryRef;
using vespalib::fuzzy::LevenshteinDfa;
+using vespalib::Utf8Reader;
+using vespalib::Utf8Writer;
using StringEnumStore = EnumStoreT<const char*>;
using DictionaryEntry = std::pair<std::string, size_t>;
using RawDictionary = std::vector<DictionaryEntry>;
using StringVector = std::vector<std::string>;
+namespace {
+
+const char* char_from_u8(const char8_t* p) {
+ return reinterpret_cast<const char*>(p);
+}
+
+}
+
RawDictionary
read_dictionary()
{
@@ -109,11 +121,11 @@ struct MatchStats {
template <bool collect_matches>
void
-brute_force_fuzzy_match_in_dictionary(std::string_view target, const StringEnumStore& store, MatchStats& stats, StringVector& matched_words)
+brute_force_fuzzy_match_in_dictionary(std::string_view target, const StringEnumStore& store, uint32_t prefix_size, bool cased, MatchStats& stats, StringVector& matched_words)
{
auto view = store.get_dictionary().get_posting_dictionary().getFrozenView();
vespalib::Timer timer;
- FuzzyMatcher matcher(target, 2, 0, false);
+ FuzzyMatcher matcher(target, 2, prefix_size, cased);
auto itr = view.begin();
size_t matches = 0;
size_t seeks = 0;
@@ -133,15 +145,33 @@ brute_force_fuzzy_match_in_dictionary(std::string_view target, const StringEnumS
template <bool collect_matches>
void
-dfa_fuzzy_match_in_dictionary(std::string_view target, const StringEnumStore& store, MatchStats& stats, StringVector& matched_words)
+dfa_fuzzy_match_in_dictionary(std::string_view target, const StringEnumStore& store, uint32_t prefix_size, bool cased, MatchStats& stats, StringVector& matched_words)
{
auto view = store.get_dictionary().get_posting_dictionary().getFrozenView();
vespalib::Timer timer;
- DfaFuzzyMatcher matcher(target, 2, false, LevenshteinDfa::DfaType::Explicit);
- auto itr = view.begin();
+ DfaFuzzyMatcher matcher(target, 2, prefix_size, cased, LevenshteinDfa::DfaType::Explicit);
+ Utf8Reader reader(vespalib::stringref(target.data(), target.size()));
+ std::string target_copy;
+ Utf8Writer<std::string> writer(target_copy);
+ for (size_t pos = 0; pos < prefix_size && reader.hasMore(); ++pos) {
+ auto code_point = reader.getChar();
+ writer.putChar(code_point);
+ }
+ auto prefix_cmp = store.make_folded_comparator_prefix(target_copy.c_str());
+ auto itr = prefix_size > 0 ? view.lowerBound(AtomicEntryRef(), prefix_cmp) : view.begin();
+ auto itr_end = itr;
+ if (itr_end.valid()) {
+ if (prefix_size > 0) {
+ if (!prefix_cmp.less(EntryRef(), itr_end.getKey().load_relaxed())) {
+ itr_end.seekPast(AtomicEntryRef(), prefix_cmp);
+ }
+ } else {
+ itr_end.end();
+ }
+ }
size_t matches = 0;
size_t seeks = 0;
- while (itr.valid()) {
+ while (itr != itr_end) {
auto word = store.get_value(itr.getKey().load_relaxed());
if (matcher.is_match(word, itr, store.get_data_store())) {
++itr;
@@ -156,10 +186,58 @@ dfa_fuzzy_match_in_dictionary(std::string_view target, const StringEnumStore& st
stats.add_sample(matches, seeks, timer.elapsed());
}
-struct DfaFuzzyMatcherTest : public ::testing::Test {
+template <bool collect_matches>
+void
+dfa_fuzzy_match_in_dictionary_no_skip(std::string_view target, const StringEnumStore& store, uint32_t prefix_size, bool cased, MatchStats& stats, StringVector& matched_words)
+{
+ auto view = store.get_dictionary().get_posting_dictionary().getFrozenView();
+ vespalib::Timer timer;
+ DfaFuzzyMatcher matcher(target, 2, prefix_size, cased, LevenshteinDfa::DfaType::Explicit);
+ auto itr = view.begin();
+ size_t matches = 0;
+ size_t seeks = 0;
+ for (;itr.valid(); ++itr) {
+ auto word = store.get_value(itr.getKey().load_relaxed());
+ if (matcher.is_match(word)) {
+ ++matches;
+ if (collect_matches) {
+ matched_words.push_back(word);
+ }
+ } else {
+ ++seeks;
+ }
+ }
+ stats.add_sample(matches, seeks, timer.elapsed());
+}
+
+struct TestParam
+{
+ vespalib::string _name;
+ bool _cased;
+
+ TestParam(vespalib::string name, bool cased)
+ : _name(std::move(name)),
+ _cased(cased)
+ {
+ }
+ TestParam(const TestParam&);
+ ~TestParam();
+};
+
+TestParam::TestParam(const TestParam&) = default;
+
+TestParam::~TestParam() = default;
+
+std::ostream& operator<<(std::ostream& os, const TestParam& param)
+{
+ os << param._name;
+ return os;
+}
+
+struct DfaFuzzyMatcherTest : public ::testing::TestWithParam<TestParam> {
StringEnumStore store;
DfaFuzzyMatcherTest()
- : store(true, DictionaryConfig(DictionaryConfig::Type::BTREE, DictionaryConfig::Match::UNCASED))
+ : store(true, DictionaryConfig(DictionaryConfig::Type::BTREE, GetParam()._cased ? DictionaryConfig::Match::CASED : DictionaryConfig::Match::UNCASED))
{}
void populate_dictionary(const StringVector& words) {
auto updater = store.make_batch_updater();
@@ -170,18 +248,31 @@ struct DfaFuzzyMatcherTest : public ::testing::Test {
updater.commit();
store.freeze_dictionary();
}
- void expect_matches(std::string_view target, const StringVector& exp_matches) {
+ void expect_prefix_matches(std::string_view target, uint32_t prefix_size, const StringVector& exp_matches) {
MatchStats stats;
StringVector brute_force_matches;
StringVector dfa_matches;
- brute_force_fuzzy_match_in_dictionary<true>(target, store, stats, brute_force_matches);
- dfa_fuzzy_match_in_dictionary<true>(target, store, stats, dfa_matches);
+ StringVector dfa_no_skip_matches;
+ bool cased = GetParam()._cased;
+ SCOPED_TRACE(target);
+ brute_force_fuzzy_match_in_dictionary<true>(target, store, prefix_size, cased, stats, brute_force_matches);
+ dfa_fuzzy_match_in_dictionary<true>(target, store, prefix_size, cased, stats, dfa_matches);
+ dfa_fuzzy_match_in_dictionary_no_skip<true>(target, store, prefix_size, cased, stats, dfa_no_skip_matches);
EXPECT_EQ(exp_matches, brute_force_matches);
EXPECT_EQ(exp_matches, dfa_matches);
+ EXPECT_EQ(exp_matches, dfa_no_skip_matches);
+ }
+ void expect_matches(std::string_view target, const StringVector& exp_matches) {
+ expect_prefix_matches(target, 0, exp_matches);
}
};
-TEST_F(DfaFuzzyMatcherTest, fuzzy_match_in_dictionary)
+INSTANTIATE_TEST_SUITE_P(DfaFuzzyMatcherMultiTest,
+ DfaFuzzyMatcherTest,
+ testing::Values(TestParam("uncased", false), TestParam("cased", true)),
+ testing::PrintToStringParamName());
+
+TEST_P(DfaFuzzyMatcherTest, fuzzy_match_in_dictionary)
{
StringVector words = { "board", "boat", "bob", "door", "food", "foot", "football", "foothill",
"for", "forbid", "force", "ford", "forearm", "forecast", "forest" };
@@ -194,23 +285,67 @@ TEST_F(DfaFuzzyMatcherTest, fuzzy_match_in_dictionary)
expect_matches("forcecast", {"forecast"});
}
+TEST_P(DfaFuzzyMatcherTest, fuzzy_match_in_dictionary_with_prefix_size)
+{
+ bool cased = GetParam()._cased;
+ StringVector words = { "board", "boat", "bob", "door", "food", "foot", "football", "foothill",
+ "for", "forbid", "force", "ford", "forearm", "forecast", "forest", "H", "HA", "h", "ha", char_from_u8(u8"Ørn"), char_from_u8(u8"øre"), char_from_u8(u8"Ås"), char_from_u8(u8"ås")};
+ populate_dictionary(words);
+ expect_prefix_matches("a", 1, {});
+ expect_prefix_matches("b", 1, {"bob"});
+ expect_prefix_matches("board", 1, {"board", "boat"});
+ expect_prefix_matches("c", 1, {});
+ expect_prefix_matches("food", 1, {"food", "foot", "for", "ford"});
+ expect_prefix_matches("food", 2, {"food", "foot", "for", "ford"});
+ expect_prefix_matches("food", 3, {"food", "foot"});
+ expect_prefix_matches("foothill", 1, {"football", "foothill"});
+ expect_prefix_matches("for", 1, {"food", "foot", "for", "force", "ford"});
+ expect_prefix_matches("for", 2, {"food", "foot", "for", "force", "ford"});
+ expect_prefix_matches("for", 3, {"for", "force", "ford"});
+ expect_prefix_matches("force", 1, {"for", "force", "ford"});
+ expect_prefix_matches("forcecast", 1, {"forecast"});
+ expect_prefix_matches("forcecast", 4, {});
+ expect_prefix_matches("z", 1, {});
+ if (cased) {
+ expect_prefix_matches("h", 1, {"h", "ha"});
+ expect_prefix_matches(char_from_u8(u8"Ø"), 1, {char_from_u8(u8"Ørn")});
+ expect_prefix_matches(char_from_u8(u8"ø"), 1, {char_from_u8(u8"øre")});
+ expect_prefix_matches(char_from_u8(u8"å"), 1, {char_from_u8(u8"ås")});
+ /* Corner case: prefix length > target length means exact match */
+ expect_prefix_matches("h", 2, {"h"});
+ } else {
+ expect_prefix_matches("h", 1, {"H", "h", "HA", "ha"});
+ expect_prefix_matches(char_from_u8(u8"ø"), 1, {char_from_u8(u8"øre"), char_from_u8(u8"Ørn")});
+ expect_prefix_matches(char_from_u8(u8"å"), 1, {char_from_u8(u8"Ås"), char_from_u8(u8"ås")});
+ /* Corner case: prefix length > target length means exact match */
+ expect_prefix_matches("h", 2, {"H", "h"});
+ }
+}
+
void
-benchmark_fuzzy_match_in_dictionary(const StringEnumStore& store, const RawDictionary& dict, size_t words_to_match, bool dfa_algorithm)
+benchmark_fuzzy_match_in_dictionary(const StringEnumStore& store, const RawDictionary& dict, size_t words_to_match, bool cased, bool dfa_algorithm)
{
MatchStats stats;
StringVector dummy;
for (size_t i = 0; i < std::min(words_to_match, dict.size()); ++i) {
const auto& entry = dict[i];
if (dfa_algorithm) {
- dfa_fuzzy_match_in_dictionary<false>(entry.first, store, stats, dummy);
+ dfa_fuzzy_match_in_dictionary<false>(entry.first, store, 0, cased, stats, dummy);
} else {
- brute_force_fuzzy_match_in_dictionary<false>(entry.first, store, stats, dummy);
+ brute_force_fuzzy_match_in_dictionary<false>(entry.first, store, 0, cased, stats, dummy);
}
}
std::cout << (dfa_algorithm ? "DFA:" : "Brute force:") << " samples=" << stats.samples << ", avg_matches=" << stats.avg_matches() << ", avg_seeks=" << stats.avg_seeks() << ", avg_elapsed_ms=" << stats.avg_elapsed_ms() << std::endl;
}
-TEST_F(DfaFuzzyMatcherTest, benchmark_fuzzy_match_in_dictionary)
+using DfaFuzzyMatcherBenchmarkTest = DfaFuzzyMatcherTest;
+
+INSTANTIATE_TEST_SUITE_P(DfaFuzzyMatcherBenchmarkMultiTest,
+ DfaFuzzyMatcherBenchmarkTest,
+ testing::Values(TestParam("uncased", false)),
+ testing::PrintToStringParamName());
+
+TEST_P(DfaFuzzyMatcherBenchmarkTest, benchmark_fuzzy_match_in_dictionary)
{
if (!benchmarking_enabled()) {
GTEST_SKIP() << "benchmarking not enabled";
@@ -219,8 +354,9 @@ TEST_F(DfaFuzzyMatcherTest, benchmark_fuzzy_match_in_dictionary)
populate_dictionary(to_string_vector(dict));
std::cout << "Unique words: " << store.get_num_uniques() << std::endl;
sort_by_freq(dict);
- benchmark_fuzzy_match_in_dictionary(store, dict, dfa_words_to_match, true);
- benchmark_fuzzy_match_in_dictionary(store, dict, brute_force_words_to_match, false);
+ bool cased = GetParam()._cased;
+ benchmark_fuzzy_match_in_dictionary(store, dict, dfa_words_to_match, cased, true);
+ benchmark_fuzzy_match_in_dictionary(store, dict, brute_force_words_to_match, cased, false);
}
int
diff --git a/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp b/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp
index b9c70d76934..1fd9dde09c7 100644
--- a/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp
+++ b/searchlib/src/tests/attribute/document_weight_or_filter_search/document_weight_or_filter_search_test.cpp
@@ -24,14 +24,14 @@ class DocumentWeightOrFilterSearchTest : public ::testing::Test {
uint32_t _range_end;
public:
DocumentWeightOrFilterSearchTest();
- ~DocumentWeightOrFilterSearchTest();
+ ~DocumentWeightOrFilterSearchTest() override;
void inc_generation();
size_t num_trees() const { return _trees.size(); }
Iterator get_tree(size_t idx) const {
if (idx < _trees.size()) {
return _postings.beginFrozen(_trees[idx]);
} else {
- return Iterator();
+ return {};
}
}
void ensure_tree(size_t idx) {
@@ -39,13 +39,13 @@ public:
_trees.resize(idx + 1);
}
}
- void add_tree(size_t idx, std::vector<uint32_t> keys) {
+ void add_tree(size_t idx, const std::vector<uint32_t>& keys) {
ensure_tree(idx);
std::vector<KeyData> adds;
std::vector<uint32_t> removes;
adds.reserve(keys.size());
for (auto& key : keys) {
- adds.emplace_back(KeyData(key, 1));
+ adds.emplace_back(key, 1);
}
_postings.apply(_trees[idx], adds.data(), adds.data() + adds.size(), removes.data(), removes.data() + removes.size());
}
@@ -67,7 +67,7 @@ public:
return result;
};
- std::vector<uint32_t> eval_daat(SearchIterator &iterator) {
+ std::vector<uint32_t> eval_daat(SearchIterator &iterator) const {
std::vector<uint32_t> result;
uint32_t doc_id = _range_start;
while (doc_id < _range_end) {
@@ -81,7 +81,7 @@ public:
return result;
}
- std::vector<uint32_t> frombv(const BitVector &bv) {
+ std::vector<uint32_t> frombv(const BitVector &bv) const {
std::vector<uint32_t> result;
uint32_t doc_id = _range_start;
doc_id = bv.getNextTrueBit(doc_id);
@@ -93,7 +93,7 @@ public:
return result;
}
- std::unique_ptr<BitVector> tobv(std::vector<uint32_t> values) {
+ std::unique_ptr<BitVector> tobv(const std::vector<uint32_t> & values) const {
auto bv = BitVector::create(_range_start, _range_end);
for (auto value : values) {
bv->setBit(value);
@@ -102,7 +102,7 @@ public:
return bv;
}
- void expect_result(std::vector<uint32_t> exp, std::vector<uint32_t> act)
+ static void expect_result(const std::vector<uint32_t> & exp, const std::vector<uint32_t> & act)
{
EXPECT_EQ(exp, act);
}
@@ -227,7 +227,7 @@ public:
}
_test.inc_generation();
}
- ~Verifier() {
+ ~Verifier() override {
for (uint32_t tree_id = 0; tree_id < _test.num_trees(); ++tree_id) {
_test.clear_tree(tree_id);
}
diff --git a/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp b/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp
index d7a854e0afc..6c6f05fd5e2 100644
--- a/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp
+++ b/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp
@@ -29,14 +29,14 @@ using namespace search::attribute::test;
namespace {
void
-setupAttributeManager(MockAttributeManager &manager)
+setupAttributeManager(MockAttributeManager &manager, bool isFilter)
{
AttributeVector::DocId docId;
{
- AttributeVector::SP attr_sp = AttributeFactory::createAttribute("integer", Config(BasicType("int64")));
+ AttributeVector::SP attr_sp = AttributeFactory::createAttribute("integer", Config(BasicType("int64")).setIsFilter(isFilter));
manager.addAttribute(attr_sp);
- IntegerAttribute *attr = (IntegerAttribute*)(attr_sp.get());
+ auto *attr = (IntegerAttribute*)(attr_sp.get());
for (size_t i = 1; i < 10; ++i) {
attr->addDoc(docId);
assert(i == docId);
@@ -45,10 +45,10 @@ setupAttributeManager(MockAttributeManager &manager)
}
}
{
- AttributeVector::SP attr_sp = AttributeFactory::createAttribute("string", Config(BasicType("string")));
+ AttributeVector::SP attr_sp = AttributeFactory::createAttribute("string", Config(BasicType("string")).setIsFilter(isFilter));
manager.addAttribute(attr_sp);
- StringAttribute *attr = (StringAttribute*)(attr_sp.get());
+ auto *attr = (StringAttribute*)(attr_sp.get());
for (size_t i = 1; i < 10; ++i) {
attr->addDoc(docId);
assert(i == docId);
@@ -58,9 +58,9 @@ setupAttributeManager(MockAttributeManager &manager)
}
{
AttributeVector::SP attr_sp = AttributeFactory::createAttribute(
- "multi", Config(BasicType("int64"), search::attribute::CollectionType("array")));
+ "multi", Config(BasicType("int64"), search::attribute::CollectionType("array")).setIsFilter(isFilter));
manager.addAttribute(attr_sp);
- IntegerAttribute *attr = (IntegerAttribute*)(attr_sp.get());
+ auto *attr = (IntegerAttribute*)(attr_sp.get());
for (size_t i = 1; i < 10; ++i) {
attr->addDoc(docId);
assert(i == docId);
@@ -78,35 +78,43 @@ struct WS {
TermFieldHandle handle;
std::vector<std::pair<std::string, uint32_t> > tokens;
- WS(IAttributeManager & manager) : attribute_manager(manager), layout(), handle(layout.allocTermField(fieldId)), tokens() {
+ explicit WS(IAttributeManager & manager)
+ : attribute_manager(manager),
+ layout(), handle(layout.allocTermField(fieldId)),
+ tokens()
+ {
MatchData::UP tmp = layout.createMatchData();
ASSERT_TRUE(tmp->resolveTermField(handle)->getFieldId() == fieldId);
}
WS &add(const std::string &token, uint32_t weight) {
- tokens.push_back(std::make_pair(token, weight));
+ tokens.emplace_back(token, weight);
return *this;
}
Node::UP createNode() const {
- SimpleWeightedSetTerm *node = new SimpleWeightedSetTerm(tokens.size(), "view", 0, Weight(0));
- for (size_t i = 0; i < tokens.size(); ++i) {
- node->addTerm(tokens[i].first, Weight(tokens[i].second));
+ auto *node = new SimpleWeightedSetTerm(tokens.size(), "view", 0, Weight(0));
+ for (const auto & token : tokens) {
+ node->addTerm(token.first, Weight(token.second));
}
return Node::UP(node);
}
- bool isGenericSearch(Searchable &searchable, const std::string &field, bool strict) const {
+ SearchIterator::UP
+ createSearch(Searchable &searchable, const std::string &field, bool strict) const {
AttributeContext ac(attribute_manager);
FakeRequestContext requestContext(&ac);
MatchData::UP md = layout.createMatchData();
Node::UP node = createNode();
FieldSpecList fields;
- fields.add(FieldSpec(field, fieldId, handle));
+ fields.add(FieldSpec(field, fieldId, handle, ac.getAttribute(field)->getIsFilter()));
queryeval::Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, *node);
bp->fetchPostings(queryeval::ExecuteInfo::create(strict));
SearchIterator::UP sb = bp->createSearch(*md, strict);
- return (dynamic_cast<WeightedSetTermSearch*>(sb.get()) != 0);
+ return sb;
+ }
+ bool isWeightedSetTermSearch(Searchable &searchable, const std::string &field, bool strict) const {
+ return dynamic_cast<WeightedSetTermSearch *>(createSearch(searchable, field, strict).get()) != nullptr;
}
FakeResult search(Searchable &searchable, const std::string &field, bool strict) const {
@@ -140,23 +148,58 @@ struct WS {
} // namespace <unnamed>
+void test_tokens(bool isFilter, const std::vector<uint32_t> & docs) {
+ MockAttributeManager manager;
+ setupAttributeManager(manager, isFilter);
+ AttributeBlueprintFactory adapter;
+
+ FakeResult expect = FakeResult();
+ WS ws = WS(manager);
+ for (uint32_t doc : docs) {
+ auto docS = vespalib::stringify(doc);
+ int32_t weight = doc * 10;
+ expect.doc(doc).weight(weight).pos(0);
+ ws.add(docS, weight);
+ }
+
+ EXPECT_TRUE(ws.isWeightedSetTermSearch(adapter, "integer", true));
+ EXPECT_TRUE(!ws.isWeightedSetTermSearch(adapter, "integer", false));
+ EXPECT_TRUE(ws.isWeightedSetTermSearch(adapter, "string", true));
+ EXPECT_TRUE(!ws.isWeightedSetTermSearch(adapter, "string", false));
+ EXPECT_TRUE(ws.isWeightedSetTermSearch(adapter, "multi", true));
+ EXPECT_TRUE(ws.isWeightedSetTermSearch(adapter, "multi", false));
+
+ EXPECT_EQUAL(expect, ws.search(adapter, "integer", true));
+ EXPECT_EQUAL(expect, ws.search(adapter, "integer", false));
+ EXPECT_EQUAL(expect, ws.search(adapter, "string", true));
+ EXPECT_EQUAL(expect, ws.search(adapter, "string", false));
+ EXPECT_EQUAL(expect, ws.search(adapter, "multi", true));
+ EXPECT_EQUAL(expect, ws.search(adapter, "multi", false));
+}
TEST("attribute_weighted_set_test") {
+ test_tokens(false, {3, 5, 7});
+ test_tokens(true, {3, 5, 7});
+ test_tokens(false, {3});
+}
+
+TEST("attribute_weighted_set_single_token_filter_lifted_out") {
MockAttributeManager manager;
- setupAttributeManager(manager);
+ setupAttributeManager(manager, true);
AttributeBlueprintFactory adapter;
- FakeResult expect = FakeResult()
- .doc(3).elem(0).weight(30).pos(0)
- .doc(5).elem(0).weight(50).pos(0)
- .doc(7).elem(0).weight(70).pos(0);
- WS ws = WS(manager).add("7", 70).add("5", 50).add("3", 30);
-
- EXPECT_TRUE(ws.isGenericSearch(adapter, "integer", true));
- EXPECT_TRUE(!ws.isGenericSearch(adapter, "integer", false));
- EXPECT_TRUE(ws.isGenericSearch(adapter, "string", true));
- EXPECT_TRUE(!ws.isGenericSearch(adapter, "string", false));
- EXPECT_TRUE(ws.isGenericSearch(adapter, "multi", true));
- EXPECT_TRUE(ws.isGenericSearch(adapter, "multi", false));
+ FakeResult expect = FakeResult().doc(3).elem(0).weight(30).pos(0);
+ WS ws = WS(manager).add("3", 30);
+
+ EXPECT_EQUAL("search::FilterAttributeIteratorStrict<search::attribute::SingleNumericSearchContext<long, search::attribute::NumericMatcher<long> > >",
+ ws.createSearch(adapter, "integer", true)->getClassName());
+ EXPECT_EQUAL("search::FilterAttributeIteratorT<search::attribute::SingleNumericSearchContext<long, search::attribute::NumericMatcher<long> > >",
+ ws.createSearch(adapter, "integer", false)->getClassName());
+ EXPECT_EQUAL("search::FilterAttributeIteratorStrict<search::attribute::SingleEnumSearchContext<char const*, search::attribute::StringSearchContext> >",
+ ws.createSearch(adapter, "string", true)->getClassName());
+ EXPECT_EQUAL("search::FilterAttributeIteratorT<search::attribute::SingleEnumSearchContext<char const*, search::attribute::StringSearchContext> >",
+ ws.createSearch(adapter, "string", false)->getClassName());
+ EXPECT_TRUE(ws.isWeightedSetTermSearch(adapter, "multi", true));
+ EXPECT_TRUE(ws.isWeightedSetTermSearch(adapter, "multi", false));
EXPECT_EQUAL(expect, ws.search(adapter, "integer", true));
EXPECT_EQUAL(expect, ws.search(adapter, "integer", false));
diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp
index 40d4b20aaf2..ac1042dda6c 100644
--- a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp
+++ b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp
@@ -4,6 +4,7 @@
#include <vespa/searchlib/attribute/attributefactory.h>
#include <vespa/searchlib/attribute/attributeiterators.h>
#include <vespa/searchlib/attribute/flagattribute.h>
+#include <vespa/searchlib/attribute/postinglistsearchcontext.h>
#include <vespa/searchlib/attribute/searchcontextelementiterator.h>
#include <vespa/searchlib/attribute/singleboolattribute.h>
#include <vespa/searchlib/attribute/stringbase.h>
@@ -1424,6 +1425,25 @@ SearchContextTest::testPrefixSearch(const vespalib::string& name, const Config&
}
}
}
+
+ // Long range of prefixes with unique strings that causes
+ // PostingListFoldedSearchContextT<DataT>::countHits() to populate
+ // partial vector of posting indexes, with scan resumed by
+ // fillArray or fillBitVector.
+ auto& vec = dynamic_cast<StringAttribute &>(*attr.get());
+ uint32_t old_size = attr->getNumDocs();
+ constexpr uint32_t longrange_values = search::attribute::PostingListFoldedSearchContextT<int32_t>::MAX_POSTING_INDEXES_SIZE + 100;
+ attr->addDocs(longrange_values);
+ DocSet exp_longrange;
+ for (uint32_t i = 0; i < longrange_values; ++i) {
+ vespalib::asciistream ss;
+ ss << "lpref" << i;
+ vespalib::string sss(ss.str());
+ exp_longrange.put(old_size + i);
+ vec.update(old_size + i, vespalib::string(ss.str()).c_str());
+ }
+ attr->commit();
+ performSearch(*attr, "lpref", exp_longrange, TermType::PREFIXTERM);
}
diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
index e9d0f8cb736..52329f31ba7 100644
--- a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
+++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
@@ -389,8 +389,8 @@ testSingleValue(Attribute & svsa, Config &cfg)
TEST("testSingleValue")
{
EXPECT_EQUAL(24u, sizeof(SearchContext));
- EXPECT_EQUAL(40u, sizeof(StringSearchHelper));
- EXPECT_EQUAL(96u, sizeof(attribute::SingleStringEnumSearchContext));
+ EXPECT_EQUAL(48u, sizeof(StringSearchHelper));
+ EXPECT_EQUAL(104u, sizeof(attribute::SingleStringEnumSearchContext));
{
Config cfg(BasicType::STRING, CollectionType::SINGLE);
SingleValueStringAttribute svsa("svsa", cfg);
diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
index ef0fd56840a..c617db871a7 100644
--- a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
+++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp
@@ -128,9 +128,9 @@ TEST("test And propagates updated histestimate") {
const RememberExecuteInfo & child = dynamic_cast<const RememberExecuteInfo &>(bp.getChild(i));
EXPECT_EQUAL((i == 0), child.executeInfo.isStrict());
}
- EXPECT_EQUAL(1.0, dynamic_cast<const RememberExecuteInfo &>(bp.getChild(0)).executeInfo.hitRate());
- EXPECT_EQUAL(1.0/250, dynamic_cast<const RememberExecuteInfo &>(bp.getChild(1)).executeInfo.hitRate());
- EXPECT_EQUAL(1.0/(250*25), dynamic_cast<const RememberExecuteInfo &>(bp.getChild(2)).executeInfo.hitRate());
+ EXPECT_EQUAL(1.0f, dynamic_cast<const RememberExecuteInfo &>(bp.getChild(0)).executeInfo.hitRate());
+ EXPECT_EQUAL(1.0f/250, dynamic_cast<const RememberExecuteInfo &>(bp.getChild(1)).executeInfo.hitRate());
+ EXPECT_EQUAL(1.0f/(250*25), dynamic_cast<const RememberExecuteInfo &>(bp.getChild(2)).executeInfo.hitRate());
}
TEST("test And Blueprint") {