From d47dc1ca38ff5c0611923acfbac8d50ae5ea7807 Mon Sep 17 00:00:00 2001 From: Geir Storli Date: Tue, 28 Jan 2020 11:30:28 +0000 Subject: Rename searchcontext.cpp -> searchcontext_test.cpp --- .../tests/attribute/searchcontext/CMakeLists.txt | 2 +- .../attribute/searchcontext/searchcontext.cpp | 1964 -------------------- .../attribute/searchcontext/searchcontext_test.cpp | 1964 ++++++++++++++++++++ 3 files changed, 1965 insertions(+), 1965 deletions(-) delete mode 100644 searchlib/src/tests/attribute/searchcontext/searchcontext.cpp create mode 100644 searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp (limited to 'searchlib') diff --git a/searchlib/src/tests/attribute/searchcontext/CMakeLists.txt b/searchlib/src/tests/attribute/searchcontext/CMakeLists.txt index a705fd5ecb5..377d91bf634 100644 --- a/searchlib/src/tests/attribute/searchcontext/CMakeLists.txt +++ b/searchlib/src/tests/attribute/searchcontext/CMakeLists.txt @@ -1,7 +1,7 @@ # Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. vespa_add_executable(searchlib_searchcontext_test_app TEST SOURCES - searchcontext.cpp + searchcontext_test.cpp DEPENDS searchlib searchlib_test diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext.cpp b/searchlib/src/tests/attribute/searchcontext/searchcontext.cpp deleted file mode 100644 index 7d4a2d63355..00000000000 --- a/searchlib/src/tests/attribute/searchcontext/searchcontext.cpp +++ /dev/null @@ -1,1964 +0,0 @@ -// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -LOG_SETUP("searchcontext_test"); - -namespace search { - -namespace { - -bool -isUnsignedSmallIntAttribute(const AttributeVector &a) -{ - switch (a.getBasicType()) - { - case attribute::BasicType::BOOL: - case attribute::BasicType::UINT2: - case attribute::BasicType::UINT4: - return true; - default: - return false; - } -} - -} - -typedef AttributeVector::SP AttributePtr; -typedef std::unique_ptr SearchContextPtr; -typedef AttributeVector::SearchContext SearchContext; -using attribute::Config; -using attribute::BasicType; -using attribute::CollectionType; -typedef AttributeVector::largeint_t largeint_t; -typedef queryeval::SearchIterator::UP SearchBasePtr; -typedef std::unique_ptr ResultSetPtr; - -using queryeval::HitCollector; -using queryeval::SearchIterator; -using fef::MatchData; -using fef::TermFieldMatchData; -using fef::TermFieldMatchDataArray; -using fef::TermFieldMatchDataPosition; - -class DocSet : public std::set -{ -public: - DocSet(); - ~DocSet(); - DocSet(const uint32_t *b, const uint32_t *e) : std::set(b, e) {} - DocSet & put(const uint32_t &v) { - insert(v); - return *this; - } -}; - -DocSet::DocSet() = default; -DocSet::~DocSet() = default; - -template -class PostingList -{ -private: - V * _vec; - T _value; - DocSet _hits; - -public: - PostingList(V & vec, T value); - ~PostingList(); - const V & getAttribute() const { return *_vec; } - V & getAttribute() { return *_vec; } - const T & getValue() const { return _value; } - DocSet & getHits() { return _hits; } - const DocSet & getHits() const { return _hits; } - uint32_t getHitCount() const { return _hits.size(); } -}; - -template -PostingList::PostingList(V & vec, T value) : _vec(&vec), _value(value), _hits() {} - -template -PostingList::~PostingList() = default; - -class DocRange -{ -public: - uint32_t start; - uint32_t end; - DocRange(uint32_t start_, uint32_t end_) : start(start_), end(end_) {} -}; - -class SearchContextTest : public vespalib::TestApp -{ -public: - // helper functions - static void addReservedDoc(AttributeVector &ptr); - static void addDocs(AttributeVector & ptr, uint32_t numDocs); - template - static SearchContextPtr getSearch(const V & vec, const T & term, QueryTermSimple::SearchTerm termType=QueryTermSimple::WORD); -private: - typedef std::map ConfigMap; - // Map of all config objects - ConfigMap _integerCfg; - ConfigMap _floatCfg; - ConfigMap _stringCfg; - - - template - void fillVector(std::vector & values, size_t numValues); - template - void fillAttribute(V & vec, const std::vector & values); - template - void resetAttribute(V & vec, const T & value); - template - void fillPostingList(PostingList & pl, const DocRange & range); - template - void fillPostingList(PostingList & pl); - static void buildTermQuery(std::vector & buffer, const vespalib::string & index, const vespalib::string & term, - QueryTermSimple::SearchTerm termType=QueryTermSimple::WORD); - - ResultSetPtr performSearch(SearchIterator & sb, uint32_t numDocs); - template - ResultSetPtr performSearch(const V & vec, const T & term, QueryTermSimple::SearchTerm termType=QueryTermSimple::WORD); - template - void performSearch(const V & vec, const vespalib::string & term, - const DocSet & expected, QueryTermSimple::SearchTerm termType); - void checkResultSet(const ResultSet & rs, const DocSet & exp, bool bitVector); - - template - void testSearchIterator(const std::vector & keys, const vespalib::string &keyAsString, const ConfigMap &cfgs); - void testSearchIteratorConformance(); - // test search functionality - template - void testFind(const PostingList & first); - - template - void testSearch(V & attribute, uint32_t numDocs, const std::vector & values); - template - void testSearch(const ConfigMap & cfgs); - template - void testMultiValueSearchHelper(V & vec, const std::vector & values); - template - void testMultiValueSearch(V& attr, uint32_t num_docs, const std::vector & values); - void testSearch(); - - class IteratorTester { - public: - virtual bool matches(const SearchIterator & base) const = 0; - virtual ~IteratorTester() = default; - }; - class AttributeIteratorTester : public IteratorTester - { - public: - bool matches(const SearchIterator & base) const override { - return dynamic_cast(&base) != nullptr; - } - }; - class FlagAttributeIteratorTester : public IteratorTester - { - public: - bool matches(const SearchIterator & base) const override { - return (dynamic_cast(&base) != nullptr) || - (dynamic_cast(&base) != nullptr) || - (dynamic_cast(&base) != nullptr); - } - }; - class AttributePostingListIteratorTester : public IteratorTester - { - public: - bool matches(const SearchIterator & base) const override { - return dynamic_cast(&base) != nullptr || - dynamic_cast(&base) != nullptr; - - } - }; - - - // test search iterator functionality - void testStrictSearchIterator(SearchContext & threeHits, SearchContext & noHits, const IteratorTester & typeTester); - void testNonStrictSearchIterator(SearchContext & threeHits, SearchContext & noHits, const IteratorTester & typeTester); - void fillForSearchIteratorTest(IntegerAttribute * ia); - void fillForSemiNibbleSearchIteratorTest(IntegerAttribute * ia); - void testSearchIterator(); - - - // test search iterator unpacking - void fillForSearchIteratorUnpackingTest(IntegerAttribute * ia, bool extra); - void testSearchIteratorUnpacking(const AttributePtr & ptr, SearchContext & sc, bool extra, bool strict) { - sc.fetchPostings(queryeval::ExecuteInfo::create(strict, 1.0)); - for (bool withElementId : {false, true}) { - testSearchIteratorUnpacking(ptr, sc, extra, strict, withElementId); - } - } - void testSearchIteratorUnpacking(const AttributePtr & ptr, SearchContext & sc, - bool extra, bool strict, bool withElementId); - void testSearchIteratorUnpacking(); - - - // test range search - template - void performRangeSearch(const VectorType & vec, const vespalib::string & term, const DocSet & expected); - template - void testRangeSearch(const AttributePtr & ptr, uint32_t numDocs, std::vector values); - void testRangeSearch(); - void testRangeSearchLimited(); - - - // test case insensitive search - void performCaseInsensitiveSearch(const StringAttribute & vec, const vespalib::string & term, const DocSet & expected); - void testCaseInsensitiveSearch(const AttributePtr & ptr); - void testCaseInsensitiveSearch(); - void testRegexSearch(const AttributePtr & ptr); - void testRegexSearch(); - - - // test prefix search - void performPrefixSearch(const StringAttribute & vec, const vespalib::string & term, - const DocSet & expected, QueryTermSimple::SearchTerm termType); - void testPrefixSearch(const AttributePtr & ptr); - void testPrefixSearch(); - - // test that search is working after clear doc - template - void requireThatSearchIsWorkingAfterClearDoc(const vespalib::string & name, const Config & cfg, - ValueType startValue, const vespalib::string & term); - void requireThatSearchIsWorkingAfterClearDoc(); - - // test that search is working after load and clear doc - template - void requireThatSearchIsWorkingAfterLoadAndClearDoc(const vespalib::string & name, const Config & cfg, - ValueType startValue, ValueType defaultValue, - const vespalib::string & term); - void requireThatSearchIsWorkingAfterLoadAndClearDoc(); - - template - void requireThatSearchIsWorkingAfterUpdates(const vespalib::string & name, const Config & cfg, - ValueType value1, ValueType value2); - void requireThatSearchIsWorkingAfterUpdates(); - - void requireThatFlagAttributeIsWorkingWhenNewDocsAreAdded(); - - template - void requireThatInvalidSearchTermGivesZeroHits(const vespalib::string & name, const Config & cfg, ValueType value); - void requireThatInvalidSearchTermGivesZeroHits(); - - void requireThatFlagAttributeHandlesTheByteRange(); - - void requireThatOutOfBoundsSearchTermGivesZeroHits(const vespalib::string &name, const Config &cfg, int64_t maxValue); - void requireThatOutOfBoundsSearchTermGivesZeroHits(); - - // init maps with config objects - void initIntegerConfig(); - void initFloatConfig(); - void initStringConfig(); - -public: - SearchContextTest(); - ~SearchContextTest() override; - int Main() override; -}; - - -void -SearchContextTest::addReservedDoc(AttributeVector &ptr) -{ - ptr.addReservedDoc(); -} - - -void -SearchContextTest::addDocs(AttributeVector & ptr, uint32_t numDocs) -{ - uint32_t docId; - addReservedDoc(ptr); - for (uint32_t i = 1; i <= numDocs; ++i) { - ptr.addDoc(docId); - EXPECT_EQUAL(docId, i); - } - ASSERT_TRUE(ptr.getNumDocs() == numDocs + 1); -} - -template -void -SearchContextTest::fillVector(std::vector & values, size_t numValues) -{ - values.clear(); - values.reserve(numValues); - for (size_t i = 1; i <= numValues; ++i) { - values.push_back(static_cast(i)); - } -} - -template <> -void -SearchContextTest::fillVector(std::vector & values, size_t numValues) -{ - values.clear(); - values.reserve(numValues); - for (size_t i = 0; i < numValues; ++i) { - vespalib::asciistream ss; - ss << "string" << (i < 10 ? "0" : "") << i; - values.emplace_back(ss.str()); - } -} - -template -void -SearchContextTest::fillAttribute(V & vec, const std::vector & values) -{ - for (uint32_t doc = 1; doc < vec.getNumDocs(); ++doc) { - ASSERT_TRUE(doc < vec.getNumDocs()); - vec.clearDoc(doc); - uint32_t valueCount = doc % (values.size() + 1); - for (uint32_t i = 0; i < valueCount; ++i) { - // std::cout << "append(" << doc << ", " << values[i] << ")" << std::endl; - EXPECT_TRUE(vec.append(doc, values[i], 1)); - } - } - vec.commit(true); -} - -template -void -SearchContextTest::resetAttribute(V & vec, const T & value) -{ - for (uint32_t doc = 1; doc < vec.getNumDocs(); ++doc) { - ASSERT_TRUE(doc < vec.getNumDocs()); - EXPECT_TRUE(vec.update(doc, value)); - } - vec.commit(true); -} - -template -void -SearchContextTest::fillPostingList(PostingList & pl, const DocRange & range) -{ - pl.getHits().clear(); - for (uint32_t doc = range.start; doc < range.end; ++doc) { - ASSERT_TRUE(doc < pl.getAttribute().getNumDocs()); - EXPECT_TRUE(pl.getAttribute().update(doc, pl.getValue())); - pl.getHits().insert(doc); - } - pl.getAttribute().commit(true); -} - -template -void -SearchContextTest::fillPostingList(PostingList & pl) -{ - auto & vec = dynamic_cast(pl.getAttribute()); - pl.getHits().clear(); - uint32_t sz = vec.getMaxValueCount(); - T * buf = new T[sz]; - for (uint32_t doc = 1; doc < vec.getNumDocs(); ++doc) { - uint32_t valueCount = vec.get(doc, buf, sz); - EXPECT_TRUE(valueCount <= sz); - for (uint32_t i = 0; i < valueCount; ++i) { - if (buf[i] == pl.getValue()) { - //std::cout << "hit for doc(" << doc << "): buf[" << i << "] (=" << buf[i] << ") == " << pl.getValue() << std::endl; - pl.getHits().insert(doc); - break; - } - } - } - delete [] buf; -} - -void -SearchContextTest::buildTermQuery(std::vector & buffer, const vespalib::string & index, const vespalib::string & term, QueryTermSimple::SearchTerm termType) -{ - uint32_t indexLen = index.size(); - uint32_t termLen = term.size(); - uint32_t queryPacketSize = 1 + 2 * 4 + indexLen + termLen; - uint32_t p = 0; - buffer.resize(queryPacketSize); - switch (termType) { - case QueryTermSimple::PREFIXTERM: buffer[p++] = ParseItem::ITEM_PREFIXTERM; break; - case QueryTermSimple::REGEXP: buffer[p++] = ParseItem::ITEM_REGEXP; break; - default: - buffer[p++] = ParseItem::ITEM_TERM; - break; - } - p += vespalib::compress::Integer::compressPositive(indexLen, &buffer[p]); - memcpy(&buffer[p], index.c_str(), indexLen); - p += indexLen; - p += vespalib::compress::Integer::compressPositive(termLen, &buffer[p]); - memcpy(&buffer[p], term.c_str(), termLen); - p += termLen; - buffer.resize(p); -} - -template -SearchContextPtr -SearchContextTest::getSearch(const V & vec, const T & term, QueryTermSimple::SearchTerm termType) -{ - std::vector query; - vespalib::asciistream ss; - ss << term; - buildTermQuery(query, vec.getName(), ss.str(), termType); - - return (dynamic_cast(vec)). - getSearch(vespalib::stringref(&query[0], query.size()), - attribute::SearchContextParams()); -} - -ResultSetPtr -SearchContextTest::performSearch(SearchIterator & sb, uint32_t numDocs) -{ - HitCollector hc(numDocs, numDocs); - sb.initRange(1, numDocs); - // assume strict toplevel search object located at start - for (sb.seek(1u); ! sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) { - hc.addHit(sb.getDocId(), 0.0); - } - return hc.getResultSet(); -} - -template -ResultSetPtr -SearchContextTest::performSearch(const V & vec, const T & term, QueryTermSimple::SearchTerm termType) -{ - TermFieldMatchData dummy; - SearchContextPtr sc = getSearch(vec, term, termType); - sc->fetchPostings(queryeval::ExecuteInfo::TRUE); - SearchBasePtr sb = sc->createIterator(&dummy, true); - ResultSetPtr rs = performSearch(*sb, vec.getNumDocs()); - return rs; -} - -template -void -SearchContextTest::performSearch(const V & vec, const vespalib::string & term, - const DocSet & expected, QueryTermSimple::SearchTerm termType) -{ -#if 0 - std::cout << "performSearch[" << term << "]: {"; - std::copy(expected.begin(), expected.end(), std::ostream_iterator(std::cout, ", ")); - std::cout << "}, prefix(" << (prefix ? "true" : "false") << ")" << std::endl; -#endif - { // strict search iterator - ResultSetPtr rs = performSearch(vec, term, termType); - checkResultSet(*rs, expected, false); - } -} - -void -SearchContextTest::checkResultSet(const ResultSet & rs, const DocSet & expected, bool bitVector) -{ - EXPECT_EQUAL(rs.getNumHits(), expected.size()); - if (bitVector) { - const BitVector * vec = rs.getBitOverflow(); - if ( ! expected.empty()) { - ASSERT_TRUE(vec != nullptr); - for (const auto & expect : expected) { - EXPECT_TRUE(vec->testBit(expect)); - } - } - } else { - const RankedHit * array = rs.getArray(); - if ( ! expected.empty()) { - ASSERT_TRUE(array != nullptr); - uint32_t i = 0; - for (auto iter = expected.begin(); iter != expected.end(); ++iter, ++i) { - EXPECT_TRUE(array[i]._docId == *iter); - } - } - } -} - - -//----------------------------------------------------------------------------- -// Test search functionality -//----------------------------------------------------------------------------- -template -void -SearchContextTest::testFind(const PostingList & pl) -{ - { // strict search iterator - SearchContextPtr sc = getSearch(pl.getAttribute(), pl.getValue()); - sc->fetchPostings(queryeval::ExecuteInfo::TRUE); - TermFieldMatchData dummy; - SearchBasePtr sb = sc->createIterator(&dummy, true); - ResultSetPtr rs = performSearch(*sb, pl.getAttribute().getNumDocs()); - checkResultSet(*rs, pl.getHits(), false); - } -} - -template -void -SearchContextTest::testSearch(V & attribute, uint32_t numDocs, const std::vector & values) -{ - LOG(info, "testSearch: vector '%s' with %u documents and %lu unique values", - attribute.getName().c_str(), numDocs, values.size()); - - // fill attribute vectors - addDocs(attribute, numDocs); - - std::vector > lists; - - // fill posting lists - ASSERT_TRUE((attribute.getNumDocs() - 1) % values.size() == 0); - uint32_t hitCount = attribute.getNumDocs() / values.size(); - for (uint32_t i = 0; i < values.size(); ++i) { - // for each value a range with hitCount documents will hit on that value - lists.push_back(PostingList(attribute, values[i])); - fillPostingList(lists.back(), DocRange(i * hitCount + 1, (i + 1) * hitCount + 1)); - } - - // test find() - for (const auto & list : lists) { - testFind(list); - } -} - -template -void -SearchContextTest::testMultiValueSearchHelper(V & vec, const std::vector & values) -{ - std::vector > lists; - - // fill posting lists based on attribute content - for (const T & value : values) { - lists.push_back(PostingList(vec, value)); - fillPostingList(lists.back()); - } - - // test find() - for (const auto & list : lists) { - //std::cout << "testFind(lists[" << i << "]): value = " << lists[i].getValue() - // << ", hit count = " << lists[i].getHitCount() << std::endl; - testFind(list); - } -} - -AttributePtr -create_as(const AttributeVector& attr, const std::string& name_suffix) -{ - return AttributeFactory::createAttribute(attr.getName() + name_suffix, attr.getConfig()); -} - - -template -void -SearchContextTest::testMultiValueSearch(V& attr, uint32_t num_docs, const std::vector & values) -{ - addDocs(attr, num_docs); - LOG(info, "testMultiValueSearch: vector '%s' with %u documents and %lu unique values", - attr.getName().c_str(), attr.getNumDocs(), values.size()); - - fillAttribute(attr, values); - - testMultiValueSearchHelper(attr, values); - - auto attr2 = create_as(attr, "_2"); - ASSERT_TRUE(attr.save(attr2->getBaseFileName())); - ASSERT_TRUE(attr2->load()); - - testMultiValueSearchHelper(static_cast(*attr2.get()), values); - - size_t sz = values.size(); - ASSERT_TRUE(sz > 2); - std::vector subset; - // values[sz - 2] is not used -> 0 hits - // values[sz - 1] is used once -> 1 hit - for (size_t i = 0; i < sz - 2; ++i) { - subset.push_back(values[i]); - } - - fillAttribute(attr, subset); - - ASSERT_TRUE(1u < attr.getNumDocs()); - EXPECT_TRUE(attr.append(1u, values[sz - 1], 1)); - attr.commit(true); - - testMultiValueSearchHelper(attr, values); - - auto attr3 = create_as(attr, "_3"); - ASSERT_TRUE(attr.save(attr3->getBaseFileName())); - ASSERT_TRUE(attr3->load()); - - testMultiValueSearchHelper(static_cast(*attr3.get()), values); -} - -template -void SearchContextTest::testSearch(const ConfigMap & cfgs) { - uint32_t numDocs = 100; - uint32_t numUniques = 20; - std::vector values; - fillVector(values, numUniques); - for (const auto & cfg : cfgs) { - AttributePtr second = AttributeFactory::createAttribute(cfg.first + "-2", cfg.second); - testSearch(*(dynamic_cast(second.get())), numDocs, values); - if (second->hasMultiValue()) { - AttributePtr first = AttributeFactory::createAttribute(cfg.first + "-1", cfg.second); - testMultiValueSearch(*(dynamic_cast(first.get())), second->getNumDocs(), values); - } - } -} - - -template -class Verifier : public search::test::SearchIteratorVerifier { -public: - Verifier(const std::vector & keys, const vespalib::string & keyAsString, const vespalib::string & name, - const Config & cfg, bool withElementId); - ~Verifier() override; - SearchIterator::UP - create(bool strict) const override { - _sc->fetchPostings(queryeval::ExecuteInfo::create(strict, 1.0)); - auto search = _sc->createIterator(&_dummy, strict); - if (_withElementId) { - search = std::make_unique(std::move(search), *_sc, _dummy); - } - return search; - } -private: - mutable TermFieldMatchData _dummy; - const bool _withElementId; - AttributePtr _attribute; - SearchContextPtr _sc; -}; - -template -Verifier::Verifier(const std::vector & keys, const vespalib::string & keyAsString, const vespalib::string & name, - const Config & cfg, bool withElementId) - : _withElementId(withElementId), - _attribute(AttributeFactory::createAttribute(name + "-initrange", cfg)), - _sc() -{ - SearchContextTest::addDocs(*_attribute, getDocIdLimit()); - size_t i(0); - for (uint32_t doc : getExpectedDocIds()) { - EXPECT_TRUE(nullptr != dynamic_cast(_attribute.get())); - EXPECT_TRUE(dynamic_cast(_attribute.get())->update(doc, keys[(i++)%keys.size()])); - } - _attribute->commit(true); - _sc = SearchContextTest::getSearch(*_attribute, keyAsString); - ASSERT_TRUE(_sc->valid()); -} - -template -Verifier::~Verifier() = default; - -template -void SearchContextTest::testSearchIterator(const std::vector & keys, const vespalib::string &keyAsString, const ConfigMap &cfgs) { - - for (bool withElementId : {false, true} ) { - for (const auto & cfg : cfgs) { - { - Verifier verifier(keys, keyAsString, cfg.first, cfg.second, withElementId); - verifier.verify(); - } - { - Config withFilter(cfg.second); - withFilter.setIsFilter(true); - Verifier verifier(keys, keyAsString, cfg.first + "-filter", withFilter, withElementId); - verifier.verify(); - } - } - } - -} - -void SearchContextTest::testSearchIteratorConformance() { - testSearchIterator({42,45,46}, "[0;100]", _integerCfg); - testSearchIterator({42}, "42", _integerCfg); - testSearchIterator({42.42}, "42.42", _floatCfg); - testSearchIterator({"any-key"}, "any-key", _stringCfg); -} - -void -SearchContextTest::testSearch() -{ - const uint32_t numDocs = 100; - const uint32_t numUniques = 20; - - { // IntegerAttribute - for (const auto & cfg : _integerCfg) { - AttributePtr attribute = AttributeFactory::createAttribute(cfg.first + "-3", cfg.second); - SearchContextPtr sc = getSearch(*attribute, "100"); - ASSERT_TRUE(sc->valid()); - sc = getSearch(*attribute, "1A0"); - EXPECT_FALSE( sc->valid() ); - } - - - { // CollectionType::ARRAY Flags. - std::vector values; - fillVector(values, numUniques); - Config cfg(BasicType::INT8, CollectionType::ARRAY); - cfg.setFastSearch(true); - AttributePtr second = AttributeFactory::createAttribute("flags-2", cfg); - testSearch(*(dynamic_cast(second.get())), numDocs, values); - AttributePtr first = AttributeFactory::createAttribute("flags-1", cfg); - testMultiValueSearch(*(dynamic_cast(first.get())), second->getNumDocs(), values); - } - } - - { // FloatingPointAttribute - for (const auto & cfg : _floatCfg) { - AttributePtr attribute = AttributeFactory::createAttribute(cfg.first + "-3", cfg.second); - SearchContextPtr sc = getSearch(*attribute, "100"); - ASSERT_TRUE(sc->valid()); - sc = getSearch(*attribute, "7.3"); - ASSERT_TRUE( sc->valid() ); - sc = getSearch(*attribute, "1A0"); - EXPECT_FALSE( sc->valid() ); - } - } - - testSearch(_integerCfg); - testSearch(_floatCfg); - testSearch(_stringCfg); -} - -//----------------------------------------------------------------------------- -// Test search iterator functionality -//----------------------------------------------------------------------------- -void -SearchContextTest::testStrictSearchIterator(SearchContext & threeHits, - SearchContext & noHits, - const IteratorTester & typeTester) -{ - TermFieldMatchData dummy; - { // search for value with 3 hits - threeHits.fetchPostings(queryeval::ExecuteInfo::TRUE); - SearchBasePtr sb = threeHits.createIterator(&dummy, true); - sb->initRange(1, threeHits.attribute().getCommittedDocIdLimit()); - EXPECT_TRUE(typeTester.matches(*sb)); - EXPECT_TRUE(sb->getDocId() == sb->beginId() || - sb->getDocId() == 1u); - EXPECT_TRUE(sb->seek(1)); - EXPECT_EQUAL(sb->getDocId(), 1u); - EXPECT_TRUE(!sb->seek(2)); - EXPECT_EQUAL(sb->getDocId(), 3u); - EXPECT_TRUE(sb->seek(3)); - EXPECT_EQUAL(sb->getDocId(), 3u); - EXPECT_TRUE(!sb->seek(4)); - EXPECT_EQUAL(sb->getDocId(), 5u); - EXPECT_TRUE(sb->seek(5)); - EXPECT_EQUAL(sb->getDocId(), 5u); - EXPECT_TRUE(!sb->seek(6)); - EXPECT_TRUE(sb->isAtEnd()); - } - - { // search for value with no hits - noHits.fetchPostings(queryeval::ExecuteInfo::TRUE); - SearchBasePtr sb = noHits.createIterator(&dummy, true); - sb->initRange(1, noHits.attribute().getCommittedDocIdLimit()); - ASSERT_TRUE(typeTester.matches(*sb)); - EXPECT_TRUE(sb->getDocId() == sb->beginId() || - sb->isAtEnd()); - EXPECT_TRUE(!sb->seek(1)); - EXPECT_TRUE(sb->isAtEnd()); - } -} - -void -SearchContextTest::testNonStrictSearchIterator(SearchContext & threeHits, - SearchContext & noHits, - const IteratorTester & typeTester) -{ - TermFieldMatchData dummy; - { // search for value with three hits - threeHits.fetchPostings(queryeval::ExecuteInfo::FALSE); - SearchBasePtr sb = threeHits.createIterator(&dummy, false); - sb->initRange(1, threeHits.attribute().getCommittedDocIdLimit()); - EXPECT_TRUE(typeTester.matches(*sb)); - EXPECT_TRUE(sb->seek(1)); - EXPECT_EQUAL(sb->getDocId(), 1u); - EXPECT_TRUE(!sb->seek(2)); - EXPECT_EQUAL(sb->getDocId(), 1u); - EXPECT_TRUE(sb->seek(3)); - EXPECT_EQUAL(sb->getDocId(), 3u); - EXPECT_TRUE(!sb->seek(4)); - EXPECT_EQUAL(sb->getDocId(), 3u); - EXPECT_TRUE(sb->seek(5)); - EXPECT_EQUAL(sb->getDocId(), 5u); - EXPECT_TRUE(!sb->seek(6)); - EXPECT_TRUE(sb->getDocId() == 5u || sb->isAtEnd()); - } - { // search for value with no hits - noHits.fetchPostings(queryeval::ExecuteInfo::FALSE); - SearchBasePtr sb = noHits.createIterator(&dummy, false); - sb->initRange(1, threeHits.attribute().getCommittedDocIdLimit()); - - EXPECT_TRUE(typeTester.matches(*sb)); - EXPECT_TRUE(sb->getDocId() == sb->beginId() || - sb->isAtEnd()); - EXPECT_TRUE(!sb->seek(1)); - EXPECT_NOT_EQUAL(sb->getDocId(), 1u); - EXPECT_TRUE(!sb->seek(6)); - EXPECT_NOT_EQUAL(sb->getDocId(), 6u); - } -} - -void -SearchContextTest::fillForSearchIteratorTest(IntegerAttribute * ia) -{ - addReservedDoc(*ia); - ia->addDocs(5); - ia->update(1, 10); - ia->update(2, 20); - ia->update(3, 10); - ia->update(4, 20); - ia->update(5, 10); - ia->commit(true); -} - -void -SearchContextTest::fillForSemiNibbleSearchIteratorTest(IntegerAttribute * ia) -{ - addReservedDoc(*ia); - ia->addDocs(5); - ia->update(1, 1); - ia->update(2, 2); - ia->update(3, 1); - ia->update(4, 2); - ia->update(5, 1); - ia->commit(true); -} - -void -SearchContextTest::testSearchIterator() -{ - { - Config cfg(BasicType::INT32, CollectionType::SINGLE); - AttributePtr ptr = AttributeFactory::createAttribute("s-int32", cfg); - fillForSearchIteratorTest(dynamic_cast(ptr.get())); - - SearchContextPtr threeHits = getSearch(*ptr.get(), 10); - SearchContextPtr noHits = getSearch(*ptr.get(), 30); - AttributeIteratorTester tester; - testStrictSearchIterator(*threeHits, *noHits, tester); - threeHits = getSearch(*ptr.get(), 10); - noHits = getSearch(*ptr.get(), 30); - testNonStrictSearchIterator(*threeHits, *noHits, tester); - } - { - Config cfg(BasicType::UINT2, CollectionType::SINGLE); - AttributePtr ptr = AttributeFactory::createAttribute("s-uint2", cfg); - fillForSemiNibbleSearchIteratorTest(dynamic_cast - (ptr.get())); - - SearchContextPtr threeHits = getSearch(*ptr.get(), 1); - SearchContextPtr noHits = getSearch(*ptr.get(), 3); - AttributeIteratorTester tester; - testStrictSearchIterator(*threeHits, *noHits, tester); - threeHits = getSearch(*ptr.get(), 1); - noHits = getSearch(*ptr.get(), 3); - testNonStrictSearchIterator(*threeHits, *noHits, tester); - } - { - Config cfg(BasicType::INT32, CollectionType::SINGLE); - cfg.setFastSearch(true); - AttributePtr ptr = AttributeFactory::createAttribute("sfs-int32", cfg); - fillForSearchIteratorTest(dynamic_cast(ptr.get())); - - SearchContextPtr threeHits = getSearch(*ptr.get(), 10); - SearchContextPtr noHits = getSearch(*ptr.get(), 30); - AttributePostingListIteratorTester tester; - testStrictSearchIterator(*threeHits, *noHits, tester); - } - { - Config cfg(BasicType::STRING, CollectionType::SINGLE); - cfg.setFastSearch(true); - AttributePtr ptr = AttributeFactory::createAttribute("sfs-string", cfg); - auto * sa = dynamic_cast(ptr.get()); - addReservedDoc(*ptr); - ptr->addDocs(5); - sa->update(1, "three"); - sa->update(2, "two"); - sa->update(3, "three"); - sa->update(4, "two"); - sa->update(5, "three"); - ptr->commit(true); - - SearchContextPtr threeHits = getSearch(*ptr.get(), "three"); - SearchContextPtr noHits = getSearch(*ptr.get(), "none"); - AttributePostingListIteratorTester tester; - testStrictSearchIterator(*threeHits, *noHits, tester); - } - { - Config cfg(BasicType::INT8, CollectionType::ARRAY); - cfg.setFastSearch(true); - AttributePtr ptr = AttributeFactory::createAttribute("flags", cfg); - fillForSearchIteratorTest(dynamic_cast(ptr.get())); - - SearchContextPtr threeHits = getSearch(*ptr.get(), 10); - SearchContextPtr noHits = getSearch(*ptr.get(), 30); - FlagAttributeIteratorTester tester; - testStrictSearchIterator(*threeHits, *noHits, tester); - threeHits = getSearch(*ptr.get(), 10); - noHits = getSearch(*ptr.get(), 30); - testNonStrictSearchIterator(*threeHits, *noHits, tester); - } -} - - - -//----------------------------------------------------------------------------- -// Test search iterator unpacking -//----------------------------------------------------------------------------- -void -SearchContextTest::fillForSearchIteratorUnpackingTest(IntegerAttribute * ia, - bool extra) -{ - addReservedDoc(*ia); - ia->addDocs(3); - if (ia->getCollectionType() == CollectionType::SINGLE) { - ia->update(1, 10); - ia->update(2, 10); - ia->update(3, 10); - } else if (ia->getCollectionType() == CollectionType::ARRAY) { - ia->append(1, 10, 1); - ia->append(2, 10, 1); - ia->append(2, 10, 1); - ia->append(3, 10, 1); - ia->append(3, 10, 1); - ia->append(3, 10, 1); - } else { // WEIGHTED SET - ia->append(1, 10, -50); - ia->append(2, 10, 0); - ia->append(3, 10, 50); - } - ia->commit(true); - if (!extra) - return; - ia->addDocs(20); - for (uint32_t d = 4; d < 24; ++d) { - if (ia->getCollectionType() == CollectionType::SINGLE) - ia->update(d, 10); - else - ia->append(d, 10, 1); - } - ia->commit(true); -} - -void -SearchContextTest::testSearchIteratorUnpacking(const AttributePtr & attr, SearchContext & sc, - bool extra, bool strict, bool withElementId) -{ - LOG(info, "testSearchIteratorUnpacking: vector '%s'", attr->getName().c_str()); - - TermFieldMatchData md; - md.reset(100); - - TermFieldMatchDataPosition pos; - pos.setElementWeight(100); - md.appendPosition(pos); - - SearchBasePtr sb = sc.createIterator(&md, strict); - if (withElementId) { - sb = std::make_unique(std::move(sb), sc, md); - } - sb->initFullRange(); - - std::vector weights(3); - if (attr->getCollectionType() == CollectionType::SINGLE || - (attr->getCollectionType() == CollectionType::ARRAY && attr->getBasicType() == BasicType::INT8)) - { - weights[0] = 1; - weights[1] = 1; - weights[2] = 1; - } else if (attr->getCollectionType() == CollectionType::ARRAY) { - weights[0] = 1; - weights[1] = 2; - weights[2] = 3; - } else { - weights[0] = -50; - weights[1] = 0; - weights[2] = 50; - } - - // unpack and check weights - sb->unpack(1); - EXPECT_EQUAL(sb->getDocId(), 1u); - EXPECT_EQUAL(md.getDocId(), 1u); - EXPECT_EQUAL(md.getWeight(), weights[0]); - - sb->unpack(2); - EXPECT_EQUAL(sb->getDocId(), 2u); - EXPECT_EQUAL(md.getDocId(), 2u); - if (withElementId && attr->hasMultiValue() && !attr->hasWeightedSetType()) { - EXPECT_EQUAL(2, md.end()- md.begin()); - EXPECT_EQUAL(md.begin()[0].getElementId(), 0u); - EXPECT_EQUAL(md.begin()[0].getElementWeight(), 1); - EXPECT_EQUAL(md.begin()[1].getElementId(), 1u); - EXPECT_EQUAL(md.begin()[1].getElementWeight(), 1); - } else { - EXPECT_EQUAL(md.getWeight(), weights[1]); - } - - sb->unpack(3); - EXPECT_EQUAL(sb->getDocId(), 3u); - EXPECT_EQUAL(md.getDocId(), 3u); - if (withElementId && attr->hasMultiValue() && !attr->hasWeightedSetType()) { - EXPECT_EQUAL(3, md.end()- md.begin()); - EXPECT_EQUAL(md.begin()[0].getElementId(), 0u); - EXPECT_EQUAL(md.begin()[0].getElementWeight(), 1); - EXPECT_EQUAL(md.begin()[1].getElementId(), 1u); - EXPECT_EQUAL(md.begin()[1].getElementWeight(), 1); - EXPECT_EQUAL(md.begin()[2].getElementId(), 2u); - EXPECT_EQUAL(md.begin()[2].getElementWeight(), 1); - } else { - EXPECT_EQUAL(md.getWeight(), weights[2]); - } - if (extra) { - sb->unpack(4); - EXPECT_EQUAL(sb->getDocId(), 4u); - EXPECT_EQUAL(md.getDocId(), 4u); - EXPECT_EQUAL(md.getWeight(), 1); - } -} - -void -SearchContextTest::testSearchIteratorUnpacking() -{ - std::vector > config; - - { - Config cfg(BasicType::INT32, CollectionType::SINGLE); - config.emplace_back("s-int32", cfg); - } - { - Config cfg(BasicType::UINT4, CollectionType::SINGLE); - config.emplace_back("s-uint4", cfg); - } - { - Config cfg(BasicType::INT32, CollectionType::ARRAY); - config.emplace_back("a-int32", cfg); - } - { - Config cfg(BasicType::INT32, CollectionType::WSET); - config.emplace_back("w-int32", cfg); - } - { - Config cfg(BasicType::INT32, CollectionType::SINGLE); - cfg.setFastSearch(true); - config.emplace_back("sfs-int32", cfg); - } - { - Config cfg(BasicType::INT32, CollectionType::ARRAY); - cfg.setFastSearch(true); - config.emplace_back("afs-int32", cfg); - } - { - Config cfg(BasicType::INT32, CollectionType::WSET); - cfg.setFastSearch(true); - config.emplace_back("wfs-int32", cfg); - } - { - Config cfg(BasicType::INT8, CollectionType::ARRAY); - cfg.setFastSearch(true); - config.emplace_back("flags", cfg); - } - - for (const auto & cfg : config) { - AttributePtr ptr = AttributeFactory::createAttribute(cfg.first, cfg.second); - fillForSearchIteratorUnpackingTest(dynamic_cast(ptr.get()), false); - SearchContextPtr sc = getSearch(*ptr.get(), 10); - testSearchIteratorUnpacking(ptr, *sc, false, true); - sc = getSearch(*ptr.get(), 10); - testSearchIteratorUnpacking(ptr, *sc, false, false); - if (cfg.second.fastSearch()) { - AttributePtr ptr2 = AttributeFactory::createAttribute(cfg.first + "-extra", cfg.second); - fillForSearchIteratorUnpackingTest(dynamic_cast(ptr2.get()), true); - SearchContextPtr sc2 = getSearch(*ptr2.get(), 10); - testSearchIteratorUnpacking(ptr2, *sc2, true, true); - sc2 = getSearch(*ptr2.get(), 10); - testSearchIteratorUnpacking(ptr2, *sc2, true, false); - } - } -} - - - -//----------------------------------------------------------------------------- -// Test range search -//----------------------------------------------------------------------------- - -template -void -SearchContextTest::performRangeSearch(const VectorType & vec, const vespalib::string & term, - const DocSet & expected) -{ - performSearch(vec, term, expected, QueryTermSimple::WORD); -} - -template -void -SearchContextTest::testRangeSearch(const AttributePtr & ptr, uint32_t numDocs, std::vector values) -{ - LOG(info, "testRangeSearch: vector '%s'", ptr->getName().c_str()); - - auto & vec = dynamic_cast(*ptr.get()); - - addDocs(vec, numDocs); - - std::map postingList; - - uint32_t docCnt = 0; - for (uint32_t i = 0; i < values.size() && docCnt < numDocs; i+=2) { - //std::cout << "postingList[" << values[i] << "]: {"; - for (uint32_t j = 0; j < (i + 1) && docCnt < numDocs; ++j, ++docCnt) { - EXPECT_TRUE(vec.update(docCnt + 1u, values[i])); - postingList[values[i]].insert(docCnt + 1u); - //std::cout << docCnt << ", "; - } - //std::cout << "}" << std::endl; - } - ptr->commit(true); - uint32_t smallHits = 0; - ValueType zeroValue = 0; - bool smallUInt = isUnsignedSmallIntAttribute(vec); - if (smallUInt) { - for (uint32_t i = docCnt ; i < numDocs; ++i) { - postingList[zeroValue].insert(i + 1u); - ++smallHits; - } - } - - // test less than ("a") - for (uint32_t i = 0; i < values.size(); ++i) { - vespalib::asciistream ss; - ss << ">" << values[i]; - DocSet expected; - for (uint32_t j = i + 1; j < values.size(); ++j) { - expected.insert(postingList[values[j]].begin(), postingList[values[j]].end()); - } - performRangeSearch(vec, ss.str(), expected); - } - - // test range ("[a;b]") - for (uint32_t i = 0; i < values.size(); ++i) { - for (uint32_t j = 0; j < values.size(); ++j) { // illegal range when j < i - vespalib::asciistream ss; - ss << "[" << values[i] << ";" << values[j] << "]"; - DocSet expected; - for (uint32_t k = i; k < j + 1; ++k) { - expected.insert(postingList[values[k]].begin(), postingList[values[k]].end()); - } - performRangeSearch(vec, ss.str(), expected); - } - } - - { // test large range - vespalib::asciistream ss; - ss << "[" << (values.front() - 1) << ";" << (values.back() + 1) << "]"; - DocSet expected; - for (uint32_t doc = 0; doc < numDocs; ++doc) { - expected.insert(doc + 1); - } - performRangeSearch(vec, ss.str(), expected); - } -} - -void -SearchContextTest::testRangeSearchLimited() -{ - largeint_t VALUES [] = {0,1,1,2,3,4,5,6,7,8,9,9,10 }; - std::vector values(VALUES, VALUES+sizeof(VALUES)/sizeof(VALUES[0])); - Config cfg(BasicType::INT32, CollectionType::SINGLE); - cfg.setFastSearch(true); - AttributePtr ptr = AttributeFactory::createAttribute("limited-int32", cfg); - auto & vec = dynamic_cast(*ptr); - addDocs(vec, values.size()); - for (size_t i(1); i < values.size(); i++) { - EXPECT_TRUE(vec.update(i, values[i])); - } - ptr->commit(true); - - DocSet expected; - for (size_t i(1); i < 12; i++) { - expected.put(i); - } - performRangeSearch(vec, "[1;9]", expected); - performRangeSearch(vec, "[1;9;100]", expected); - performRangeSearch(vec, "[1;9;-100]", expected); - expected.clear(); - expected.put(3); - performRangeSearch(vec, "<1;3>", expected); - expected.put(4); - performRangeSearch(vec, "<1;3]", expected); - expected.clear(); - expected.put(1).put(2).put(3); - performRangeSearch(vec, "[1;3>", expected); - expected.put(4); - performRangeSearch(vec, "[1;3]", expected); - expected.clear(); - expected.put(1).put(2); - performRangeSearch(vec, "[1;9;1]", expected); - performRangeSearch(vec, "[1;9;2]", expected); - expected.put(3); - performRangeSearch(vec, "[1;9;3]", expected); - expected.clear(); - expected.put(10).put(11); - performRangeSearch(vec, "[1;9;-1]", expected); - performRangeSearch(vec, "[1;9;-2]", expected); - expected.put(9); - performRangeSearch(vec, "[1;9;-3]", expected); - performRangeSearch(vec, "[1;9;-3]", expected); - - expected.clear(); - for (size_t i(1); i < 13; i++) { - expected.put(i); - } - performRangeSearch(vec, "[;;100]", expected); - performRangeSearch(vec, "[;;-100]", expected); - - expected.clear(); - expected.put(1).put(2); - performRangeSearch(vec, "[;;1]", expected); - expected.clear(); - expected.put(12); - performRangeSearch(vec, "[;;-1]", expected); -} - -void -SearchContextTest::testRangeSearch() -{ - const uint32_t numDocs = 100; - const uint32_t numValues = 20; - const uint32_t numNibbleValues = 9; - - { // IntegerAttribute - std::vector values; - std::vector nibbleValues; - largeint_t start = 1; - - for (uint32_t i = 0; i < numValues; ++i) { - values.push_back(start + i); - } - for (uint32_t i = 0; i < numNibbleValues; ++i) { - nibbleValues.push_back(start + i); - } - - for (const auto & cfg : _integerCfg) { - AttributePtr ptr = AttributeFactory::createAttribute(cfg.first, cfg.second); - testRangeSearch(ptr, numDocs, values); - } - { // CollectionType::ARRAY Flags. - Config cfg(BasicType::INT8, CollectionType::ARRAY); - cfg.setFastSearch(true); - AttributePtr ptr = AttributeFactory::createAttribute("flags", cfg); - testRangeSearch(ptr, numDocs, values); - } - { - Config cfg(BasicType::UINT4, CollectionType::SINGLE); - AttributePtr ptr = AttributeFactory::createAttribute("s-uint4", cfg); - testRangeSearch(ptr, numDocs, nibbleValues); - } - } - - { // FloatingPointAttribute - std::vector values; - double start = 1; - - for (uint32_t i = 0; i < numValues; ++i) { - values.push_back(start + i); - } - - for (const auto & cfg : _floatCfg) { - AttributePtr ptr = AttributeFactory::createAttribute(cfg.first, cfg.second); - testRangeSearch(ptr, numDocs, values); - } - } -} - - -//----------------------------------------------------------------------------- -// Test case insensitive search -//----------------------------------------------------------------------------- - -void -SearchContextTest::performCaseInsensitiveSearch(const StringAttribute & vec, const vespalib::string & term, - const DocSet & expected) -{ - performSearch(vec, term, expected, QueryTermSimple::WORD); -} - -void -SearchContextTest::testCaseInsensitiveSearch(const AttributePtr & ptr) -{ - LOG(info, "testCaseInsensitiveSearch: vector '%s'", ptr->getName().c_str()); - - auto & vec = dynamic_cast(*ptr.get()); - - uint32_t numDocs = 5 * 5; - addDocs(*ptr.get(), numDocs); - - const char * terms[][5] = { - {"lower", "upper", "firstupper", "mixedcase", "intermixedcase"}, // lower - {"LOWER", "UPPER", "FIRSTUPPER", "MIXEDCASE", "INTERMIXEDCASE"}, // upper - {"Lower", "Upper", "Firstupper", "Mixedcase", "Intermixedcase"}, // firstUpper - {"Lower", "Upper", "FirstUpper", "MixedCase", "InterMixedCase"}, // mixedCase - {"lower", "upper", "firstUpper", "mixedCase", "interMixedCase"}, // interMixedCase - }; - - uint32_t doc = 1; - for (uint32_t j = 0; j < 5; ++j) { - for (uint32_t i = 0; i < 5; ++i) { - ASSERT_TRUE(doc < vec.getNumDocs()); - EXPECT_TRUE(vec.update(doc++, terms[i][j])); - } - } - - ptr->commit(true); - - const char * buffer[1]; - doc = 1; - for (uint32_t j = 0; j < 5; ++j) { - for (uint32_t i = 0; i < 5; ++i) { - EXPECT_EQUAL(ptr->get(doc++, buffer, 1), uint32_t(1)); - EXPECT_EQUAL(vespalib::string(buffer[0]), vespalib::string(terms[i][j])); - } - } - - DocSet empty; - for (uint32_t j = 0; j < 5; ++j) { - DocSet expected; - for (doc = j * 5 + 1; doc < (j + 1) * 5 + 1; ++doc) { - expected.insert(doc); - } - // for non-posting attributes only lower case search terms should give hits - performCaseInsensitiveSearch(vec, terms[0][j], expected); - - if (ptr->getConfig().fastSearch()) { - for (uint32_t i = 1; i < 5; ++i) { - performCaseInsensitiveSearch(vec, terms[i][j], expected); - } - } else { - for (uint32_t i = 1; i < 4; ++i) { - performCaseInsensitiveSearch(vec, terms[i][j], empty); - } - } - } - performCaseInsensitiveSearch(vec, "none", empty); - performCaseInsensitiveSearch(vec, "NONE", empty); - performCaseInsensitiveSearch(vec, "None", empty); -} - -void -SearchContextTest::testRegexSearch(const AttributePtr & ptr) -{ - LOG(info, "testRegexSearch: vector '%s'", ptr->getName().c_str()); - - auto & vec = dynamic_cast(*ptr.get()); - - uint32_t numDocs = 6; - addDocs(*ptr.get(), numDocs); - - const char * strings [] = {"abc1def", "abc2Def", "abc2def", "abc4def", "abc5def", "abc6def"}; - std::vector terms = { "abc", "bc2de" }; - - for (uint32_t doc = 1; doc < numDocs + 1; ++doc) { - ASSERT_TRUE(doc < vec.getNumDocs()); - EXPECT_TRUE(vec.update(doc, strings[doc - 1])); - } - - ptr->commit(true); - - std::vector expected; - DocSet empty; - { - uint32_t docs[] = {1, 2, 3, 4, 5, 6}; - expected.emplace_back(docs, docs + 6); // "abc" - } - { - uint32_t docs[] = {2, 3}; - expected.emplace_back(docs, docs + 2); // "bc2de" - } - - for (uint32_t i = 0; i < terms.size(); ++i) { - performSearch(vec, terms[i], expected[i], QueryTermSimple::REGEXP); - performSearch(vec, terms[i], empty, QueryTermSimple::WORD); - } -} - - -void -SearchContextTest::testCaseInsensitiveSearch() -{ - for (const auto & cfg : _stringCfg) { - testCaseInsensitiveSearch(AttributeFactory::createAttribute(cfg.first, cfg.second)); - } -} - -void -SearchContextTest::testRegexSearch() -{ - for (const auto & cfg : _stringCfg) { - testRegexSearch(AttributeFactory::createAttribute(cfg.first, cfg.second)); - } -} - - -//----------------------------------------------------------------------------- -// Test prefix search -//----------------------------------------------------------------------------- - -void -SearchContextTest::performPrefixSearch(const StringAttribute & vec, const vespalib::string & term, - const DocSet & expected, QueryTermSimple::SearchTerm termType) -{ - performSearch(vec, term, expected, termType); -} - -void -SearchContextTest::testPrefixSearch(const AttributePtr & ptr) -{ - LOG(info, "testPrefixSearch: vector '%s'", ptr->getName().c_str()); - - auto & vec = dynamic_cast(*ptr.get()); - - uint32_t numDocs = 6; - addDocs(*ptr.get(), numDocs); - - const char * strings [] = {"prefixsearch", "PREFIXSEARCH", "PrefixSearch", "precommit", "PRECOMMIT", "PreCommit"}; - const char * terms[][3] = {{"pre", "PRE", "Pre"}, {"pref", "PREF", "Pref"}, - {"prec", "PREC", "PreC"}, {"prex", "PREX", "Prex"}}; - - for (uint32_t doc = 1; doc < numDocs + 1; ++doc) { - ASSERT_TRUE(doc < vec.getNumDocs()); - EXPECT_TRUE(vec.update(doc, strings[doc - 1])); - } - - ptr->commit(true); - - std::vector expected; - DocSet empty; - { - uint32_t docs[] = {1, 2, 3, 4, 5, 6}; - expected.emplace_back(docs, docs + 6); // "pre" - } - { - uint32_t docs[] = {1, 2, 3}; - expected.emplace_back(docs, docs + 3); // "pref" - } - { - uint32_t docs[] = {4, 5, 6}; - expected.emplace_back(docs, docs + 3); // "prec" - } - expected.emplace_back(); // "prex" - - for (uint32_t i = 0; i < 4; ++i) { - for (uint32_t j = 0; j < 3; ++j) { - if (j == 0 || ptr->getConfig().fastSearch()) { - performPrefixSearch(vec, terms[i][j], expected[i], QueryTermSimple::PREFIXTERM); - performPrefixSearch(vec, terms[i][j], empty, QueryTermSimple::WORD); - } else { - performPrefixSearch(vec, terms[i][j], empty, QueryTermSimple::PREFIXTERM); - performPrefixSearch(vec, terms[i][j], empty, QueryTermSimple::WORD); - } - } - } -} - - -void -SearchContextTest::testPrefixSearch() -{ - for (const auto & cfg : _stringCfg) { - testPrefixSearch(AttributeFactory::createAttribute(cfg.first, cfg.second)); - } -} - -template -void -SearchContextTest::requireThatSearchIsWorkingAfterClearDoc(const vespalib::string & name, - const Config & cfg, - ValueType startValue, - const vespalib::string & term) -{ - AttributePtr a = AttributeFactory::createAttribute(name, cfg); - LOG(info, "requireThatSearchIsWorkingAfterClearDoc: vector '%s', term '%s'", - a->getName().c_str(), term.c_str()); - addReservedDoc(*a); - a->addDocs(4); - auto & v = dynamic_cast(*a); - resetAttribute(v, startValue); - { - ResultSetPtr rs = performSearch(v, term); - EXPECT_EQUAL(4u, rs->getNumHits()); - ASSERT_TRUE(4u == rs->getNumHits()); - const RankedHit * array = rs->getArray(); - EXPECT_EQUAL(1u, array[0]._docId); - EXPECT_EQUAL(2u, array[1]._docId); - EXPECT_EQUAL(3u, array[2]._docId); - EXPECT_EQUAL(4u, array[3]._docId); - } - a->clearDoc(1); - a->clearDoc(3); - a->commit(true); - { - ResultSetPtr rs = performSearch(v, term); - EXPECT_EQUAL(2u, rs->getNumHits()); - const RankedHit * array = rs->getArray(); - EXPECT_EQUAL(2u, array[0]._docId); - EXPECT_EQUAL(4u, array[1]._docId); - } -} - -void -SearchContextTest::requireThatSearchIsWorkingAfterClearDoc() -{ - for (const auto & cfg : _integerCfg) { - requireThatSearchIsWorkingAfterClearDoc(cfg.first, cfg.second, 10, "10"); - requireThatSearchIsWorkingAfterClearDoc(cfg.first, cfg.second, 10, "<11"); - } - - for (const auto & cfg : _floatCfg) { - requireThatSearchIsWorkingAfterClearDoc(cfg.first, cfg.second, 10.5, "10.5"); - requireThatSearchIsWorkingAfterClearDoc(cfg.first, cfg.second, 10.5, "<10.6"); - } - - for (const auto & cfg : _stringCfg) { - requireThatSearchIsWorkingAfterClearDoc(cfg.first, cfg.second, "start", "start"); - } -} - -template -void -SearchContextTest::requireThatSearchIsWorkingAfterLoadAndClearDoc(const vespalib::string & name, - const Config & cfg, - ValueType startValue, - ValueType defaultValue, - const vespalib::string & term) -{ - AttributePtr a = AttributeFactory::createAttribute(name, cfg); - LOG(info, "requireThatSearchIsWorkingAfterLoadAndClearDoc: vector '%s', term '%s'", - a->getName().c_str(), term.c_str()); - addReservedDoc(*a); - a->addDocs(15); - auto & va = dynamic_cast(*a); - resetAttribute(va, startValue); // triggers vector vector in posting list (count 15) - AttributePtr b = AttributeFactory::createAttribute(name + "-save", cfg); - EXPECT_TRUE(a->save(b->getBaseFileName())); - EXPECT_TRUE(b->load()); - b->clearDoc(6); // goes from vector vector to single vector with count 14 - b->commit(true); - { - ResultSetPtr rs = performSearch(dynamic_cast(*b), term); - EXPECT_EQUAL(14u, rs->getNumHits()); - const RankedHit * array = rs->getArray(); - for (uint32_t i = 0; i < 14; ++i) { - if (i < 5) { - EXPECT_EQUAL(i + 1, array[i]._docId); - } else - EXPECT_EQUAL(i + 2, array[i]._docId); - } - } - ValueType buf; - if (cfg.collectionType().isMultiValue()) { - EXPECT_EQUAL(0u, b->get(6, &buf, 1)); - } else { - EXPECT_EQUAL(1u, b->get(6, &buf, 1)); - EXPECT_EQUAL(defaultValue, buf); - } -} - -void -SearchContextTest::requireThatSearchIsWorkingAfterLoadAndClearDoc() -{ - { - int64_t value = 10; - int64_t defValue = search::attribute::getUndefined(); - requireThatSearchIsWorkingAfterLoadAndClearDoc("s-fs-int32", _integerCfg["s-fs-int32"], - value, defValue, "10"); - requireThatSearchIsWorkingAfterLoadAndClearDoc("a-fs-int32", _integerCfg["a-fs-int32"], - value, defValue, "10"); - } - { - vespalib::string value = "foo"; - vespalib::string defValue = ""; - requireThatSearchIsWorkingAfterLoadAndClearDoc("s-fs-str", _stringCfg["s-fs-str"], - value, defValue, value); - requireThatSearchIsWorkingAfterLoadAndClearDoc("a-fs-str", _stringCfg["a-fs-str"], - value, defValue, value); - } -} - -template -void -SearchContextTest::requireThatSearchIsWorkingAfterUpdates(const vespalib::string & name, - const Config & cfg, - ValueType value1, - ValueType value2) -{ - AttributePtr a = AttributeFactory::createAttribute(name, cfg); - auto & va = dynamic_cast(*a); - LOG(info, "requireThatSearchIsWorkingAfterUpdates: vector '%s'", a->getName().c_str()); - addReservedDoc(*a); - a->addDocs(2); - va.update(1, value1); - va.commit(true); - va.update(2, value1); - va.update(2, value2); - va.commit(true); - { - ResultSetPtr rs = performSearch(va, value1); - EXPECT_EQUAL(1u, rs->getNumHits()); // doc 1 should not have this value - } - { - ResultSetPtr rs = performSearch(va, value2); - EXPECT_EQUAL(1u, rs->getNumHits()); - } -} - -void -SearchContextTest::requireThatSearchIsWorkingAfterUpdates() -{ - for (const auto & cfg : _integerCfg) { - requireThatSearchIsWorkingAfterUpdates(cfg.first, cfg.second, 10, 20); - } - - for (const auto & cfg : _stringCfg) { - requireThatSearchIsWorkingAfterUpdates(cfg.first, cfg.second, "foo", "bar"); - } -} - -void -SearchContextTest::requireThatFlagAttributeIsWorkingWhenNewDocsAreAdded() -{ - LOG(info, "requireThatFlagAttributeIsWorkingWhenNewDocsAreAdded()"); - Config cfg(BasicType::INT8, CollectionType::ARRAY); - cfg.setFastSearch(true); - { - cfg.setGrowStrategy(GrowStrategy::make(1, 0, 1)); - AttributePtr a = AttributeFactory::createAttribute("flags", cfg); - auto & fa = dynamic_cast(*a); - addReservedDoc(fa); - fa.addDocs(1); - fa.append(1, 10, 1); - fa.append(1, 24, 1); - fa.commit(true); - fa.addDocs(1); - fa.append(2, 20, 1); - fa.append(2, 24, 1); - fa.commit(true); - fa.addDocs(1); - fa.append(3, 30, 1); - fa.append(3, 26, 1); - fa.commit(true); - fa.addDocs(1); - fa.append(4, 40, 1); - fa.append(4, 24, 1); - fa.commit(true); - { - ResultSetPtr rs = performSearch(fa, "<24"); - EXPECT_EQUAL(2u, rs->getNumHits()); - EXPECT_EQUAL(1u, rs->getArray()[0]._docId); - EXPECT_EQUAL(2u, rs->getArray()[1]._docId); - } - { - ResultSetPtr rs = performSearch(fa, "24"); - EXPECT_EQUAL(3u, rs->getNumHits()); - EXPECT_EQUAL(1u, rs->getArray()[0]._docId); - EXPECT_EQUAL(2u, rs->getArray()[1]._docId); - EXPECT_EQUAL(4u, rs->getArray()[2]._docId); - } - } - { - cfg.setGrowStrategy(GrowStrategy::make(4, 0, 4)); - AttributePtr a = AttributeFactory::createAttribute("flags", cfg); - auto & fa = dynamic_cast(*a); - std::vector exp50; - std::vector exp60; - addReservedDoc(fa); - for (uint32_t i = 0; i < 200; ++i) { - uint32_t docId; - EXPECT_TRUE(fa.addDoc(docId)); - if (i % 2 == 0) { - fa.append(docId, 50, 1); - exp50.push_back(docId); - } else { - fa.append(docId, 60, 1); - exp60.push_back(docId); - } - fa.commit(true); - { - ResultSetPtr rs1 = performSearch(fa, "50"); - ResultSetPtr rs2 = performSearch(fa, "<51"); - EXPECT_EQUAL(exp50.size(), rs1->getNumHits()); - EXPECT_EQUAL(exp50.size(), rs2->getNumHits()); - for (size_t j = 0; j < exp50.size(); ++j) { - EXPECT_EQUAL(exp50[j], rs1->getArray()[j]._docId); - EXPECT_EQUAL(exp50[j], rs2->getArray()[j]._docId); - } - } - { - ResultSetPtr rs = performSearch(fa, "60"); - EXPECT_EQUAL(exp60.size(), rs->getNumHits()); - for (size_t j = 0; j < exp60.size(); ++j) { - EXPECT_EQUAL(exp60[j], rs->getArray()[j]._docId); - } - } - } - } -} - -template -void -SearchContextTest::requireThatInvalidSearchTermGivesZeroHits(const vespalib::string & name, - const Config & cfg, - ValueType value) -{ - AttributePtr a = AttributeFactory::createAttribute(name, cfg); - auto & va = dynamic_cast(*a); - LOG(info, "requireThatInvalidSearchTermGivesZeroHits: vector '%s'", a->getName().c_str()); - addReservedDoc(*a); - a->addDocs(1); - va.update(1, value); - va.commit(true); - ResultSetPtr rs = performSearch(va, "foo"); - EXPECT_EQUAL(0u, rs->getNumHits()); -} - -void -SearchContextTest::requireThatInvalidSearchTermGivesZeroHits() -{ - for (const auto & cfg : _integerCfg) { - requireThatInvalidSearchTermGivesZeroHits(cfg.first, cfg.second, 10); - } - for (const auto & cfg : _floatCfg) { - requireThatInvalidSearchTermGivesZeroHits(cfg.first, cfg.second, 10); - } -} - -void -SearchContextTest::requireThatFlagAttributeHandlesTheByteRange() -{ - LOG(info, "requireThatFlagAttributeHandlesTheByteRange()"); - Config cfg(BasicType::INT8, CollectionType::ARRAY); - cfg.setFastSearch(true); - - AttributePtr a = AttributeFactory::createAttribute("flags", cfg); - auto & fa = dynamic_cast(*a); - addReservedDoc(fa); - fa.addDocs(5); - fa.append(1, -128, 1); - fa.append(2, -64, 1); - fa.append(2, -8, 1); - fa.append(3, 0, 1); - fa.append(3, 8, 1); - fa.append(4, 64, 1); - fa.append(4, 24, 1); - fa.append(5, 127, 1); - fa.commit(true); - - performSearch(fa, "-128", DocSet().put(1), QueryTermSimple::WORD); - performSearch(fa, "127", DocSet().put(5), QueryTermSimple::WORD); - performSearch(fa, ">-128", DocSet().put(2).put(3).put(4).put(5), QueryTermSimple::WORD); - performSearch(fa, "<127", DocSet().put(1).put(2).put(3).put(4), QueryTermSimple::WORD); - performSearch(fa, "[-128;-8]", DocSet().put(1).put(2), QueryTermSimple::WORD); - performSearch(fa, "[-8;8]", DocSet().put(2).put(3), QueryTermSimple::WORD); - performSearch(fa, "[8;127]", DocSet().put(3).put(4).put(5), QueryTermSimple::WORD); - performSearch(fa, "[-129;-8]", DocSet().put(1).put(2), QueryTermSimple::WORD); - performSearch(fa, "[8;128]", DocSet().put(3).put(4).put(5), QueryTermSimple::WORD); -} - -void -SearchContextTest::requireThatOutOfBoundsSearchTermGivesZeroHits(const vespalib::string &name, - const Config &cfg, - int64_t maxValue) -{ - AttributePtr a = AttributeFactory::createAttribute(name, cfg); - auto &ia = dynamic_cast(*a); - addReservedDoc(*a); - a->addDocs(1); - ia.update(1, maxValue); - ia.commit(true); - vespalib::string term = vespalib::make_string("%" PRIu64 "", (int64_t) maxValue + 1); - LOG(info, "requireThatOutOfBoundsSearchTermGivesZeroHits: vector '%s', term '%s'", a->getName().c_str(), term.c_str()); - ResultSetPtr rs = performSearch(ia, term); - EXPECT_EQUAL(0u, rs->getNumHits()); -} - -void -SearchContextTest::requireThatOutOfBoundsSearchTermGivesZeroHits() -{ - for (const auto & cfg : _integerCfg) { - int32_t maxValue = std::numeric_limits::max(); - requireThatOutOfBoundsSearchTermGivesZeroHits(cfg.first, cfg.second, maxValue); - } - { - Config cfg(BasicType::INT8, CollectionType::ARRAY); - cfg.setFastSearch(true); - int8_t maxValue = std::numeric_limits::max(); - requireThatOutOfBoundsSearchTermGivesZeroHits("flags", cfg, maxValue); - } -} - -void -SearchContextTest::initIntegerConfig() -{ - { // CollectionType::SINGLE - Config cfg(BasicType::INT32, CollectionType::SINGLE); - _integerCfg["s-int32"] = cfg; - } - { // CollectionType::SINGLE && fastSearch - Config cfg(BasicType::INT32, CollectionType::SINGLE); - cfg.setFastSearch(true); - _integerCfg["s-fs-int32"] = cfg; - } - { // CollectionType::ARRAY - Config cfg(BasicType::INT32, CollectionType::ARRAY); - _integerCfg["a-int32"] = cfg; - } - { // CollectionType::ARRAY && fastSearch - Config cfg(BasicType::INT32, CollectionType::ARRAY); - cfg.setFastSearch(true); - _integerCfg["a-fs-int32"] = cfg; - } - { // CollectionType::WSET - Config cfg(BasicType::INT32, CollectionType::WSET); - _integerCfg["w-int32"] = cfg; - } - { // CollectionType::WSET && fastSearch - Config cfg(BasicType::INT32, CollectionType::WSET); - cfg.setFastSearch(true); - _integerCfg["w-fs-int32"] = cfg; - } -} - -void -SearchContextTest::initFloatConfig() -{ - { // CollectionType::SINGLE - Config cfg(BasicType::FLOAT, CollectionType::SINGLE); - _floatCfg["s-float"] = cfg; - } - { // CollectionType::SINGLE && fastSearch - Config cfg(BasicType::FLOAT, CollectionType::SINGLE); - cfg.setFastSearch(true); - _floatCfg["s-fs-float"] = cfg; - } - { // CollectionType::ARRAY - Config cfg(BasicType::FLOAT, CollectionType::ARRAY); - _floatCfg["a-float"] = cfg; - } - { // CollectionType::ARRAY && fastSearch - Config cfg(BasicType::FLOAT, CollectionType::ARRAY); - cfg.setFastSearch(true); - _floatCfg["a-fs-float"] = cfg; - } - { // CollectionType::WSET - Config cfg(BasicType::FLOAT, CollectionType::WSET); - _floatCfg["w-float"] = cfg; - } - { // CollectionType::WSET && fastSearch - Config cfg(BasicType::FLOAT, CollectionType::WSET); - cfg.setFastSearch(true); - _floatCfg["w-fs-float"] = cfg; - } -} - -void -SearchContextTest::initStringConfig() -{ - { // CollectionType::SINGLE - Config cfg(BasicType::STRING, CollectionType::SINGLE); - _stringCfg["s-str"] = cfg; - } - { // CollectionType::ARRAY - Config cfg(BasicType::STRING, CollectionType::ARRAY); - _stringCfg["a-str"] = cfg; - } - { // CollectionType::WSET - Config cfg(BasicType::STRING, CollectionType::WSET); - _stringCfg["w-str"] = cfg; - } - { // CollectionType::SINGLE && fastSearch - Config cfg(BasicType::STRING, CollectionType::SINGLE); - cfg.setFastSearch(true); - _stringCfg["s-fs-str"] = cfg; - } - { // CollectionType::ARRAY && fastSearch - Config cfg(BasicType::STRING, CollectionType::ARRAY); - cfg.setFastSearch(true); - _stringCfg["a-fs-str"] = cfg; - } - { // CollectionType::WSET && fastSearch - Config cfg(BasicType::STRING, CollectionType::WSET); - cfg.setFastSearch(true); - _stringCfg["w-fs-str"] = cfg; - } -} - -SearchContextTest::SearchContextTest() : - _integerCfg(), - _floatCfg(), - _stringCfg() -{ - initIntegerConfig(); - initFloatConfig(); - initStringConfig(); -} - -SearchContextTest::~SearchContextTest() = default; - -int -SearchContextTest::Main() -{ - TEST_INIT("searchcontext_test"); - EXPECT_TRUE(true); - - testSearch(); - testSearchIterator(); - testRangeSearch(); - testRangeSearchLimited(); - testCaseInsensitiveSearch(); - testRegexSearch(); - testPrefixSearch(); - testSearchIteratorConformance(); - testSearchIteratorUnpacking(); - TEST_DO(requireThatSearchIsWorkingAfterClearDoc()); - TEST_DO(requireThatSearchIsWorkingAfterLoadAndClearDoc()); - TEST_DO(requireThatSearchIsWorkingAfterUpdates()); - TEST_DO(requireThatFlagAttributeIsWorkingWhenNewDocsAreAdded()); - TEST_DO(requireThatInvalidSearchTermGivesZeroHits()); - TEST_DO(requireThatFlagAttributeHandlesTheByteRange()); - TEST_DO(requireThatOutOfBoundsSearchTermGivesZeroHits()); - - TEST_DONE(); -} - -} - -TEST_APPHOOK(search::SearchContextTest); diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp new file mode 100644 index 00000000000..7d4a2d63355 --- /dev/null +++ b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp @@ -0,0 +1,1964 @@ +// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +LOG_SETUP("searchcontext_test"); + +namespace search { + +namespace { + +bool +isUnsignedSmallIntAttribute(const AttributeVector &a) +{ + switch (a.getBasicType()) + { + case attribute::BasicType::BOOL: + case attribute::BasicType::UINT2: + case attribute::BasicType::UINT4: + return true; + default: + return false; + } +} + +} + +typedef AttributeVector::SP AttributePtr; +typedef std::unique_ptr SearchContextPtr; +typedef AttributeVector::SearchContext SearchContext; +using attribute::Config; +using attribute::BasicType; +using attribute::CollectionType; +typedef AttributeVector::largeint_t largeint_t; +typedef queryeval::SearchIterator::UP SearchBasePtr; +typedef std::unique_ptr ResultSetPtr; + +using queryeval::HitCollector; +using queryeval::SearchIterator; +using fef::MatchData; +using fef::TermFieldMatchData; +using fef::TermFieldMatchDataArray; +using fef::TermFieldMatchDataPosition; + +class DocSet : public std::set +{ +public: + DocSet(); + ~DocSet(); + DocSet(const uint32_t *b, const uint32_t *e) : std::set(b, e) {} + DocSet & put(const uint32_t &v) { + insert(v); + return *this; + } +}; + +DocSet::DocSet() = default; +DocSet::~DocSet() = default; + +template +class PostingList +{ +private: + V * _vec; + T _value; + DocSet _hits; + +public: + PostingList(V & vec, T value); + ~PostingList(); + const V & getAttribute() const { return *_vec; } + V & getAttribute() { return *_vec; } + const T & getValue() const { return _value; } + DocSet & getHits() { return _hits; } + const DocSet & getHits() const { return _hits; } + uint32_t getHitCount() const { return _hits.size(); } +}; + +template +PostingList::PostingList(V & vec, T value) : _vec(&vec), _value(value), _hits() {} + +template +PostingList::~PostingList() = default; + +class DocRange +{ +public: + uint32_t start; + uint32_t end; + DocRange(uint32_t start_, uint32_t end_) : start(start_), end(end_) {} +}; + +class SearchContextTest : public vespalib::TestApp +{ +public: + // helper functions + static void addReservedDoc(AttributeVector &ptr); + static void addDocs(AttributeVector & ptr, uint32_t numDocs); + template + static SearchContextPtr getSearch(const V & vec, const T & term, QueryTermSimple::SearchTerm termType=QueryTermSimple::WORD); +private: + typedef std::map ConfigMap; + // Map of all config objects + ConfigMap _integerCfg; + ConfigMap _floatCfg; + ConfigMap _stringCfg; + + + template + void fillVector(std::vector & values, size_t numValues); + template + void fillAttribute(V & vec, const std::vector & values); + template + void resetAttribute(V & vec, const T & value); + template + void fillPostingList(PostingList & pl, const DocRange & range); + template + void fillPostingList(PostingList & pl); + static void buildTermQuery(std::vector & buffer, const vespalib::string & index, const vespalib::string & term, + QueryTermSimple::SearchTerm termType=QueryTermSimple::WORD); + + ResultSetPtr performSearch(SearchIterator & sb, uint32_t numDocs); + template + ResultSetPtr performSearch(const V & vec, const T & term, QueryTermSimple::SearchTerm termType=QueryTermSimple::WORD); + template + void performSearch(const V & vec, const vespalib::string & term, + const DocSet & expected, QueryTermSimple::SearchTerm termType); + void checkResultSet(const ResultSet & rs, const DocSet & exp, bool bitVector); + + template + void testSearchIterator(const std::vector & keys, const vespalib::string &keyAsString, const ConfigMap &cfgs); + void testSearchIteratorConformance(); + // test search functionality + template + void testFind(const PostingList & first); + + template + void testSearch(V & attribute, uint32_t numDocs, const std::vector & values); + template + void testSearch(const ConfigMap & cfgs); + template + void testMultiValueSearchHelper(V & vec, const std::vector & values); + template + void testMultiValueSearch(V& attr, uint32_t num_docs, const std::vector & values); + void testSearch(); + + class IteratorTester { + public: + virtual bool matches(const SearchIterator & base) const = 0; + virtual ~IteratorTester() = default; + }; + class AttributeIteratorTester : public IteratorTester + { + public: + bool matches(const SearchIterator & base) const override { + return dynamic_cast(&base) != nullptr; + } + }; + class FlagAttributeIteratorTester : public IteratorTester + { + public: + bool matches(const SearchIterator & base) const override { + return (dynamic_cast(&base) != nullptr) || + (dynamic_cast(&base) != nullptr) || + (dynamic_cast(&base) != nullptr); + } + }; + class AttributePostingListIteratorTester : public IteratorTester + { + public: + bool matches(const SearchIterator & base) const override { + return dynamic_cast(&base) != nullptr || + dynamic_cast(&base) != nullptr; + + } + }; + + + // test search iterator functionality + void testStrictSearchIterator(SearchContext & threeHits, SearchContext & noHits, const IteratorTester & typeTester); + void testNonStrictSearchIterator(SearchContext & threeHits, SearchContext & noHits, const IteratorTester & typeTester); + void fillForSearchIteratorTest(IntegerAttribute * ia); + void fillForSemiNibbleSearchIteratorTest(IntegerAttribute * ia); + void testSearchIterator(); + + + // test search iterator unpacking + void fillForSearchIteratorUnpackingTest(IntegerAttribute * ia, bool extra); + void testSearchIteratorUnpacking(const AttributePtr & ptr, SearchContext & sc, bool extra, bool strict) { + sc.fetchPostings(queryeval::ExecuteInfo::create(strict, 1.0)); + for (bool withElementId : {false, true}) { + testSearchIteratorUnpacking(ptr, sc, extra, strict, withElementId); + } + } + void testSearchIteratorUnpacking(const AttributePtr & ptr, SearchContext & sc, + bool extra, bool strict, bool withElementId); + void testSearchIteratorUnpacking(); + + + // test range search + template + void performRangeSearch(const VectorType & vec, const vespalib::string & term, const DocSet & expected); + template + void testRangeSearch(const AttributePtr & ptr, uint32_t numDocs, std::vector values); + void testRangeSearch(); + void testRangeSearchLimited(); + + + // test case insensitive search + void performCaseInsensitiveSearch(const StringAttribute & vec, const vespalib::string & term, const DocSet & expected); + void testCaseInsensitiveSearch(const AttributePtr & ptr); + void testCaseInsensitiveSearch(); + void testRegexSearch(const AttributePtr & ptr); + void testRegexSearch(); + + + // test prefix search + void performPrefixSearch(const StringAttribute & vec, const vespalib::string & term, + const DocSet & expected, QueryTermSimple::SearchTerm termType); + void testPrefixSearch(const AttributePtr & ptr); + void testPrefixSearch(); + + // test that search is working after clear doc + template + void requireThatSearchIsWorkingAfterClearDoc(const vespalib::string & name, const Config & cfg, + ValueType startValue, const vespalib::string & term); + void requireThatSearchIsWorkingAfterClearDoc(); + + // test that search is working after load and clear doc + template + void requireThatSearchIsWorkingAfterLoadAndClearDoc(const vespalib::string & name, const Config & cfg, + ValueType startValue, ValueType defaultValue, + const vespalib::string & term); + void requireThatSearchIsWorkingAfterLoadAndClearDoc(); + + template + void requireThatSearchIsWorkingAfterUpdates(const vespalib::string & name, const Config & cfg, + ValueType value1, ValueType value2); + void requireThatSearchIsWorkingAfterUpdates(); + + void requireThatFlagAttributeIsWorkingWhenNewDocsAreAdded(); + + template + void requireThatInvalidSearchTermGivesZeroHits(const vespalib::string & name, const Config & cfg, ValueType value); + void requireThatInvalidSearchTermGivesZeroHits(); + + void requireThatFlagAttributeHandlesTheByteRange(); + + void requireThatOutOfBoundsSearchTermGivesZeroHits(const vespalib::string &name, const Config &cfg, int64_t maxValue); + void requireThatOutOfBoundsSearchTermGivesZeroHits(); + + // init maps with config objects + void initIntegerConfig(); + void initFloatConfig(); + void initStringConfig(); + +public: + SearchContextTest(); + ~SearchContextTest() override; + int Main() override; +}; + + +void +SearchContextTest::addReservedDoc(AttributeVector &ptr) +{ + ptr.addReservedDoc(); +} + + +void +SearchContextTest::addDocs(AttributeVector & ptr, uint32_t numDocs) +{ + uint32_t docId; + addReservedDoc(ptr); + for (uint32_t i = 1; i <= numDocs; ++i) { + ptr.addDoc(docId); + EXPECT_EQUAL(docId, i); + } + ASSERT_TRUE(ptr.getNumDocs() == numDocs + 1); +} + +template +void +SearchContextTest::fillVector(std::vector & values, size_t numValues) +{ + values.clear(); + values.reserve(numValues); + for (size_t i = 1; i <= numValues; ++i) { + values.push_back(static_cast(i)); + } +} + +template <> +void +SearchContextTest::fillVector(std::vector & values, size_t numValues) +{ + values.clear(); + values.reserve(numValues); + for (size_t i = 0; i < numValues; ++i) { + vespalib::asciistream ss; + ss << "string" << (i < 10 ? "0" : "") << i; + values.emplace_back(ss.str()); + } +} + +template +void +SearchContextTest::fillAttribute(V & vec, const std::vector & values) +{ + for (uint32_t doc = 1; doc < vec.getNumDocs(); ++doc) { + ASSERT_TRUE(doc < vec.getNumDocs()); + vec.clearDoc(doc); + uint32_t valueCount = doc % (values.size() + 1); + for (uint32_t i = 0; i < valueCount; ++i) { + // std::cout << "append(" << doc << ", " << values[i] << ")" << std::endl; + EXPECT_TRUE(vec.append(doc, values[i], 1)); + } + } + vec.commit(true); +} + +template +void +SearchContextTest::resetAttribute(V & vec, const T & value) +{ + for (uint32_t doc = 1; doc < vec.getNumDocs(); ++doc) { + ASSERT_TRUE(doc < vec.getNumDocs()); + EXPECT_TRUE(vec.update(doc, value)); + } + vec.commit(true); +} + +template +void +SearchContextTest::fillPostingList(PostingList & pl, const DocRange & range) +{ + pl.getHits().clear(); + for (uint32_t doc = range.start; doc < range.end; ++doc) { + ASSERT_TRUE(doc < pl.getAttribute().getNumDocs()); + EXPECT_TRUE(pl.getAttribute().update(doc, pl.getValue())); + pl.getHits().insert(doc); + } + pl.getAttribute().commit(true); +} + +template +void +SearchContextTest::fillPostingList(PostingList & pl) +{ + auto & vec = dynamic_cast(pl.getAttribute()); + pl.getHits().clear(); + uint32_t sz = vec.getMaxValueCount(); + T * buf = new T[sz]; + for (uint32_t doc = 1; doc < vec.getNumDocs(); ++doc) { + uint32_t valueCount = vec.get(doc, buf, sz); + EXPECT_TRUE(valueCount <= sz); + for (uint32_t i = 0; i < valueCount; ++i) { + if (buf[i] == pl.getValue()) { + //std::cout << "hit for doc(" << doc << "): buf[" << i << "] (=" << buf[i] << ") == " << pl.getValue() << std::endl; + pl.getHits().insert(doc); + break; + } + } + } + delete [] buf; +} + +void +SearchContextTest::buildTermQuery(std::vector & buffer, const vespalib::string & index, const vespalib::string & term, QueryTermSimple::SearchTerm termType) +{ + uint32_t indexLen = index.size(); + uint32_t termLen = term.size(); + uint32_t queryPacketSize = 1 + 2 * 4 + indexLen + termLen; + uint32_t p = 0; + buffer.resize(queryPacketSize); + switch (termType) { + case QueryTermSimple::PREFIXTERM: buffer[p++] = ParseItem::ITEM_PREFIXTERM; break; + case QueryTermSimple::REGEXP: buffer[p++] = ParseItem::ITEM_REGEXP; break; + default: + buffer[p++] = ParseItem::ITEM_TERM; + break; + } + p += vespalib::compress::Integer::compressPositive(indexLen, &buffer[p]); + memcpy(&buffer[p], index.c_str(), indexLen); + p += indexLen; + p += vespalib::compress::Integer::compressPositive(termLen, &buffer[p]); + memcpy(&buffer[p], term.c_str(), termLen); + p += termLen; + buffer.resize(p); +} + +template +SearchContextPtr +SearchContextTest::getSearch(const V & vec, const T & term, QueryTermSimple::SearchTerm termType) +{ + std::vector query; + vespalib::asciistream ss; + ss << term; + buildTermQuery(query, vec.getName(), ss.str(), termType); + + return (dynamic_cast(vec)). + getSearch(vespalib::stringref(&query[0], query.size()), + attribute::SearchContextParams()); +} + +ResultSetPtr +SearchContextTest::performSearch(SearchIterator & sb, uint32_t numDocs) +{ + HitCollector hc(numDocs, numDocs); + sb.initRange(1, numDocs); + // assume strict toplevel search object located at start + for (sb.seek(1u); ! sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) { + hc.addHit(sb.getDocId(), 0.0); + } + return hc.getResultSet(); +} + +template +ResultSetPtr +SearchContextTest::performSearch(const V & vec, const T & term, QueryTermSimple::SearchTerm termType) +{ + TermFieldMatchData dummy; + SearchContextPtr sc = getSearch(vec, term, termType); + sc->fetchPostings(queryeval::ExecuteInfo::TRUE); + SearchBasePtr sb = sc->createIterator(&dummy, true); + ResultSetPtr rs = performSearch(*sb, vec.getNumDocs()); + return rs; +} + +template +void +SearchContextTest::performSearch(const V & vec, const vespalib::string & term, + const DocSet & expected, QueryTermSimple::SearchTerm termType) +{ +#if 0 + std::cout << "performSearch[" << term << "]: {"; + std::copy(expected.begin(), expected.end(), std::ostream_iterator(std::cout, ", ")); + std::cout << "}, prefix(" << (prefix ? "true" : "false") << ")" << std::endl; +#endif + { // strict search iterator + ResultSetPtr rs = performSearch(vec, term, termType); + checkResultSet(*rs, expected, false); + } +} + +void +SearchContextTest::checkResultSet(const ResultSet & rs, const DocSet & expected, bool bitVector) +{ + EXPECT_EQUAL(rs.getNumHits(), expected.size()); + if (bitVector) { + const BitVector * vec = rs.getBitOverflow(); + if ( ! expected.empty()) { + ASSERT_TRUE(vec != nullptr); + for (const auto & expect : expected) { + EXPECT_TRUE(vec->testBit(expect)); + } + } + } else { + const RankedHit * array = rs.getArray(); + if ( ! expected.empty()) { + ASSERT_TRUE(array != nullptr); + uint32_t i = 0; + for (auto iter = expected.begin(); iter != expected.end(); ++iter, ++i) { + EXPECT_TRUE(array[i]._docId == *iter); + } + } + } +} + + +//----------------------------------------------------------------------------- +// Test search functionality +//----------------------------------------------------------------------------- +template +void +SearchContextTest::testFind(const PostingList & pl) +{ + { // strict search iterator + SearchContextPtr sc = getSearch(pl.getAttribute(), pl.getValue()); + sc->fetchPostings(queryeval::ExecuteInfo::TRUE); + TermFieldMatchData dummy; + SearchBasePtr sb = sc->createIterator(&dummy, true); + ResultSetPtr rs = performSearch(*sb, pl.getAttribute().getNumDocs()); + checkResultSet(*rs, pl.getHits(), false); + } +} + +template +void +SearchContextTest::testSearch(V & attribute, uint32_t numDocs, const std::vector & values) +{ + LOG(info, "testSearch: vector '%s' with %u documents and %lu unique values", + attribute.getName().c_str(), numDocs, values.size()); + + // fill attribute vectors + addDocs(attribute, numDocs); + + std::vector > lists; + + // fill posting lists + ASSERT_TRUE((attribute.getNumDocs() - 1) % values.size() == 0); + uint32_t hitCount = attribute.getNumDocs() / values.size(); + for (uint32_t i = 0; i < values.size(); ++i) { + // for each value a range with hitCount documents will hit on that value + lists.push_back(PostingList(attribute, values[i])); + fillPostingList(lists.back(), DocRange(i * hitCount + 1, (i + 1) * hitCount + 1)); + } + + // test find() + for (const auto & list : lists) { + testFind(list); + } +} + +template +void +SearchContextTest::testMultiValueSearchHelper(V & vec, const std::vector & values) +{ + std::vector > lists; + + // fill posting lists based on attribute content + for (const T & value : values) { + lists.push_back(PostingList(vec, value)); + fillPostingList(lists.back()); + } + + // test find() + for (const auto & list : lists) { + //std::cout << "testFind(lists[" << i << "]): value = " << lists[i].getValue() + // << ", hit count = " << lists[i].getHitCount() << std::endl; + testFind(list); + } +} + +AttributePtr +create_as(const AttributeVector& attr, const std::string& name_suffix) +{ + return AttributeFactory::createAttribute(attr.getName() + name_suffix, attr.getConfig()); +} + + +template +void +SearchContextTest::testMultiValueSearch(V& attr, uint32_t num_docs, const std::vector & values) +{ + addDocs(attr, num_docs); + LOG(info, "testMultiValueSearch: vector '%s' with %u documents and %lu unique values", + attr.getName().c_str(), attr.getNumDocs(), values.size()); + + fillAttribute(attr, values); + + testMultiValueSearchHelper(attr, values); + + auto attr2 = create_as(attr, "_2"); + ASSERT_TRUE(attr.save(attr2->getBaseFileName())); + ASSERT_TRUE(attr2->load()); + + testMultiValueSearchHelper(static_cast(*attr2.get()), values); + + size_t sz = values.size(); + ASSERT_TRUE(sz > 2); + std::vector subset; + // values[sz - 2] is not used -> 0 hits + // values[sz - 1] is used once -> 1 hit + for (size_t i = 0; i < sz - 2; ++i) { + subset.push_back(values[i]); + } + + fillAttribute(attr, subset); + + ASSERT_TRUE(1u < attr.getNumDocs()); + EXPECT_TRUE(attr.append(1u, values[sz - 1], 1)); + attr.commit(true); + + testMultiValueSearchHelper(attr, values); + + auto attr3 = create_as(attr, "_3"); + ASSERT_TRUE(attr.save(attr3->getBaseFileName())); + ASSERT_TRUE(attr3->load()); + + testMultiValueSearchHelper(static_cast(*attr3.get()), values); +} + +template +void SearchContextTest::testSearch(const ConfigMap & cfgs) { + uint32_t numDocs = 100; + uint32_t numUniques = 20; + std::vector values; + fillVector(values, numUniques); + for (const auto & cfg : cfgs) { + AttributePtr second = AttributeFactory::createAttribute(cfg.first + "-2", cfg.second); + testSearch(*(dynamic_cast(second.get())), numDocs, values); + if (second->hasMultiValue()) { + AttributePtr first = AttributeFactory::createAttribute(cfg.first + "-1", cfg.second); + testMultiValueSearch(*(dynamic_cast(first.get())), second->getNumDocs(), values); + } + } +} + + +template +class Verifier : public search::test::SearchIteratorVerifier { +public: + Verifier(const std::vector & keys, const vespalib::string & keyAsString, const vespalib::string & name, + const Config & cfg, bool withElementId); + ~Verifier() override; + SearchIterator::UP + create(bool strict) const override { + _sc->fetchPostings(queryeval::ExecuteInfo::create(strict, 1.0)); + auto search = _sc->createIterator(&_dummy, strict); + if (_withElementId) { + search = std::make_unique(std::move(search), *_sc, _dummy); + } + return search; + } +private: + mutable TermFieldMatchData _dummy; + const bool _withElementId; + AttributePtr _attribute; + SearchContextPtr _sc; +}; + +template +Verifier::Verifier(const std::vector & keys, const vespalib::string & keyAsString, const vespalib::string & name, + const Config & cfg, bool withElementId) + : _withElementId(withElementId), + _attribute(AttributeFactory::createAttribute(name + "-initrange", cfg)), + _sc() +{ + SearchContextTest::addDocs(*_attribute, getDocIdLimit()); + size_t i(0); + for (uint32_t doc : getExpectedDocIds()) { + EXPECT_TRUE(nullptr != dynamic_cast(_attribute.get())); + EXPECT_TRUE(dynamic_cast(_attribute.get())->update(doc, keys[(i++)%keys.size()])); + } + _attribute->commit(true); + _sc = SearchContextTest::getSearch(*_attribute, keyAsString); + ASSERT_TRUE(_sc->valid()); +} + +template +Verifier::~Verifier() = default; + +template +void SearchContextTest::testSearchIterator(const std::vector & keys, const vespalib::string &keyAsString, const ConfigMap &cfgs) { + + for (bool withElementId : {false, true} ) { + for (const auto & cfg : cfgs) { + { + Verifier verifier(keys, keyAsString, cfg.first, cfg.second, withElementId); + verifier.verify(); + } + { + Config withFilter(cfg.second); + withFilter.setIsFilter(true); + Verifier verifier(keys, keyAsString, cfg.first + "-filter", withFilter, withElementId); + verifier.verify(); + } + } + } + +} + +void SearchContextTest::testSearchIteratorConformance() { + testSearchIterator({42,45,46}, "[0;100]", _integerCfg); + testSearchIterator({42}, "42", _integerCfg); + testSearchIterator({42.42}, "42.42", _floatCfg); + testSearchIterator({"any-key"}, "any-key", _stringCfg); +} + +void +SearchContextTest::testSearch() +{ + const uint32_t numDocs = 100; + const uint32_t numUniques = 20; + + { // IntegerAttribute + for (const auto & cfg : _integerCfg) { + AttributePtr attribute = AttributeFactory::createAttribute(cfg.first + "-3", cfg.second); + SearchContextPtr sc = getSearch(*attribute, "100"); + ASSERT_TRUE(sc->valid()); + sc = getSearch(*attribute, "1A0"); + EXPECT_FALSE( sc->valid() ); + } + + + { // CollectionType::ARRAY Flags. + std::vector values; + fillVector(values, numUniques); + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr second = AttributeFactory::createAttribute("flags-2", cfg); + testSearch(*(dynamic_cast(second.get())), numDocs, values); + AttributePtr first = AttributeFactory::createAttribute("flags-1", cfg); + testMultiValueSearch(*(dynamic_cast(first.get())), second->getNumDocs(), values); + } + } + + { // FloatingPointAttribute + for (const auto & cfg : _floatCfg) { + AttributePtr attribute = AttributeFactory::createAttribute(cfg.first + "-3", cfg.second); + SearchContextPtr sc = getSearch(*attribute, "100"); + ASSERT_TRUE(sc->valid()); + sc = getSearch(*attribute, "7.3"); + ASSERT_TRUE( sc->valid() ); + sc = getSearch(*attribute, "1A0"); + EXPECT_FALSE( sc->valid() ); + } + } + + testSearch(_integerCfg); + testSearch(_floatCfg); + testSearch(_stringCfg); +} + +//----------------------------------------------------------------------------- +// Test search iterator functionality +//----------------------------------------------------------------------------- +void +SearchContextTest::testStrictSearchIterator(SearchContext & threeHits, + SearchContext & noHits, + const IteratorTester & typeTester) +{ + TermFieldMatchData dummy; + { // search for value with 3 hits + threeHits.fetchPostings(queryeval::ExecuteInfo::TRUE); + SearchBasePtr sb = threeHits.createIterator(&dummy, true); + sb->initRange(1, threeHits.attribute().getCommittedDocIdLimit()); + EXPECT_TRUE(typeTester.matches(*sb)); + EXPECT_TRUE(sb->getDocId() == sb->beginId() || + sb->getDocId() == 1u); + EXPECT_TRUE(sb->seek(1)); + EXPECT_EQUAL(sb->getDocId(), 1u); + EXPECT_TRUE(!sb->seek(2)); + EXPECT_EQUAL(sb->getDocId(), 3u); + EXPECT_TRUE(sb->seek(3)); + EXPECT_EQUAL(sb->getDocId(), 3u); + EXPECT_TRUE(!sb->seek(4)); + EXPECT_EQUAL(sb->getDocId(), 5u); + EXPECT_TRUE(sb->seek(5)); + EXPECT_EQUAL(sb->getDocId(), 5u); + EXPECT_TRUE(!sb->seek(6)); + EXPECT_TRUE(sb->isAtEnd()); + } + + { // search for value with no hits + noHits.fetchPostings(queryeval::ExecuteInfo::TRUE); + SearchBasePtr sb = noHits.createIterator(&dummy, true); + sb->initRange(1, noHits.attribute().getCommittedDocIdLimit()); + ASSERT_TRUE(typeTester.matches(*sb)); + EXPECT_TRUE(sb->getDocId() == sb->beginId() || + sb->isAtEnd()); + EXPECT_TRUE(!sb->seek(1)); + EXPECT_TRUE(sb->isAtEnd()); + } +} + +void +SearchContextTest::testNonStrictSearchIterator(SearchContext & threeHits, + SearchContext & noHits, + const IteratorTester & typeTester) +{ + TermFieldMatchData dummy; + { // search for value with three hits + threeHits.fetchPostings(queryeval::ExecuteInfo::FALSE); + SearchBasePtr sb = threeHits.createIterator(&dummy, false); + sb->initRange(1, threeHits.attribute().getCommittedDocIdLimit()); + EXPECT_TRUE(typeTester.matches(*sb)); + EXPECT_TRUE(sb->seek(1)); + EXPECT_EQUAL(sb->getDocId(), 1u); + EXPECT_TRUE(!sb->seek(2)); + EXPECT_EQUAL(sb->getDocId(), 1u); + EXPECT_TRUE(sb->seek(3)); + EXPECT_EQUAL(sb->getDocId(), 3u); + EXPECT_TRUE(!sb->seek(4)); + EXPECT_EQUAL(sb->getDocId(), 3u); + EXPECT_TRUE(sb->seek(5)); + EXPECT_EQUAL(sb->getDocId(), 5u); + EXPECT_TRUE(!sb->seek(6)); + EXPECT_TRUE(sb->getDocId() == 5u || sb->isAtEnd()); + } + { // search for value with no hits + noHits.fetchPostings(queryeval::ExecuteInfo::FALSE); + SearchBasePtr sb = noHits.createIterator(&dummy, false); + sb->initRange(1, threeHits.attribute().getCommittedDocIdLimit()); + + EXPECT_TRUE(typeTester.matches(*sb)); + EXPECT_TRUE(sb->getDocId() == sb->beginId() || + sb->isAtEnd()); + EXPECT_TRUE(!sb->seek(1)); + EXPECT_NOT_EQUAL(sb->getDocId(), 1u); + EXPECT_TRUE(!sb->seek(6)); + EXPECT_NOT_EQUAL(sb->getDocId(), 6u); + } +} + +void +SearchContextTest::fillForSearchIteratorTest(IntegerAttribute * ia) +{ + addReservedDoc(*ia); + ia->addDocs(5); + ia->update(1, 10); + ia->update(2, 20); + ia->update(3, 10); + ia->update(4, 20); + ia->update(5, 10); + ia->commit(true); +} + +void +SearchContextTest::fillForSemiNibbleSearchIteratorTest(IntegerAttribute * ia) +{ + addReservedDoc(*ia); + ia->addDocs(5); + ia->update(1, 1); + ia->update(2, 2); + ia->update(3, 1); + ia->update(4, 2); + ia->update(5, 1); + ia->commit(true); +} + +void +SearchContextTest::testSearchIterator() +{ + { + Config cfg(BasicType::INT32, CollectionType::SINGLE); + AttributePtr ptr = AttributeFactory::createAttribute("s-int32", cfg); + fillForSearchIteratorTest(dynamic_cast(ptr.get())); + + SearchContextPtr threeHits = getSearch(*ptr.get(), 10); + SearchContextPtr noHits = getSearch(*ptr.get(), 30); + AttributeIteratorTester tester; + testStrictSearchIterator(*threeHits, *noHits, tester); + threeHits = getSearch(*ptr.get(), 10); + noHits = getSearch(*ptr.get(), 30); + testNonStrictSearchIterator(*threeHits, *noHits, tester); + } + { + Config cfg(BasicType::UINT2, CollectionType::SINGLE); + AttributePtr ptr = AttributeFactory::createAttribute("s-uint2", cfg); + fillForSemiNibbleSearchIteratorTest(dynamic_cast + (ptr.get())); + + SearchContextPtr threeHits = getSearch(*ptr.get(), 1); + SearchContextPtr noHits = getSearch(*ptr.get(), 3); + AttributeIteratorTester tester; + testStrictSearchIterator(*threeHits, *noHits, tester); + threeHits = getSearch(*ptr.get(), 1); + noHits = getSearch(*ptr.get(), 3); + testNonStrictSearchIterator(*threeHits, *noHits, tester); + } + { + Config cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sfs-int32", cfg); + fillForSearchIteratorTest(dynamic_cast(ptr.get())); + + SearchContextPtr threeHits = getSearch(*ptr.get(), 10); + SearchContextPtr noHits = getSearch(*ptr.get(), 30); + AttributePostingListIteratorTester tester; + testStrictSearchIterator(*threeHits, *noHits, tester); + } + { + Config cfg(BasicType::STRING, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sfs-string", cfg); + auto * sa = dynamic_cast(ptr.get()); + addReservedDoc(*ptr); + ptr->addDocs(5); + sa->update(1, "three"); + sa->update(2, "two"); + sa->update(3, "three"); + sa->update(4, "two"); + sa->update(5, "three"); + ptr->commit(true); + + SearchContextPtr threeHits = getSearch(*ptr.get(), "three"); + SearchContextPtr noHits = getSearch(*ptr.get(), "none"); + AttributePostingListIteratorTester tester; + testStrictSearchIterator(*threeHits, *noHits, tester); + } + { + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("flags", cfg); + fillForSearchIteratorTest(dynamic_cast(ptr.get())); + + SearchContextPtr threeHits = getSearch(*ptr.get(), 10); + SearchContextPtr noHits = getSearch(*ptr.get(), 30); + FlagAttributeIteratorTester tester; + testStrictSearchIterator(*threeHits, *noHits, tester); + threeHits = getSearch(*ptr.get(), 10); + noHits = getSearch(*ptr.get(), 30); + testNonStrictSearchIterator(*threeHits, *noHits, tester); + } +} + + + +//----------------------------------------------------------------------------- +// Test search iterator unpacking +//----------------------------------------------------------------------------- +void +SearchContextTest::fillForSearchIteratorUnpackingTest(IntegerAttribute * ia, + bool extra) +{ + addReservedDoc(*ia); + ia->addDocs(3); + if (ia->getCollectionType() == CollectionType::SINGLE) { + ia->update(1, 10); + ia->update(2, 10); + ia->update(3, 10); + } else if (ia->getCollectionType() == CollectionType::ARRAY) { + ia->append(1, 10, 1); + ia->append(2, 10, 1); + ia->append(2, 10, 1); + ia->append(3, 10, 1); + ia->append(3, 10, 1); + ia->append(3, 10, 1); + } else { // WEIGHTED SET + ia->append(1, 10, -50); + ia->append(2, 10, 0); + ia->append(3, 10, 50); + } + ia->commit(true); + if (!extra) + return; + ia->addDocs(20); + for (uint32_t d = 4; d < 24; ++d) { + if (ia->getCollectionType() == CollectionType::SINGLE) + ia->update(d, 10); + else + ia->append(d, 10, 1); + } + ia->commit(true); +} + +void +SearchContextTest::testSearchIteratorUnpacking(const AttributePtr & attr, SearchContext & sc, + bool extra, bool strict, bool withElementId) +{ + LOG(info, "testSearchIteratorUnpacking: vector '%s'", attr->getName().c_str()); + + TermFieldMatchData md; + md.reset(100); + + TermFieldMatchDataPosition pos; + pos.setElementWeight(100); + md.appendPosition(pos); + + SearchBasePtr sb = sc.createIterator(&md, strict); + if (withElementId) { + sb = std::make_unique(std::move(sb), sc, md); + } + sb->initFullRange(); + + std::vector weights(3); + if (attr->getCollectionType() == CollectionType::SINGLE || + (attr->getCollectionType() == CollectionType::ARRAY && attr->getBasicType() == BasicType::INT8)) + { + weights[0] = 1; + weights[1] = 1; + weights[2] = 1; + } else if (attr->getCollectionType() == CollectionType::ARRAY) { + weights[0] = 1; + weights[1] = 2; + weights[2] = 3; + } else { + weights[0] = -50; + weights[1] = 0; + weights[2] = 50; + } + + // unpack and check weights + sb->unpack(1); + EXPECT_EQUAL(sb->getDocId(), 1u); + EXPECT_EQUAL(md.getDocId(), 1u); + EXPECT_EQUAL(md.getWeight(), weights[0]); + + sb->unpack(2); + EXPECT_EQUAL(sb->getDocId(), 2u); + EXPECT_EQUAL(md.getDocId(), 2u); + if (withElementId && attr->hasMultiValue() && !attr->hasWeightedSetType()) { + EXPECT_EQUAL(2, md.end()- md.begin()); + EXPECT_EQUAL(md.begin()[0].getElementId(), 0u); + EXPECT_EQUAL(md.begin()[0].getElementWeight(), 1); + EXPECT_EQUAL(md.begin()[1].getElementId(), 1u); + EXPECT_EQUAL(md.begin()[1].getElementWeight(), 1); + } else { + EXPECT_EQUAL(md.getWeight(), weights[1]); + } + + sb->unpack(3); + EXPECT_EQUAL(sb->getDocId(), 3u); + EXPECT_EQUAL(md.getDocId(), 3u); + if (withElementId && attr->hasMultiValue() && !attr->hasWeightedSetType()) { + EXPECT_EQUAL(3, md.end()- md.begin()); + EXPECT_EQUAL(md.begin()[0].getElementId(), 0u); + EXPECT_EQUAL(md.begin()[0].getElementWeight(), 1); + EXPECT_EQUAL(md.begin()[1].getElementId(), 1u); + EXPECT_EQUAL(md.begin()[1].getElementWeight(), 1); + EXPECT_EQUAL(md.begin()[2].getElementId(), 2u); + EXPECT_EQUAL(md.begin()[2].getElementWeight(), 1); + } else { + EXPECT_EQUAL(md.getWeight(), weights[2]); + } + if (extra) { + sb->unpack(4); + EXPECT_EQUAL(sb->getDocId(), 4u); + EXPECT_EQUAL(md.getDocId(), 4u); + EXPECT_EQUAL(md.getWeight(), 1); + } +} + +void +SearchContextTest::testSearchIteratorUnpacking() +{ + std::vector > config; + + { + Config cfg(BasicType::INT32, CollectionType::SINGLE); + config.emplace_back("s-int32", cfg); + } + { + Config cfg(BasicType::UINT4, CollectionType::SINGLE); + config.emplace_back("s-uint4", cfg); + } + { + Config cfg(BasicType::INT32, CollectionType::ARRAY); + config.emplace_back("a-int32", cfg); + } + { + Config cfg(BasicType::INT32, CollectionType::WSET); + config.emplace_back("w-int32", cfg); + } + { + Config cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + config.emplace_back("sfs-int32", cfg); + } + { + Config cfg(BasicType::INT32, CollectionType::ARRAY); + cfg.setFastSearch(true); + config.emplace_back("afs-int32", cfg); + } + { + Config cfg(BasicType::INT32, CollectionType::WSET); + cfg.setFastSearch(true); + config.emplace_back("wfs-int32", cfg); + } + { + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + config.emplace_back("flags", cfg); + } + + for (const auto & cfg : config) { + AttributePtr ptr = AttributeFactory::createAttribute(cfg.first, cfg.second); + fillForSearchIteratorUnpackingTest(dynamic_cast(ptr.get()), false); + SearchContextPtr sc = getSearch(*ptr.get(), 10); + testSearchIteratorUnpacking(ptr, *sc, false, true); + sc = getSearch(*ptr.get(), 10); + testSearchIteratorUnpacking(ptr, *sc, false, false); + if (cfg.second.fastSearch()) { + AttributePtr ptr2 = AttributeFactory::createAttribute(cfg.first + "-extra", cfg.second); + fillForSearchIteratorUnpackingTest(dynamic_cast(ptr2.get()), true); + SearchContextPtr sc2 = getSearch(*ptr2.get(), 10); + testSearchIteratorUnpacking(ptr2, *sc2, true, true); + sc2 = getSearch(*ptr2.get(), 10); + testSearchIteratorUnpacking(ptr2, *sc2, true, false); + } + } +} + + + +//----------------------------------------------------------------------------- +// Test range search +//----------------------------------------------------------------------------- + +template +void +SearchContextTest::performRangeSearch(const VectorType & vec, const vespalib::string & term, + const DocSet & expected) +{ + performSearch(vec, term, expected, QueryTermSimple::WORD); +} + +template +void +SearchContextTest::testRangeSearch(const AttributePtr & ptr, uint32_t numDocs, std::vector values) +{ + LOG(info, "testRangeSearch: vector '%s'", ptr->getName().c_str()); + + auto & vec = dynamic_cast(*ptr.get()); + + addDocs(vec, numDocs); + + std::map postingList; + + uint32_t docCnt = 0; + for (uint32_t i = 0; i < values.size() && docCnt < numDocs; i+=2) { + //std::cout << "postingList[" << values[i] << "]: {"; + for (uint32_t j = 0; j < (i + 1) && docCnt < numDocs; ++j, ++docCnt) { + EXPECT_TRUE(vec.update(docCnt + 1u, values[i])); + postingList[values[i]].insert(docCnt + 1u); + //std::cout << docCnt << ", "; + } + //std::cout << "}" << std::endl; + } + ptr->commit(true); + uint32_t smallHits = 0; + ValueType zeroValue = 0; + bool smallUInt = isUnsignedSmallIntAttribute(vec); + if (smallUInt) { + for (uint32_t i = docCnt ; i < numDocs; ++i) { + postingList[zeroValue].insert(i + 1u); + ++smallHits; + } + } + + // test less than ("a") + for (uint32_t i = 0; i < values.size(); ++i) { + vespalib::asciistream ss; + ss << ">" << values[i]; + DocSet expected; + for (uint32_t j = i + 1; j < values.size(); ++j) { + expected.insert(postingList[values[j]].begin(), postingList[values[j]].end()); + } + performRangeSearch(vec, ss.str(), expected); + } + + // test range ("[a;b]") + for (uint32_t i = 0; i < values.size(); ++i) { + for (uint32_t j = 0; j < values.size(); ++j) { // illegal range when j < i + vespalib::asciistream ss; + ss << "[" << values[i] << ";" << values[j] << "]"; + DocSet expected; + for (uint32_t k = i; k < j + 1; ++k) { + expected.insert(postingList[values[k]].begin(), postingList[values[k]].end()); + } + performRangeSearch(vec, ss.str(), expected); + } + } + + { // test large range + vespalib::asciistream ss; + ss << "[" << (values.front() - 1) << ";" << (values.back() + 1) << "]"; + DocSet expected; + for (uint32_t doc = 0; doc < numDocs; ++doc) { + expected.insert(doc + 1); + } + performRangeSearch(vec, ss.str(), expected); + } +} + +void +SearchContextTest::testRangeSearchLimited() +{ + largeint_t VALUES [] = {0,1,1,2,3,4,5,6,7,8,9,9,10 }; + std::vector values(VALUES, VALUES+sizeof(VALUES)/sizeof(VALUES[0])); + Config cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("limited-int32", cfg); + auto & vec = dynamic_cast(*ptr); + addDocs(vec, values.size()); + for (size_t i(1); i < values.size(); i++) { + EXPECT_TRUE(vec.update(i, values[i])); + } + ptr->commit(true); + + DocSet expected; + for (size_t i(1); i < 12; i++) { + expected.put(i); + } + performRangeSearch(vec, "[1;9]", expected); + performRangeSearch(vec, "[1;9;100]", expected); + performRangeSearch(vec, "[1;9;-100]", expected); + expected.clear(); + expected.put(3); + performRangeSearch(vec, "<1;3>", expected); + expected.put(4); + performRangeSearch(vec, "<1;3]", expected); + expected.clear(); + expected.put(1).put(2).put(3); + performRangeSearch(vec, "[1;3>", expected); + expected.put(4); + performRangeSearch(vec, "[1;3]", expected); + expected.clear(); + expected.put(1).put(2); + performRangeSearch(vec, "[1;9;1]", expected); + performRangeSearch(vec, "[1;9;2]", expected); + expected.put(3); + performRangeSearch(vec, "[1;9;3]", expected); + expected.clear(); + expected.put(10).put(11); + performRangeSearch(vec, "[1;9;-1]", expected); + performRangeSearch(vec, "[1;9;-2]", expected); + expected.put(9); + performRangeSearch(vec, "[1;9;-3]", expected); + performRangeSearch(vec, "[1;9;-3]", expected); + + expected.clear(); + for (size_t i(1); i < 13; i++) { + expected.put(i); + } + performRangeSearch(vec, "[;;100]", expected); + performRangeSearch(vec, "[;;-100]", expected); + + expected.clear(); + expected.put(1).put(2); + performRangeSearch(vec, "[;;1]", expected); + expected.clear(); + expected.put(12); + performRangeSearch(vec, "[;;-1]", expected); +} + +void +SearchContextTest::testRangeSearch() +{ + const uint32_t numDocs = 100; + const uint32_t numValues = 20; + const uint32_t numNibbleValues = 9; + + { // IntegerAttribute + std::vector values; + std::vector nibbleValues; + largeint_t start = 1; + + for (uint32_t i = 0; i < numValues; ++i) { + values.push_back(start + i); + } + for (uint32_t i = 0; i < numNibbleValues; ++i) { + nibbleValues.push_back(start + i); + } + + for (const auto & cfg : _integerCfg) { + AttributePtr ptr = AttributeFactory::createAttribute(cfg.first, cfg.second); + testRangeSearch(ptr, numDocs, values); + } + { // CollectionType::ARRAY Flags. + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("flags", cfg); + testRangeSearch(ptr, numDocs, values); + } + { + Config cfg(BasicType::UINT4, CollectionType::SINGLE); + AttributePtr ptr = AttributeFactory::createAttribute("s-uint4", cfg); + testRangeSearch(ptr, numDocs, nibbleValues); + } + } + + { // FloatingPointAttribute + std::vector values; + double start = 1; + + for (uint32_t i = 0; i < numValues; ++i) { + values.push_back(start + i); + } + + for (const auto & cfg : _floatCfg) { + AttributePtr ptr = AttributeFactory::createAttribute(cfg.first, cfg.second); + testRangeSearch(ptr, numDocs, values); + } + } +} + + +//----------------------------------------------------------------------------- +// Test case insensitive search +//----------------------------------------------------------------------------- + +void +SearchContextTest::performCaseInsensitiveSearch(const StringAttribute & vec, const vespalib::string & term, + const DocSet & expected) +{ + performSearch(vec, term, expected, QueryTermSimple::WORD); +} + +void +SearchContextTest::testCaseInsensitiveSearch(const AttributePtr & ptr) +{ + LOG(info, "testCaseInsensitiveSearch: vector '%s'", ptr->getName().c_str()); + + auto & vec = dynamic_cast(*ptr.get()); + + uint32_t numDocs = 5 * 5; + addDocs(*ptr.get(), numDocs); + + const char * terms[][5] = { + {"lower", "upper", "firstupper", "mixedcase", "intermixedcase"}, // lower + {"LOWER", "UPPER", "FIRSTUPPER", "MIXEDCASE", "INTERMIXEDCASE"}, // upper + {"Lower", "Upper", "Firstupper", "Mixedcase", "Intermixedcase"}, // firstUpper + {"Lower", "Upper", "FirstUpper", "MixedCase", "InterMixedCase"}, // mixedCase + {"lower", "upper", "firstUpper", "mixedCase", "interMixedCase"}, // interMixedCase + }; + + uint32_t doc = 1; + for (uint32_t j = 0; j < 5; ++j) { + for (uint32_t i = 0; i < 5; ++i) { + ASSERT_TRUE(doc < vec.getNumDocs()); + EXPECT_TRUE(vec.update(doc++, terms[i][j])); + } + } + + ptr->commit(true); + + const char * buffer[1]; + doc = 1; + for (uint32_t j = 0; j < 5; ++j) { + for (uint32_t i = 0; i < 5; ++i) { + EXPECT_EQUAL(ptr->get(doc++, buffer, 1), uint32_t(1)); + EXPECT_EQUAL(vespalib::string(buffer[0]), vespalib::string(terms[i][j])); + } + } + + DocSet empty; + for (uint32_t j = 0; j < 5; ++j) { + DocSet expected; + for (doc = j * 5 + 1; doc < (j + 1) * 5 + 1; ++doc) { + expected.insert(doc); + } + // for non-posting attributes only lower case search terms should give hits + performCaseInsensitiveSearch(vec, terms[0][j], expected); + + if (ptr->getConfig().fastSearch()) { + for (uint32_t i = 1; i < 5; ++i) { + performCaseInsensitiveSearch(vec, terms[i][j], expected); + } + } else { + for (uint32_t i = 1; i < 4; ++i) { + performCaseInsensitiveSearch(vec, terms[i][j], empty); + } + } + } + performCaseInsensitiveSearch(vec, "none", empty); + performCaseInsensitiveSearch(vec, "NONE", empty); + performCaseInsensitiveSearch(vec, "None", empty); +} + +void +SearchContextTest::testRegexSearch(const AttributePtr & ptr) +{ + LOG(info, "testRegexSearch: vector '%s'", ptr->getName().c_str()); + + auto & vec = dynamic_cast(*ptr.get()); + + uint32_t numDocs = 6; + addDocs(*ptr.get(), numDocs); + + const char * strings [] = {"abc1def", "abc2Def", "abc2def", "abc4def", "abc5def", "abc6def"}; + std::vector terms = { "abc", "bc2de" }; + + for (uint32_t doc = 1; doc < numDocs + 1; ++doc) { + ASSERT_TRUE(doc < vec.getNumDocs()); + EXPECT_TRUE(vec.update(doc, strings[doc - 1])); + } + + ptr->commit(true); + + std::vector expected; + DocSet empty; + { + uint32_t docs[] = {1, 2, 3, 4, 5, 6}; + expected.emplace_back(docs, docs + 6); // "abc" + } + { + uint32_t docs[] = {2, 3}; + expected.emplace_back(docs, docs + 2); // "bc2de" + } + + for (uint32_t i = 0; i < terms.size(); ++i) { + performSearch(vec, terms[i], expected[i], QueryTermSimple::REGEXP); + performSearch(vec, terms[i], empty, QueryTermSimple::WORD); + } +} + + +void +SearchContextTest::testCaseInsensitiveSearch() +{ + for (const auto & cfg : _stringCfg) { + testCaseInsensitiveSearch(AttributeFactory::createAttribute(cfg.first, cfg.second)); + } +} + +void +SearchContextTest::testRegexSearch() +{ + for (const auto & cfg : _stringCfg) { + testRegexSearch(AttributeFactory::createAttribute(cfg.first, cfg.second)); + } +} + + +//----------------------------------------------------------------------------- +// Test prefix search +//----------------------------------------------------------------------------- + +void +SearchContextTest::performPrefixSearch(const StringAttribute & vec, const vespalib::string & term, + const DocSet & expected, QueryTermSimple::SearchTerm termType) +{ + performSearch(vec, term, expected, termType); +} + +void +SearchContextTest::testPrefixSearch(const AttributePtr & ptr) +{ + LOG(info, "testPrefixSearch: vector '%s'", ptr->getName().c_str()); + + auto & vec = dynamic_cast(*ptr.get()); + + uint32_t numDocs = 6; + addDocs(*ptr.get(), numDocs); + + const char * strings [] = {"prefixsearch", "PREFIXSEARCH", "PrefixSearch", "precommit", "PRECOMMIT", "PreCommit"}; + const char * terms[][3] = {{"pre", "PRE", "Pre"}, {"pref", "PREF", "Pref"}, + {"prec", "PREC", "PreC"}, {"prex", "PREX", "Prex"}}; + + for (uint32_t doc = 1; doc < numDocs + 1; ++doc) { + ASSERT_TRUE(doc < vec.getNumDocs()); + EXPECT_TRUE(vec.update(doc, strings[doc - 1])); + } + + ptr->commit(true); + + std::vector expected; + DocSet empty; + { + uint32_t docs[] = {1, 2, 3, 4, 5, 6}; + expected.emplace_back(docs, docs + 6); // "pre" + } + { + uint32_t docs[] = {1, 2, 3}; + expected.emplace_back(docs, docs + 3); // "pref" + } + { + uint32_t docs[] = {4, 5, 6}; + expected.emplace_back(docs, docs + 3); // "prec" + } + expected.emplace_back(); // "prex" + + for (uint32_t i = 0; i < 4; ++i) { + for (uint32_t j = 0; j < 3; ++j) { + if (j == 0 || ptr->getConfig().fastSearch()) { + performPrefixSearch(vec, terms[i][j], expected[i], QueryTermSimple::PREFIXTERM); + performPrefixSearch(vec, terms[i][j], empty, QueryTermSimple::WORD); + } else { + performPrefixSearch(vec, terms[i][j], empty, QueryTermSimple::PREFIXTERM); + performPrefixSearch(vec, terms[i][j], empty, QueryTermSimple::WORD); + } + } + } +} + + +void +SearchContextTest::testPrefixSearch() +{ + for (const auto & cfg : _stringCfg) { + testPrefixSearch(AttributeFactory::createAttribute(cfg.first, cfg.second)); + } +} + +template +void +SearchContextTest::requireThatSearchIsWorkingAfterClearDoc(const vespalib::string & name, + const Config & cfg, + ValueType startValue, + const vespalib::string & term) +{ + AttributePtr a = AttributeFactory::createAttribute(name, cfg); + LOG(info, "requireThatSearchIsWorkingAfterClearDoc: vector '%s', term '%s'", + a->getName().c_str(), term.c_str()); + addReservedDoc(*a); + a->addDocs(4); + auto & v = dynamic_cast(*a); + resetAttribute(v, startValue); + { + ResultSetPtr rs = performSearch(v, term); + EXPECT_EQUAL(4u, rs->getNumHits()); + ASSERT_TRUE(4u == rs->getNumHits()); + const RankedHit * array = rs->getArray(); + EXPECT_EQUAL(1u, array[0]._docId); + EXPECT_EQUAL(2u, array[1]._docId); + EXPECT_EQUAL(3u, array[2]._docId); + EXPECT_EQUAL(4u, array[3]._docId); + } + a->clearDoc(1); + a->clearDoc(3); + a->commit(true); + { + ResultSetPtr rs = performSearch(v, term); + EXPECT_EQUAL(2u, rs->getNumHits()); + const RankedHit * array = rs->getArray(); + EXPECT_EQUAL(2u, array[0]._docId); + EXPECT_EQUAL(4u, array[1]._docId); + } +} + +void +SearchContextTest::requireThatSearchIsWorkingAfterClearDoc() +{ + for (const auto & cfg : _integerCfg) { + requireThatSearchIsWorkingAfterClearDoc(cfg.first, cfg.second, 10, "10"); + requireThatSearchIsWorkingAfterClearDoc(cfg.first, cfg.second, 10, "<11"); + } + + for (const auto & cfg : _floatCfg) { + requireThatSearchIsWorkingAfterClearDoc(cfg.first, cfg.second, 10.5, "10.5"); + requireThatSearchIsWorkingAfterClearDoc(cfg.first, cfg.second, 10.5, "<10.6"); + } + + for (const auto & cfg : _stringCfg) { + requireThatSearchIsWorkingAfterClearDoc(cfg.first, cfg.second, "start", "start"); + } +} + +template +void +SearchContextTest::requireThatSearchIsWorkingAfterLoadAndClearDoc(const vespalib::string & name, + const Config & cfg, + ValueType startValue, + ValueType defaultValue, + const vespalib::string & term) +{ + AttributePtr a = AttributeFactory::createAttribute(name, cfg); + LOG(info, "requireThatSearchIsWorkingAfterLoadAndClearDoc: vector '%s', term '%s'", + a->getName().c_str(), term.c_str()); + addReservedDoc(*a); + a->addDocs(15); + auto & va = dynamic_cast(*a); + resetAttribute(va, startValue); // triggers vector vector in posting list (count 15) + AttributePtr b = AttributeFactory::createAttribute(name + "-save", cfg); + EXPECT_TRUE(a->save(b->getBaseFileName())); + EXPECT_TRUE(b->load()); + b->clearDoc(6); // goes from vector vector to single vector with count 14 + b->commit(true); + { + ResultSetPtr rs = performSearch(dynamic_cast(*b), term); + EXPECT_EQUAL(14u, rs->getNumHits()); + const RankedHit * array = rs->getArray(); + for (uint32_t i = 0; i < 14; ++i) { + if (i < 5) { + EXPECT_EQUAL(i + 1, array[i]._docId); + } else + EXPECT_EQUAL(i + 2, array[i]._docId); + } + } + ValueType buf; + if (cfg.collectionType().isMultiValue()) { + EXPECT_EQUAL(0u, b->get(6, &buf, 1)); + } else { + EXPECT_EQUAL(1u, b->get(6, &buf, 1)); + EXPECT_EQUAL(defaultValue, buf); + } +} + +void +SearchContextTest::requireThatSearchIsWorkingAfterLoadAndClearDoc() +{ + { + int64_t value = 10; + int64_t defValue = search::attribute::getUndefined(); + requireThatSearchIsWorkingAfterLoadAndClearDoc("s-fs-int32", _integerCfg["s-fs-int32"], + value, defValue, "10"); + requireThatSearchIsWorkingAfterLoadAndClearDoc("a-fs-int32", _integerCfg["a-fs-int32"], + value, defValue, "10"); + } + { + vespalib::string value = "foo"; + vespalib::string defValue = ""; + requireThatSearchIsWorkingAfterLoadAndClearDoc("s-fs-str", _stringCfg["s-fs-str"], + value, defValue, value); + requireThatSearchIsWorkingAfterLoadAndClearDoc("a-fs-str", _stringCfg["a-fs-str"], + value, defValue, value); + } +} + +template +void +SearchContextTest::requireThatSearchIsWorkingAfterUpdates(const vespalib::string & name, + const Config & cfg, + ValueType value1, + ValueType value2) +{ + AttributePtr a = AttributeFactory::createAttribute(name, cfg); + auto & va = dynamic_cast(*a); + LOG(info, "requireThatSearchIsWorkingAfterUpdates: vector '%s'", a->getName().c_str()); + addReservedDoc(*a); + a->addDocs(2); + va.update(1, value1); + va.commit(true); + va.update(2, value1); + va.update(2, value2); + va.commit(true); + { + ResultSetPtr rs = performSearch(va, value1); + EXPECT_EQUAL(1u, rs->getNumHits()); // doc 1 should not have this value + } + { + ResultSetPtr rs = performSearch(va, value2); + EXPECT_EQUAL(1u, rs->getNumHits()); + } +} + +void +SearchContextTest::requireThatSearchIsWorkingAfterUpdates() +{ + for (const auto & cfg : _integerCfg) { + requireThatSearchIsWorkingAfterUpdates(cfg.first, cfg.second, 10, 20); + } + + for (const auto & cfg : _stringCfg) { + requireThatSearchIsWorkingAfterUpdates(cfg.first, cfg.second, "foo", "bar"); + } +} + +void +SearchContextTest::requireThatFlagAttributeIsWorkingWhenNewDocsAreAdded() +{ + LOG(info, "requireThatFlagAttributeIsWorkingWhenNewDocsAreAdded()"); + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + { + cfg.setGrowStrategy(GrowStrategy::make(1, 0, 1)); + AttributePtr a = AttributeFactory::createAttribute("flags", cfg); + auto & fa = dynamic_cast(*a); + addReservedDoc(fa); + fa.addDocs(1); + fa.append(1, 10, 1); + fa.append(1, 24, 1); + fa.commit(true); + fa.addDocs(1); + fa.append(2, 20, 1); + fa.append(2, 24, 1); + fa.commit(true); + fa.addDocs(1); + fa.append(3, 30, 1); + fa.append(3, 26, 1); + fa.commit(true); + fa.addDocs(1); + fa.append(4, 40, 1); + fa.append(4, 24, 1); + fa.commit(true); + { + ResultSetPtr rs = performSearch(fa, "<24"); + EXPECT_EQUAL(2u, rs->getNumHits()); + EXPECT_EQUAL(1u, rs->getArray()[0]._docId); + EXPECT_EQUAL(2u, rs->getArray()[1]._docId); + } + { + ResultSetPtr rs = performSearch(fa, "24"); + EXPECT_EQUAL(3u, rs->getNumHits()); + EXPECT_EQUAL(1u, rs->getArray()[0]._docId); + EXPECT_EQUAL(2u, rs->getArray()[1]._docId); + EXPECT_EQUAL(4u, rs->getArray()[2]._docId); + } + } + { + cfg.setGrowStrategy(GrowStrategy::make(4, 0, 4)); + AttributePtr a = AttributeFactory::createAttribute("flags", cfg); + auto & fa = dynamic_cast(*a); + std::vector exp50; + std::vector exp60; + addReservedDoc(fa); + for (uint32_t i = 0; i < 200; ++i) { + uint32_t docId; + EXPECT_TRUE(fa.addDoc(docId)); + if (i % 2 == 0) { + fa.append(docId, 50, 1); + exp50.push_back(docId); + } else { + fa.append(docId, 60, 1); + exp60.push_back(docId); + } + fa.commit(true); + { + ResultSetPtr rs1 = performSearch(fa, "50"); + ResultSetPtr rs2 = performSearch(fa, "<51"); + EXPECT_EQUAL(exp50.size(), rs1->getNumHits()); + EXPECT_EQUAL(exp50.size(), rs2->getNumHits()); + for (size_t j = 0; j < exp50.size(); ++j) { + EXPECT_EQUAL(exp50[j], rs1->getArray()[j]._docId); + EXPECT_EQUAL(exp50[j], rs2->getArray()[j]._docId); + } + } + { + ResultSetPtr rs = performSearch(fa, "60"); + EXPECT_EQUAL(exp60.size(), rs->getNumHits()); + for (size_t j = 0; j < exp60.size(); ++j) { + EXPECT_EQUAL(exp60[j], rs->getArray()[j]._docId); + } + } + } + } +} + +template +void +SearchContextTest::requireThatInvalidSearchTermGivesZeroHits(const vespalib::string & name, + const Config & cfg, + ValueType value) +{ + AttributePtr a = AttributeFactory::createAttribute(name, cfg); + auto & va = dynamic_cast(*a); + LOG(info, "requireThatInvalidSearchTermGivesZeroHits: vector '%s'", a->getName().c_str()); + addReservedDoc(*a); + a->addDocs(1); + va.update(1, value); + va.commit(true); + ResultSetPtr rs = performSearch(va, "foo"); + EXPECT_EQUAL(0u, rs->getNumHits()); +} + +void +SearchContextTest::requireThatInvalidSearchTermGivesZeroHits() +{ + for (const auto & cfg : _integerCfg) { + requireThatInvalidSearchTermGivesZeroHits(cfg.first, cfg.second, 10); + } + for (const auto & cfg : _floatCfg) { + requireThatInvalidSearchTermGivesZeroHits(cfg.first, cfg.second, 10); + } +} + +void +SearchContextTest::requireThatFlagAttributeHandlesTheByteRange() +{ + LOG(info, "requireThatFlagAttributeHandlesTheByteRange()"); + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + + AttributePtr a = AttributeFactory::createAttribute("flags", cfg); + auto & fa = dynamic_cast(*a); + addReservedDoc(fa); + fa.addDocs(5); + fa.append(1, -128, 1); + fa.append(2, -64, 1); + fa.append(2, -8, 1); + fa.append(3, 0, 1); + fa.append(3, 8, 1); + fa.append(4, 64, 1); + fa.append(4, 24, 1); + fa.append(5, 127, 1); + fa.commit(true); + + performSearch(fa, "-128", DocSet().put(1), QueryTermSimple::WORD); + performSearch(fa, "127", DocSet().put(5), QueryTermSimple::WORD); + performSearch(fa, ">-128", DocSet().put(2).put(3).put(4).put(5), QueryTermSimple::WORD); + performSearch(fa, "<127", DocSet().put(1).put(2).put(3).put(4), QueryTermSimple::WORD); + performSearch(fa, "[-128;-8]", DocSet().put(1).put(2), QueryTermSimple::WORD); + performSearch(fa, "[-8;8]", DocSet().put(2).put(3), QueryTermSimple::WORD); + performSearch(fa, "[8;127]", DocSet().put(3).put(4).put(5), QueryTermSimple::WORD); + performSearch(fa, "[-129;-8]", DocSet().put(1).put(2), QueryTermSimple::WORD); + performSearch(fa, "[8;128]", DocSet().put(3).put(4).put(5), QueryTermSimple::WORD); +} + +void +SearchContextTest::requireThatOutOfBoundsSearchTermGivesZeroHits(const vespalib::string &name, + const Config &cfg, + int64_t maxValue) +{ + AttributePtr a = AttributeFactory::createAttribute(name, cfg); + auto &ia = dynamic_cast(*a); + addReservedDoc(*a); + a->addDocs(1); + ia.update(1, maxValue); + ia.commit(true); + vespalib::string term = vespalib::make_string("%" PRIu64 "", (int64_t) maxValue + 1); + LOG(info, "requireThatOutOfBoundsSearchTermGivesZeroHits: vector '%s', term '%s'", a->getName().c_str(), term.c_str()); + ResultSetPtr rs = performSearch(ia, term); + EXPECT_EQUAL(0u, rs->getNumHits()); +} + +void +SearchContextTest::requireThatOutOfBoundsSearchTermGivesZeroHits() +{ + for (const auto & cfg : _integerCfg) { + int32_t maxValue = std::numeric_limits::max(); + requireThatOutOfBoundsSearchTermGivesZeroHits(cfg.first, cfg.second, maxValue); + } + { + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + int8_t maxValue = std::numeric_limits::max(); + requireThatOutOfBoundsSearchTermGivesZeroHits("flags", cfg, maxValue); + } +} + +void +SearchContextTest::initIntegerConfig() +{ + { // CollectionType::SINGLE + Config cfg(BasicType::INT32, CollectionType::SINGLE); + _integerCfg["s-int32"] = cfg; + } + { // CollectionType::SINGLE && fastSearch + Config cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + _integerCfg["s-fs-int32"] = cfg; + } + { // CollectionType::ARRAY + Config cfg(BasicType::INT32, CollectionType::ARRAY); + _integerCfg["a-int32"] = cfg; + } + { // CollectionType::ARRAY && fastSearch + Config cfg(BasicType::INT32, CollectionType::ARRAY); + cfg.setFastSearch(true); + _integerCfg["a-fs-int32"] = cfg; + } + { // CollectionType::WSET + Config cfg(BasicType::INT32, CollectionType::WSET); + _integerCfg["w-int32"] = cfg; + } + { // CollectionType::WSET && fastSearch + Config cfg(BasicType::INT32, CollectionType::WSET); + cfg.setFastSearch(true); + _integerCfg["w-fs-int32"] = cfg; + } +} + +void +SearchContextTest::initFloatConfig() +{ + { // CollectionType::SINGLE + Config cfg(BasicType::FLOAT, CollectionType::SINGLE); + _floatCfg["s-float"] = cfg; + } + { // CollectionType::SINGLE && fastSearch + Config cfg(BasicType::FLOAT, CollectionType::SINGLE); + cfg.setFastSearch(true); + _floatCfg["s-fs-float"] = cfg; + } + { // CollectionType::ARRAY + Config cfg(BasicType::FLOAT, CollectionType::ARRAY); + _floatCfg["a-float"] = cfg; + } + { // CollectionType::ARRAY && fastSearch + Config cfg(BasicType::FLOAT, CollectionType::ARRAY); + cfg.setFastSearch(true); + _floatCfg["a-fs-float"] = cfg; + } + { // CollectionType::WSET + Config cfg(BasicType::FLOAT, CollectionType::WSET); + _floatCfg["w-float"] = cfg; + } + { // CollectionType::WSET && fastSearch + Config cfg(BasicType::FLOAT, CollectionType::WSET); + cfg.setFastSearch(true); + _floatCfg["w-fs-float"] = cfg; + } +} + +void +SearchContextTest::initStringConfig() +{ + { // CollectionType::SINGLE + Config cfg(BasicType::STRING, CollectionType::SINGLE); + _stringCfg["s-str"] = cfg; + } + { // CollectionType::ARRAY + Config cfg(BasicType::STRING, CollectionType::ARRAY); + _stringCfg["a-str"] = cfg; + } + { // CollectionType::WSET + Config cfg(BasicType::STRING, CollectionType::WSET); + _stringCfg["w-str"] = cfg; + } + { // CollectionType::SINGLE && fastSearch + Config cfg(BasicType::STRING, CollectionType::SINGLE); + cfg.setFastSearch(true); + _stringCfg["s-fs-str"] = cfg; + } + { // CollectionType::ARRAY && fastSearch + Config cfg(BasicType::STRING, CollectionType::ARRAY); + cfg.setFastSearch(true); + _stringCfg["a-fs-str"] = cfg; + } + { // CollectionType::WSET && fastSearch + Config cfg(BasicType::STRING, CollectionType::WSET); + cfg.setFastSearch(true); + _stringCfg["w-fs-str"] = cfg; + } +} + +SearchContextTest::SearchContextTest() : + _integerCfg(), + _floatCfg(), + _stringCfg() +{ + initIntegerConfig(); + initFloatConfig(); + initStringConfig(); +} + +SearchContextTest::~SearchContextTest() = default; + +int +SearchContextTest::Main() +{ + TEST_INIT("searchcontext_test"); + EXPECT_TRUE(true); + + testSearch(); + testSearchIterator(); + testRangeSearch(); + testRangeSearchLimited(); + testCaseInsensitiveSearch(); + testRegexSearch(); + testPrefixSearch(); + testSearchIteratorConformance(); + testSearchIteratorUnpacking(); + TEST_DO(requireThatSearchIsWorkingAfterClearDoc()); + TEST_DO(requireThatSearchIsWorkingAfterLoadAndClearDoc()); + TEST_DO(requireThatSearchIsWorkingAfterUpdates()); + TEST_DO(requireThatFlagAttributeIsWorkingWhenNewDocsAreAdded()); + TEST_DO(requireThatInvalidSearchTermGivesZeroHits()); + TEST_DO(requireThatFlagAttributeHandlesTheByteRange()); + TEST_DO(requireThatOutOfBoundsSearchTermGivesZeroHits()); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(search::SearchContextTest); -- cgit v1.2.3