// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include LOG_SETUP("postinglistattribute_test"); using std::shared_ptr; bool FastOS_UNIX_File::Sync() { return true; } namespace search { using attribute::CollectionType; using attribute::BasicType; using attribute::Config; using queryeval::PostingInfo; using queryeval::MinMaxPostingInfo; using search::fef::TermFieldMatchData; using search::queryeval::SearchIterator; typedef std::unique_ptr SearchContextPtr; typedef std::unique_ptr SearchBasePtr; void toStr(std::stringstream &ss, SearchIterator &it, TermFieldMatchData *md) { it.initFullRange(); it.seek(1u); bool first = true; while ( !it.isAtEnd()) { if (first) { first = false; } else { ss << ","; } ss << it.getDocId(); if (md != nullptr) { it.unpack(it.getDocId()); ss << "[w=" << md->begin()->getElementWeight() << "]"; } it.seek(it.getDocId() + 1); } } bool assertIterator(const std::string &exp, SearchIterator &it, TermFieldMatchData *md = nullptr) { std::stringstream ss; toStr(ss, it, md); if (!EXPECT_EQUAL(exp, ss.str())) return false; return true; } class PostingListAttributeTest : public vespalib::TestApp { private: typedef IntegerAttribute::largeint_t largeint_t; typedef AttributeVector::SP AttributePtr; typedef std::set DocSet; typedef SingleValueNumericPostingAttribute< EnumAttribute > > Int32PostingListAttribute; typedef MultiValueNumericPostingAttribute< EnumAttribute >, multivalue::Value > Int32ArrayPostingListAttribute; typedef MultiValueNumericPostingAttribute< EnumAttribute >, multivalue::WeightedValue > Int32WsetPostingListAttribute; typedef SingleValueNumericPostingAttribute< EnumAttribute > > FloatPostingListAttribute; typedef MultiValueNumericPostingAttribute< EnumAttribute >, multivalue::Value > FloatArrayPostingListAttribute; typedef MultiValueNumericPostingAttribute< EnumAttribute >, multivalue::WeightedValue > FloatWsetPostingListAttribute; typedef SingleValueStringPostingAttribute StringPostingListAttribute; typedef ArrayStringPostingAttribute StringArrayPostingListAttribute; typedef WeightedSetStringPostingAttribute StringWsetPostingListAttribute; template void populate(VectorType &v); template VectorType & as(AttributePtr &v); IntegerAttribute & asInt(AttributePtr &v); StringAttribute & asString(AttributePtr &v); void buildTermQuery(std::vector & buffer, const vespalib::string & index, const vespalib::string & term, bool prefix); template SearchContextPtr getSearch(const V & vec, const T & term, bool prefix, const attribute::SearchContextParams & params=attribute::SearchContextParams()); template SearchContextPtr getSearch(const V & vec); template SearchContextPtr getSearch2(const V & vec); bool assertSearch(const std::string &exp, StringAttribute &sa); bool assertSearch(const std::string &exp, StringAttribute &v, const std::string &key); bool assertSearch(const std::string &exp, IntegerAttribute &v, int32_t key); void addDocs(const AttributePtr & ptr, uint32_t numDocs); template void checkPostingList(const VectorType & vec, const std::vector & values, const Range & range); template void checkSearch(bool useBitVector, const AttributeVector & vec, const BufferType & term, uint32_t numHits, uint32_t docBegin, uint32_t docEnd); template void testPostingList(const AttributePtr & ptr1, const AttributePtr & ptr2, uint32_t numDocs, const std::vector & values); void testPostingList(); void testPostingList(bool enableBitVector); void testPostingList(bool enableBitVector, uint32_t numDocs, uint32_t numUniqueValues); template void checkPostingList(AttributeType & vec, ValueType value, DocSet expected); template void checkNonExistantPostingList(AttributeType & vec, ValueType value); template void testArithmeticValueUpdate(const AttributePtr & ptr); void testArithmeticValueUpdate(); template void testReload(const AttributePtr & ptr1, const AttributePtr & ptr2, const ValueType & value); void testReload(); template void testMinMax(AttributePtr &ptr1, uint32_t trimmed); template void testMinMax(AttributePtr &ptr1, AttributePtr &ptr2); void testMinMax(); void testStringFold(); void testDupValuesInIntArray(); void testDupValuesInStringArray(); public: int Main() override; }; template <> void PostingListAttributeTest::populate(IntegerAttribute &v) { for(size_t i(0), m(v.getNumDocs()); i < m; i++) { v.clearDoc(i); if (i == 0) continue; if (i == 9) continue; if (i == 7) { if (v.hasMultiValue()) { v.append(i, -42, 27); v.append(i, -43, 14); v.append(i, -42, -3); } else { EXPECT_TRUE( v.update(i, -43) ); } v.commit(); continue; } if (i == 20) { if (v.hasMultiValue()) { v.append(i, -42, 27); v.append(i, -43, 14); v.append(i, -42, -3); } else { EXPECT_TRUE( v.update(i, -43) ); } v.commit(); continue; } if (i == 25) { if (v.hasMultiValue()) { v.append(i, -42, 27); v.append(i, -43, 12); v.append(i, -42, -3); } else { EXPECT_TRUE( v.update(i, -43) ); } v.commit(); continue; } if (v.hasMultiValue()) { v.append(i, -42, 3); } else { v.update(i, -42); } v.commit(); } v.commit(); } template <> void PostingListAttributeTest::populate(StringAttribute &v) { for(size_t i(0), m(v.getNumDocs()); i < m; i++) { v.clearDoc(i); if (i == 0) continue; if (i == 9) continue; if (i == 7) { if (v.hasMultiValue()) { v.append(i, "foo", 27); v.append(i, "bar", 14); v.append(i, "foo", -3); } else { EXPECT_TRUE( v.update(i, "bar") ); } v.commit(); continue; } if (i == 20) { if (v.hasMultiValue()) { v.append(i, "foo", 27); v.append(i, "bar", 14); v.append(i, "foo", -3); } else { EXPECT_TRUE( v.update(i, "bar") ); } v.commit(); continue; } if (i == 25) { if (v.hasMultiValue()) { v.append(i, "foo", 27); v.append(i, "bar", 12); v.append(i, "foo", -3); } else { EXPECT_TRUE( v.update(i, "bar") ); } v.commit(); continue; } if (v.hasMultiValue()) { v.append(i, "foo", 3); } else { v.update(i, "foo"); } v.commit(); } } template VectorType & PostingListAttributeTest::as(AttributePtr &v) { VectorType *res = dynamic_cast(v.get()); assert(res != NULL); return *res; } IntegerAttribute & PostingListAttributeTest::asInt(AttributePtr &v) { return as(v); } StringAttribute & PostingListAttributeTest::asString(AttributePtr &v) { return as(v); } void PostingListAttributeTest::buildTermQuery(std::vector &buffer, const vespalib::string &index, const vespalib::string &term, bool prefix) { uint32_t indexLen = index.size(); uint32_t termLen = term.size(); uint32_t queryPacketSize = 1 + 2 * 4 + indexLen + termLen; uint32_t p = 0; buffer.resize(queryPacketSize); buffer[p++] = prefix ? ParseItem::ITEM_PREFIXTERM : ParseItem::ITEM_TERM; p += vespalib::compress::Integer::compressPositive(indexLen, &buffer[p]); memcpy(&buffer[p], index.c_str(), indexLen); p += indexLen; p += vespalib::compress::Integer::compressPositive(termLen, &buffer[p]); memcpy(&buffer[p], term.c_str(), termLen); p += termLen; buffer.resize(p); } template SearchContextPtr PostingListAttributeTest::getSearch(const V &vec, const T &term, bool prefix, const attribute::SearchContextParams & params) { std::vector query; vespalib::asciistream ss; ss << term; buildTermQuery(query, vec.getName(), ss.str(), prefix); return (static_cast(vec)).getSearch(vespalib::stringref(&query[0], query.size()), params); } template <> SearchContextPtr PostingListAttributeTest::getSearch(const IntegerAttribute &v) { return getSearch(v, "[-42;-42]", false); } template <> SearchContextPtr PostingListAttributeTest::getSearch(const StringAttribute &v) { return getSearch(v, "foo", false); } template <> SearchContextPtr PostingListAttributeTest::getSearch2(const IntegerAttribute &v) { return getSearch(v, "[-43;-43]", false); } template <> SearchContextPtr PostingListAttributeTest::getSearch2(const StringAttribute &v) { return getSearch(v, "bar", false); } bool PostingListAttributeTest::assertSearch(const std::string &exp, StringAttribute &sa) { TermFieldMatchData md; SearchContextPtr sc = getSearch(sa); sc->fetchPostings(true); SearchBasePtr sb = sc->createIterator(&md, true); if (!EXPECT_TRUE(assertIterator(exp, *sb))) return false; return true; } bool PostingListAttributeTest::assertSearch(const std::string &exp, StringAttribute &sa, const std::string &key) { TermFieldMatchData md; SearchContextPtr sc = getSearch(sa, key, false); sc->fetchPostings(true); SearchBasePtr sb = sc->createIterator(&md, true); if (!EXPECT_TRUE(assertIterator(exp, *sb, &md))) return false; return true; } bool PostingListAttributeTest::assertSearch(const std::string &exp, IntegerAttribute &ia, int32_t key) { TermFieldMatchData md; SearchContextPtr sc = getSearch(ia, key, false); sc->fetchPostings(true); SearchBasePtr sb = sc->createIterator(&md, true); if (!EXPECT_TRUE(assertIterator(exp, *sb, &md))) return false; return true; } void PostingListAttributeTest::addDocs(const AttributePtr & ptr, uint32_t numDocs) { for (uint32_t i = 0; i < numDocs; ++i) { uint32_t doc; ASSERT_TRUE(ptr->addDoc(doc)); ASSERT_TRUE(doc == i); ASSERT_TRUE(ptr->getNumDocs() == i + 1); } ASSERT_TRUE(ptr->getNumDocs() == numDocs); } class RangeAlpha { private: uint32_t _part; public: RangeAlpha(uint32_t part) : _part(part) { } uint32_t getBegin(uint32_t i) const { return i * _part; } uint32_t getEnd(uint32_t i) const { return (i + 1) * _part; } }; class RangeBeta { private: uint32_t _part; uint32_t _numValues; public: RangeBeta(uint32_t part, uint32_t numValues) : _part(part), _numValues(numValues) { } uint32_t getBegin(uint32_t i) const { return (_numValues - 1 - i) * _part; } uint32_t getEnd(uint32_t i) const { return (_numValues - i) * _part; } }; template void PostingListAttributeTest::checkPostingList(const VectorType & vec, const std::vector & values, const RangeGenerator & range) { const typename VectorType::EnumStore & enumStore = vec.getEnumStore(); const typename VectorType::Dictionary & dict = enumStore.getPostingDictionary(); const typename VectorType::PostingList & postingList = vec.getPostingList(); for (size_t i = 0; i < values.size(); ++i) { const uint32_t docBegin = range.getBegin(i); const uint32_t docEnd = range.getEnd(i); typename VectorType::DictionaryIterator itr = dict.find(typename VectorType::EnumIndex(), typename VectorType::ComparatorType(enumStore, values[i])); ASSERT_TRUE(itr.valid()); typename VectorType::PostingList::Iterator postings; postings = postingList.begin(itr.getData()); uint32_t doc = docBegin; uint32_t numHits(0); for (; postings.valid(); ++postings) { EXPECT_EQUAL(doc++, postings.getKey()); numHits++; } EXPECT_EQUAL(doc, docEnd); checkSearch(false, vec, values[i], numHits, docBegin, docEnd); checkSearch(true, vec, values[i], numHits, docBegin, docEnd); } } template void PostingListAttributeTest::checkSearch(bool useBitVector, const AttributeVector & vec, const BufferType & term, uint32_t numHits, uint32_t docBegin, uint32_t docEnd) { SearchContextPtr sc = getSearch(vec, term, false, attribute::SearchContextParams().useBitVector(useBitVector)); EXPECT_FALSE( ! sc ); sc->fetchPostings(true); size_t approx = sc->approximateHits(); EXPECT_EQUAL(numHits, approx); if (docBegin == 0) { // Approximation does not know about the special 0 // But the iterator does.... numHits--; docBegin++; } TermFieldMatchData tfmd; auto it = sc->createIterator(&tfmd, true); it->initFullRange(); EXPECT_TRUE(it->seekFirst(docBegin)); EXPECT_EQUAL(docBegin, it->getDocId()); size_t hits(0); uint32_t lastDocId = it->getDocId(); while (! it->isAtEnd()) { lastDocId = it->getDocId(); it->seek(lastDocId+1); hits++; } EXPECT_EQUAL(numHits, hits); EXPECT_GREATER_EQUAL(approx, hits); EXPECT_EQUAL(docEnd, lastDocId+1); } template void PostingListAttributeTest::testPostingList(const AttributePtr & ptr1, const AttributePtr & ptr2, uint32_t numDocs, const std::vector & values) { LOG(info, "testPostingList: vector '%s'", ptr1->getName().c_str()); VectorType & vec1 = static_cast(*ptr1.get()); VectorType & vec2 = static_cast(*ptr2.get()); addDocs(ptr1, numDocs); uint32_t part = numDocs / values.size(); // insert values for (uint32_t doc = 0; doc < numDocs; ++doc) { uint32_t idx = doc / part; EXPECT_TRUE(vec1.update(doc, values[idx])); } vec1.commit(); #if 0 std::cout << "***** printBuffer 0 ***** " << std::endl; vec1.getEnumStore().printBuffer(std::cout, 0); std::cout << "***** printBuffer 1 ***** " << std::endl; vec1.getEnumStore().printBuffer(std::cout, 1); std::cout << "***** printCurrentContent ***** " << std::endl; vec1.getEnumStore().printCurrentContent(std::cout); std::cout << "***** printPostingListContent *****" << std::endl; vec1.printPostingListContent(std::cout); #endif // check posting list for correct content checkPostingList(vec1, values, RangeAlpha(part)); // load and save vector ptr1->save(ptr2->getBaseFileName()); ptr2->load(); #if 0 std::cout << "***** vec2.printPostingListContent *****" << std::endl; vec2.printPostingListContent(std::cout); #endif checkPostingList(vec2, values, RangeAlpha(part)); // insert values in another order for (uint32_t doc = 0; doc < numDocs; ++doc) { uint32_t idx = values.size() - 1 - (doc / part); EXPECT_TRUE(vec1.update(doc, values[idx])); } vec1.commit(); // check posting list again for correct content checkPostingList(vec1, values, RangeBeta(part, values.size())); // load and save vector ptr1->save(ptr2->getBaseFileName()); ptr2->load(); checkPostingList(vec2, values, RangeBeta(part, values.size())); } void PostingListAttributeTest::testPostingList() { testPostingList(false); testPostingList(true); } void PostingListAttributeTest::testPostingList(bool enableBitVector) { testPostingList(enableBitVector, 1000, 50); testPostingList(enableBitVector, 2000, 10); // This should force bitvector } void PostingListAttributeTest::testPostingList(bool enableBitVector, uint32_t numDocs, uint32_t numUniqueValues) { { // IntegerAttribute std::vector values; for (uint32_t i = 0; i < numUniqueValues; ++i) { values.push_back(i); } { Config cfg(Config(BasicType::INT32, CollectionType::SINGLE)); cfg.setFastSearch(true); cfg.setEnableBitVectors(enableBitVector); AttributePtr ptr1 = AttributeFactory::createAttribute("sint32_1", cfg); AttributePtr ptr2 = AttributeFactory::createAttribute("sint32_2", cfg); testPostingList(ptr1, ptr2, numDocs, values); } { Config cfg(Config(BasicType::INT32, CollectionType::ARRAY)); cfg.setFastSearch(true); cfg.setEnableBitVectors(enableBitVector); AttributePtr ptr1 = AttributeFactory::createAttribute("aint32_1", cfg); AttributePtr ptr2 = AttributeFactory::createAttribute("aint32_2", cfg); testPostingList(ptr1, ptr2, numDocs, values); } { Config cfg(Config(BasicType::INT32, CollectionType::WSET)); cfg.setFastSearch(true); cfg.setEnableBitVectors(enableBitVector); AttributePtr ptr1 = AttributeFactory::createAttribute("wsint32_1", cfg); AttributePtr ptr2 = AttributeFactory::createAttribute("wsint32_2", cfg); testPostingList(ptr1, ptr2, numDocs, values); } } { // FloatingPointAttribute std::vector values; for (uint32_t i = 0; i < numUniqueValues; ++i) { values.push_back(i); } { Config cfg(Config(BasicType::FLOAT, CollectionType::SINGLE)); cfg.setFastSearch(true); cfg.setEnableBitVectors(enableBitVector); AttributePtr ptr1 = AttributeFactory::createAttribute("sfloat_1", cfg); AttributePtr ptr2 = AttributeFactory::createAttribute("sfloat_2", cfg); testPostingList(ptr1, ptr2, numDocs, values); } { Config cfg(Config(BasicType::FLOAT, CollectionType::ARRAY)); cfg.setFastSearch(true); cfg.setEnableBitVectors(enableBitVector); AttributePtr ptr1 = AttributeFactory::createAttribute("afloat_1", cfg); AttributePtr ptr2 = AttributeFactory::createAttribute("afloat_2", cfg); testPostingList(ptr1, ptr2, numDocs, values); } { Config cfg(Config(BasicType::FLOAT, CollectionType::WSET)); cfg.setFastSearch(true); cfg.setEnableBitVectors(enableBitVector); AttributePtr ptr1 = AttributeFactory::createAttribute("wsfloat_1", cfg); AttributePtr ptr2 = AttributeFactory::createAttribute("wsfloat_2", cfg); testPostingList(ptr1, ptr2, numDocs, values); } } { // StringAttribute std::vector values; std::vector charValues; values.reserve(numUniqueValues); charValues.reserve(numUniqueValues); for (uint32_t i = 0; i < numUniqueValues; ++i) { vespalib::asciistream ss; ss << "string" << i; values.push_back(ss.str()); charValues.push_back(values.back().c_str()); } { Config cfg(Config(BasicType::STRING, CollectionType::SINGLE)); cfg.setFastSearch(true); cfg.setEnableBitVectors(enableBitVector); AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg); AttributePtr ptr2 = AttributeFactory::createAttribute("sstr_2", cfg); testPostingList(ptr1, ptr2, numDocs, charValues); } { Config cfg(Config(BasicType::STRING, CollectionType::ARRAY)); cfg.setFastSearch(true); cfg.setEnableBitVectors(enableBitVector); AttributePtr ptr1 = AttributeFactory::createAttribute("astr_1", cfg); AttributePtr ptr2 = AttributeFactory::createAttribute("astr_2", cfg); testPostingList(ptr1, ptr2, numDocs, charValues); } { Config cfg(Config(BasicType::STRING, CollectionType::WSET)); cfg.setFastSearch(true); cfg.setEnableBitVectors(enableBitVector); AttributePtr ptr1 = AttributeFactory::createAttribute("wsstr_1", cfg); AttributePtr ptr2 = AttributeFactory::createAttribute("wsstr_2", cfg); testPostingList(ptr1, ptr2, numDocs, charValues); } } } template void PostingListAttributeTest::checkPostingList(AttributeType & vec, ValueType value, DocSet expected) { const typename AttributeType::EnumStore & enumStore = vec.getEnumStore(); const typename AttributeType::Dictionary & dict = enumStore.getPostingDictionary(); const typename AttributeType::PostingList & postingList = vec.getPostingList(); typename AttributeType::DictionaryIterator itr = dict.find(typename AttributeType::EnumIndex(), typename AttributeType::ComparatorType(vec.getEnumStore(), value)); ASSERT_TRUE(itr.valid()); typename AttributeType::PostingList::Iterator postings; postings = postingList.begin(itr.getData()); DocSet::iterator docBegin = expected.begin(); DocSet::iterator docEnd = expected.end(); for (; postings.valid(); ++postings) { EXPECT_EQUAL(*docBegin++, postings.getKey()); } EXPECT_TRUE(docBegin == docEnd); } template void PostingListAttributeTest::checkNonExistantPostingList(AttributeType & vec, ValueType value) { const typename AttributeType::Dictionary & dict = vec.getEnumStore().getPostingDictionary(); typename AttributeType::DictionaryIterator itr = dict.find(typename AttributeType::EnumIndex(), typename AttributeType::ComparatorType(vec.getEnumStore(), value)); EXPECT_TRUE(!itr.valid()); } template void PostingListAttributeTest::testArithmeticValueUpdate(const AttributePtr & ptr) { LOG(info, "testArithmeticValueUpdate: vector '%s'", ptr->getName().c_str()); typedef document::ArithmeticValueUpdate Arith; AttributeType & vec = static_cast(*ptr.get()); addDocs(ptr, 4); uint32_t allDocs[] = {0, 1, 2, 3}; checkNonExistantPostingList(vec, 0); for (uint32_t doc = 0; doc < 4; ++doc) { ASSERT_TRUE(vec.update(doc, 100)); } ptr->commit(); checkNonExistantPostingList(vec, 0); checkPostingList(vec, 100, DocSet(allDocs, allDocs + 4)); EXPECT_TRUE(vec.apply(0, Arith(Arith::Add, 10))); EXPECT_TRUE(vec.apply(1, Arith(Arith::Sub, 10))); EXPECT_TRUE(vec.apply(2, Arith(Arith::Mul, 10))); EXPECT_TRUE(vec.apply(3, Arith(Arith::Div, 10))); ptr->commit(); { uint32_t docs[] = {0}; checkPostingList(vec, 110, DocSet(docs, docs + 1)); } { uint32_t docs[] = {1}; checkPostingList(vec, 90, DocSet(docs, docs + 1)); } { uint32_t docs[] = {2}; checkPostingList(vec, 1000, DocSet(docs, docs + 1)); } { uint32_t docs[] = {3}; checkPostingList(vec, 10, DocSet(docs, docs + 1)); } // several inside a single commit for (uint32_t doc = 0; doc < 4; ++doc) { ASSERT_TRUE(vec.update(doc, 2000)); } EXPECT_TRUE(vec.apply(0, Arith(Arith::Add, 10))); EXPECT_TRUE(vec.apply(0, Arith(Arith::Add, 10))); EXPECT_TRUE(vec.apply(1, Arith(Arith::Sub, 10))); EXPECT_TRUE(vec.apply(1, Arith(Arith::Sub, 10))); EXPECT_TRUE(vec.apply(2, Arith(Arith::Mul, 10))); EXPECT_TRUE(vec.apply(2, Arith(Arith::Mul, 10))); EXPECT_TRUE(vec.apply(3, Arith(Arith::Div, 10))); EXPECT_TRUE(vec.apply(3, Arith(Arith::Div, 10))); ptr->commit(); vespalib::asciistream ss; vec.printPostingListContent(ss); std::cout << ss.str(); { uint32_t docs[] = {0}; checkPostingList(vec, 2020, DocSet(docs, docs + 1)); } { uint32_t docs[] = {1}; checkPostingList(vec, 1980, DocSet(docs, docs + 1)); } { uint32_t docs[] = {2}; checkPostingList(vec, 200000, DocSet(docs, docs + 1)); } { uint32_t docs[] = {3}; checkPostingList(vec, 20, DocSet(docs, docs + 1)); } checkNonExistantPostingList(vec, 100); checkNonExistantPostingList(vec, 110); checkNonExistantPostingList(vec, 90); checkNonExistantPostingList(vec, 1000); checkNonExistantPostingList(vec, 10); checkNonExistantPostingList(vec, 2000); checkNonExistantPostingList(vec, 2010); checkNonExistantPostingList(vec, 1990); checkNonExistantPostingList(vec, 20000); checkNonExistantPostingList(vec, 200); } void PostingListAttributeTest::testArithmeticValueUpdate() { { // IntegerAttribute Config cfg(Config(BasicType::INT32, CollectionType::SINGLE)); cfg.setFastSearch(true); AttributePtr ptr = AttributeFactory::createAttribute("sint32", cfg); testArithmeticValueUpdate(ptr); } { // FloatingPointAttribute Config cfg(Config(BasicType::FLOAT, CollectionType::SINGLE)); cfg.setFastSearch(true); AttributePtr ptr = AttributeFactory::createAttribute("sfloat", cfg); testArithmeticValueUpdate(ptr); } } template void PostingListAttributeTest::testReload(const AttributePtr & ptr1, const AttributePtr & ptr2, const ValueType & value) { LOG(info, "testReload: vector '%s'", ptr1->getName().c_str()); VectorType & vec1 = static_cast(*ptr1.get()); addDocs(ptr1, 5); for (uint32_t doc = 0; doc < 5; ++doc) { EXPECT_TRUE(vec1.update(doc, value)); } ptr1->commit(); ASSERT_TRUE(ptr1->save(ptr2->getBaseFileName())); ASSERT_TRUE(ptr2->load()); EXPECT_TRUE(ptr2->getNumDocs() == 5); ValueType buffer[1]; for (uint32_t doc = 0; doc < 5; ++doc) { EXPECT_TRUE(ptr2->get(doc, buffer, 1) == 1); EXPECT_EQUAL(buffer[0], value); } } void PostingListAttributeTest::testReload() { { // IntegerAttribute Config cfg(Config(BasicType::INT32, CollectionType::SINGLE)); cfg.setFastSearch(true); { AttributePtr ptr1 = AttributeFactory::createAttribute("sint32_1", cfg); AttributePtr ptr2 = AttributeFactory::createAttribute("sint32_2", cfg); testReload(ptr1, ptr2, 100); } { AttributePtr ptr1 = AttributeFactory::createAttribute("sint32_1", cfg); AttributePtr ptr2 = AttributeFactory::createAttribute("sint32_2", cfg); testReload(ptr1, ptr2, 0); } } { // FloatingPointAttribute Config cfg(Config(BasicType::FLOAT, CollectionType::SINGLE)); cfg.setFastSearch(true); { AttributePtr ptr1 = AttributeFactory::createAttribute("sfloat_1", cfg); AttributePtr ptr2 = AttributeFactory::createAttribute("sfloat_2", cfg); testReload(ptr1, ptr2, 100); } { AttributePtr ptr1 = AttributeFactory::createAttribute("sfloat_1", cfg); AttributePtr ptr2 = AttributeFactory::createAttribute("sfloat_2", cfg); testReload(ptr1, ptr2, 0); } } { // StringAttribute Config cfg(Config(BasicType::STRING, CollectionType::SINGLE)); cfg.setFastSearch(true); { AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg); AttributePtr ptr2 = AttributeFactory::createAttribute("sstr_2", cfg); testReload(ptr1, ptr2, "unique"); } { AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg); AttributePtr ptr2 = AttributeFactory::createAttribute("sstr_2", cfg); testReload(ptr1, ptr2, ""); } } } template void PostingListAttributeTest::testMinMax(AttributePtr &ptr1, uint32_t trimmed) { TermFieldMatchData md; SearchContextPtr sc = getSearch(as(ptr1)); sc->fetchPostings(true); SearchBasePtr sb = sc->createIterator(&md, true); sb->initFullRange(); const PostingInfo *pi = sb->getPostingInfo(); ASSERT_TRUE(pi != NULL); const MinMaxPostingInfo *mmpi = dynamic_cast(pi); ASSERT_TRUE(mmpi != NULL); if (ptr1->hasMultiValue()) { if (trimmed == 2u) { EXPECT_EQUAL(3, mmpi->getMinWeight()); } else { EXPECT_EQUAL(-3, mmpi->getMinWeight()); } EXPECT_EQUAL(3, mmpi->getMaxWeight()); } else { EXPECT_EQUAL(1, mmpi->getMinWeight()); EXPECT_EQUAL(1, mmpi->getMaxWeight()); } sb->seek(1u); EXPECT_EQUAL(1u, sb->getDocId()); sc = getSearch2(as(ptr1)); sc->fetchPostings(true); sb = sc->createIterator(&md, true); sb->initFullRange(); pi = sb->getPostingInfo(); if (trimmed == 2) { ASSERT_TRUE(pi == NULL); } else { ASSERT_TRUE(pi != NULL); mmpi = dynamic_cast(pi); ASSERT_TRUE(mmpi != NULL); if (ptr1->hasMultiValue()) { if (trimmed == 0) { EXPECT_EQUAL(12, mmpi->getMinWeight()); } else { EXPECT_EQUAL(14, mmpi->getMinWeight()); } EXPECT_EQUAL(14, mmpi->getMaxWeight()); } else { EXPECT_EQUAL(1, mmpi->getMinWeight()); EXPECT_EQUAL(1, mmpi->getMaxWeight()); } } sb->seek(1u); if (trimmed == 2u) { EXPECT_TRUE(sb->isAtEnd()); } else { EXPECT_EQUAL(7u, sb->getDocId()); } } template void PostingListAttributeTest::testMinMax(AttributePtr &ptr1, AttributePtr &ptr2) { uint32_t numDocs = 100; addDocs(ptr1, numDocs); populate(as(ptr1)); TEST_DO(testMinMax(ptr1, 0u)); ASSERT_TRUE(ptr1->save(ptr2->getBaseFileName())); ASSERT_TRUE(ptr2->load()); testMinMax(ptr2, 0u); ptr2->clearDoc(20); ptr2->clearDoc(25); ptr2->commit(); TEST_DO(testMinMax(ptr2, 1u)); ptr2->clearDoc(7); ptr2->commit(); TEST_DO(testMinMax(ptr2, 2u)); } void PostingListAttributeTest::testMinMax() { { Config cfg(Config(BasicType::INT32, CollectionType::SINGLE)); cfg.setFastSearch(true); AttributePtr ptr1 = AttributeFactory::createAttribute("sint32_1", cfg); AttributePtr ptr2 = AttributeFactory::createAttribute("sint32_2", cfg); testMinMax(ptr1, ptr2); } { Config cfg(Config(BasicType::INT32, CollectionType::WSET)); cfg.setFastSearch(true); AttributePtr ptr1 = AttributeFactory::createAttribute("wsint32_1", cfg); AttributePtr ptr2 = AttributeFactory::createAttribute("wsint32_2", cfg); testMinMax(ptr1, ptr2); } { Config cfg(Config(BasicType::STRING, CollectionType::SINGLE)); cfg.setFastSearch(true); AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg); AttributePtr ptr2 = AttributeFactory::createAttribute("sstr_2", cfg); testMinMax(ptr1, ptr2); } { Config cfg(Config(BasicType::STRING, CollectionType::WSET)); cfg.setFastSearch(true); AttributePtr ptr1 = AttributeFactory::createAttribute("wsstr_1", cfg); AttributePtr ptr2 = AttributeFactory::createAttribute("wsstr_2", cfg); testMinMax(ptr1, ptr2); } } void PostingListAttributeTest::testStringFold() { Config cfg(Config(BasicType::STRING, CollectionType::SINGLE)); cfg.setFastSearch(true); AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg); addDocs(ptr1, 6); StringAttribute &sa(asString(ptr1)); sa.update(1, "a"); sa.commit(); sa.update(3, "FOo"); sa.commit(); sa.update(4, "foo"); sa.commit(); sa.update(5, "z"); sa.commit(); EXPECT_TRUE(assertSearch("3,4", sa)); sa.update(2, "FOO"); sa.commit(); EXPECT_TRUE(assertSearch("2,3,4", sa)); sa.update(4, ""); sa.commit(); EXPECT_TRUE(assertSearch("2,3", sa)); sa.update(2, ""); sa.commit(); EXPECT_TRUE(assertSearch("3", sa)); sa.update(3, ""); sa.commit(); EXPECT_TRUE(assertSearch("", sa)); } void PostingListAttributeTest::testDupValuesInIntArray() { Config cfg(Config(BasicType::INT32, CollectionType::ARRAY)); cfg.setFastSearch(true); AttributePtr ptr1 = AttributeFactory::createAttribute("aint32_3", cfg); addDocs(ptr1, 6); IntegerAttribute &ia(asInt(ptr1)); ia.append(1, 1, 1); ia.append(1, 1, 1); ia.append(2, 1, 1); ia.commit(); EXPECT_TRUE(assertSearch("1[w=2],2[w=1]", ia, 1)); ia.clearDoc(1); ia.append(1, 1, 1); ia.clearDoc(2); ia.append(2, 1, 1); ia.append(2, 1, 1); ia.commit(); EXPECT_TRUE(assertSearch("1[w=1],2[w=2]", ia, 1)); } void PostingListAttributeTest::testDupValuesInStringArray() { Config cfg(Config(BasicType::STRING, CollectionType::ARRAY)); cfg.setFastSearch(true); AttributePtr ptr1 = AttributeFactory::createAttribute("astr_3", cfg); addDocs(ptr1, 6); StringAttribute &sa(asString(ptr1)); sa.append(1, "foo", 1); sa.append(1, "foo", 1); sa.append(2, "foo", 1); sa.append(3, "bar", 1); sa.append(3, "BAR", 1); sa.append(4, "bar", 1); sa.commit(); EXPECT_TRUE(assertSearch("1[w=2],2[w=1]", sa, "foo")); EXPECT_TRUE(assertSearch("3[w=2],4[w=1]", sa, "bar")); sa.clearDoc(1); sa.append(1, "foo", 1); sa.clearDoc(2); sa.append(2, "foo", 1); sa.append(2, "foo", 1); sa.clearDoc(3); sa.append(3, "bar", 1); sa.clearDoc(4); sa.append(4, "bar", 1); sa.append(4, "BAR", 1); sa.commit(); EXPECT_TRUE(assertSearch("1[w=1],2[w=2]", sa, "foo")); EXPECT_TRUE(assertSearch("3[w=1],4[w=2]", sa, "bar")); } int PostingListAttributeTest::Main() { TEST_INIT("postinglistattribute_test"); testPostingList(); testArithmeticValueUpdate(); testReload(); testMinMax(); testStringFold(); testDupValuesInIntArray(); testDupValuesInStringArray(); TEST_DONE(); } } TEST_APPHOOK(search::PostingListAttributeTest);