// Copyright 2017 Yahoo Holdings. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using search::AttributeFactory; using search::AttributeMemoryFileBufferWriter; using search::BufferWriter; using search::FloatingPointAttribute; using search::IAttributeFileWriter; using search::IntegerAttribute; using search::ParseItem; using search::RandomGenerator; using search::StringAttribute; using search::attribute::BasicType; using search::attribute::CollectionType; using search::attribute::Config; using search::attribute::SearchContextParams; using search::fef::TermFieldMatchData; typedef std::unique_ptr SearchContextPtr; typedef std::unique_ptr SearchBasePtr; bool FastOS_UNIX_File::Sync() { // LOG(info, "Skip sync"); return true; } class MemAttrFileWriter : public IAttributeFileWriter { private: Buffer _buf; public: MemAttrFileWriter() : _buf() { } virtual Buffer allocBuf(size_t size) override { return std::make_unique(size, 4096); } virtual void writeBuf(Buffer buf_in) override { if (!_buf) { _buf = std::move(buf_in); } else { _buf->writeBytes(buf_in->getData(), buf_in->getDataLen()); } } const Buffer &buf() const { return _buf; } std::unique_ptr allocBufferWriter() override; }; std::unique_ptr MemAttrFileWriter::allocBufferWriter() { if (!_buf) { _buf = allocBuf(1); } return std::make_unique(*this); } class MemAttr : public search::IAttributeSaveTarget { private: MemAttrFileWriter _datWriter; MemAttrFileWriter _idxWriter; MemAttrFileWriter _weightWriter; MemAttrFileWriter _udatWriter; public: typedef std::shared_ptr SP; MemAttr(); ~MemAttr(); // Implements IAttributeSaveTarget virtual bool setup() override { return true; } virtual void close() override {} virtual IAttributeFileWriter &datWriter() override { return _datWriter; } virtual IAttributeFileWriter &idxWriter() override { return _idxWriter; } virtual IAttributeFileWriter &weightWriter() override { return _weightWriter; } virtual IAttributeFileWriter &udatWriter() override { return _udatWriter; } bool bufEqual(const Buffer &lhs, const Buffer &rhs) const; bool operator==(const MemAttr &rhs) const; }; MemAttr::MemAttr() : _datWriter(), _idxWriter(), _weightWriter(), _udatWriter() { } MemAttr::~MemAttr() {} class EnumeratedSaveTest { private: typedef AttributeVector::SP AttributePtr; template VectorType & as(AttributePtr &v); IntegerAttribute & asInt(AttributePtr &v); StringAttribute & asString(AttributePtr &v); FloatingPointAttribute & asFloat(AttributePtr &v); void addDocs(const AttributePtr &v, size_t sz); template void populate(VectorType &v, unsigned seed, BasicType bt); template void compare(VectorType &a, VectorType &b); void buildTermQuery(std::vector & buffer, const vespalib::string & index, const vespalib::string & term, bool prefix); template SearchContextPtr getSearch(const V & vec, const T & term, bool prefix); template SearchContextPtr getSearch(const V & vec); MemAttr::SP saveMem(AttributeVector &v); void checkMem(AttributeVector &v, const MemAttr &e, bool enumerated); MemAttr::SP saveBoth(AttributePtr v); AttributePtr make(Config cfg, const vespalib::string &pref, bool fastSearch = false); void load(AttributePtr v, const vespalib::string &name); template void checkLoad(AttributePtr v, const vespalib::string &name, AttributePtr ev); template void testReload(AttributePtr v0, AttributePtr v1, AttributePtr v2, MemAttr::SP mv0, MemAttr::SP mv1, MemAttr::SP mv2, MemAttr::SP emv0, MemAttr::SP emv1, MemAttr::SP emv2, Config cfg, const vespalib::string &pref, bool fastSearch); public: template void test(BasicType bt, CollectionType ct, const vespalib::string &pref); EnumeratedSaveTest() { } }; bool MemAttr::bufEqual(const Buffer &lhs, const Buffer &rhs) const { if (!EXPECT_TRUE((lhs.get() != NULL) == (rhs.get() != NULL))) return false; if (lhs.get() == NULL) return true; if (!EXPECT_TRUE(lhs->getDataLen() == rhs->getDataLen())) return false; if (!EXPECT_TRUE(memcmp(lhs->getData(), rhs->getData(), lhs->getDataLen()) == 0)) return false; return true; } bool MemAttr::operator==(const MemAttr &rhs) const { if (!EXPECT_TRUE(bufEqual(_datWriter.buf(), rhs._datWriter.buf()))) return false; if (!EXPECT_TRUE(bufEqual(_idxWriter.buf(), rhs._idxWriter.buf()))) return false; if (!EXPECT_TRUE(bufEqual(_weightWriter.buf(), rhs._weightWriter.buf()))) return false; if (!EXPECT_TRUE(bufEqual(_udatWriter.buf(), rhs._udatWriter.buf()))) return false; return true; } void EnumeratedSaveTest::addDocs(const AttributePtr &v, size_t sz) { if (sz) { AttributeVector::DocId docId; for(size_t i(0); i< sz; i++) { EXPECT_TRUE( v->addDoc(docId) ); } EXPECT_TRUE( docId+1 == sz ); EXPECT_TRUE( v->getNumDocs() == sz ); v->commit(true); } } template <> void EnumeratedSaveTest::populate(IntegerAttribute &v, unsigned seed, BasicType bt) { search::Rand48 rnd; IntegerAttribute::largeint_t mask(std::numeric_limits ::max()); switch (bt.type()) { case BasicType::INT8: mask = 0x7f; break; case BasicType::INT16: mask = 0x7fff; break; default: ; } rnd.srand48(seed); int weight = 1; for(size_t i(0), m(v.getNumDocs()); i < m; i++) { v.clearDoc(i); if (i == 9) continue; if (i == 7) { if (v.hasMultiValue()) { v.append(i, -42, 27); v.append(i, -43, 14); v.append(i, -42, -3); } else { EXPECT_TRUE( v.update(i, -42) ); } v.commit(); continue; } if (v.hasMultiValue()) { if (v.hasWeightedSetType()) { weight = (rand() % 256) - 128; } for (size_t j(0); j <= i; j++) { EXPECT_TRUE( v.append(i, rnd.lrand48() & mask, weight) ); } v.commit(); if (!v.hasWeightedSetType()) { EXPECT_EQUAL(static_cast(v.getValueCount(i)), i + 1); ASSERT_TRUE(static_cast(v.getValueCount(i)) == i + 1); } } else { EXPECT_TRUE( v.update(i, lrand48() & mask) ); } } v.commit(); } template <> void EnumeratedSaveTest::populate(FloatingPointAttribute &v, unsigned seed, BasicType bt) { (void) bt; search::Rand48 rnd; rnd.srand48(seed); int weight = 1; for(size_t i(0), m(v.getNumDocs()); i < m; i++) { v.clearDoc(i); if (i == 9) continue; if (i == 7) { if (v.hasMultiValue()) { v.append(i, -42.0, 27); v.append(i, -43.0, 14); v.append(i, -42.0, -3); } else { EXPECT_TRUE( v.update(i, -42.0) ); } v.commit(); continue; } if (v.hasMultiValue()) { if (v.hasWeightedSetType()) { weight = (rand() % 256) - 128; } for (size_t j(0); j <= i; j++) { EXPECT_TRUE( v.append(i, rnd.lrand48(), weight) ); } v.commit(); if (!v.hasWeightedSetType()) { EXPECT_EQUAL(static_cast(v.getValueCount(i)), i + 1); ASSERT_TRUE(static_cast(v.getValueCount(i)) == i + 1); } } else { EXPECT_TRUE( v.update(i, lrand48()) ); } } v.commit(); } template <> void EnumeratedSaveTest::populate(StringAttribute &v, unsigned seed, BasicType bt) { (void) bt; RandomGenerator rnd(seed); int weight = 1; for(size_t i(0), m(v.getNumDocs()); i < m; i++) { v.clearDoc(i); if (i == 9) continue; if (i == 7) { if (v.hasMultiValue()) { v.append(i, "foo", 27); v.append(i, "bar", 14); v.append(i, "foO", -3); } else { EXPECT_TRUE( v.update(i, "foo") ); } v.commit(); continue; } if (v.hasMultiValue()) { if (v.hasWeightedSetType()) { weight = rnd.rand(0, 256) - 128; } for (size_t j(0); j <= i; j++) { EXPECT_TRUE( v.append(i, rnd.getRandomString(2, 50), weight) ); } v.commit(); if (!v.hasWeightedSetType()) { EXPECT_EQUAL(static_cast(v.getValueCount(i)), i + 1); } } else { EXPECT_TRUE( v.update(i, rnd.getRandomString(2, 50)) ); } } v.commit(); } namespace { template inline bool equalsHelper(const T &lhs, const T &rhs) { return lhs == rhs; } template <> inline bool equalsHelper(const float &lhs, const float &rhs) { if (std::isnan(lhs)) return std::isnan(rhs); if (std::isnan(rhs)) return false; return lhs == rhs; } template <> inline bool equalsHelper(const double &lhs, const double &rhs) { if (std::isnan(lhs)) return std::isnan(rhs); if (std::isnan(rhs)) return false; return lhs == rhs; } } template void EnumeratedSaveTest::compare(VectorType &a, VectorType &b) { EXPECT_EQUAL(a.getNumDocs(), b.getNumDocs()); ASSERT_TRUE(a.getNumDocs() == b.getNumDocs()); // EXPECT_EQUAL(a.getMaxValueCount(), b.getMaxValueCount()); EXPECT_EQUAL(a.getCommittedDocIdLimit(), b.getCommittedDocIdLimit()); uint32_t asz(a.getMaxValueCount()); uint32_t bsz(b.getMaxValueCount()); BufferType *av = new BufferType[asz]; BufferType *bv = new BufferType[bsz]; for (size_t i(0), m(a.getNumDocs()); i < m; i++) { ASSERT_TRUE(asz >= static_cast(a.getValueCount(i))); ASSERT_TRUE(bsz >= static_cast(b.getValueCount(i))); EXPECT_EQUAL(a.getValueCount(i), b.getValueCount(i)); ASSERT_TRUE(a.getValueCount(i) == b.getValueCount(i)); EXPECT_EQUAL(static_cast(a).get(i, av, asz), static_cast(a.getValueCount(i))); EXPECT_EQUAL(static_cast(b).get(i, bv, bsz), static_cast(b.getValueCount(i))); for(size_t j(0), k(std::min(a.getValueCount(i), b.getValueCount(i))); j < k; j++) { EXPECT_TRUE(equalsHelper(av[j], bv[j])); } } delete [] bv; delete [] av; } template VectorType & EnumeratedSaveTest::as(AttributePtr &v) { VectorType *res = dynamic_cast(v.get()); assert(res != NULL); return *res; } IntegerAttribute & EnumeratedSaveTest::asInt(AttributePtr &v) { return as(v); } StringAttribute & EnumeratedSaveTest::asString(AttributePtr &v) { return as(v); } FloatingPointAttribute & EnumeratedSaveTest::asFloat(AttributePtr &v) { return as(v); } void EnumeratedSaveTest::buildTermQuery(std::vector &buffer, const vespalib::string &index, const vespalib::string &term, bool prefix) { uint32_t indexLen = index.size(); uint32_t termLen = term.size(); uint32_t queryPacketSize = 1 + 2 * 4 + indexLen + termLen; uint32_t p = 0; buffer.resize(queryPacketSize); buffer[p++] = prefix ? ParseItem::ITEM_PREFIXTERM : ParseItem::ITEM_TERM; p += vespalib::compress::Integer::compressPositive(indexLen, &buffer[p]); memcpy(&buffer[p], index.c_str(), indexLen); p += indexLen; p += vespalib::compress::Integer::compressPositive(termLen, &buffer[p]); memcpy(&buffer[p], term.c_str(), termLen); p += termLen; buffer.resize(p); } template SearchContextPtr EnumeratedSaveTest::getSearch(const V &vec, const T &term, bool prefix) { std::vector query; vespalib::asciistream ss; ss << term; buildTermQuery(query, vec.getName(), ss.str(), prefix); return (static_cast(vec)). getSearch(vespalib::stringref(&query[0], query.size()), SearchContextParams()); } template <> SearchContextPtr EnumeratedSaveTest::getSearch(const IntegerAttribute &v) { return getSearch(v, "[-42;-42]", false); } template <> SearchContextPtr EnumeratedSaveTest::getSearch(const FloatingPointAttribute &v) { return getSearch(v, "[-42.0;-42.0]", false); } template <> SearchContextPtr EnumeratedSaveTest::getSearch(const StringAttribute &v) { return getSearch (v, "foo", false); } MemAttr::SP EnumeratedSaveTest::saveMem(AttributeVector &v) { MemAttr::SP res(new MemAttr); EXPECT_TRUE(v.save(*res, v.getBaseFileName())); return res; } void EnumeratedSaveTest::checkMem(AttributeVector &v, const MemAttr &e, bool enumerated) { MemAttr m; v.enableEnumeratedSave(enumerated); EXPECT_TRUE(v.save(m, v.getBaseFileName())); v.enableEnumeratedSave(false); ASSERT_TRUE(m == e); } MemAttr::SP EnumeratedSaveTest::saveBoth(AttributePtr v) { EXPECT_TRUE(v->save()); vespalib::string basename = v->getBaseFileName(); AttributePtr v2 = make(v->getConfig(), basename, true); EXPECT_TRUE(v2->load()); v2->enableEnumeratedSave(true); EXPECT_TRUE(v2->save(basename + "_e")); if ((v->getConfig().basicType() == BasicType::INT32 && v->getConfig().collectionType() == CollectionType::WSET) || true) { search::AttributeMemorySaveTarget ms; search::TuneFileAttributes tune; search::index::DummyFileHeaderContext fileHeaderContext; EXPECT_TRUE(v2->save(ms, basename + "_ee")); EXPECT_TRUE(ms.writeToFile(tune, fileHeaderContext)); } return saveMem(*v2); } EnumeratedSaveTest::AttributePtr EnumeratedSaveTest::make(Config cfg, const vespalib::string &pref, bool fastSearch) { cfg.setFastSearch(fastSearch); AttributePtr v = AttributeFactory::createAttribute(pref, cfg); return v; } void EnumeratedSaveTest::load(AttributePtr v, const vespalib::string &name) { v->setBaseFileName(name); EXPECT_TRUE(v->load()); } template void EnumeratedSaveTest::checkLoad(AttributePtr v, const vespalib::string &name, AttributePtr ev) { v->setBaseFileName(name); EXPECT_TRUE(v->load()); compare(as(v), as(ev)); } template void EnumeratedSaveTest::testReload(AttributePtr v0, AttributePtr v1, AttributePtr v2, MemAttr::SP mv0, MemAttr::SP mv1, MemAttr::SP mv2, MemAttr::SP emv0, MemAttr::SP emv1, MemAttr::SP emv2, Config cfg, const vespalib::string &pref, bool fastSearch) { // typedef AttributePtr AVP; bool flagAttr = cfg.collectionType() == CollectionType::ARRAY && cfg.basicType() == BasicType::INT8 && fastSearch; bool supportsEnumerated = (fastSearch || cfg.basicType() == BasicType::STRING) && !flagAttr; AttributePtr v = make(cfg, pref, fastSearch); TEST_DO((checkLoad(v, pref + "0", v0))); TEST_DO((checkLoad(v, pref + "1", v1))); TEST_DO((checkLoad(v, pref + "2", v2))); TEST_DO((checkLoad(v, pref + "1", v1))); TEST_DO((checkLoad(v, pref + "0", v0))); TEST_DO((checkLoad(v, pref + "0", v0))); TEST_DO(checkMem(*v, *mv0, false)); TEST_DO(checkMem(*v, supportsEnumerated ? *emv0 : *mv0, true)); TEST_DO((checkLoad(v, pref + "1", v1))); TEST_DO(checkMem(*v, *mv1, false)); TEST_DO(checkMem(*v, supportsEnumerated ? *emv1 : *mv1, true)); TEST_DO((checkLoad(v, pref + "2", v2))); TEST_DO(checkMem(*v, *mv2, false)); TEST_DO(checkMem(*v, supportsEnumerated ? *emv2 : *mv2, true)); TEST_DO((checkLoad(v, pref + "0_e", v0))); TEST_DO((checkLoad(v, pref + "1_e", v1))); TEST_DO((checkLoad(v, pref + "2_e", v2))); TEST_DO((checkLoad(v, pref + "1_e", v1))); TEST_DO((checkLoad(v, pref + "0_e", v0))); TEST_DO((checkLoad(v, pref + "0_e", v0))); TEST_DO(checkMem(*v, *mv0, false)); TEST_DO(checkMem(*v, supportsEnumerated ? *emv0 : *mv0, true)); TEST_DO((checkLoad(v, pref + "1_e", v1))); TEST_DO(checkMem(*v, *mv1, false)); TEST_DO(checkMem(*v, supportsEnumerated ? *emv1 : *mv1, true)); TEST_DO((checkLoad(v, pref + "2_e", v2))); TEST_DO(checkMem(*v, *mv2, false)); TEST_DO(checkMem(*v, supportsEnumerated ? *emv2 : *mv2, true)); TermFieldMatchData md; SearchContextPtr sc = getSearch(as(v)); sc->fetchPostings(true); SearchBasePtr sb = sc->createIterator(&md, true); sb->initFullRange(); sb->seek(1u); EXPECT_EQUAL(7u, sb->getDocId()); sb->unpack(7u); EXPECT_EQUAL(md.getDocId(), 7u); if (v->getCollectionType() == CollectionType::SINGLE || flagAttr) { EXPECT_EQUAL(md.getWeight(), 1); } else if (v->getCollectionType() == CollectionType::ARRAY) { EXPECT_EQUAL(md.getWeight(), 2); } else { if (cfg.basicType() == BasicType::STRING) { EXPECT_EQUAL(md.getWeight(), 24); } else { EXPECT_EQUAL(md.getWeight(), -3); } } } template void EnumeratedSaveTest::test(BasicType bt, CollectionType ct, const vespalib::string &pref) { Config cfg(bt, ct); AttributePtr v0 = AttributeFactory::createAttribute(pref + "0", cfg); AttributePtr v1 = AttributeFactory::createAttribute(pref + "1", cfg); AttributePtr v2 = AttributeFactory::createAttribute(pref + "2", cfg); addDocs(v0, 0); addDocs(v1, 10); addDocs(v2, 30); populate(as(v0), 0, bt); populate(as(v1), 10, bt); populate(as(v2), 30, bt); MemAttr::SP mv0 = saveMem(*v0); MemAttr::SP mv1 = saveMem(*v1); MemAttr::SP mv2 = saveMem(*v2); MemAttr::SP emv0 = saveBoth(v0); MemAttr::SP emv1 = saveBoth(v1); MemAttr::SP emv2 = saveBoth(v2); AttributePtr v = make(cfg, pref, true); checkLoad(v, pref + "0_ee", v0); checkLoad(v, pref + "1_ee", v1); checkLoad(v, pref + "2_ee", v2); v.reset(); TEST_DO((testReload(v0, v1, v2, mv0, mv1, mv2, emv0, emv1, emv2, cfg, pref, false))); TEST_DO((testReload(v0, v1, v2, mv0, mv1, mv2, emv0, emv1, emv2, cfg, pref, true))); } TEST_F("Test enumerated save with single value int8", EnumeratedSaveTest) { f.template test(BasicType::INT8, CollectionType::SINGLE, "int8_sv"); } TEST_F("Test enumerated save with array value int8", EnumeratedSaveTest) { f.template test(BasicType::INT8, CollectionType::ARRAY, "int8_a"); } TEST_F("Test enumerated save with weighted set value int8", EnumeratedSaveTest) { f.template test(BasicType::INT8, CollectionType::WSET, "int8_ws"); } TEST_F("Test enumerated save with single value int16", EnumeratedSaveTest) { f.template test(BasicType::INT16, CollectionType::SINGLE, "int16_sv"); } TEST_F("Test enumerated save with array value int16", EnumeratedSaveTest) { f.template test(BasicType::INT16, CollectionType::ARRAY, "int16_a"); } TEST_F("Test enumerated save with weighted set value int16", EnumeratedSaveTest) { f.template test(BasicType::INT16, CollectionType::WSET, "int16_ws"); } TEST_F("Test enumerated save with single value int32", EnumeratedSaveTest) { f.template test(BasicType::INT32, CollectionType::SINGLE, "int32_sv"); } TEST_F("Test enumerated save with array value int32", EnumeratedSaveTest) { f.template test(BasicType::INT32, CollectionType::ARRAY, "int32_a"); } TEST_F("Test enumerated save with weighted set value int32", EnumeratedSaveTest) { f.template test(BasicType::INT32, CollectionType::WSET, "int32_ws"); } TEST_F("Test enumerated save with single value int64", EnumeratedSaveTest) { f.template test(BasicType::INT64, CollectionType::SINGLE, "int64_sv"); } TEST_F("Test enumerated save with array value int64", EnumeratedSaveTest) { f.template test(BasicType::INT64, CollectionType::ARRAY, "int64_a"); } TEST_F("Test enumerated save with weighted set value int64", EnumeratedSaveTest) { f.template test(BasicType::INT64, CollectionType::WSET, "int64_ws"); } TEST_F("Test enumerated save with single value float", EnumeratedSaveTest) { f.template test(BasicType::FLOAT, CollectionType::SINGLE, "float_sv"); } TEST_F("Test enumerated save with array value float", EnumeratedSaveTest) { f.template test(BasicType::FLOAT, CollectionType::ARRAY, "float_a"); } TEST_F("Test enumerated save with weighted set value float", EnumeratedSaveTest) { f.template test( BasicType::FLOAT, CollectionType::WSET, "float_ws"); } TEST_F("Test enumerated save with single value double", EnumeratedSaveTest) { f.template test(BasicType::DOUBLE, CollectionType::SINGLE, "double_sv"); } TEST_F("Test enumerated save with array value double", EnumeratedSaveTest) { f.template test(BasicType::DOUBLE, CollectionType::ARRAY, "double_a"); } TEST_F("Test enumerated save with weighted set value double", EnumeratedSaveTest) { f.template test( BasicType::DOUBLE, CollectionType::WSET, "double_ws"); } TEST_F("Test enumerated save with single value string", EnumeratedSaveTest) { f.template test(BasicType::STRING, CollectionType::SINGLE, "str_sv"); } TEST_F("Test enumerated save with array value string", EnumeratedSaveTest) { f.template test(BasicType::STRING, CollectionType::ARRAY, "str_a"); } TEST_F("Test enumerated save with weighted set value string", EnumeratedSaveTest) { f.template test( BasicType::STRING, CollectionType::WSET, "str_ws"); } TEST_MAIN() { TEST_RUN_ALL(); }