diff options
Diffstat (limited to 'searchlib/src/tests/attribute')
94 files changed, 14230 insertions, 0 deletions
diff --git a/searchlib/src/tests/attribute/.gitignore b/searchlib/src/tests/attribute/.gitignore new file mode 100644 index 00000000000..732912ab981 --- /dev/null +++ b/searchlib/src/tests/attribute/.gitignore @@ -0,0 +1,11 @@ +*.dat +*.idx +*.weight +.depend +Makefile +attribute_test +attributebenchmark +searchlib_attribute_test_app +searchlib_attributeguard_test_app +searchlib_changevector_test_app +searchlib_attributebenchmark_app diff --git a/searchlib/src/tests/attribute/CMakeLists.txt b/searchlib/src/tests/attribute/CMakeLists.txt new file mode 100644 index 00000000000..0598b5776a8 --- /dev/null +++ b/searchlib/src/tests/attribute/CMakeLists.txt @@ -0,0 +1,29 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_attributeguard_test_app + SOURCES + attributeguard.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_attributeguard_test_app COMMAND sh attributeguard_test.sh) +vespa_add_executable(searchlib_attribute_test_app + SOURCES + attribute_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_attribute_test_app COMMAND sh attribute_test.sh) +vespa_add_executable(searchlib_changevector_test_app + SOURCES + changevector_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_changevector_test_app COMMAND sh changevector_test.sh) +vespa_add_executable(searchlib_attributebenchmark_app + SOURCES + attributebenchmark.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_attributebenchmark_app COMMAND searchlib_attributebenchmark_app BENCHMARK) diff --git a/searchlib/src/tests/attribute/DESC b/searchlib/src/tests/attribute/DESC new file mode 100644 index 00000000000..6a9215b1a3b --- /dev/null +++ b/searchlib/src/tests/attribute/DESC @@ -0,0 +1 @@ +Unit tests for attribute use. diff --git a/searchlib/src/tests/attribute/FILES b/searchlib/src/tests/attribute/FILES new file mode 100644 index 00000000000..b742644b750 --- /dev/null +++ b/searchlib/src/tests/attribute/FILES @@ -0,0 +1,2 @@ +attribute.cpp +attributebenchmark.cpp diff --git a/searchlib/src/tests/attribute/attribute_test.cpp b/searchlib/src/tests/attribute/attribute_test.cpp new file mode 100644 index 00000000000..b1d4e675e23 --- /dev/null +++ b/searchlib/src/tests/attribute/attribute_test.cpp @@ -0,0 +1,2200 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/document/fieldvalue/intfieldvalue.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/searchlib/attribute/attribute.h> +#include <vespa/searchlib/attribute/attributefile.h> +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/attributememorysavetarget.h> +#include <vespa/searchlib/attribute/singlenumericattribute.h> +#include <vespa/searchlib/attribute/multinumericattribute.h> +#include <vespa/searchlib/attribute/singlestringattribute.h> +#include <vespa/searchlib/attribute/multistringattribute.h> +#include <vespa/searchlib/attribute/attrvector.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/searchlib/util/randomgenerator.h> +#include <vespa/vespalib/io/fileutil.h> + +#include <vespa/searchlib/attribute/attributevector.hpp> + +#include <vespa/log/log.h> +LOG_SETUP("attribute_test"); + + +using namespace document; +using std::shared_ptr; +using search::common::FileHeaderContext; +using search::index::DummyFileHeaderContext; +using search::attribute::BasicType; +using search::attribute::IAttributeVector; + +namespace +{ + + +vespalib::string empty; +vespalib::string clstmp("clstmp"); +vespalib::string asuDir("asutmp"); + +bool +isUnsignedSmallIntAttribute(const BasicType::Type &type) +{ + switch (type) + { + case BasicType::UINT1: + case BasicType::UINT2: + case BasicType::UINT4: + return true; + default: + return false; + } +} + +bool +isUnsignedSmallIntAttribute(const AttributeVector &a) +{ + return isUnsignedSmallIntAttribute(a.getBasicType()); +} + +template <typename BufferType> +void +expectZero(const BufferType &b) +{ + EXPECT_EQUAL(0, b); +} + +template <> +void +expectZero(const vespalib::string &b) +{ + EXPECT_EQUAL(empty, b); +} + +uint64_t +statSize(const vespalib::string &fileName) +{ + FastOS_StatInfo statInfo; + if (EXPECT_TRUE(FastOS_File::Stat(fileName.c_str(), &statInfo))) { + return statInfo._size; + } else { + return 0u; + } +} + +uint64_t +statSize(const AttributeVector &a) +{ + vespalib::string baseFileName = a.getBaseFileName(); + uint64_t resultSize = statSize(baseFileName + ".dat"); + if (a.hasMultiValue()) { + resultSize += statSize(baseFileName + ".idx"); + } + if (a.hasWeightedSetType()) { + resultSize += statSize(baseFileName + ".weight"); + } + if (a.hasEnum() && a.getEnumeratedSave()) { + resultSize += statSize(baseFileName + ".udat"); + } + return resultSize; +} + + +bool +preciseEstimatedSize(const AttributeVector &a) +{ + if (a.getBasicType() == BasicType::STRING && + EXPECT_TRUE(a.hasEnum()) && !a.getEnumeratedSave()) { + return false; // Using average of string lens, can be somewhat off + } + return true; +} + +} + +namespace search { + +using attribute::CollectionType; +using attribute::Config; + +class AttributeTest : public vespalib::TestApp +{ +private: + typedef AttributeVector::SP AttributePtr; + + void addDocs(const AttributePtr & v, size_t sz); + template <typename VectorType> + void populate(VectorType & ptr, unsigned seed); + template <typename VectorType, typename BufferType> + void compare(VectorType & a, VectorType & b); + + void testReloadInt(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c, size_t numDocs); + void testReloadString(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c, size_t numDocs); + template <typename VectorType, typename BufferType> + void testReload(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c); + void testMemorySaverInt(const AttributePtr & a, const AttributePtr & b, size_t numDocs); + void testMemorySaverString(const AttributePtr & a, const AttributePtr & b, size_t numDocs); + template <typename VectorType, typename BufferType> + void testMemorySaver(const AttributePtr & a, const AttributePtr & b); + + void testReload(); + void testHasLoadData(); + void testMemorySaver(); + + void commit(const AttributePtr & ptr); + + template <typename T> + void fillNumeric(std::vector<T> & values, uint32_t numValues); + void fillString(std::vector<vespalib::string> & values, uint32_t numValues); + template <typename VectorType, typename BufferType> + bool appendToVector(VectorType & v, uint32_t doc, uint32_t valueCount, + const std::vector<BufferType> & values); + template <typename BufferType> + bool checkCount(const AttributePtr & ptr, uint32_t doc, uint32_t valueCount, + uint32_t numValues, const BufferType & value); + template <typename BufferType> + bool checkContent(const AttributePtr & ptr, uint32_t doc, uint32_t valueCount, + uint32_t range, const std::vector<BufferType> & values); + + // CollectionType::SINGLE + template <typename VectorType, typename BufferType, typename BaseType> + void testSingle(const AttributePtr & ptr, const std::vector<BufferType> & values); + void testSingle(); + + // CollectionType::ARRAY + template <typename BufferType> + void printArray(const AttributePtr & ptr); + template <typename VectorType, typename BufferType> + void testArray(const AttributePtr & ptr, const std::vector<BufferType> & values); + void testArray(); + + // CollectionType::WSET + template <typename BufferType> + void printWeightedSet(const AttributePtr & ptr); + template <typename VectorType, typename BufferType> + void testWeightedSet(const AttributePtr & ptr, const std::vector<BufferType> & values); + void testWeightedSet(); + void testBaseName(); + + template <typename VectorType, typename BufferType> + void testArithmeticValueUpdate(const AttributePtr & ptr); + void testArithmeticValueUpdate(); + + template <typename VectorType, typename BaseType, typename BufferType> + void testArithmeticWithUndefinedValue(const AttributePtr & ptr, BaseType before, BaseType after); + void testArithmeticWithUndefinedValue(); + + template <typename VectorType, typename BufferType> + void testMapValueUpdate(const AttributePtr & ptr, BufferType initValue, + const FieldValue & initFieldValue, const FieldValue & nonExistant, + bool removeIfZero, bool createIfNonExistant); + void testMapValueUpdate(); + + void testStatus(); + void testNullProtection(); + void testGeneration(const AttributePtr & attr, bool exactStatus); + void testGeneration(); + + void + testCreateSerialNum(void); + + template <typename VectorType, typename BufferType> + void + testCompactLidSpace(const Config &config, + bool fs, + bool es); + + template <typename VectorType, typename BufferType> + void + testCompactLidSpace(const Config &config); + + void + testCompactLidSpace(const Config &config); + + void + testCompactLidSpace(void); + + template <typename AttributeType> + void requireThatAddressSpaceUsageIsReported(const Config &config, bool fastSearch); + template <typename AttributeType> + void requireThatAddressSpaceUsageIsReported(const Config &config); + void requireThatAddressSpaceUsageIsReported(); + +public: + AttributeTest() { } + int Main(); +}; + +void AttributeTest::testBaseName() +{ + AttributeVector::BaseName v("attr1"); + EXPECT_EQUAL(v.getAttributeName(), "attr1"); + EXPECT_TRUE(v.getSnapshotName().empty()); + // EXPECT_TRUE(v.getIndexName().empty()); + EXPECT_EQUAL("", v.getIndexName()); + EXPECT_TRUE(v.getDirName().empty()); + v = "attribute/attr1/attr1"; + EXPECT_EQUAL(v.getAttributeName(), "attr1"); + EXPECT_TRUE(v.getSnapshotName().empty()); + // EXPECT_TRUE(v.getIndexName().empty()); + EXPECT_EQUAL("", v.getIndexName()); + EXPECT_EQUAL(v.getDirName(), "attribute/attr1"); + v = "attribute/attr1/snapshot-X/attr1"; + EXPECT_EQUAL(v.getAttributeName(), "attr1"); + EXPECT_EQUAL(v.getSnapshotName(), "snapshot-X"); + // EXPECT_TRUE(v.getIndexName().empty()); + EXPECT_EQUAL("", v.getIndexName()); + EXPECT_EQUAL(v.getDirName(), "attribute/attr1/snapshot-X"); + v = "/attribute/attr1/snapshot-X/attr1"; + EXPECT_EQUAL(v.getAttributeName(), "attr1"); + EXPECT_EQUAL(v.getSnapshotName(), "snapshot-X"); + // EXPECT_TRUE(v.getIndexName().empty()); + EXPECT_EQUAL("", v.getIndexName()); + EXPECT_EQUAL(v.getDirName(), "/attribute/attr1/snapshot-X"); + v = "index.1/1.ready/attribute/attr1/snapshot-X/attr1"; + EXPECT_EQUAL(v.getAttributeName(), "attr1"); + EXPECT_EQUAL(v.getSnapshotName(), "snapshot-X"); + EXPECT_EQUAL(v.getIndexName(), "index.1"); + EXPECT_EQUAL(v.getDirName(), "index.1/1.ready/attribute/attr1/snapshot-X"); + v = "/index.1/1.ready/attribute/attr1/snapshot-X/attr1"; + EXPECT_EQUAL(v.getAttributeName(), "attr1"); + EXPECT_EQUAL(v.getSnapshotName(), "snapshot-X"); + EXPECT_EQUAL(v.getIndexName(), "index.1"); + EXPECT_EQUAL(v.getDirName(), + "/index.1/1.ready/attribute/attr1/snapshot-X"); + v = "xxxyyyy/zzz/index.1/1.ready/attribute/attr1/snapshot-X/attr1"; + EXPECT_EQUAL(v.getAttributeName(), "attr1"); + EXPECT_EQUAL(v.getSnapshotName(), "snapshot-X"); + EXPECT_EQUAL(v.getIndexName(), "index.1"); + EXPECT_EQUAL(v.getDirName(), + "xxxyyyy/zzz/index.1/1.ready/attribute/attr1/snapshot-X"); +} + +void AttributeTest::addDocs(const AttributePtr & v, size_t sz) +{ + if (sz) { + AttributeVector::DocId docId; + for(size_t i(0); i< sz; i++) { + EXPECT_TRUE( v->addDoc(docId) ); + } + EXPECT_TRUE( docId+1 == sz ); + EXPECT_TRUE( v->getNumDocs() == sz ); + commit(v); + } +} + + +template <> +void AttributeTest::populate(IntegerAttribute & v, unsigned seed) +{ + srand(seed); + int weight = 1; + for(size_t i(0), m(v.getNumDocs()); i < m; i++) { + v.clearDoc(i); + if (v.hasMultiValue()) { + if (v.hasWeightedSetType()) { + weight = (rand() % 256) - 128; + } + for (size_t j(0); j <= i; j++) { + EXPECT_TRUE( v.append(i, rand(), weight) ); + } + } else { + EXPECT_TRUE( v.update(i, rand()) ); + } + } + v.commit(); +} + +template <> +void AttributeTest::populate(FloatingPointAttribute & v, unsigned seed) +{ + srand(seed); + int weight = 1; + for(size_t i(0), m(v.getNumDocs()); i < m; i++) { + v.clearDoc(i); + if (v.hasMultiValue()) { + if (v.hasWeightedSetType()) { + weight = (rand() % 256) - 128; + } + for (size_t j(0); j <= i; j++) { + EXPECT_TRUE( v.append(i, rand() * 1.25, weight) ); + } + } else { + EXPECT_TRUE( v.update(i, rand() * 1.25) ); + } + } + v.commit(); +} + +template <> +void AttributeTest::populate(StringAttribute & v, unsigned seed) +{ + RandomGenerator rnd(seed); + int weight = 1; + for(size_t i(0), m(v.getNumDocs()); i < m; i++) { + v.clearDoc(i); + if (v.hasMultiValue()) { + if (v.hasWeightedSetType()) { + weight = rnd.rand(0, 256) - 128; + } + for (size_t j(0); j <= i; j++) { + EXPECT_TRUE( v.append(i, rnd.getRandomString(2, 50), weight) ); + } + } else { + EXPECT_TRUE( v.update(i, rnd.getRandomString(2, 50)) ); + } + } + v.commit(); +} + +template <typename VectorType, typename BufferType> +void AttributeTest::compare(VectorType & a, VectorType & b) +{ + EXPECT_EQUAL(a.getNumDocs(), b.getNumDocs()); + ASSERT_TRUE(a.getNumDocs() == b.getNumDocs()); + uint32_t asz(a.getMaxValueCount()); + uint32_t bsz(b.getMaxValueCount()); + BufferType *av = new BufferType[asz]; + BufferType *bv = new BufferType[bsz]; + + for (size_t i(0), m(a.getNumDocs()); i < m; i++) { + ASSERT_TRUE(asz >= static_cast<uint32_t>(a.getValueCount(i))); + ASSERT_TRUE(bsz >= static_cast<uint32_t>(b.getValueCount(i))); + EXPECT_EQUAL(a.getValueCount(i), b.getValueCount(i)); + ASSERT_TRUE(a.getValueCount(i) == b.getValueCount(i)); + EXPECT_EQUAL(static_cast<const AttributeVector &>(a).get(i, av, asz), static_cast<uint32_t>(a.getValueCount(i))); + EXPECT_EQUAL(static_cast<const AttributeVector &>(b).get(i, bv, bsz), static_cast<uint32_t>(b.getValueCount(i))); + for(size_t j(0), k(std::min(a.getValueCount(i), b.getValueCount(i))); j < k; j++) { + EXPECT_TRUE(av[j] == bv[j]); + } + } + delete [] bv; + delete [] av; +} + +void AttributeTest::testReloadInt(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c, size_t numDocs) +{ + addDocs(a, numDocs); + addDocs(b, numDocs); + populate(static_cast<IntegerAttribute &>(*a.get()), 17); + populate(static_cast<IntegerAttribute &>(*b.get()), 17); + if (a->hasWeightedSetType()) { + testReload<IntegerAttribute, IntegerAttribute::WeightedInt>(a, b, c); + } else { + testReload<IntegerAttribute, IntegerAttribute::largeint_t>(a, b, c); + } +} + + +void AttributeTest::testReloadString(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c, size_t numDocs) +{ + addDocs(a, numDocs); + addDocs(b, numDocs); + populate(static_cast<StringAttribute &>(*a.get()), 17); + populate(static_cast<StringAttribute &>(*b.get()), 17); + if (a->hasWeightedSetType()) { + testReload<StringAttribute, StringAttribute::WeightedString>(a, b, c); + } else { + testReload<StringAttribute, vespalib::string>(a, b, c); + } +} + +template <typename VectorType, typename BufferType> +void AttributeTest::testReload(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c) +{ + LOG(info, "testReload: vector '%s'", a->getName().c_str()); + + compare<VectorType, BufferType> + (*(static_cast<VectorType *>(a.get())), *(static_cast<VectorType *>(b.get()))); + a->setCreateSerialNum(43u); + EXPECT_TRUE( a->saveAs(b->getBaseFileName()) ); + if (preciseEstimatedSize(*a)) { + EXPECT_EQUAL(statSize(*b), a->getEstimatedSaveByteSize()); + } else { + double estSize = a->getEstimatedSaveByteSize(); + double actSize = statSize(*b); + EXPECT_LESS_EQUAL(actSize * 1.0, estSize * 1.3); + EXPECT_GREATER_EQUAL(actSize * 1.0, estSize * 0.7); + } + EXPECT_TRUE( a->saveAs(c->getBaseFileName()) ); + if (preciseEstimatedSize(*a)) { + EXPECT_EQUAL(statSize(*c), a->getEstimatedSaveByteSize()); + } + EXPECT_TRUE( b->load() ); + EXPECT_EQUAL(43u, b->getCreateSerialNum()); + compare<VectorType, BufferType> + (*(static_cast<VectorType *>(a.get())), *(static_cast<VectorType *>(b.get()))); + EXPECT_TRUE( c->load() ); + compare<VectorType, BufferType> + (*(static_cast<VectorType *>(a.get())), *(static_cast<VectorType *>(c.get()))); + + if (isUnsignedSmallIntAttribute(*a)) + return; + populate(static_cast<VectorType &>(*b.get()), 700); + populate(static_cast<VectorType &>(*c.get()), 700); + compare<VectorType, BufferType> + (*(static_cast<VectorType *>(b.get())), *(static_cast<VectorType *>(c.get()))); + + { + ReadAttributeFile readC(c->getBaseFileName(), c->getConfig()); + WriteAttributeFile writeC(b->getBaseFileName(), b->getConfig(), + DummyFileHeaderContext(), + c->getNumDocs()); + std::unique_ptr<AttributeFile::Record> record(readC.getRecord()); + ASSERT_TRUE(record.get()); + for (size_t i(0), m(c->getNumDocs()); i < m; i++) { + EXPECT_TRUE(readC.read(*record)); + EXPECT_TRUE(writeC.write(*record)); + } + EXPECT_TRUE( ! readC.read(*record)); + } + EXPECT_TRUE( b->load() ); + compare<VectorType, BufferType> + (*(static_cast<VectorType *>(a.get())), + *(static_cast<VectorType *>(b.get()))); + { + ReadAttributeFile readC(c->getBaseFileName(), c->getConfig()); + WriteAttributeFile writeC(b->getBaseFileName(), b->getConfig(), + DummyFileHeaderContext(), + c->getNumDocs()); + readC.enableDirectIO(); + writeC.enableDirectIO(); + std::unique_ptr<AttributeFile::Record> record(readC.getRecord()); + ASSERT_TRUE(record.get()); + for (size_t i(0), m(c->getNumDocs()); i < m; i++) { + EXPECT_TRUE(readC.read(*record)); + EXPECT_TRUE(writeC.write(*record)); + } + EXPECT_TRUE( ! readC.read(*record)); + } + EXPECT_TRUE( b->load() ); + compare<VectorType, BufferType> + (*(static_cast<VectorType *>(a.get())), *(static_cast<VectorType *>(b.get()))); +} + + +void AttributeTest::testReload() +{ + // IntegerAttribute + // CollectionType::SINGLE + { + AttributePtr iv1 = AttributeFactory::createAttribute("sint32_1", Config(BasicType::INT32, CollectionType::SINGLE)); + AttributePtr iv2 = AttributeFactory::createAttribute("sint32_2", Config(BasicType::INT32, CollectionType::SINGLE)); + AttributePtr iv3 = AttributeFactory::createAttribute("sint32_3", Config(BasicType::INT32, CollectionType::SINGLE)); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("suint4_1", Config(BasicType::UINT4, CollectionType::SINGLE)); + AttributePtr iv2 = AttributeFactory::createAttribute("suint4_2", Config(BasicType::UINT4, CollectionType::SINGLE)); + AttributePtr iv3 = AttributeFactory::createAttribute("suint4_3", Config(BasicType::UINT4, CollectionType::SINGLE)); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("suint2_1", Config(BasicType::UINT2, CollectionType::SINGLE)); + AttributePtr iv2 = AttributeFactory::createAttribute("suint2_2", Config(BasicType::UINT2, CollectionType::SINGLE)); + AttributePtr iv3 = AttributeFactory::createAttribute("suint2_3", Config(BasicType::UINT2, CollectionType::SINGLE)); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("suint1_1", Config(BasicType::UINT1, CollectionType::SINGLE)); + AttributePtr iv2 = AttributeFactory::createAttribute("suint1_2", Config(BasicType::UINT1, CollectionType::SINGLE)); + AttributePtr iv3 = AttributeFactory::createAttribute("suint1_3", Config(BasicType::UINT1, CollectionType::SINGLE)); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + { + Config cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr iv1 = AttributeFactory::createAttribute("sfsint32_1", cfg); + AttributePtr iv2 = AttributeFactory::createAttribute("sfsint32_2", cfg); + AttributePtr iv3 = AttributeFactory::createAttribute("sfsint32_3", cfg); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + // CollectionType::ARRAY + { + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr iv1 = AttributeFactory::createAttribute("flag_1", cfg); + AttributePtr iv2 = AttributeFactory::createAttribute("flag_2", cfg); + AttributePtr iv3 = AttributeFactory::createAttribute("flag_3", cfg); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("aint32_1", Config(BasicType::INT32, CollectionType::ARRAY)); + AttributePtr iv2 = AttributeFactory::createAttribute("aint32_2", Config(BasicType::INT32, CollectionType::ARRAY)); + AttributePtr iv3 = AttributeFactory::createAttribute("aint32_3", Config(BasicType::INT32, CollectionType::ARRAY)); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + { + Config cfg(BasicType::INT32, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr iv1 = AttributeFactory::createAttribute("afsint32_1", cfg); + AttributePtr iv2 = AttributeFactory::createAttribute("afsint32_2", cfg); + AttributePtr iv3 = AttributeFactory::createAttribute("afsint32_3", cfg); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + // CollectionType::WSET + { + AttributePtr iv1 = AttributeFactory::createAttribute("wint32_1", Config(BasicType::INT32, CollectionType::WSET)); + AttributePtr iv2 = AttributeFactory::createAttribute("wint32_2", Config(BasicType::INT32, CollectionType::WSET)); + AttributePtr iv3 = AttributeFactory::createAttribute("wint32_3", Config(BasicType::INT32, CollectionType::WSET)); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + { + Config cfg(BasicType::INT32, CollectionType::WSET); + cfg.setFastSearch(true); + AttributePtr iv1 = AttributeFactory::createAttribute("wfsint32_1", cfg); + AttributePtr iv2 = AttributeFactory::createAttribute("wfsint32_2", cfg); + AttributePtr iv3 = AttributeFactory::createAttribute("wfsint32_3", cfg); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + + + // StringAttribute + { + AttributePtr iv1 = AttributeFactory::createAttribute("sstring_1", Config(BasicType::STRING, CollectionType::SINGLE)); + AttributePtr iv2 = AttributeFactory::createAttribute("sstring_2", Config(BasicType::STRING, CollectionType::SINGLE)); + AttributePtr iv3 = AttributeFactory::createAttribute("sstring_3", Config(BasicType::STRING, CollectionType::SINGLE)); + testReloadString(iv1, iv2, iv3, 0); + testReloadString(iv1, iv2, iv3, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("astring_1", Config(BasicType::STRING, CollectionType::ARRAY)); + AttributePtr iv2 = AttributeFactory::createAttribute("astring_2", Config(BasicType::STRING, CollectionType::ARRAY)); + AttributePtr iv3 = AttributeFactory::createAttribute("astring_3", Config(BasicType::STRING, CollectionType::ARRAY)); + testReloadString(iv1, iv2, iv3, 0); + testReloadString(iv1, iv2, iv3, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("wstring_1", Config(BasicType::STRING, CollectionType::WSET)); + AttributePtr iv2 = AttributeFactory::createAttribute("wstring_2", Config(BasicType::STRING, CollectionType::WSET)); + AttributePtr iv3 = AttributeFactory::createAttribute("wstring_3", Config(BasicType::STRING, CollectionType::WSET)); + testReloadString(iv1, iv2, iv3, 0); + testReloadString(iv1, iv2, iv3, 100); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr iv1 = AttributeFactory::createAttribute("sfsstring_1", cfg); + AttributePtr iv2 = AttributeFactory::createAttribute("sfsstring_2", cfg); + AttributePtr iv3 = AttributeFactory::createAttribute("sfsstring_3", cfg); + testReloadString(iv1, iv2, iv3, 0); + testReloadString(iv1, iv2, iv3, 100); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::ARRAY)); + cfg.setFastSearch(true); + AttributePtr iv1 = AttributeFactory::createAttribute("afsstring_1", cfg); + AttributePtr iv2 = AttributeFactory::createAttribute("afsstring_2", cfg); + AttributePtr iv3 = AttributeFactory::createAttribute("afsstring_3", cfg); + testReloadString(iv1, iv2, iv3, 0); + testReloadString(iv1, iv2, iv3, 100); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::WSET)); + cfg.setFastSearch(true); + AttributePtr iv1 = AttributeFactory::createAttribute("wsfsstring_1", cfg); + AttributePtr iv2 = AttributeFactory::createAttribute("wsfsstring_2", cfg); + AttributePtr iv3 = AttributeFactory::createAttribute("wsfsstring_3", cfg); + testReloadString(iv1, iv2, iv3, 0); + testReloadString(iv1, iv2, iv3, 100); + } +} + +void AttributeTest::testHasLoadData() +{ + { // single value + AttributePtr av = AttributeFactory::createAttribute("loaddata1", Config(BasicType::INT32)); + EXPECT_TRUE(!av->hasLoadData()); + av->save(); + EXPECT_TRUE(av->hasLoadData()); + av->saveAs("loaddata2"); + av = AttributeFactory::createAttribute("loaddata2", Config(BasicType::INT32)); + EXPECT_TRUE(av->hasLoadData()); + av->saveAs("loaddata3"); + } + { // array + AttributePtr av = AttributeFactory::createAttribute("loaddata3", Config(BasicType::INT32, CollectionType::ARRAY)); + EXPECT_TRUE(!av->hasLoadData()); + av->save(); + EXPECT_TRUE(av->hasLoadData()); + av->saveAs("loaddata4"); + av = AttributeFactory::createAttribute("loaddata4", Config(BasicType::INT32, CollectionType::ARRAY)); + EXPECT_TRUE(av->hasLoadData()); + av->saveAs("loaddata5"); + } + { // wset + AttributePtr av = AttributeFactory::createAttribute("loaddata5", Config(BasicType::INT32, CollectionType::WSET)); + EXPECT_TRUE(!av->hasLoadData()); + av->save(); + EXPECT_TRUE(av->hasLoadData()); + av->saveAs("loaddata6"); + av = AttributeFactory::createAttribute("loaddata6", Config(BasicType::INT32, CollectionType::WSET)); + EXPECT_TRUE(av->hasLoadData()); + } +} + +void +AttributeTest::testMemorySaverInt(const AttributePtr & a, const AttributePtr & b, size_t numDocs) +{ + addDocs(a, numDocs); + populate(static_cast<IntegerAttribute &>(*a.get()), 21); + if (a->hasWeightedSetType()) { + testMemorySaver<IntegerAttribute, IntegerAttribute::WeightedInt>(a, b); + } else { + testMemorySaver<IntegerAttribute, IntegerAttribute::largeint_t>(a, b); + } +} + +void +AttributeTest::testMemorySaverString(const AttributePtr & a, const AttributePtr & b, size_t numDocs) +{ + addDocs(a, numDocs); + populate(static_cast<StringAttribute &>(*a.get()), 21); + if (a->hasWeightedSetType()) { + testMemorySaver<StringAttribute, StringAttribute::WeightedString>(a, b); + } else { + testMemorySaver<StringAttribute, vespalib::string>(a, b); + } +} + +template <typename VectorType, typename BufferType> +void +AttributeTest::testMemorySaver(const AttributePtr & a, const AttributePtr & b) +{ + LOG(info, "testMemorySaver: vector '%s'", a->getName().c_str()); + + AttributeMemorySaveTarget saveTarget; + EXPECT_TRUE(a->saveAs(b->getBaseFileName(), saveTarget)); + FastOS_StatInfo statInfo; + vespalib::string datFile = vespalib::make_string("%s.dat", b->getBaseFileName().c_str()); + EXPECT_TRUE(!FastOS_File::Stat(datFile.c_str(), &statInfo)); + EXPECT_TRUE(saveTarget.writeToFile(TuneFileAttributes(), + DummyFileHeaderContext())); + EXPECT_TRUE(FastOS_File::Stat(datFile.c_str(), &statInfo)); + EXPECT_TRUE(b->load()); + compare<VectorType, BufferType> + (*(static_cast<VectorType *>(a.get())), *(static_cast<VectorType *>(b.get()))); +} + +void +AttributeTest::testMemorySaver() +{ + // CollectionType::SINGLE + { + AttributePtr iv1 = AttributeFactory::createAttribute("sint32_1ms", Config(BasicType::INT32, CollectionType::SINGLE)); + AttributePtr iv2 = AttributeFactory::createAttribute("sint32_2ms", Config(BasicType::INT32, CollectionType::SINGLE)); + testMemorySaverInt(iv1, iv2, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("suint4_1ms", Config(BasicType::UINT4, CollectionType::SINGLE)); + AttributePtr iv2 = AttributeFactory::createAttribute("suint4_2ms", Config(BasicType::UINT4, CollectionType::SINGLE)); + testMemorySaverInt(iv1, iv2, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("sstr_1ms", Config(BasicType::STRING, CollectionType::SINGLE)); + AttributePtr iv2 = AttributeFactory::createAttribute("sstr_2ms", Config(BasicType::STRING, CollectionType::SINGLE)); + testMemorySaverString(iv1, iv2, 100); + } + // CollectionType::ARRAY + { + AttributePtr iv1 = AttributeFactory::createAttribute("aint32_1ms", Config(BasicType::INT32, CollectionType::ARRAY)); + AttributePtr iv2 = AttributeFactory::createAttribute("aint32_2ms", Config(BasicType::INT32, CollectionType::ARRAY)); + testMemorySaverInt(iv1, iv2, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("astr_1ms", Config(BasicType::STRING, CollectionType::ARRAY)); + AttributePtr iv2 = AttributeFactory::createAttribute("astr_2ms", Config(BasicType::STRING, CollectionType::ARRAY)); + testMemorySaverString(iv1, iv2, 100); + } + // CollectionType::WSET + { + AttributePtr iv1 = AttributeFactory::createAttribute("wint32_1ms", Config(BasicType::INT32, CollectionType::WSET)); + AttributePtr iv2 = AttributeFactory::createAttribute("wint32_2ms", Config(BasicType::INT32, CollectionType::WSET)); + testMemorySaverInt(iv1, iv2, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("wstr_1ms", Config(BasicType::STRING, CollectionType::WSET)); + AttributePtr iv2 = AttributeFactory::createAttribute("wstr_2ms", Config(BasicType::STRING, CollectionType::WSET)); + testMemorySaverString(iv1, iv2, 100); + } +} + + +template <typename T> +void +AttributeTest::fillNumeric(std::vector<T> & values, uint32_t numValues) +{ + values.clear(); + values.reserve(numValues); + for (uint32_t i = 0; i < numValues; ++i) { + values.push_back(static_cast<T>(i)); + } +} + +void +AttributeTest::fillString(std::vector<vespalib::string> & values, uint32_t numValues) +{ + values.clear(); + values.reserve(numValues); + for (uint32_t i = 0; i < numValues; ++i) { + vespalib::asciistream ss; + ss << "string" << (i < 10 ? "0" : "") << i; + values.push_back(ss.str()); + } +} + +template <typename VectorType, typename BufferType> +bool +AttributeTest::appendToVector(VectorType & v, uint32_t doc, uint32_t valueCount, + const std::vector<BufferType> & values) +{ + bool retval = true; + for (uint32_t i = 0; i < valueCount; ++i) { + EXPECT_TRUE((retval = retval && v.append(doc, values[i], 1))); + } + return retval; +} + +template <typename BufferType> +bool +AttributeTest::checkCount(const AttributePtr & ptr, uint32_t doc, uint32_t valueCount, + uint32_t numValues, const BufferType & value) +{ + std::vector<BufferType> buffer(valueCount); + if (!EXPECT_EQUAL(valueCount, ptr->getValueCount(doc))) return false; + if (!EXPECT_EQUAL(valueCount, ptr->get(doc, &buffer[0], buffer.size()))) return false; + if (!EXPECT_EQUAL(numValues, static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), value)))) return false; + return true; +} + +template <typename BufferType> +bool +AttributeTest::checkContent(const AttributePtr & ptr, uint32_t doc, uint32_t valueCount, + uint32_t range, const std::vector<BufferType> & values) +{ + std::vector<BufferType> buffer(valueCount); + bool retval = true; + EXPECT_TRUE((retval = retval && (static_cast<uint32_t>(ptr->getValueCount(doc)) == valueCount))); + EXPECT_TRUE((retval = retval && (ptr->get(doc, &buffer[0], buffer.size()) == valueCount))); + for (uint32_t i = 0; i < valueCount; ++i) { + EXPECT_TRUE((retval = retval && (buffer[i] == values[i % range]))); + } + return retval; +} + + +//----------------------------------------------------------------------------- +// CollectionType::SINGLE +//----------------------------------------------------------------------------- + +template <typename VectorType, typename BufferType, typename BaseType> +void +AttributeTest::testSingle(const AttributePtr & ptr, const std::vector<BufferType> & values) +{ + LOG(info, "testSingle: vector '%s' with %u documents and %lu values", + ptr->getName().c_str(), ptr->getNumDocs(), static_cast<unsigned long>(values.size())); + + VectorType & v = *(static_cast<VectorType *>(ptr.get())); + uint32_t numUniques = values.size(); + std::vector<BufferType> buffer(1); + + // test update() + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + EXPECT_TRUE(ptr->getValueCount(doc) == 1); + uint32_t i = doc % numUniques; + uint32_t j = (doc + 1) % numUniques; + + EXPECT_TRUE(v.update(doc, values[i])); + ptr->commit(); + EXPECT_TRUE(checkCount(ptr, doc, 1, 1, values[i])); + + EXPECT_TRUE(v.update(doc, values[j])); + ptr->commit(); + EXPECT_TRUE(checkCount(ptr, doc, 1, 1, values[j])); + } + EXPECT_TRUE(!v.update(ptr->getNumDocs(), values[0])); + + // test append() + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + EXPECT_TRUE(!v.append(doc, values[0], 1)); + } + EXPECT_TRUE(!v.append(ptr->getNumDocs(), values[0], 1)); + + // test remove() + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + EXPECT_TRUE(!v.remove(doc, values[0], 1)); + } + EXPECT_TRUE(!v.remove(ptr->getNumDocs(), values[0], 1)); + + bool smallUInt = isUnsignedSmallIntAttribute(*ptr); + // test clearDoc() + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + uint32_t i = (doc + 2) % numUniques; + + EXPECT_TRUE(v.update(doc, values[i])); + if (doc % 2 == 0) { // alternate clearing + ptr->clearDoc(doc); + } + ptr->commit(); + EXPECT_EQUAL(1u, ptr->get(doc, &buffer[0], buffer.size())); + if (doc % 2 == 0) { + if (smallUInt) { + expectZero(buffer[0]); + } else { + EXPECT_TRUE(attribute::isUndefined<BaseType>(buffer[0])); + } + } else { + EXPECT_TRUE(!attribute::isUndefined<BaseType>(buffer[0])); + EXPECT_EQUAL(values[i], buffer[0]); + } + } + EXPECT_TRUE(!v.clearDoc(ptr->getNumDocs())); +} + +void +AttributeTest::testSingle() +{ + uint32_t numDocs = 1000; + uint32_t numUniques = 50; + uint32_t numUniqueNibbles = 9; + { + std::vector<AttributeVector::largeint_t> values; + fillNumeric(values, numUniques); + std::vector<AttributeVector::largeint_t> nibbleValues; + fillNumeric(nibbleValues, numUniqueNibbles); + { + AttributePtr ptr = AttributeFactory::createAttribute("sv-int32", Config(BasicType::INT32, CollectionType::SINGLE)); + addDocs(ptr, numDocs); + testSingle<IntegerAttribute, AttributeVector::largeint_t, int32_t>(ptr, values); + } + { + AttributePtr ptr = AttributeFactory::createAttribute("sv-uint4", Config(BasicType::UINT4, CollectionType::SINGLE)); + addDocs(ptr, numDocs); + testSingle<IntegerAttribute, AttributeVector::largeint_t, int8_t>(ptr, nibbleValues); + } + { + Config cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sv-post-int32", cfg); + addDocs(ptr, numDocs); + testSingle<IntegerAttribute, AttributeVector::largeint_t, int32_t>(ptr, values); + } + } + { + std::vector<double> values; + fillNumeric(values, numUniques); + { + AttributePtr ptr = AttributeFactory::createAttribute("sv-float", Config(BasicType::FLOAT, CollectionType::SINGLE)); + addDocs(ptr, numDocs); + testSingle<FloatingPointAttribute, double, float>(ptr, values); + } + { + Config cfg(BasicType::FLOAT, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sv-post-float", cfg); + addDocs(ptr, numDocs); + testSingle<FloatingPointAttribute, double, float>(ptr, values); + } + + } + { + std::vector<vespalib::string> values; + fillString(values, numUniques); + { + AttributePtr ptr = AttributeFactory::createAttribute("sv-string", Config(BasicType::STRING, CollectionType::SINGLE)); + addDocs(ptr, numDocs); + testSingle<StringAttribute, vespalib::string, vespalib::string>(ptr, values); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sv-fs-string", cfg); + addDocs(ptr, numDocs); + testSingle<StringAttribute, vespalib::string, vespalib::string>(ptr, values); + } + } +} + + +//----------------------------------------------------------------------------- +// CollectionType::ARRAY +//----------------------------------------------------------------------------- + +template <typename VectorType, typename BufferType> +void +AttributeTest::testArray(const AttributePtr & ptr, const std::vector<BufferType> & values) +{ + LOG(info, "testArray: vector '%s' with %i documents and %lu values", + ptr->getName().c_str(), ptr->getNumDocs(), static_cast<unsigned long>(values.size())); + + VectorType & v = *(static_cast<VectorType *>(ptr.get())); + uint32_t numUniques = values.size(); + ASSERT_TRUE(numUniques >= 6); + + + // test update() + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 0u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u); + size_t sumAppends(0); + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + uint32_t valueCount = doc % numUniques; + ptr->clearDoc(doc); + + EXPECT_TRUE(appendToVector(v, doc, valueCount, values)); + ptr->commit(); + sumAppends += valueCount; + + uint32_t i = doc % numUniques; + EXPECT_TRUE(v.update(doc, values[i])); + ptr->commit(); + EXPECT_TRUE(checkCount(ptr, doc, 1, 1, values[i])); + } + EXPECT_TRUE(!v.update(ptr->getNumDocs(), values[0])); + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), (1 + 2)*ptr->getNumDocs() + sumAppends); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), sumAppends); + + + // test append() + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + uint32_t valueCount = doc % numUniques; + ptr->clearDoc(doc); + + // append unique values + EXPECT_TRUE(appendToVector(v, doc, valueCount, values)); + ptr->commit(); + EXPECT_TRUE(checkContent(ptr, doc, valueCount, valueCount, values)); + + // append duplicates + EXPECT_TRUE(appendToVector(v, doc, valueCount, values)); + ptr->commit(); + EXPECT_TRUE(checkContent(ptr, doc, valueCount * 2, valueCount, values)); + } + EXPECT_TRUE(!v.append(ptr->getNumDocs(), values[0], 1)); + + + // test remove() + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + ptr->clearDoc(doc); + + EXPECT_TRUE(v.append(doc, values[1], 1)); + for (uint32_t i = 0; i < 3; ++i) { + EXPECT_TRUE(v.append(doc, values[3], 1)); + } + for (uint32_t i = 0; i < 5; ++i) { + EXPECT_TRUE(v.append(doc, values[5], 1)); + } + + ptr->commit(); + EXPECT_TRUE(checkCount(ptr, doc, 9, 1, values[1])); + EXPECT_TRUE(checkCount(ptr, doc, 9, 3, values[3])); + EXPECT_TRUE(checkCount(ptr, doc, 9, 5, values[5])); + + EXPECT_TRUE(v.remove(doc, values[0], 1)); + ptr->commit(); + EXPECT_TRUE(checkCount(ptr, doc, 9, 1, values[1])); + EXPECT_TRUE(checkCount(ptr, doc, 9, 3, values[3])); + EXPECT_TRUE(checkCount(ptr, doc, 9, 5, values[5])); + + EXPECT_TRUE(v.remove(doc, values[1], 1)); + ptr->commit(); + EXPECT_TRUE(checkCount(ptr, doc, 8, 0, values[1])); + EXPECT_TRUE(checkCount(ptr, doc, 8, 3, values[3])); + EXPECT_TRUE(checkCount(ptr, doc, 8, 5, values[5])); + + EXPECT_TRUE(v.remove(doc, values[5], 1)); + ptr->commit(); + EXPECT_TRUE(checkCount(ptr, doc, 3, 0, values[1])); + EXPECT_TRUE(checkCount(ptr, doc, 3, 3, values[3])); + EXPECT_TRUE(checkCount(ptr, doc, 3, 0, values[5])); + } + EXPECT_TRUE(!v.remove(ptr->getNumDocs(), values[0], 1)); + + + // test clearDoc() + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + uint32_t valueCount = doc % numUniques; + + ptr->clearDoc(doc); + for (uint32_t j = 0; j < valueCount; ++j) { + EXPECT_TRUE(v.append(doc, values[0], 1)); + } + ptr->clearDoc(doc); + for (uint32_t j = 0; j < valueCount; ++j) { + EXPECT_TRUE(v.append(doc, values[1], 1)); + } + ptr->commit(); + + EXPECT_TRUE(checkCount(ptr, doc, valueCount, valueCount, values[1])); + } + EXPECT_TRUE(!v.clearDoc(ptr->getNumDocs())); +} + +template <typename BufferType> +void +AttributeTest::printArray(const AttributePtr & ptr) +{ + uint32_t bufferSize = ptr->getMaxValueCount(); + std::vector<BufferType> buffer(bufferSize); + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + uint32_t valueCount = ptr->get(doc, &buffer[0], buffer.size()); + for (uint32_t i = 0; i < valueCount; ++i) { + std::cout << "doc[" << doc << "][" << i << "] = " << buffer[i] + << std::endl; + } + } +} + +void +AttributeTest::testArray() +{ + uint32_t numDocs = 100; + uint32_t numUniques = 50; + { // IntegerAttribute + std::vector<AttributeVector::largeint_t> values; + fillNumeric(values, numUniques); + { + AttributePtr ptr = AttributeFactory::createAttribute("a-int32", Config(BasicType::INT32, CollectionType::ARRAY)); + addDocs(ptr, numDocs); + testArray<IntegerAttribute, AttributeVector::largeint_t>(ptr, values); + } + { + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("flags", cfg); + addDocs(ptr, numDocs); + testArray<IntegerAttribute, AttributeVector::largeint_t>(ptr, values); + } + { + Config cfg(BasicType::INT32, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("a-fs-int32", cfg); + addDocs(ptr, numDocs); + testArray<IntegerAttribute, AttributeVector::largeint_t>(ptr, values); + } + } + { // FloatingPointAttribute + std::vector<double> values; + fillNumeric(values, numUniques); + { + AttributePtr ptr = AttributeFactory::createAttribute("a-float", Config(BasicType::FLOAT, CollectionType::ARRAY)); + addDocs(ptr, numDocs); + testArray<FloatingPointAttribute, double>(ptr, values); + } + { + Config cfg(BasicType::FLOAT, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("a-fs-float", cfg); + addDocs(ptr, numDocs); + testArray<FloatingPointAttribute, double>(ptr, values); + } + } + { // StringAttribute + std::vector<vespalib::string> values; + fillString(values, numUniques); + { + AttributePtr ptr = AttributeFactory::createAttribute("a-string", Config(BasicType::STRING, CollectionType::ARRAY)); + addDocs(ptr, numDocs); + testArray<StringAttribute, vespalib::string>(ptr, values); + } + { + Config cfg(BasicType::STRING, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("afs-string", cfg); + addDocs(ptr, numDocs); + testArray<StringAttribute, vespalib::string>(ptr, values); + } + } +} + + +//----------------------------------------------------------------------------- +// CollectionType::WSET +//----------------------------------------------------------------------------- + +template <typename BufferType> +void +AttributeTest::printWeightedSet(const AttributePtr & ptr) +{ + std::vector<BufferType> buffer(ptr->getMaxValueCount()); + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + uint32_t valueCount = ptr->get(doc, &buffer[0], buffer.size()); + for (uint32_t i = 0; i < valueCount; ++i) { + std::cout << "doc[" << doc << "][" << i << "] = {" << buffer[i].getValue() + << ", " << buffer[i].getWeight() << "}" << std::endl; + } + } +} + +template <typename VectorType, typename BufferType> +void +AttributeTest::testWeightedSet(const AttributePtr & ptr, const std::vector<BufferType> & values) +{ + LOG(info, "testWeightedSet: vector '%s' with %u documents and %lu values", + ptr->getName().c_str(), ptr->getNumDocs(), static_cast<unsigned long>(values.size())); + + VectorType & v = *(static_cast<VectorType *>(ptr.get())); + uint32_t numDocs = v.getNumDocs(); + ASSERT_TRUE(values.size() >= numDocs + 10); + uint32_t bufferSize = numDocs + 10; + std::vector<BufferType> buffer(bufferSize); + + // fill and check + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 0u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u); + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t valueCount = doc; + v.clearDoc(doc); + for (uint32_t j = 0; j < valueCount; ++j) { + EXPECT_TRUE(v.append(doc, values[j].getValue(), values[j].getWeight())); + } + commit(ptr); + EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount); + for (uint32_t j = 0; j < valueCount; ++j) { + EXPECT_TRUE(buffer[j].getValue() == values[j].getValue()); + EXPECT_TRUE(buffer[j].getWeight() == values[j].getWeight()); + } + } + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), numDocs + (numDocs*(numDocs-1))/2); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u); + + // test append() + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t valueCount = doc; + + // append non-existent value + EXPECT_TRUE(v.append(doc, values[doc].getValue(), values[doc].getWeight())); + commit(ptr); + EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 1); + EXPECT_TRUE(buffer[doc].getValue() == values[doc].getValue()); + EXPECT_TRUE(buffer[doc].getWeight() == values[doc].getWeight()); + + // append existent value + EXPECT_TRUE(v.append(doc, values[doc].getValue(), values[doc].getWeight() + 10)); + commit(ptr); + EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 1); + EXPECT_TRUE(buffer[doc].getValue() == values[doc].getValue()); + EXPECT_TRUE(buffer[doc].getWeight() == values[doc].getWeight() + 10); + + // append non-existent value two times + EXPECT_TRUE(v.append(doc, values[doc + 1].getValue(), values[doc + 1].getWeight())); + EXPECT_TRUE(v.append(doc, values[doc + 1].getValue(), values[doc + 1].getWeight() + 10)); + commit(ptr); + EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 2); + EXPECT_TRUE(buffer[doc + 1].getValue() == values[doc + 1].getValue()); + EXPECT_TRUE(buffer[doc + 1].getWeight() == values[doc + 1].getWeight() + 10); + } + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), numDocs + (numDocs*(numDocs-1))/2 + numDocs*4); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u); + + // test remove() + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t valueCount = doc; + + // remove non-existent value + EXPECT_TRUE(static_cast<uint32_t>(v.getValueCount(doc)) == valueCount + 2); + EXPECT_TRUE(v.remove(doc, values[doc + 2].getValue(), 0)); + commit(ptr); + EXPECT_TRUE(static_cast<uint32_t>(v.getValueCount(doc)) == valueCount + 2); + + // remove existent value + EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 2); + EXPECT_TRUE(buffer[doc + 1].getValue() == values[doc + 1].getValue()); + EXPECT_TRUE(v.remove(doc, values[doc + 1].getValue(), 0)); + commit(ptr); + EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 1); + for (uint32_t i = 0; i < valueCount + 1; ++i) { + EXPECT_TRUE(buffer[i].getValue() != values[doc + 1].getValue()); + } + } + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), numDocs + (numDocs*(numDocs-1))/2 + numDocs*4 + numDocs * 2); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u); +} + +void +AttributeTest::testWeightedSet() +{ + uint32_t numDocs = 100; + uint32_t numValues = numDocs + 10; + { // IntegerAttribute + std::vector<AttributeVector::WeightedInt> values; + values.reserve(numValues); + for (uint32_t i = 0; i < numValues; ++i) { + values.push_back(AttributeVector::WeightedInt(i, i + numValues)); + } + + { + AttributePtr ptr = AttributeFactory::createAttribute + ("wsint32", Config(BasicType::INT32, CollectionType::WSET)); + addDocs(ptr, numDocs); + testWeightedSet<IntegerAttribute, AttributeVector::WeightedInt>(ptr, values); + } + { + Config cfg(BasicType::INT32, CollectionType::WSET); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("ws-fs-int32", cfg); + addDocs(ptr, numDocs); + testWeightedSet<IntegerAttribute, AttributeVector::WeightedInt>(ptr, values); + IAttributeVector::EnumHandle e; + EXPECT_TRUE(ptr->findEnum("1", e)); + } + } + { // FloatingPointAttribute + std::vector<AttributeVector::WeightedFloat> values; + values.reserve(numValues); + for (uint32_t i = 0; i < numValues; ++i) { + values.push_back(AttributeVector::WeightedFloat(i, i + numValues)); + } + + { + Config cfg(BasicType::FLOAT, CollectionType::WSET); + AttributePtr ptr = AttributeFactory::createAttribute("ws-float", cfg); + addDocs(ptr, numDocs); + testWeightedSet<FloatingPointAttribute, AttributeVector::WeightedFloat>(ptr, values); + } + { + Config cfg(BasicType::FLOAT, CollectionType::WSET); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("ws-fs-float", cfg); + addDocs(ptr, numDocs); + testWeightedSet<FloatingPointAttribute, AttributeVector::WeightedFloat>(ptr, values); + IAttributeVector::EnumHandle e; + EXPECT_TRUE(ptr->findEnum("1", e)); + } + } + { // StringAttribute + std::vector<AttributeVector::WeightedString> values; + values.reserve(numValues); + for (uint32_t i = 0; i < numValues; ++i) { + vespalib::asciistream ss; + ss << "string" << (i < 10 ? "0" : "") << i; + values.push_back(AttributeVector::WeightedString(ss.str(), i + numValues)); + } + + { + AttributePtr ptr = AttributeFactory::createAttribute + ("wsstr", Config(BasicType::STRING, CollectionType::WSET)); + addDocs(ptr, numDocs); + testWeightedSet<StringAttribute, AttributeVector::WeightedString>(ptr, values); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::WSET)); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("wsfsstr", cfg); + addDocs(ptr, numDocs); + testWeightedSet<StringAttribute, AttributeVector::WeightedString>(ptr, values); + IAttributeVector::EnumHandle e; + EXPECT_TRUE(ptr->findEnum("string00", e)); + } + } +} + +template <typename VectorType, typename BufferType> +void +AttributeTest::testArithmeticValueUpdate(const AttributePtr & ptr) +{ + LOG(info, "testArithmeticValueUpdate: vector '%s'", ptr->getName().c_str()); + + typedef document::ArithmeticValueUpdate Arith; + VectorType & vec = static_cast<VectorType &>(*ptr.get()); + addDocs(ptr, 13); + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 0u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u); + for (uint32_t doc = 0; doc < 13; ++doc) { + ASSERT_TRUE(vec.update(doc, 100)); + } + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 13u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u); + ptr->commit(); + + EXPECT_TRUE(vec.apply(0, Arith(Arith::Add, 10))); + EXPECT_TRUE(vec.apply(1, Arith(Arith::Add, -10))); + EXPECT_TRUE(vec.apply(2, Arith(Arith::Sub, 10))); + EXPECT_TRUE(vec.apply(3, Arith(Arith::Sub, -10))); + EXPECT_TRUE(vec.apply(4, Arith(Arith::Mul, 10))); + EXPECT_TRUE(vec.apply(5, Arith(Arith::Mul, -10))); + EXPECT_TRUE(vec.apply(6, Arith(Arith::Div, 10))); + EXPECT_TRUE(vec.apply(7, Arith(Arith::Div, -10))); + EXPECT_TRUE(vec.apply(8, Arith(Arith::Add, 10.5))); + EXPECT_TRUE(vec.apply(9, Arith(Arith::Sub, 10.5))); + EXPECT_TRUE(vec.apply(10, Arith(Arith::Mul, 1.2))); + EXPECT_TRUE(vec.apply(11, Arith(Arith::Mul, 0.8))); + EXPECT_TRUE(vec.apply(12, Arith(Arith::Div, 0.8))); + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 26u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 13u); + ptr->commit(); + + std::vector<BufferType> buf(1); + ptr->get(0, &buf[0], 1); + EXPECT_EQUAL(buf[0], 110); + ptr->get(1, &buf[0], 1); + EXPECT_EQUAL(buf[0], 90); + ptr->get(2, &buf[0], 1); + EXPECT_EQUAL(buf[0], 90); + ptr->get(3, &buf[0], 1); + EXPECT_EQUAL(buf[0], 110); + ptr->get(4, &buf[0], 1); + EXPECT_EQUAL(buf[0], 1000); + ptr->get(5, &buf[0], 1); + EXPECT_EQUAL(buf[0], -1000); + ptr->get(6, &buf[0], 1); + EXPECT_EQUAL(buf[0], 10); + ptr->get(7, &buf[0], 1); + EXPECT_EQUAL(buf[0], -10); + if (ptr->getBasicType() == BasicType::INT32) { + ptr->get(8, &buf[0], 1); + EXPECT_EQUAL(buf[0], 110); + ptr->get(9, &buf[0], 1); + EXPECT_EQUAL(buf[0], 90); + } else if (ptr->getBasicType() == BasicType::FLOAT || + ptr->getBasicType() == BasicType::DOUBLE) + { + ptr->get(8, &buf[0], 1); + EXPECT_EQUAL(buf[0], 110.5); + ptr->get(9, &buf[0], 1); + EXPECT_EQUAL(buf[0], 89.5); + } else { + ASSERT_TRUE(false); + } + ptr->get(10, &buf[0], 1); + EXPECT_EQUAL(buf[0], 120); + ptr->get(11, &buf[0], 1); + EXPECT_EQUAL(buf[0], 80); + ptr->get(12, &buf[0], 1); + EXPECT_EQUAL(buf[0], 125); + + + // try several arithmetic operations on the same document in a single commit + ASSERT_TRUE(vec.update(0, 1100)); + ASSERT_TRUE(vec.update(1, 1100)); + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 28u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 13u); + for (uint32_t i = 0; i < 10; ++i) { + ASSERT_TRUE(vec.apply(0, Arith(Arith::Add, 10))); + ASSERT_TRUE(vec.apply(1, Arith(Arith::Add, 10))); + } + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 48u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 33u); + ptr->commit(); + ptr->get(0, &buf[0], 1); + EXPECT_EQUAL(buf[0], 1200); + ptr->get(1, &buf[0], 1); + EXPECT_EQUAL(buf[0], 1200); + + ASSERT_TRUE(vec.update(0, 10)); + ASSERT_TRUE(vec.update(1, 10)); + ASSERT_TRUE(vec.update(2, 10)); + ASSERT_TRUE(vec.update(3, 10)); + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 52u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 33u); + for (uint32_t i = 0; i < 8; ++i) { + EXPECT_TRUE(vec.apply(0, Arith(Arith::Mul, 1.2))); + EXPECT_TRUE(vec.apply(1, Arith(Arith::Mul, 2.3))); + EXPECT_TRUE(vec.apply(2, Arith(Arith::Mul, 3.4))); + EXPECT_TRUE(vec.apply(3, Arith(Arith::Mul, 5.6))); + ptr->commit(); + } + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 84u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 65u); + + + // try divide by zero + ASSERT_TRUE(vec.update(0, 100)); + EXPECT_TRUE(vec.apply(0, Arith(Arith::Div, 0))); + ptr->commit(); + if (ptr->getClass().inherits(FloatingPointAttribute::classId)) { + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 86u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 66u); + } else { // does not apply for interger attributes + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 85u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 65u); + } + ptr->get(0, &buf[0], 1); + if (ptr->getBasicType() == BasicType::INT32) { + EXPECT_EQUAL(buf[0], 100); + } + + // try divide by zero with empty change vector + EXPECT_TRUE(vec.apply(0, Arith(Arith::Div, 0))); + ptr->commit(); + if (ptr->getClass().inherits(FloatingPointAttribute::classId)) { + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 87u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 67u); + } else { // does not apply for interger attributes + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 85u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 65u); + } +} + +void +AttributeTest::testArithmeticValueUpdate() +{ + { + AttributePtr ptr = AttributeFactory::createAttribute("sint32", Config(BasicType::INT32, CollectionType::SINGLE)); + testArithmeticValueUpdate<IntegerAttribute, IntegerAttribute::largeint_t>(ptr); + } + { + AttributePtr ptr = AttributeFactory::createAttribute("sfloat", Config(BasicType::FLOAT, CollectionType::SINGLE)); + testArithmeticValueUpdate<FloatingPointAttribute, double>(ptr); + } + { + Config cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sfsint32", cfg); + testArithmeticValueUpdate<IntegerAttribute, IntegerAttribute::largeint_t>(ptr); + } + { + Config cfg(BasicType::FLOAT, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sfsfloat", cfg); + testArithmeticValueUpdate<FloatingPointAttribute, double>(ptr); + } + { + Config cfg(BasicType::DOUBLE, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sfsdouble", cfg); + testArithmeticValueUpdate<FloatingPointAttribute, double>(ptr); + } +} + + +template <typename VectorType, typename BaseType, typename BufferType> +void +AttributeTest::testArithmeticWithUndefinedValue(const AttributePtr & ptr, BaseType before, BaseType after) +{ + LOG(info, "testArithmeticWithUndefinedValue: vector '%s'", ptr->getName().c_str()); + + typedef document::ArithmeticValueUpdate Arith; + VectorType & vec = static_cast<VectorType &>(*ptr.get()); + addDocs(ptr, 1); + ASSERT_TRUE(vec.update(0, before)); + ptr->commit(); + + EXPECT_TRUE(vec.apply(0, Arith(Arith::Add, 10))); + ptr->commit(); + + std::vector<BufferType> buf(1); + ptr->get(0, &buf[0], 1); + + if (ptr->getClass().inherits(FloatingPointAttribute::classId)) { + EXPECT_TRUE(std::isnan(buf[0])); + } else { + EXPECT_EQUAL(buf[0], after); + } +} + +void +AttributeTest::testArithmeticWithUndefinedValue() +{ + { + AttributePtr ptr = AttributeFactory::createAttribute("sint32", Config(BasicType::INT32, CollectionType::SINGLE)); + testArithmeticWithUndefinedValue<IntegerAttribute, int32_t, IntegerAttribute::largeint_t> + (ptr, std::numeric_limits<int32_t>::min(), std::numeric_limits<int32_t>::min()); + } + { + AttributePtr ptr = AttributeFactory::createAttribute("sfloat", Config(BasicType::FLOAT, CollectionType::SINGLE)); + testArithmeticWithUndefinedValue<FloatingPointAttribute, float, double> + (ptr, std::numeric_limits<float>::quiet_NaN(), std::numeric_limits<float>::quiet_NaN()); + } + { + AttributePtr ptr = AttributeFactory::createAttribute("sdouble", Config(BasicType::DOUBLE, CollectionType::SINGLE)); + testArithmeticWithUndefinedValue<FloatingPointAttribute, double, double> + (ptr, std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN()); + } +} + + +template <typename VectorType, typename BufferType> +void +AttributeTest::testMapValueUpdate(const AttributePtr & ptr, BufferType initValue, + const FieldValue & initFieldValue, const FieldValue & nonExistant, + bool removeIfZero, bool createIfNonExistant) +{ + LOG(info, "testMapValueUpdate: vector '%s'", ptr->getName().c_str()); + typedef MapValueUpdate MapVU; + typedef ArithmeticValueUpdate ArithVU; + VectorType & vec = static_cast<VectorType &>(*ptr.get()); + + addDocs(ptr, 6); + for (uint32_t doc = 0; doc < 6; ++doc) { + ASSERT_TRUE(vec.append(doc, initValue.getValue(), 100)); + } + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 6u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u); + + EXPECT_TRUE(ptr->apply(0, MapVU(initFieldValue, + ArithVU(ArithVU::Add, 10)))); + EXPECT_TRUE(ptr->apply(1, MapVU(initFieldValue, + ArithVU(ArithVU::Sub, 10)))); + EXPECT_TRUE(ptr->apply(2, MapVU(initFieldValue, + ArithVU(ArithVU::Mul, 10)))); + EXPECT_TRUE(ptr->apply(3, MapVU(initFieldValue, + ArithVU(ArithVU::Div, 10)))); + ptr->commit(); + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 10u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 4u); + + std::vector<BufferType> buf(2); + ptr->get(0, &buf[0], 2); + EXPECT_EQUAL(buf[0].getWeight(), 110); + ptr->get(1, &buf[0], 2); + EXPECT_EQUAL(buf[0].getWeight(), 90); + ptr->get(2, &buf[0], 2); + EXPECT_EQUAL(buf[0].getWeight(), 1000); + ptr->get(3, &buf[0], 2); + EXPECT_EQUAL(buf[0].getWeight(), 10); + + // removeifzero + EXPECT_TRUE(ptr->apply(4, MapVU(initFieldValue, + ArithVU(ArithVU::Sub, 100)))); + ptr->commit(); + if (removeIfZero) { + EXPECT_EQUAL(ptr->get(4, &buf[0], 2), uint32_t(0)); + } else { + EXPECT_EQUAL(ptr->get(4, &buf[0], 2), uint32_t(1)); + EXPECT_EQUAL(buf[0].getWeight(), 0); + } + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 11u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 5u); + + // createifnonexistant + EXPECT_TRUE(ptr->apply(5, MapVU(nonExistant, + ArithVU(ArithVU::Add, 10)))); + ptr->commit(); + if (createIfNonExistant) { + EXPECT_EQUAL(ptr->get(5, &buf[0], 2), uint32_t(2)); + EXPECT_EQUAL(buf[0].getWeight(), 100); + EXPECT_EQUAL(buf[1].getWeight(), 10); + } else { + EXPECT_EQUAL(ptr->get(5, &buf[0], 2), uint32_t(1)); + EXPECT_EQUAL(buf[0].getWeight(), 100); + } + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 12u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 6u); + + + // try divide by zero (should be ignored) + vec.clearDoc(0); + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 13u); + ASSERT_TRUE(vec.append(0, initValue.getValue(), 12345)); + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 14u); + EXPECT_TRUE(ptr->apply(0, MapVU(initFieldValue, ArithVU(ArithVU::Div, 0)))); + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 14u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 6u); + ptr->commit(); + ptr->get(0, &buf[0], 1); + EXPECT_EQUAL(buf[0].getWeight(), 12345); +} + +void +AttributeTest::testMapValueUpdate() +{ + { // regular set + AttributePtr ptr = AttributeFactory::createAttribute + ("wsint32", Config(BasicType::INT32, CollectionType::WSET)); + testMapValueUpdate<IntegerAttribute, AttributeVector::WeightedInt> + (ptr, AttributeVector::WeightedInt(64, 1), IntFieldValue(64), + IntFieldValue(32), false, false); + } + { // remove if zero + AttributePtr ptr = AttributeFactory::createAttribute + ("wsint32", Config(BasicType::INT32, CollectionType(CollectionType::WSET, true, false))); + testMapValueUpdate<IntegerAttribute, AttributeVector::WeightedInt> + (ptr, AttributeVector::WeightedInt(64, 1), IntFieldValue(64), + IntFieldValue(32), true, false); + } + { // create if non existant + AttributePtr ptr = AttributeFactory::createAttribute + ("wsint32", Config(BasicType::INT32, CollectionType(CollectionType::WSET, false, true))); + testMapValueUpdate<IntegerAttribute, AttributeVector::WeightedInt> + (ptr, AttributeVector::WeightedInt(64, 1), IntFieldValue(64), + IntFieldValue(32), false, true); + } + + Config setCfg(Config(BasicType::STRING, CollectionType::WSET)); + Config setRemoveCfg(Config(BasicType::STRING, CollectionType(CollectionType::WSET, true, false))); + Config setCreateCfg(Config(BasicType::STRING, CollectionType(CollectionType::WSET, false, true))); + + { // regular set + AttributePtr ptr = AttributeFactory::createAttribute("wsstr", setCfg); + testMapValueUpdate<StringAttribute, AttributeVector::WeightedString> + (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"), + StringFieldValue("second"), false, false); + } + { // remove if zero + AttributePtr ptr = AttributeFactory::createAttribute("wsstr", setRemoveCfg); + testMapValueUpdate<StringAttribute, AttributeVector::WeightedString> + (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"), + StringFieldValue("second"), true, false); + } + { // create if non existant + AttributePtr ptr = AttributeFactory::createAttribute("wsstr", setCreateCfg); + testMapValueUpdate<StringAttribute, AttributeVector::WeightedString> + (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"), + StringFieldValue("second"), false, true); + } + + // fast-search - posting lists + { // regular set + setCfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("wsfsstr", setCfg); + testMapValueUpdate<StringAttribute, AttributeVector::WeightedString> + (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"), + StringFieldValue("second"), false, false); + } + { // remove if zero + setRemoveCfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("wsfsstr", setRemoveCfg); + testMapValueUpdate<StringAttribute, AttributeVector::WeightedString> + (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"), + StringFieldValue("second"), true, false); + } + { // create if non existant + setCreateCfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("wsfsstr", setCreateCfg); + testMapValueUpdate<StringAttribute, AttributeVector::WeightedString> + (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"), + StringFieldValue("second"), false, true); + } +} + + + +void +AttributeTest::commit(const AttributePtr & ptr) +{ + ptr->commit(); +} + + +void +AttributeTest::testStatus() +{ + std::vector<vespalib::string> values; + fillString(values, 16); + uint32_t numDocs = 100; + // No posting list + static constexpr size_t LeafNodeSize = + 4 + sizeof(EnumStoreBase::Index) * EnumTreeTraits::LEAF_SLOTS; + static constexpr size_t InternalNodeSize = + 8 + (sizeof(EnumStoreBase::Index) + + sizeof(btree::EntryRef)) * EnumTreeTraits::INTERNAL_SLOTS; + static constexpr size_t NestedVectorSize = 24; // sizeof(vespalib::Array) + + { + Config cfg(BasicType::STRING, CollectionType::ARRAY); + AttributePtr ptr = AttributeFactory::createAttribute("as", cfg); + addDocs(ptr, numDocs); + StringAttribute & sa = *(static_cast<StringAttribute *>(ptr.get())); + for (uint32_t i = 0; i < numDocs; ++i) { + EXPECT_TRUE(appendToVector(sa, i, 1, values)); + } + ptr->commit(true); + EXPECT_EQUAL(ptr->getStatus().getNumDocs(), 100u); + EXPECT_EQUAL(ptr->getStatus().getNumValues(), 100u); + EXPECT_EQUAL(ptr->getStatus().getNumUniqueValues(), 1u); + size_t expUsed = 0; + expUsed += 1 * InternalNodeSize + 1 * LeafNodeSize; // enum store tree + expUsed += 1 * 32; // enum store (uniquevalues * bytes per entry) + // multi value mapping (numdocs * sizeof(MappingIndex) + numvalues * sizeof(EnumIndex)) + expUsed += 100 * sizeof(search::multivalue::Index32) + 100 * 4; + EXPECT_GREATER_EQUAL(ptr->getStatus().getUsed(), expUsed); + EXPECT_GREATER_EQUAL(ptr->getStatus().getAllocated(), expUsed); + } + + { + Config cfg(BasicType::STRING, CollectionType::ARRAY); + AttributePtr ptr = AttributeFactory::createAttribute("as", cfg); + addDocs(ptr, numDocs); + StringAttribute & sa = *(static_cast<StringAttribute *>(ptr.get())); + const size_t numUniq(16); + const size_t numValuesPerDoc(16); + for (uint32_t i = 0; i < numDocs; ++i) { + EXPECT_TRUE(appendToVector(sa, i, numValuesPerDoc, values)); + } + ptr->commit(true); + EXPECT_EQUAL(ptr->getStatus().getNumDocs(), numDocs); + EXPECT_EQUAL(ptr->getStatus().getNumValues(), numDocs*numValuesPerDoc); + EXPECT_EQUAL(ptr->getStatus().getNumUniqueValues(), numUniq); + size_t expUsed = 0; + expUsed += 1 * InternalNodeSize + 1 * LeafNodeSize; // enum store tree + expUsed += numUniq * 32; // enum store (16 unique values, 32 bytes per entry) + // multi value mapping (numdocs * sizeof(MappingIndex) + numvalues * sizeof(EnumIndex) + + // numdocs * sizeof(Array<EnumIndex>) (due to vector vector)) + expUsed += numDocs * sizeof(search::multivalue::Index32) + numDocs * numValuesPerDoc * sizeof(EnumStoreBase::Index) + ((numValuesPerDoc > search::multivalue::Index32::maxValues()) ? numDocs * NestedVectorSize : 0); + EXPECT_GREATER_EQUAL(ptr->getStatus().getUsed(), expUsed); + EXPECT_GREATER_EQUAL(ptr->getStatus().getAllocated(), expUsed); + } +} + +void +AttributeTest::testNullProtection() +{ + size_t len1 = strlen("evil"); + size_t len2 = strlen("string"); + size_t len = len1 + 1 + len2; + vespalib::string good("good"); + vespalib::string evil("evil string"); + vespalib::string pureEvil("evil"); + EXPECT_EQUAL(strlen(evil.data()), len); + EXPECT_EQUAL(strlen(evil.c_str()), len); + evil[len1] = 0; // replace space with '\0' + EXPECT_EQUAL(strlen(evil.data()), len1); + EXPECT_EQUAL(strlen(evil.c_str()), len1); + EXPECT_EQUAL(strlen(evil.data() + len1), 0u); + EXPECT_EQUAL(strlen(evil.c_str() + len1), 0u); + EXPECT_EQUAL(strlen(evil.data() + len1 + 1), len2); + EXPECT_EQUAL(strlen(evil.c_str() + len1 + 1), len2); + EXPECT_EQUAL(evil.size(), len); + { // string + AttributeVector::DocId docId; + std::vector<vespalib::string> buf(16); + AttributePtr attr = AttributeFactory::createAttribute("string", Config(BasicType::STRING, CollectionType::SINGLE)); + StringAttribute &v = static_cast<StringAttribute &>(*attr.get()); + EXPECT_TRUE(v.addDoc(docId)); + EXPECT_TRUE(v.update(docId, evil)); + v.commit(); + size_t n = static_cast<const AttributeVector &>(v).get(docId, &buf[0], buf.size()); + EXPECT_EQUAL(n, 1u); + EXPECT_EQUAL(buf[0], pureEvil); + } + { // string array + AttributeVector::DocId docId; + std::vector<vespalib::string> buf(16); + AttributePtr attr = AttributeFactory::createAttribute("string", Config(BasicType::STRING, CollectionType::ARRAY)); + StringAttribute &v = static_cast<StringAttribute &>(*attr.get()); + EXPECT_TRUE(v.addDoc(docId)); + EXPECT_TRUE(v.append(0, good, 1)); + EXPECT_TRUE(v.append(0, evil, 1)); + EXPECT_TRUE(v.append(0, good, 1)); + v.commit(); + size_t n = static_cast<const AttributeVector &>(v).get(0, &buf[0], buf.size()); + EXPECT_EQUAL(n, 3u); + EXPECT_EQUAL(buf[0], good); + EXPECT_EQUAL(buf[1], pureEvil); + EXPECT_EQUAL(buf[2], good); + } + { // string set + AttributeVector::DocId docId; + std::vector<StringAttribute::WeightedString> buf(16); + AttributePtr attr = AttributeFactory::createAttribute("string", Config(BasicType::STRING, CollectionType::WSET)); + StringAttribute &v = static_cast<StringAttribute &>(*attr.get()); + EXPECT_TRUE(v.addDoc(docId)); + EXPECT_TRUE(v.append(0, good, 10)); + EXPECT_TRUE(v.append(0, evil, 20)); + v.commit(); + size_t n = static_cast<const AttributeVector &>(v).get(0, &buf[0], buf.size()); + EXPECT_EQUAL(n, 2u); + if (buf[0].getValue() != good) { + std::swap(buf[0], buf[1]); + } + EXPECT_EQUAL(buf[0].getValue(), good); + EXPECT_EQUAL(buf[0].getWeight(), 10); + EXPECT_EQUAL(buf[1].getValue(), pureEvil); + EXPECT_EQUAL(buf[1].getWeight(), 20); + + // remove + EXPECT_TRUE(v.remove(0, evil, 20)); + v.commit(); + n = static_cast<const AttributeVector &>(v).get(0, &buf[0], buf.size()); + EXPECT_EQUAL(n, 1u); + EXPECT_EQUAL(buf[0].getValue(), good); + EXPECT_EQUAL(buf[0].getWeight(), 10); + } +} + +void +AttributeTest::testGeneration(const AttributePtr & attr, bool exactStatus) +{ + LOG(info, "testGeneration(%s)", attr->getName().c_str()); + IntegerAttribute & ia = static_cast<IntegerAttribute &>(*attr.get()); + // add docs to trigger inc generation when data vector is full + AttributeVector::DocId docId; + EXPECT_EQUAL(0u, ia.getCurrentGeneration()); + EXPECT_TRUE(ia.addDoc(docId)); + EXPECT_EQUAL(0u, ia.getCurrentGeneration()); + EXPECT_TRUE(ia.addDoc(docId)); + EXPECT_EQUAL(0u, ia.getCurrentGeneration()); + ia.commit(true); + EXPECT_EQUAL(1u, ia.getCurrentGeneration()); + uint64_t lastAllocated; + uint64_t lastOnHold; + if (exactStatus) { + EXPECT_EQUAL(2u, ia.getStatus().getAllocated()); + EXPECT_EQUAL(0u, ia.getStatus().getOnHold()); + } else { + EXPECT_LESS(0u, ia.getStatus().getAllocated()); + EXPECT_EQUAL(0u, ia.getStatus().getOnHold()); + lastAllocated = ia.getStatus().getAllocated(); + lastOnHold = ia.getStatus().getOnHold(); + } + { + AttributeGuard ag(attr); // guard on generation 1 + EXPECT_TRUE(ia.addDoc(docId)); // inc gen + EXPECT_EQUAL(2u, ia.getCurrentGeneration()); + ia.commit(true); + EXPECT_EQUAL(3u, ia.getCurrentGeneration()); + if (exactStatus) { + EXPECT_EQUAL(4u, ia.getStatus().getAllocated()); + EXPECT_EQUAL(2u, ia.getStatus().getOnHold()); // no cleanup due to guard + } else { + EXPECT_LESS(lastAllocated, ia.getStatus().getAllocated()); + EXPECT_LESS(lastOnHold, ia.getStatus().getOnHold()); + lastAllocated = ia.getStatus().getAllocated(); + lastOnHold = ia.getStatus().getOnHold(); + } + } + EXPECT_TRUE(ia.addDoc(docId)); + EXPECT_EQUAL(3u, ia.getCurrentGeneration()); + { + AttributeGuard ag(attr); // guard on generation 3 + ia.commit(true); + EXPECT_EQUAL(4u, ia.getCurrentGeneration()); + if (exactStatus) { + EXPECT_EQUAL(4u, ia.getStatus().getAllocated()); + EXPECT_EQUAL(0u, ia.getStatus().getOnHold()); // cleanup at end of addDoc() + } else { + EXPECT_EQUAL(lastAllocated, ia.getStatus().getAllocated()); + EXPECT_GREATER(lastOnHold, ia.getStatus().getOnHold()); + lastAllocated = ia.getStatus().getAllocated(); + lastOnHold = ia.getStatus().getOnHold(); + } + } + { + AttributeGuard ag(attr); // guard on generation 4 + EXPECT_TRUE(ia.addDoc(docId)); // inc gen + EXPECT_EQUAL(5u, ia.getCurrentGeneration()); + ia.commit(); + EXPECT_EQUAL(6u, ia.getCurrentGeneration()); + if (exactStatus) { + EXPECT_EQUAL(6u, ia.getStatus().getAllocated()); + EXPECT_EQUAL(4u, ia.getStatus().getOnHold()); // no cleanup due to guard + } else { + EXPECT_LESS(lastAllocated, ia.getStatus().getAllocated()); + EXPECT_LESS(lastOnHold, ia.getStatus().getOnHold()); + lastAllocated = ia.getStatus().getAllocated(); + lastOnHold = ia.getStatus().getOnHold(); + } + } + ia.commit(true); + EXPECT_EQUAL(7u, ia.getCurrentGeneration()); + if (exactStatus) { + EXPECT_EQUAL(6u, ia.getStatus().getAllocated()); + EXPECT_EQUAL(0u, ia.getStatus().getOnHold()); // cleanup at end of commit() + } else { + EXPECT_EQUAL(lastAllocated, ia.getStatus().getAllocated()); + EXPECT_GREATER(lastOnHold, ia.getStatus().getOnHold()); + } +} + +void +AttributeTest::testGeneration() +{ + { // single value attribute + Config cfg(BasicType::INT8); + cfg.setGrowStrategy(GrowStrategy(2, 0, 2)); + AttributePtr attr = AttributeFactory::createAttribute("int8", cfg); + testGeneration(attr, true); + } + { // enum attribute (with fast search) + Config cfg(BasicType::INT8); + cfg.setFastSearch(true); + cfg.setGrowStrategy(GrowStrategy(2, 0, 2)); + AttributePtr attr = AttributeFactory::createAttribute("faint8", cfg); + testGeneration(attr, false); + } + { // multi value attribute + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setGrowStrategy(GrowStrategy(2, 0, 2)); + AttributePtr attr = AttributeFactory::createAttribute("aint8", cfg); + testGeneration(attr, false); + } + { // multi value enum attribute (with fast search) + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + cfg.setGrowStrategy(GrowStrategy(2, 0, 2)); + AttributePtr attr = AttributeFactory::createAttribute("faaint8", cfg); + testGeneration(attr, false); + } +} + + +void +AttributeTest::testCreateSerialNum() +{ + Config cfg(BasicType::INT32); + AttributePtr attr = AttributeFactory::createAttribute("int32", cfg); + attr->setCreateSerialNum(42u); + EXPECT_TRUE(attr->save()); + AttributePtr attr2 = AttributeFactory::createAttribute("int32", cfg); + EXPECT_TRUE(attr2->load()); + EXPECT_EQUAL(42u, attr2->getCreateSerialNum()); +} + + +template <typename VectorType, typename BufferType> +void +AttributeTest::testCompactLidSpace(const Config &config, + bool fs, + bool es) +{ + uint32_t highDocs = 100; + uint32_t trimmedDocs = 30; + vespalib::string bts = config.basicType().asString(); + vespalib::string cts = config.collectionType().asString(); + vespalib::string fas = fs ? "-fs" : ""; + vespalib::string ess = es ? "-es" : ""; + Config cfg = config; + cfg.setFastSearch(fs); + + vespalib::string name = clstmp + "/" + bts + "-" + cts + fas + ess; + LOG(info, "testCompactLidSpace(%s)", name.c_str()); + AttributePtr attr = AttributeFactory::createAttribute(name, cfg); + VectorType &v = static_cast<VectorType &>(*attr.get()); + attr->enableEnumeratedSave(es); + attr->addDocs(highDocs); + populate(v, 17); + AttributePtr attr2 = AttributeFactory::createAttribute(name, cfg); + VectorType &v2 = static_cast<VectorType &>(*attr2.get()); + attr2->enableEnumeratedSave(es); + attr2->addDocs(trimmedDocs); + populate(v2, 17); + EXPECT_EQUAL(trimmedDocs, attr2->getNumDocs()); + EXPECT_EQUAL(trimmedDocs, attr2->getCommittedDocIdLimit()); + EXPECT_EQUAL(highDocs, attr->getNumDocs()); + EXPECT_EQUAL(highDocs, attr->getCommittedDocIdLimit()); + attr->compactLidSpace(trimmedDocs); + EXPECT_EQUAL(highDocs, attr->getNumDocs()); + EXPECT_EQUAL(trimmedDocs, attr->getCommittedDocIdLimit()); + EXPECT_TRUE(attr->save()); + EXPECT_EQUAL(highDocs, attr->getNumDocs()); + EXPECT_EQUAL(trimmedDocs, attr->getCommittedDocIdLimit()); + AttributePtr attr3 = AttributeFactory::createAttribute(name, cfg); + EXPECT_TRUE(attr3->load()); + EXPECT_EQUAL(trimmedDocs, attr3->getNumDocs()); + EXPECT_EQUAL(trimmedDocs, attr3->getCommittedDocIdLimit()); + VectorType &v3 = static_cast<VectorType &>(*attr3.get()); + compare<VectorType, BufferType>(v2, v3); + attr->shrinkLidSpace(); + EXPECT_EQUAL(trimmedDocs, attr->getNumDocs()); + EXPECT_EQUAL(trimmedDocs, attr->getCommittedDocIdLimit()); + compare<VectorType, BufferType>(v, v3); +} + + +template <typename VectorType, typename BufferType> +void +AttributeTest::testCompactLidSpace(const Config &config) +{ + testCompactLidSpace<VectorType, BufferType>(config, false, false); + testCompactLidSpace<VectorType, BufferType>(config, false, true); + bool smallUInt = isUnsignedSmallIntAttribute(config.basicType().type()); + if (smallUInt) + return; + testCompactLidSpace<VectorType, BufferType>(config, true, false); + testCompactLidSpace<VectorType, BufferType>(config, true, true); +} + + +void +AttributeTest::testCompactLidSpace(const Config &config) +{ + switch (config.basicType().type()) { + case BasicType::UINT1: + case BasicType::UINT2: + case BasicType::UINT4: + case BasicType::INT8: + case BasicType::INT16: + case BasicType::INT32: + case BasicType::INT64: + if (config.collectionType() == CollectionType::WSET) { + testCompactLidSpace<IntegerAttribute, + IntegerAttribute::WeightedInt>(config); + } else { + testCompactLidSpace<IntegerAttribute, + IntegerAttribute::largeint_t>(config); + } + break; + case BasicType::FLOAT: + case BasicType::DOUBLE: + if (config.collectionType() == CollectionType::WSET) { + testCompactLidSpace<FloatingPointAttribute, + FloatingPointAttribute::WeightedFloat>(config); + } else { + testCompactLidSpace<FloatingPointAttribute, double>(config); + } + break; + case BasicType::STRING: + if (config.collectionType() == CollectionType::WSET) { + testCompactLidSpace<StringAttribute, + StringAttribute::WeightedString>(config); + } else { + testCompactLidSpace<StringAttribute, vespalib::string>(config); + } + break; + default: + abort(); + } +} + + +void +AttributeTest::testCompactLidSpace() +{ + vespalib::rmdir(clstmp, true); + vespalib::mkdir(clstmp); + TEST_DO(testCompactLidSpace(Config(BasicType::UINT1, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::UINT2, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::UINT4, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT8, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT8, + CollectionType::ARRAY))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT8, + CollectionType::WSET))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT16, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT16, + CollectionType::ARRAY))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT16, + CollectionType::WSET))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT32, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT32, + CollectionType::ARRAY))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT32, + CollectionType::WSET))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT64, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT64, + CollectionType::ARRAY))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT64, + CollectionType::WSET))); + TEST_DO(testCompactLidSpace(Config(BasicType::FLOAT, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::FLOAT, + CollectionType::ARRAY))); + TEST_DO(testCompactLidSpace(Config(BasicType::FLOAT, + CollectionType::WSET))); + TEST_DO(testCompactLidSpace(Config(BasicType::DOUBLE, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::DOUBLE, + CollectionType::ARRAY))); + TEST_DO(testCompactLidSpace(Config(BasicType::DOUBLE, + CollectionType::WSET))); + TEST_DO(testCompactLidSpace(Config(BasicType::STRING, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::STRING, + CollectionType::ARRAY))); + TEST_DO(testCompactLidSpace(Config(BasicType::STRING, + CollectionType::WSET))); + vespalib::rmdir(clstmp, true); +} + +template <typename AttributeType> +void +AttributeTest::requireThatAddressSpaceUsageIsReported(const Config &config, bool fastSearch) +{ + uint32_t numDocs = 10; + vespalib::string attrName = asuDir + "/" + config.basicType().asString() + "-" + + config.collectionType().asString() + (fastSearch ? "-fs" : ""); + Config cfg = config; + cfg.setFastSearch(fastSearch); + + AttributePtr attrPtr = AttributeFactory::createAttribute(attrName, cfg); + addDocs(attrPtr, numDocs); + AddressSpaceUsage before = attrPtr->getAddressSpaceUsage(); + populate(static_cast<AttributeType &>(*attrPtr.get()), 5); + AddressSpaceUsage after = attrPtr->getAddressSpaceUsage(); + if (attrPtr->hasEnum()) { + LOG(info, "requireThatAddressSpaceUsageIsReported(%s): Has enum", attrName.c_str()); + EXPECT_EQUAL(before.enumStoreUsage().used(), 0u); + EXPECT_GREATER(after.enumStoreUsage().used(), before.enumStoreUsage().used()); + EXPECT_EQUAL(after.enumStoreUsage().limit(), before.enumStoreUsage().limit()); + EXPECT_EQUAL(34359738368u, after.enumStoreUsage().limit()); // EnumStoreBase::DataStoreType::RefType::offsetSize() + } else { + LOG(info, "requireThatAddressSpaceUsageIsReported(%s): NOT enum", attrName.c_str()); + EXPECT_EQUAL(before.enumStoreUsage().used(), 0u); + EXPECT_EQUAL(after.enumStoreUsage(), before.enumStoreUsage()); + EXPECT_EQUAL(AddressSpaceUsage::defaultEnumStoreUsage(), after.enumStoreUsage()); + } + if (attrPtr->hasMultiValue()) { + LOG(info, "requireThatAddressSpaceUsageIsReported(%s): Has multi-value", attrName.c_str()); + EXPECT_EQUAL(before.multiValueUsage().used(), 0u); + EXPECT_GREATER(after.multiValueUsage().used(), before.multiValueUsage().used()); + EXPECT_EQUAL(after.multiValueUsage().limit(), before.multiValueUsage().limit()); + EXPECT_EQUAL(134217728u, after.multiValueUsage().limit()); // multivalue::Index32::offsetSize() + } else { + LOG(info, "requireThatAddressSpaceUsageIsReported(%s): NOT multi-value", attrName.c_str()); + EXPECT_EQUAL(before.multiValueUsage().used(), 0u); + EXPECT_EQUAL(after.multiValueUsage(), before.multiValueUsage()); + EXPECT_EQUAL(AddressSpaceUsage::defaultMultiValueUsage(), after.multiValueUsage()); + } +} + +template <typename AttributeType> +void +AttributeTest::requireThatAddressSpaceUsageIsReported(const Config &config) +{ + requireThatAddressSpaceUsageIsReported<AttributeType>(config, false); + requireThatAddressSpaceUsageIsReported<AttributeType>(config, true); +} + +void +AttributeTest::requireThatAddressSpaceUsageIsReported() +{ + vespalib::rmdir(asuDir, true); + vespalib::mkdir(asuDir); + TEST_DO(requireThatAddressSpaceUsageIsReported<IntegerAttribute>(Config(BasicType::INT32, CollectionType::SINGLE))); + TEST_DO(requireThatAddressSpaceUsageIsReported<IntegerAttribute>(Config(BasicType::INT32, CollectionType::ARRAY))); + TEST_DO(requireThatAddressSpaceUsageIsReported<FloatingPointAttribute>(Config(BasicType::FLOAT, CollectionType::SINGLE))); + TEST_DO(requireThatAddressSpaceUsageIsReported<FloatingPointAttribute>(Config(BasicType::FLOAT, CollectionType::ARRAY))); + TEST_DO(requireThatAddressSpaceUsageIsReported<StringAttribute>(Config(BasicType::STRING, CollectionType::SINGLE))); + TEST_DO(requireThatAddressSpaceUsageIsReported<StringAttribute>(Config(BasicType::STRING, CollectionType::ARRAY))); +} + +int AttributeTest::Main() +{ + TEST_INIT("attribute_test"); + + if (_argc > 0) { + DummyFileHeaderContext::setCreator(_argv[0]); + } + testBaseName(); + testReload(); + testHasLoadData(); + testMemorySaver(); + + testSingle(); + testArray(); + testWeightedSet(); + testArithmeticValueUpdate(); + testArithmeticWithUndefinedValue(); + testMapValueUpdate(); + testStatus(); + testNullProtection(); + testGeneration(); + testCreateSerialNum(); + TEST_DO(testCompactLidSpace()); + TEST_DO(requireThatAddressSpaceUsageIsReported()); + + TEST_DONE(); +} + +} + + +TEST_APPHOOK(search::AttributeTest); diff --git a/searchlib/src/tests/attribute/attribute_test.sh b/searchlib/src/tests/attribute/attribute_test.sh new file mode 100644 index 00000000000..89c52129b74 --- /dev/null +++ b/searchlib/src/tests/attribute/attribute_test.sh @@ -0,0 +1,7 @@ +#!/bin/bash +$VALGRIND ./searchlib_attribute_test_app +rm -rf *.dat +rm -rf *.idx +rm -rf *.weight +rm -rf clstmp +rm -rf alstmp diff --git a/searchlib/src/tests/attribute/attributebenchmark.cpp b/searchlib/src/tests/attribute/attributebenchmark.cpp new file mode 100644 index 00000000000..88446ef71f7 --- /dev/null +++ b/searchlib/src/tests/attribute/attributebenchmark.cpp @@ -0,0 +1,678 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include "attributesearcher.h" +#include "attributeupdater.h" +#include <vespa/searchlib/util/randomgenerator.h> +#include "runnable.h" +#include <vespa/searchlib/attribute/attribute.h> +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/singlenumericattribute.h> +#include <vespa/searchlib/attribute/multinumericattribute.h> +#include <vespa/searchlib/attribute/singlestringattribute.h> +#include <vespa/searchlib/attribute/multistringattribute.h> +#include <vespa/searchlib/attribute/attrvector.h> +#include <vespa/vespalib/util/sync.h> +#include <iostream> +#include <fstream> +#include <vespa/log/log.h> + +LOG_SETUP("attributebenchmark"); + +#include <vespa/searchlib/attribute/attributevector.hpp> + +using vespalib::Monitor; +using vespalib::MonitorGuard; +using std::shared_ptr; + +typedef std::vector<uint32_t> NumVector; +typedef std::vector<vespalib::string> StringVector; +typedef AttributeVector::SP AttributePtr; +typedef AttributeVector::DocId DocId; +typedef search::attribute::Config AttrConfig; +using search::attribute::BasicType; +using search::attribute::CollectionType; + +namespace search { + +class AttributeBenchmark : public FastOS_Application +{ +private: + class Config { + public: + vespalib::string _attribute; + uint32_t _numDocs; + uint32_t _numUpdates; + uint32_t _numValues; + uint32_t _numSearchers; + uint32_t _numQueries; + bool _searchersOnly; + bool _validate; + uint32_t _populateRuns; + uint32_t _updateRuns; + uint32_t _commitFreq; + uint32_t _minValueCount; + uint32_t _maxValueCount; + uint32_t _minStringLen; + uint32_t _maxStringLen; + uint32_t _seed; + bool _writeAttribute; + int64_t _rangeStart; + int64_t _rangeEnd; + int64_t _rangeDelta; + bool _rangeSearch; + uint32_t _prefixLength; + bool _prefixSearch; + + + Config() : _attribute(""), _numDocs(0), _numUpdates(0), _numValues(0), + _numSearchers(0), _numQueries(0), _searchersOnly(true), _validate(false), _populateRuns(0), _updateRuns(0), + _commitFreq(0), _minValueCount(0), _maxValueCount(0), _minStringLen(0), _maxStringLen(0), _seed(0), + _writeAttribute(false), _rangeStart(0), _rangeEnd(0), _rangeDelta(0), _rangeSearch(false), + _prefixLength(0), _prefixSearch(false) {} + void printXML() const; + }; + + class Resource { + private: + std::vector<struct rusage> _usages; + struct rusage _reset; + + public: + Resource() : _usages(), _reset() { reset(); }; + void reset() { + getrusage(0, &_reset); + } + void saveUsage() { + struct rusage now; + getrusage(0, &now); + struct rusage usage = computeDifference(_reset, now); + _usages.push_back(usage); + } + void printLastXML(uint32_t opCount) { + (void) opCount; + struct rusage & usage = _usages.back(); + std::cout << "<ru_utime>" << usage.ru_utime.tv_sec * 1000 + usage.ru_utime.tv_usec / 1000 + << "</ru_utime>" << std::endl; + std::cout << "<ru_stime>" << usage.ru_stime.tv_sec * 1000 + usage.ru_stime.tv_usec / 1000 + << "</ru_stime>" << std::endl; + std::cout << "<ru_nvcsw>" << usage.ru_nvcsw << "</ru_nvcsw>" << std::endl; + std::cout << "<ru_nivcsw>" << usage.ru_nivcsw << "</ru_nivcsw>" << std::endl; + } + static struct rusage computeDifference(struct rusage & first, struct rusage & second); + }; + + FastOS_ThreadPool * _threadPool; + Config _config; + RandomGenerator _rndGen; + + void init(const Config & config); + void usage(); + + // benchmark helper methods + void addDocs(const AttributePtr & ptr, uint32_t numDocs); + template <typename Vector, typename T, typename BT> + void benchmarkPopulate(const AttributePtr & ptr, const std::vector<T> & values, uint32_t id); + template <typename Vector, typename T, typename BT> + void benchmarkUpdate(const AttributePtr & ptr, const std::vector<T> & values, uint32_t id); + + template <typename T> + std::vector<vespalib::string> prepareForPrefixSearch(const std::vector<T> & values) const; + template <typename T> + void benchmarkSearch(const AttributePtr & ptr, const std::vector<T> & values); + template <typename Vector, typename T, typename BT> + void benchmarkSearchWithUpdater(const AttributePtr & ptr, + const std::vector<T> & values); + + template <typename Vector, typename T, typename BT> + void benchmarkAttribute(const AttributePtr & ptr, const std::vector<T> & values); + + // Numeric Attribute + void benchmarkNumeric(const AttributePtr & ptr); + + // String Attribute + void benchmarkString(const AttributePtr & ptr); + + +public: + AttributeBenchmark() : _threadPool(NULL), _config(), _rndGen() {} + ~AttributeBenchmark() { + if (_threadPool != NULL) { + delete _threadPool; + } + } + int Main(); +}; + + +void +AttributeBenchmark::Config::printXML() const +{ + std::cout << "<config>" << std::endl; + std::cout << "<attribute>" << _attribute << "</attribute>" << std::endl; + std::cout << "<num-docs>" << _numDocs << "</num-docs>" << std::endl; + std::cout << "<num-updates>" << _numUpdates << "</num-updates>" << std::endl; + std::cout << "<num-values>" << _numValues << "</num-values>" << std::endl; + std::cout << "<num-searchers>" << _numSearchers << "</num-searchers>" << std::endl; + std::cout << "<num-queries>" << _numQueries << "</num-queries>" << std::endl; + std::cout << "<searchers-only>" << (_searchersOnly ? "true" : "false") << "</searchers-only>" << std::endl; + std::cout << "<validate>" << (_validate ? "true" : "false") << "</validate>" << std::endl; + std::cout << "<populate-runs>" << _populateRuns << "</populate-runs>" << std::endl; + std::cout << "<update-runs>" << _updateRuns << "</update-runs>" << std::endl; + std::cout << "<commit-freq>" << _commitFreq << "</commit-freq>" << std::endl; + std::cout << "<min-value-count>" << _minValueCount << "</min-value-count>" << std::endl; + std::cout << "<max-value-count>" << _maxValueCount << "</max-value-count>" << std::endl; + std::cout << "<min-string-len>" << _minStringLen << "</min-string-len>" << std::endl; + std::cout << "<max-string-len>" << _maxStringLen << "</max-string-len>" << std::endl; + std::cout << "<seed>" << _seed << "</seed>" << std::endl; + std::cout << "<range-start>" << _rangeStart << "</range-start>" << std::endl; + std::cout << "<range-end>" << _rangeEnd << "</range-end>" << std::endl; + std::cout << "<range-delta>" << _rangeDelta << "</range-delta>" << std::endl; + std::cout << "<range-search>" << (_rangeSearch ? "true" : "false") << "</range-search>" << std::endl; + std::cout << "<prefix-length>" << _prefixLength << "</range-length>" << std::endl; + std::cout << "<prefix-search>" << (_prefixSearch ? "true" : "false") << "</prefix-search>" << std::endl; + std::cout << "</config>" << std::endl; +} + +void +AttributeBenchmark::init(const Config & config) +{ + _config = config; + _rndGen.srand(_config._seed); +} + + +//----------------------------------------------------------------------------- +// Benchmark helper methods +//----------------------------------------------------------------------------- +void +AttributeBenchmark::addDocs(const AttributePtr & ptr, uint32_t numDocs) +{ + DocId startDoc; + DocId lastDoc; + bool success = ptr->addDocs(startDoc, lastDoc, numDocs); + assert(success); + (void) success; + assert(startDoc == 0); + assert(lastDoc + 1 == numDocs); + assert(ptr->getNumDocs() == numDocs); +} + +template <typename Vector, typename T, typename BT> +void +AttributeBenchmark::benchmarkPopulate(const AttributePtr & ptr, const std::vector<T> & values, uint32_t id) +{ + std::cout << "<!-- Populate " << _config._numDocs << " documents -->" << std::endl; + AttributeUpdater<Vector, T, BT> + updater(ptr, values, _rndGen, _config._validate, _config._commitFreq, + _config._minValueCount, _config._maxValueCount); + updater.populate(); + std::cout << "<populate id='" << id << "'>" << std::endl; + updater.getStatus().printXML(); + std::cout << "</populate>" << std::endl; + if (_config._validate) { + std::cout << "<!-- All " << updater.getValidator().getTotalCnt() + << " asserts passed -->" << std::endl; + } +} + +template <typename Vector, typename T, typename BT> +void +AttributeBenchmark::benchmarkUpdate(const AttributePtr & ptr, const std::vector<T> & values, uint32_t id) +{ + std::cout << "<!-- Apply " << _config._numUpdates << " updates -->" << std::endl; + AttributeUpdater<Vector, T, BT> + updater(ptr, values, _rndGen, _config._validate, _config._commitFreq, + _config._minValueCount, _config._maxValueCount); + updater.update(_config._numUpdates); + std::cout << "<update id='" << id << "'>" << std::endl; + updater.getStatus().printXML(); + std::cout << "</update>" << std::endl; + if (_config._validate) { + std::cout << "<!-- All " << updater.getValidator().getTotalCnt() + << " asserts passed -->" << std::endl; + } +} + +template <typename T> +std::vector<vespalib::string> +AttributeBenchmark::prepareForPrefixSearch(const std::vector<T> & values) const +{ + (void) values; + return std::vector<vespalib::string>(); +} + +template <> +std::vector<vespalib::string> +AttributeBenchmark::prepareForPrefixSearch(const std::vector<AttributeVector::WeightedString> & values) const +{ + std::vector<vespalib::string> retval; + retval.reserve(values.size()); + for (size_t i = 0; i < values.size(); ++i) { + retval.push_back(values[i].getValue().substr(0, _config._prefixLength)); + } + return retval; +} + +template <typename T> +void +AttributeBenchmark::benchmarkSearch(const AttributePtr & ptr, const std::vector<T> & values) +{ + std::vector<AttributeSearcher *> searchers; + if (_config._numSearchers > 0) { + std::cout << "<!-- Starting " << _config._numSearchers << " searcher threads with " + << _config._numQueries << " queries each -->" << std::endl; + + std::vector<vespalib::string> prefixStrings = prepareForPrefixSearch(values); + + for (uint32_t i = 0; i < _config._numSearchers; ++i) { + if (_config._rangeSearch) { + RangeSpec spec(_config._rangeStart, _config._rangeEnd, _config._rangeDelta); + searchers.push_back(new AttributeRangeSearcher(i, ptr, spec, _config._numQueries)); + } else if (_config._prefixSearch) { + searchers.push_back(new AttributePrefixSearcher(i, ptr, prefixStrings, _config._numQueries)); + } else { + searchers.push_back(new AttributeFindSearcher<T>(i, ptr, values, _config._numQueries)); + } + _threadPool->NewThread(searchers.back()); + } + + for (uint32_t i = 0; i < searchers.size(); ++i) { + searchers[i]->join(); + } + + AttributeSearcherStatus totalStatus; + for (uint32_t i = 0; i < searchers.size(); ++i) { + std::cout << "<searcher-summary id='" << i << "'>" << std::endl; + searchers[i]->getStatus().printXML(); + std::cout << "</searcher-summary>" << std::endl; + totalStatus.merge(searchers[i]->getStatus()); + delete searchers[i]; + } + std::cout << "<total-searcher-summary>" << std::endl; + totalStatus.printXML(); + std::cout << "</total-searcher-summary>" << std::endl; + } +} + +template <typename Vector, typename T, typename BT> +void +AttributeBenchmark::benchmarkSearchWithUpdater(const AttributePtr & ptr, + const std::vector<T> & values) +{ + if (_config._numSearchers > 0) { + std::cout << "<!-- Starting 1 updater thread -->" << std::endl; + AttributeUpdaterThread<Vector, T, BT> + updater(ptr, values, _rndGen, _config._validate, _config._commitFreq, + _config._minValueCount, _config._maxValueCount); + _threadPool->NewThread(&updater); + benchmarkSearch(ptr, values); + updater.stop(); + updater.join(); + std::cout << "<updater-summary>" << std::endl; + updater.getStatus().printXML(); + std::cout << "</updater-summary>" << std::endl; + if (_config._validate) { + std::cout << "<!-- All " << updater.getValidator().getTotalCnt() + << " asserts passed -->" << std::endl; + } + } +} + +template <typename Vector, typename T, typename BT> +void +AttributeBenchmark::benchmarkAttribute(const AttributePtr & ptr, const std::vector<T> & values) +{ + addDocs(ptr, _config._numDocs); + + // populate + for (uint32_t i = 0; i < _config._populateRuns; ++i) { + benchmarkPopulate<Vector, T, BT>(ptr, values, i); + } + + // update + if (_config._numUpdates > 0) { + for (uint32_t i = 0; i < _config._updateRuns; ++i) { + benchmarkUpdate<Vector, T, BT>(ptr, values, i); + } + } + + // search + if (_config._searchersOnly) { + benchmarkSearch(ptr, values); + } else { + benchmarkSearchWithUpdater<Vector, T, BT>(ptr, values); + } + + _threadPool->Close(); +} + + +//----------------------------------------------------------------------------- +// Numeric Attribute +//----------------------------------------------------------------------------- +void +AttributeBenchmark::benchmarkNumeric(const AttributePtr & ptr) +{ + NumVector values; + if (_config._rangeSearch) { + values.reserve(_config._numValues); + for (uint32_t i = 0; i < _config._numValues; ++i) { + values.push_back(i); + } + } else { + _rndGen.fillRandomIntegers(values, _config._numValues); + } + + std::vector<int32_t> weights; + _rndGen.fillRandomIntegers(weights, _config._numValues); + + std::vector<AttributeVector::WeightedInt> weightedVector; + weightedVector.reserve(values.size()); + for (size_t i = 0; i < values.size(); ++i) { + if (!ptr->hasWeightedSetType()) { + weightedVector.push_back(AttributeVector::WeightedInt(values[i])); + } else { + weightedVector.push_back(AttributeVector::WeightedInt(values[i], weights[i])); + } + } + benchmarkAttribute<IntegerAttribute, AttributeVector::WeightedInt, AttributeVector::WeightedInt> + (ptr, weightedVector); +} + + +//----------------------------------------------------------------------------- +// String Attribute +//----------------------------------------------------------------------------- +void +AttributeBenchmark::benchmarkString(const AttributePtr & ptr) +{ + StringVector strings; + _rndGen.fillRandomStrings(strings, _config._numValues, _config._minStringLen, _config._maxStringLen); + + std::vector<int32_t> weights; + _rndGen.fillRandomIntegers(weights, _config._numValues); + + std::vector<AttributeVector::WeightedString> weightedVector; + weightedVector.reserve(strings.size()); + for (size_t i = 0; i < strings.size(); ++i) { + if (!ptr->hasWeightedSetType()) { + weightedVector.push_back(AttributeVector::WeightedString(strings[i])); + } else { + weightedVector.push_back(AttributeVector::WeightedString(strings[i], weights[i])); + } + } + benchmarkAttribute<StringAttribute, AttributeVector::WeightedString, AttributeVector::WeightedString> + (ptr, weightedVector); +} + + +//----------------------------------------------------------------------------- +// Resource utilization +//----------------------------------------------------------------------------- +struct rusage +AttributeBenchmark::Resource::computeDifference(struct rusage & first, struct rusage & second) +{ + struct rusage result; + // utime + uint64_t firstutime = first.ru_utime.tv_sec * 1000000 + first.ru_utime.tv_usec; + uint64_t secondutime = second.ru_utime.tv_sec * 1000000 + second.ru_utime.tv_usec; + uint64_t resultutime = secondutime - firstutime; + result.ru_utime.tv_sec = resultutime / 1000000; + result.ru_utime.tv_usec = resultutime % 1000000; + + // stime + uint64_t firststime = first.ru_stime.tv_sec * 1000000 + first.ru_stime.tv_usec; + uint64_t secondstime = second.ru_stime.tv_sec * 1000000 + second.ru_stime.tv_usec; + uint64_t resultstime = secondstime - firststime; + result.ru_stime.tv_sec = resultstime / 1000000; + result.ru_stime.tv_usec = resultstime % 1000000; + + result.ru_maxrss = second.ru_maxrss; // - first.ru_maxrss; + result.ru_ixrss = second.ru_ixrss; // - first.ru_ixrss; + result.ru_idrss = second.ru_idrss; // - first.ru_idrss; + result.ru_isrss = second.ru_isrss; // - first.ru_isrss; + result.ru_minflt = second.ru_minflt - first.ru_minflt; + result.ru_majflt = second.ru_majflt - first.ru_majflt; + result.ru_nswap = second.ru_nswap - first.ru_nswap; + result.ru_inblock = second.ru_inblock - first.ru_inblock; + result.ru_oublock = second.ru_oublock - first.ru_oublock; + result.ru_msgsnd = second.ru_msgsnd - first.ru_msgsnd; + result.ru_msgrcv = second.ru_msgrcv - first.ru_msgrcv; + result.ru_nsignals = second.ru_nsignals - first.ru_nsignals; + result.ru_nvcsw = second.ru_nvcsw - first.ru_nvcsw; + result.ru_nivcsw = second.ru_nivcsw - first.ru_nivcsw; + + return result; +} + + +void +AttributeBenchmark::usage() +{ + std::cout << "usage: attributebenchmark [-n numDocs] [-u numUpdates] [-v numValues]" << std::endl; + std::cout << " [-s numSearchers] [-q numQueries] [-p populateRuns] [-r updateRuns]" << std::endl; + std::cout << " [-c commitFrequency] [-l minValueCount] [-h maxValueCount]" << std::endl; + std::cout << " [-i minStringLen] [-a maxStringLen] [-e seed]" << std::endl; + std::cout << " [-S rangeStart] [-E rangeEnd] [-D rangeDelta] [-L prefixLength]" << std::endl; + std::cout << " [-b (searchers with updater)] [-R (range search)] [-P (prefix search)]" << std::endl; + std::cout << " [-t (validate updates)] [-w (write attribute to disk)]" << std::endl; + std::cout << " <attribute>" << std::endl; + std::cout << " <attribute> : s-uint32, a-uint32, ws-uint32" << std::endl; + std::cout << " s-fa-uint32, a-fa-uint32, ws-fa-uint32" << std::endl; + std::cout << " s-fs-uint32, a-fs-uint32, ws-fs-uint32 ws-frs-uint32" << std::endl; + std::cout << " s-string, a-string, ws-string" << std::endl; + std::cout << " s-fs-string, a-fs-string, ws-fs-string ws-frs-string" << std::endl; +} + +int +AttributeBenchmark::Main() +{ + Config dc; + dc._numDocs = 50000; + dc._numUpdates = 50000; + dc._numValues = 1000; + dc._numSearchers = 0; + dc._numQueries = 1000; + dc._searchersOnly = true; + dc._validate = false; + dc._populateRuns = 1; + dc._updateRuns = 1; + dc._commitFreq = 1000; + dc._minValueCount = 0; + dc._maxValueCount = 20; + dc._minStringLen = 1; + dc._maxStringLen = 50; + dc._seed = 555; + dc._writeAttribute = false; + dc._rangeStart = 0; + dc._rangeEnd = 1000; + dc._rangeDelta = 10; + dc._rangeSearch = false; + dc._prefixLength = 2; + dc._prefixSearch = false; + + int idx = 1; + char opt; + const char * arg; + bool optError = false; + while ((opt = GetOpt("n:u:v:s:q:p:r:c:l:h:i:a:e:S:E:D:L:bRPtw", arg, idx)) != -1) { + switch (opt) { + case 'n': + dc._numDocs = atoi(arg); + break; + case 'u': + dc._numUpdates = atoi(arg); + break; + case 'v': + dc._numValues = atoi(arg); + break; + case 's': + dc._numSearchers = atoi(arg); + break; + case 'q': + dc._numQueries = atoi(arg); + break; + case 'p': + dc._populateRuns = atoi(arg); + break; + case 'r': + dc._updateRuns = atoi(arg); + break; + case 'c': + dc._commitFreq = atoi(arg); + break; + case 'l': + dc._minValueCount = atoi(arg); + break; + case 'h': + dc._maxValueCount = atoi(arg); + break; + case 'i': + dc._minStringLen = atoi(arg); + break; + case 'a': + dc._maxStringLen = atoi(arg); + break; + case 'e': + dc._seed = atoi(arg); + break; + case 'S': + dc._rangeStart = strtoll(arg, NULL, 10); + break; + case 'E': + dc._rangeEnd = strtoll(arg, NULL, 10); + break; + case 'D': + dc._rangeDelta = strtoll(arg, NULL, 10); + break; + case 'L': + dc._prefixLength = atoi(arg); + break; + case 'b': + dc._searchersOnly = false; + break; + case 'R': + dc._rangeSearch = true; + break; + case 'P': + dc._prefixSearch = true; + break; + case 't': + dc._validate = true; + break; + case 'w': + dc._writeAttribute = true; + break; + default: + optError = true; + break; + } + } + + if (_argc != (idx + 1) || optError) { + usage(); + return -1; + } + + dc._attribute = vespalib::string(_argv[idx]); + + _threadPool = new FastOS_ThreadPool(256000); + + std::cout << "<attribute-benchmark>" << std::endl; + init(dc); + _config.printXML(); + + AttributePtr ptr; + + if (_config._attribute == "s-int32") { + std::cout << "<!-- Benchmark SingleValueNumericAttribute<int32_t> -->" << std::endl; + ptr = AttributeFactory::createAttribute("s-int32", AttrConfig(BasicType::INT32, CollectionType::SINGLE)); + benchmarkNumeric(ptr); + + } else if (_config._attribute == "a-int32") { + std::cout << "<!-- Benchmark MultiValueNumericAttribute<int32_t> (array) -->" << std::endl; + ptr = AttributeFactory::createAttribute("a-int32", AttrConfig(BasicType::INT32, CollectionType::ARRAY)); + benchmarkNumeric(ptr); + + } else if (_config._attribute == "ws-int32") { + std::cout << "<!-- Benchmark MultiValueNumericAttribute<int32_t> (wset) -->" << std::endl; + ptr = AttributeFactory::createAttribute("ws-int32", AttrConfig(BasicType::INT32, CollectionType::WSET)); + benchmarkNumeric(ptr); + + } else if (_config._attribute == "s-fs-int32") { + std::cout << "<!-- Benchmark SingleValueNumericPostingAttribute<int32_t> -->" << std::endl; + AttrConfig cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + ptr = AttributeFactory::createAttribute("s-fs-int32", cfg); + benchmarkNumeric(ptr); + + } else if (_config._attribute == "a-fs-int32") { + std::cout << "<!-- Benchmark MultiValueNumericPostingAttribute<int32_t> (array) -->" << std::endl; + AttrConfig cfg(BasicType::INT32, CollectionType::ARRAY); + cfg.setFastSearch(true); + ptr = AttributeFactory::createAttribute("a-fs-int32", cfg); + benchmarkNumeric(ptr); + + } else if (_config._attribute == "ws-fs-int32") { + std::cout << "<!-- Benchmark MultiValueNumericPostingAttribute<int32_t> (wset) -->" << std::endl; + AttrConfig cfg(BasicType::INT32, CollectionType::WSET); + cfg.setFastSearch(true); + ptr = AttributeFactory::createAttribute("ws-fs-int32", cfg); + benchmarkNumeric(ptr); + + } else if (_config._attribute == "s-string") { + std::cout << "<!-- Benchmark SingleValueStringAttribute -->" << std::endl; + ptr = AttributeFactory::createAttribute("s-string", AttrConfig(BasicType::STRING, CollectionType::SINGLE)); + benchmarkString(ptr); + + } else if (_config._attribute == "a-string") { + std::cout << "<!-- Benchmark ArrayStringAttribute (array) -->" << std::endl; + ptr = AttributeFactory::createAttribute("a-string", AttrConfig(BasicType::STRING, CollectionType::ARRAY)); + benchmarkString(ptr); + + } else if (_config._attribute == "ws-string") { + std::cout << "<!-- Benchmark WeightedSetStringAttribute (wset) -->" << std::endl; + ptr = AttributeFactory::createAttribute("ws-string", AttrConfig(BasicType::STRING, CollectionType::WSET)); + benchmarkString(ptr); + + } else if (_config._attribute == "s-fs-string") { + std::cout << "<!-- Benchmark SingleValueStringPostingAttribute (single fast search) -->" << std::endl; + AttrConfig cfg(BasicType::STRING, CollectionType::SINGLE); + cfg.setFastSearch(true); + ptr = AttributeFactory::createAttribute("s-fs-string", cfg); + benchmarkString(ptr); + + } else if (_config._attribute == "a-fs-string") { + std::cout << "<!-- Benchmark ArrayStringPostingAttribute (array fast search) -->" << std::endl; + AttrConfig cfg(BasicType::STRING, CollectionType::ARRAY); + cfg.setFastSearch(true); + ptr = AttributeFactory::createAttribute("a-fs-string", cfg); + benchmarkString(ptr); + + } else if (_config._attribute == "ws-fs-string") { + std::cout << "<!-- Benchmark WeightedSetStringPostingAttribute (wset fast search) -->" << std::endl; + AttrConfig cfg(BasicType::STRING, CollectionType::WSET); + cfg.setFastSearch(true); + ptr = AttributeFactory::createAttribute("ws-fs-string", cfg); + benchmarkString(ptr); + + } + + if (dc._writeAttribute) { + std::cout << "<!-- Writing attribute to disk -->" << std::endl; + ptr->saveAs(ptr->getBaseFileName()); + } + + std::cout << "</attribute-benchmark>" << std::endl; + + return 0; +} +} + +int main(int argc, char ** argv) +{ + search::AttributeBenchmark myapp; + return myapp.Entry(argc, argv); +} + diff --git a/searchlib/src/tests/attribute/attributebenchmark.rb b/searchlib/src/tests/attribute/attributebenchmark.rb new file mode 100644 index 00000000000..44b08ec4389 --- /dev/null +++ b/searchlib/src/tests/attribute/attributebenchmark.rb @@ -0,0 +1,22 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vectors = ["sv-num-new", "mv-num-new", "sv-string-new", "mv-string-new"]#, "sv-num-old", "mv-num-old", "sv-string-old", "mv-string-old"] +num_docs = [500000, 1000000, 2000000, 4000000, 8000000, 16000000] +unique_percent = [0.001, 0.01, 0.05, 0.20, 0.50] + +vectors.each do |vector| + num_docs.each do |num| + unique_percent.each do |percent| + unique = num * percent + command = "./attributebenchmark -n #{num} -u 1000000 -v #{unique} -p 2 -r 1 -s 1 -q 1000 #{vector} > 03-27-full/#{vector}-n#{num}-v#{unique}-p2-r1-s1-q1000.log 2>&1" + puts command + `#{command}` + s = 1 + 5.times do + command = "./attributebenchmark -n #{num} -v #{unique} -p 1 -r 0 -s #{s} -q 100 -b #{vector} > 03-27-full/#{vector}-n#{num}-v#{unique}-s#{s}-q100-b.log 2>&1" + puts command + `#{command}` + s = s*2; + end + end + end +end diff --git a/searchlib/src/tests/attribute/attributefilewriter/.gitignore b/searchlib/src/tests/attribute/attributefilewriter/.gitignore new file mode 100644 index 00000000000..ea6a0e03bf2 --- /dev/null +++ b/searchlib/src/tests/attribute/attributefilewriter/.gitignore @@ -0,0 +1 @@ +searchlib_attributefilewriter_test_app diff --git a/searchlib/src/tests/attribute/attributefilewriter/CMakeLists.txt b/searchlib/src/tests/attribute/attributefilewriter/CMakeLists.txt new file mode 100644 index 00000000000..a1d859bbfb9 --- /dev/null +++ b/searchlib/src/tests/attribute/attributefilewriter/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_attributefilewriter_test_app + SOURCES + attributefilewriter_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_attributefilewriter_test_app COMMAND searchlib_attributefilewriter_test_app) diff --git a/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp b/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp new file mode 100644 index 00000000000..acf61cd58bb --- /dev/null +++ b/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp @@ -0,0 +1,116 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("attributefilewriter_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/stllike/string.h> +#include <vespa/searchlib/attribute/attributefilewriter.h> +#include <vespa/searchlib/attribute/attributefilebufferwriter.h> +#include <vespa/searchlib/util/fileutil.h> +#include <vespa/searchlib/util/rand48.h> +#include <vespa/searchlib/common/tunefileinfo.h> +#include <vespa/searchlib/common/fileheadercontext.h> +#include <vespa/searchlib/index/dummyfileheadercontext.h> + +using search::index::DummyFileHeaderContext; + +namespace search +{ + +namespace +{ + +vespalib::string testFileName("test.dat"); +vespalib::string hello("Hello world"); + +void removeTestFile() { FastOS_File::Delete(testFileName.c_str()); } + +struct Fixture { + TuneFileAttributes _tuneFileAttributes; + DummyFileHeaderContext _fileHeaderContext; + IAttributeSaveTarget::Config _cfg; + const vespalib::string _desc; + AttributeFileWriter _writer; + + Fixture() + : _tuneFileAttributes(), + _fileHeaderContext(), + _cfg(), + _desc("Attribute file sample description"), + _writer(_tuneFileAttributes, + _fileHeaderContext, + _cfg, + _desc) + { + removeTestFile(); + } + + ~Fixture() { + removeTestFile(); + } + +}; + +} + + +TEST_F("Test that we can write empty attribute file", Fixture) +{ + EXPECT_TRUE(f._writer.open(testFileName)); + f._writer.close(); + FileUtil::LoadedBuffer::UP loaded(FileUtil::loadFile(testFileName)); + EXPECT_EQUAL(0u, loaded->size()); +} + + +TEST_F("Test that we destroy writer without calling close", Fixture) +{ + EXPECT_TRUE(f._writer.open(testFileName)); +} + + +TEST_F("Test that buffer writer passes on written data", Fixture) +{ + std::vector<int> a; + const size_t mysize = 3000000; + const size_t writerBufferSize = AttributeFileBufferWriter::BUFFER_SIZE; + EXPECT_GREATER(mysize * sizeof(int), writerBufferSize); + a.reserve(mysize); + search::Rand48 rnd; + for (uint32_t i = 0; i < mysize; ++i) { + a.emplace_back(rnd.lrand48()); + } + EXPECT_TRUE(f._writer.open(testFileName)); + std::unique_ptr<BufferWriter> writer(f._writer.allocBufferWriter()); + writer->write(&a[0], a.size() * sizeof(int)); + writer->flush(); + writer.reset(); + f._writer.close(); + FileUtil::LoadedBuffer::UP loaded(FileUtil::loadFile(testFileName)); + EXPECT_EQUAL(a.size() * sizeof(int), loaded->size()); + EXPECT_TRUE(memcmp(&a[0], loaded->buffer(), loaded->size()) == 0); +} + + +TEST_F("Test that we can pass buffer directly", Fixture) +{ + using Buffer = IAttributeFileWriter::Buffer; + Buffer buf = f._writer.allocBuf(hello.size()); + buf->writeBytes(hello.c_str(), hello.size()); + EXPECT_TRUE(f._writer.open(testFileName)); + f._writer.writeBuf(std::move(buf)); + f._writer.close(); + FileUtil::LoadedBuffer::UP loaded(FileUtil::loadFile(testFileName)); + EXPECT_EQUAL(hello.size(), loaded->size()); + EXPECT_TRUE(memcmp(hello.c_str(), loaded->buffer(), loaded->size()) == 0); +} + + +} + + +TEST_MAIN() +{ + TEST_RUN_ALL(); +} diff --git a/searchlib/src/tests/attribute/attributeguard.cpp b/searchlib/src/tests/attribute/attributeguard.cpp new file mode 100644 index 00000000000..5c90caa094b --- /dev/null +++ b/searchlib/src/tests/attribute/attributeguard.cpp @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("attributeguard_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/searchlib/attribute/extendableattributes.h> + +namespace search { + +class AttributeGuardTest : public vespalib::TestApp +{ +public: + int Main(); +}; + +int +AttributeGuardTest::Main() +{ + TEST_INIT("attributeguard_test"); + + + AttributeVector::SP ssattr(new SingleStringExtAttribute("ss1")); + AttributeEnumGuard guard(ssattr); + EXPECT_TRUE(guard.valid()); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(search::AttributeGuardTest); diff --git a/searchlib/src/tests/attribute/attributeguard_test.sh b/searchlib/src/tests/attribute/attributeguard_test.sh new file mode 100644 index 00000000000..6a9557e7da7 --- /dev/null +++ b/searchlib/src/tests/attribute/attributeguard_test.sh @@ -0,0 +1,7 @@ +#!/bin/bahs +$VALGRIND ./searchlib_attributeguard_test_app +rm -rf *.dat +rm -rf *.idx +rm -rf *.weight +rm -rf clstmp +rm -rf alstmp diff --git a/searchlib/src/tests/attribute/attributemanager/.gitignore b/searchlib/src/tests/attribute/attributemanager/.gitignore new file mode 100644 index 00000000000..6fa89f09572 --- /dev/null +++ b/searchlib/src/tests/attribute/attributemanager/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +attributemanager_test +searchlib_attributemanager_test_app diff --git a/searchlib/src/tests/attribute/attributemanager/CMakeLists.txt b/searchlib/src/tests/attribute/attributemanager/CMakeLists.txt new file mode 100644 index 00000000000..ed3eeee1065 --- /dev/null +++ b/searchlib/src/tests/attribute/attributemanager/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_attributemanager_test_app + SOURCES + attributemanager_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_attributemanager_test_app COMMAND searchlib_attributemanager_test_app) diff --git a/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp new file mode 100644 index 00000000000..bf247668843 --- /dev/null +++ b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp @@ -0,0 +1,422 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("attribute_test"); +#include <vespa/searchlib/attribute/attribute.h> +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/attributemanager.h> +#include <vespa/searchlib/attribute/configconverter.h> +#include <vespa/searchlib/attribute/multinumericattribute.h> +#include <vespa/searchlib/attribute/multinumericattribute.hpp> +#include <vespa/searchlib/attribute/stringattribute.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <algorithm> + +using namespace config; +using namespace vespa::config::search; +using namespace search; +using namespace search::attribute; +using vespalib::tensor::TensorType; +using std::shared_ptr; + +typedef BasicType BT; +typedef CollectionType CT; +typedef AttributeVector::SP AVSP; + +namespace search { + +class AttributeManagerTest : public vespalib::TestApp +{ +private: + void verifyLoad(AttributeVector & v); + void testLoad(); + void testGuards(); + void testConfigConvert(); + void testContext(); + + bool + assertDataType(BT::Type exp, + AttributesConfig::Attribute::Datatype in); + + bool + assertCollectionType(CollectionType exp, + AttributesConfig::Attribute::Collectiontype in, + bool removeIfZ = false, + bool createIfNe = false); + +public: + AttributeManagerTest() + { + } + int Main(); +}; + + +typedef MultiValueNumericAttribute< IntegerAttributeTemplate<int32_t>, + multivalue::MVMTemplateArg< + multivalue::Value<int32_t>, multivalue::Index32> > +TestAttributeBase; + +class TestAttribute : public TestAttributeBase +{ +public: + TestAttribute(const std::string &name) + : + TestAttributeBase(name) + { + } + + generation_t + getGen() const + { + return getCurrentGeneration(); + } + + uint32_t + getRefCount(generation_t gen) const + { + return getGenerationRefCount(gen); + } + + void + incGen() + { + incGeneration(); + } + + void + updateFirstUsedGen(void) + { + updateFirstUsedGeneration(); + } + + generation_t + getFirstUsedGen() const + { + return getFirstUsedGeneration(); + } +}; + + +void +AttributeManagerTest::testGuards() +{ + AttributeVector::SP vec(new TestAttribute("mvint") ); + TestAttribute * v = static_cast<TestAttribute *> (vec.get()); + EXPECT_EQUAL(v->getGen(), unsigned(0)); + EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0)); + { + AttributeGuard g0(vec); + EXPECT_EQUAL(v->getGen(), unsigned(0)); + EXPECT_EQUAL(v->getRefCount(0), unsigned(1)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0)); + { + AttributeGuard g1(vec); + EXPECT_EQUAL(v->getGen(), unsigned(0)); + EXPECT_EQUAL(v->getRefCount(0), unsigned(2)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0)); + } + EXPECT_EQUAL(v->getRefCount(0), unsigned(1)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0)); + } + EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0)); + + v->incGen(); + EXPECT_EQUAL(v->getGen(), unsigned(1)); + EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); + EXPECT_EQUAL(v->getRefCount(1), unsigned(0)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1)); + { + AttributeGuard g0(vec); + EXPECT_EQUAL(v->getGen(), unsigned(1)); + EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); + EXPECT_EQUAL(v->getRefCount(1), unsigned(1)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1)); + { + v->incGen(); + AttributeGuard g1(vec); + EXPECT_EQUAL(v->getGen(), unsigned(2)); + EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); + EXPECT_EQUAL(v->getRefCount(1), unsigned(1)); + EXPECT_EQUAL(v->getRefCount(2), unsigned(1)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1)); + } + EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); + EXPECT_EQUAL(v->getRefCount(1), unsigned(1)); + EXPECT_EQUAL(v->getRefCount(2), unsigned(0)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1)); + } + EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); + EXPECT_EQUAL(v->getRefCount(1), unsigned(0)); + EXPECT_EQUAL(v->getRefCount(2), unsigned(0)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1)); + v->updateFirstUsedGeneration(); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(2)); + EXPECT_EQUAL(v->getGen(), unsigned(2)); +} + + +void +AttributeManagerTest::verifyLoad(AttributeVector & v) +{ + EXPECT_TRUE( !v.isLoaded() ); + EXPECT_TRUE( v.load() ); + EXPECT_TRUE( v.isLoaded() ); + EXPECT_EQUAL( v.getNumDocs(), size_t(100) ); +} + + +void +AttributeManagerTest::testLoad() +{ + { + TestAttributeBase v("mvint"); + EXPECT_TRUE(!v.isLoaded()); + for(size_t i(0); i < 100; i++) { + AttributeVector::DocId doc; + EXPECT_TRUE( v.addDoc(doc) ); + EXPECT_TRUE( doc == i); + } + EXPECT_TRUE( v.getNumDocs() == 100); + for(size_t i(0); i < 100; i++) { + for(size_t j(0); j < i; j++) { + EXPECT_TRUE( v.append(i, j, 1) ); + } + v.commit(); + EXPECT_TRUE(size_t(v.getValueCount(i)) == i); + EXPECT_EQUAL(v.getMaxValueCount(), std::max(size_t(1), i)); + } + EXPECT_TRUE(v.isLoaded()); + EXPECT_TRUE(v.save()); + EXPECT_TRUE(v.isLoaded()); + } + { + TestAttributeBase v("mvint"); + verifyLoad(v); + } + { + AttributeVector::Config config(BT::INT32, + CollectionType::ARRAY); + TestAttributeBase v("mvint", config); + verifyLoad(v); + } + { + AttributeManager manager; + AttributeVector::Config config(BT::INT32, + CollectionType::ARRAY); + EXPECT_TRUE(manager.addVector("mvint", config)); + AttributeManager::AttributeList list; + manager.getAttributeList(list); + EXPECT_TRUE(list.size() == 1); + EXPECT_TRUE( list[0]->isLoaded()); + AttributeGuard::UP attrG(manager.getAttribute("mvint")); + EXPECT_TRUE( attrG->valid() ); + } +} + + +bool +AttributeManagerTest::assertDataType(BT::Type exp, + AttributesConfig::Attribute::Datatype in) +{ + AttributesConfig::Attribute a; + a.datatype = in; + return EXPECT_EQUAL(exp, ConfigConverter::convert(a).basicType().type()); +} + + +bool +AttributeManagerTest:: +assertCollectionType(CollectionType exp, + AttributesConfig::Attribute::Collectiontype in, + bool removeIfZ, + bool createIfNe) +{ + AttributesConfig::Attribute a; + a.collectiontype = in; + a.removeifzero = removeIfZ; + a.createifnonexistent = createIfNe; + AttributeVector::Config out = ConfigConverter::convert(a); + return EXPECT_EQUAL(exp.type(), out.collectionType().type()) && + EXPECT_EQUAL(exp.removeIfZero(), out.collectionType().removeIfZero()) && + EXPECT_EQUAL(exp.createIfNonExistant(), + out.collectionType().createIfNonExistant()); +} + + +void +AttributeManagerTest::testConfigConvert() +{ + // typedef AttributeVector::Config AVC; + typedef BT AVBT; + typedef CollectionType AVCT; + typedef AttributesConfig::Attribute CACA; + typedef ConfigConverter CC; + + EXPECT_TRUE(assertDataType(AVBT::STRING, CACA::STRING)); + EXPECT_TRUE(assertDataType(AVBT::INT8, CACA::INT8)); + EXPECT_TRUE(assertDataType(AVBT::INT16, CACA::INT16)); + EXPECT_TRUE(assertDataType(AVBT::INT32, CACA::INT32)); + EXPECT_TRUE(assertDataType(AVBT::INT64, CACA::INT64)); + EXPECT_TRUE(assertDataType(AVBT::FLOAT, CACA::FLOAT)); + EXPECT_TRUE(assertDataType(AVBT::DOUBLE, CACA::DOUBLE)); + EXPECT_TRUE(assertDataType(AVBT::PREDICATE, CACA::PREDICATE)); + EXPECT_TRUE(assertDataType(AVBT::TENSOR, CACA::TENSOR)); + EXPECT_TRUE(assertDataType(AVBT::NONE, CACA::NONE)); + + EXPECT_TRUE(assertCollectionType(AVCT::SINGLE, CACA::SINGLE)); + EXPECT_TRUE(assertCollectionType(AVCT::ARRAY, CACA::ARRAY)); + EXPECT_TRUE(assertCollectionType(AVCT::WSET, CACA::WEIGHTEDSET)); + EXPECT_TRUE(assertCollectionType(AVCT(AVCT::SINGLE, true, false), + CACA::SINGLE, true, false)); + EXPECT_TRUE(assertCollectionType(AVCT(AVCT::SINGLE, false, true), + CACA::SINGLE, false, true)); + + { // fastsearch + CACA a; + EXPECT_TRUE(!CC::convert(a).fastSearch()); + a.fastsearch = true; + EXPECT_TRUE(CC::convert(a).fastSearch()); + } + { // huge + CACA a; + EXPECT_TRUE(!CC::convert(a).huge()); + a.huge = true; + EXPECT_TRUE(CC::convert(a).huge()); + } + { // fastAccess + CACA a; + EXPECT_TRUE(!CC::convert(a).fastAccess()); + a.fastaccess = true; + EXPECT_TRUE(CC::convert(a).fastAccess()); + } + { // tensor + CACA a; + a.datatype = CACA::TENSOR; + a.tensortype = "tensor(x[5])"; + AttributeVector::Config out = ConfigConverter::convert(a); + EXPECT_EQUAL("tensor(x[5])", out.tensorType().toSpec()); + } +} + +bool gt_attribute(const attribute::IAttributeVector * a, const attribute::IAttributeVector * b) { + return a->getName() < b->getName(); +} + +void +AttributeManagerTest::testContext() +{ + std::vector<AVSP> attrs; + // create various attributes vectors + attrs.push_back(AttributeFactory::createAttribute("sint32", + Config(BT::INT32, CT::SINGLE))); + attrs.push_back(AttributeFactory::createAttribute("aint32", + Config(BT::INT32, CT::ARRAY))); + attrs.push_back(AttributeFactory::createAttribute("wsint32", + Config(BT::INT32, CT::WSET))); + attrs.push_back(AttributeFactory::createAttribute("dontcare", + Config(BT::INT32, CT::SINGLE))); + + // add docs + for (uint32_t i = 0; i < attrs.size(); ++i) { + attrs[i]->addDocs(64); + } + + // commit all attributes (current generation -> 1); + for (uint32_t i = 0; i < attrs.size(); ++i) { + attrs[i]->commit(); + } + + AttributeManager manager; + // add to manager + for (uint32_t i = 0; i < attrs.size(); ++i) { + manager.add(attrs[i]); + } + + { + IAttributeContext::UP first = manager.createContext(); + + // no generation guards taken yet + for (uint32_t i = 0; i < attrs.size(); ++i) { + EXPECT_EQUAL(attrs[i]->getCurrentGeneration(), 1u); + EXPECT_EQUAL(attrs[i]->getGenerationRefCount(1u), 0u); + } + + for (uint32_t i = 0; i < 2; ++i) { + EXPECT_TRUE(first->getAttribute("sint32") != NULL); + EXPECT_TRUE(first->getAttribute("aint32") != NULL); + EXPECT_TRUE(first->getAttribute("wsint32") != NULL); + EXPECT_TRUE(first->getAttributeStableEnum("wsint32") != NULL); + } + EXPECT_TRUE(first->getAttribute("foo") == NULL); + EXPECT_TRUE(first->getAttribute("bar") == NULL); + + // one generation guard taken per attribute asked for + for (uint32_t i = 0; i < attrs.size(); ++i) { + EXPECT_EQUAL(attrs[i]->getCurrentGeneration(), 1u); + EXPECT_EQUAL(attrs[i]->getGenerationRefCount(1u), + (i < 3) ? (i == 2 ? 2u : 1u) : 0u); + } + + { + IAttributeContext::UP second = manager.createContext(); + + EXPECT_TRUE(second->getAttribute("sint32") != NULL); + EXPECT_TRUE(second->getAttribute("aint32") != NULL); + EXPECT_TRUE(second->getAttribute("wsint32") != NULL); + EXPECT_TRUE(second->getAttributeStableEnum("wsint32") != NULL); + + // two generation guards taken per attribute asked for + for (uint32_t i = 0; i < attrs.size(); ++i) { + EXPECT_EQUAL(attrs[i]->getCurrentGeneration(), 1u); + EXPECT_EQUAL(attrs[i]->getGenerationRefCount(1u), + (i < 3) ? (i == 2 ? 4u : 2u) : 0u); + } + } + + // one generation guard taken per attribute asked for + for (uint32_t i = 0; i < attrs.size(); ++i) { + EXPECT_EQUAL(attrs[i]->getCurrentGeneration(), 1u); + EXPECT_EQUAL(attrs[i]->getGenerationRefCount(1u), + (i < 3) ? (i == 2 ? 2u : 1u) : 0u); + } + } + + // no generation guards taken + for (uint32_t i = 0; i < attrs.size(); ++i) { + EXPECT_EQUAL(attrs[i]->getCurrentGeneration(), 1u); + EXPECT_EQUAL(attrs[i]->getGenerationRefCount(1u), 0u); + } + + { + IAttributeContext::UP ctx = manager.createContext(); + std::vector<const attribute::IAttributeVector *> all; + ctx->getAttributeList(all); + EXPECT_EQUAL(4u, all.size()); + std::sort(all.begin(), all.end(), gt_attribute); + EXPECT_EQUAL("aint32", all[0]->getName()); + EXPECT_EQUAL("dontcare", all[1]->getName()); + EXPECT_EQUAL("sint32", all[2]->getName()); + EXPECT_EQUAL("wsint32", all[3]->getName()); + } +} + +int AttributeManagerTest::Main() +{ + TEST_INIT("attributemanager_test"); + + testLoad(); + testGuards(); + testConfigConvert(); + testContext(); + + TEST_DONE(); +} + +} // namespace search + + +TEST_APPHOOK(search::AttributeManagerTest); diff --git a/searchlib/src/tests/attribute/attributesearcher.h b/searchlib/src/tests/attribute/attributesearcher.h new file mode 100644 index 00000000000..7456d22f306 --- /dev/null +++ b/searchlib/src/tests/attribute/attributesearcher.h @@ -0,0 +1,265 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "runnable.h" +#include <vespa/fastos/fastos.h> +#include <vespa/searchlib/attribute/attribute.h> +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/searchlib/queryeval/hitcollector.h> +#include <vespa/vespalib/stllike/asciistream.h> +#include <vespa/vespalib/util/compress.h> + +namespace search { + +std::unique_ptr<ResultSet> +performSearch(queryeval::SearchIterator & sb, uint32_t numDocs) +{ + queryeval::HitCollector hc(numDocs, numDocs, 0); + // assume strict toplevel search object located at start + for (sb.seek(1); ! sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) { + hc.addHit(sb.getDocId(), 0.0); + } + return hc.getResultSet(); +} + +class AttributeSearcherStatus +{ +public: + double _totalSearchTime; + uint64_t _totalHitCount; + uint64_t _numQueries; + uint64_t _numClients; + + AttributeSearcherStatus() : _totalSearchTime(0), _totalHitCount(0), _numQueries(0), _numClients(0) {} + void merge(const AttributeSearcherStatus & status) { + _totalSearchTime += status._totalSearchTime; + _totalHitCount += status._totalHitCount; + _numQueries += status._numQueries; + _numClients += status._numClients; + } + void printXML() const { + std::cout << "<total-search-time>" << _totalSearchTime << "</total-search-time>" << std::endl; // ms + std::cout << "<avg-search-time>" << avgSearchTime() << "</avg-search-time>" << std::endl; // ms + std::cout << "<search-throughput>" << searchThroughout() << "</search-throughput>" << std::endl; // per/sec + std::cout << "<total-hit-count>" << _totalHitCount << "</total-hit-count>" << std::endl; + std::cout << "<avg-hit-count>" << avgHitCount() << "</avg-hit-count>" << std::endl; + } + double avgSearchTime() const { + return _totalSearchTime / _numQueries; + } + double searchThroughout() const { + return _numClients * 1000 * _numQueries / _totalSearchTime; + } + double avgHitCount() const { + return _totalHitCount / static_cast<double>(_numQueries); + } +}; + + +class AttributeSearcher : public Runnable +{ +protected: + typedef AttributeVector::SP AttributePtr; + + const AttributePtr & _attrPtr; + FastOS_Time _timer; + AttributeSearcherStatus _status; + +public: + AttributeSearcher(uint32_t id, const AttributePtr & attrPtr) : + Runnable(id), _attrPtr(attrPtr), _timer(), _status() + { + _status._numClients = 1; + } + virtual void doRun() = 0; + AttributeSearcherStatus & getStatus() { return _status; } + void buildTermQuery(std::vector<char> & buffer, const vespalib::string & index, const char * term, bool prefix = false); +}; + +void +AttributeSearcher::buildTermQuery(std::vector<char> & buffer, const vespalib::string & index, const char * term, bool prefix) +{ + uint32_t indexLen = index.size(); + uint32_t termLen = strlen(term); + uint32_t termIdx = prefix ? ParseItem::ITEM_PREFIXTERM : ParseItem::ITEM_TERM; + uint32_t queryPacketSize = vespalib::compress::Integer::compressedPositiveLength(termIdx) + + vespalib::compress::Integer::compressedPositiveLength(indexLen) + + vespalib::compress::Integer::compressedPositiveLength(termLen) + + indexLen + termLen; + buffer.resize(queryPacketSize); + char * p = &buffer[0]; + p += vespalib::compress::Integer::compressPositive(termIdx, p); + p += vespalib::compress::Integer::compressPositive(indexLen, p); + memcpy(p, index.c_str(), indexLen); + p += indexLen; + p += vespalib::compress::Integer::compressPositive(termLen, p); + memcpy(p, term, termLen); + p += termLen; + assert(p == (&buffer[0] + buffer.size())); +} + + +template <typename T> +class AttributeFindSearcher : public AttributeSearcher +{ +private: + const std::vector<T> & _values; + std::vector<char> _query; + +public: + AttributeFindSearcher(uint32_t id, const AttributePtr & attrPtr, const std::vector<T> & values, + uint32_t numQueries) : + AttributeSearcher(id, attrPtr), _values(values), _query() + { + _status._numQueries = numQueries; + } + virtual void doRun(); +}; + +template <typename T> +void +AttributeFindSearcher<T>::doRun() +{ + _timer.SetNow(); + for (uint32_t i = 0; i < _status._numQueries; ++i) { + // build simple term query + vespalib::asciistream ss; + ss << _values[i % _values.size()].getValue(); + this->buildTermQuery(_query, _attrPtr->getName(), ss.str().c_str()); + + AttributeGuard guard(_attrPtr); + std::unique_ptr<AttributeVector::SearchContext> searchContext = + _attrPtr->getSearch(vespalib::stringref(&_query[0], _query.size()), + AttributeVector::SearchContext::Params()); + + searchContext->fetchPostings(true); + std::unique_ptr<queryeval::SearchIterator> iterator = searchContext->createIterator(NULL, true); + std::unique_ptr<ResultSet> results = performSearch(*iterator, _attrPtr->getNumDocs()); + + _status._totalHitCount += results->getNumHits(); + } + _status._totalSearchTime += _timer.MilliSecsToNow(); +} + + +class RangeSpec +{ +public: + int64_t _min; + int64_t _max; + int64_t _range; + RangeSpec(int64_t min, int64_t max, int64_t range) : + _min(min), _max(max), _range(range) + { + assert(_min < _max); + assert(_range <= (_max - _min)); + } +}; + +class RangeIterator +{ +private: + RangeSpec _spec; + int64_t _a; + int64_t _b; + +public: + RangeIterator(const RangeSpec & spec) : _spec(spec), _a(spec._min), _b(spec._min + _spec._range) {} + RangeIterator & operator++() { + _a += _spec._range; + _b += _spec._range; + if (_b > _spec._max) { + _a = _spec._min; + _b = _spec._min + _spec._range; + } + return *this; + } + int64_t a() const { return _a; } + int64_t b() const { return _b; } +}; + +class AttributeRangeSearcher : public AttributeSearcher +{ +private: + RangeSpec _spec; + std::vector<char> _query; + +public: + AttributeRangeSearcher(uint32_t id, const AttributePtr & attrPtr, const RangeSpec & spec, + uint32_t numQueries) : + AttributeSearcher(id, attrPtr), _spec(spec), _query() + { + _status._numQueries = numQueries; + } + virtual void doRun(); +}; + +void +AttributeRangeSearcher::doRun() +{ + _timer.SetNow(); + RangeIterator iter(_spec); + for (uint32_t i = 0; i < _status._numQueries; ++i, ++iter) { + // build simple range term query + vespalib::asciistream ss; + ss << "[" << iter.a() << ";" << iter.b() << "]"; + buildTermQuery(_query, _attrPtr->getName(), ss.str().c_str()); + + AttributeGuard guard(_attrPtr); + std::unique_ptr<AttributeVector::SearchContext> searchContext = + _attrPtr->getSearch(vespalib::stringref(&_query[0], _query.size()), + AttributeVector::SearchContext::Params()); + + searchContext->fetchPostings(true); + std::unique_ptr<queryeval::SearchIterator> iterator = searchContext->createIterator(NULL, true); + std::unique_ptr<ResultSet> results = performSearch(*iterator, _attrPtr->getNumDocs()); + + _status._totalHitCount += results->getNumHits(); + } + _status._totalSearchTime += _timer.MilliSecsToNow(); +} + + +class AttributePrefixSearcher : public AttributeSearcher +{ +private: + const std::vector<vespalib::string> & _values; + std::vector<char> _query; + +public: + AttributePrefixSearcher(uint32_t id, const AttributePtr & attrPtr, + const std::vector<vespalib::string> & values, uint32_t numQueries) : + AttributeSearcher(id, attrPtr), _values(values), _query() + { + _status._numQueries = numQueries; + } + virtual void doRun(); +}; + +void +AttributePrefixSearcher::doRun() +{ + _timer.SetNow(); + for (uint32_t i = 0; i < _status._numQueries; ++i) { + // build simple prefix term query + buildTermQuery(_query, _attrPtr->getName(), _values[i % _values.size()].c_str(), true); + + AttributeGuard guard(_attrPtr); + std::unique_ptr<AttributeVector::SearchContext> searchContext = + _attrPtr->getSearch(vespalib::stringref(&_query[0], _query.size()), + AttributeVector::SearchContext::Params()); + + searchContext->fetchPostings(true); + std::unique_ptr<queryeval::SearchIterator> iterator = searchContext->createIterator(NULL, true); + std::unique_ptr<ResultSet> results = performSearch(*iterator, _attrPtr->getNumDocs()); + + _status._totalHitCount += results->getNumHits(); + } + _status._totalSearchTime += _timer.MilliSecsToNow(); +} + + + +} // search + diff --git a/searchlib/src/tests/attribute/attributeupdater.h b/searchlib/src/tests/attribute/attributeupdater.h new file mode 100644 index 00000000000..5193ca0f873 --- /dev/null +++ b/searchlib/src/tests/attribute/attributeupdater.h @@ -0,0 +1,299 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchlib/util/randomgenerator.h> +#include "runnable.h" +#include <vespa/searchlib/attribute/attribute.h> + +#define VALIDATOR_STR(str) #str +#define VALIDATOR_ASSERT(rc) reportAssert(rc, __FILE__, __LINE__, VALIDATOR_STR(rc)) +#define VALIDATOR_ASSERT_EQUAL(a, b) reportAssertEqual(__FILE__, __LINE__, VALIDATOR_STR(a), VALIDATOR_STR(b), a, b) + +namespace search { + +class AttributeValidator +{ +private: + uint32_t _totalCnt; + +public: + AttributeValidator() : _totalCnt(0) {} + uint32_t getTotalCnt() const { return _totalCnt; } + bool reportAssert(bool rc, const vespalib::string & file, uint32_t line, const vespalib::string & str) { + _totalCnt++; + if (!rc) { + std::cout << "Assert " << _totalCnt << " failed: \"" << str << "\" (" + << file << ":" << line << ")" << std::endl; + abort(); + } + return true; + } + template <class A, class B> + bool reportAssertEqual(const vespalib::string & file, uint32_t line, + const vespalib::string & aStr, const vespalib::string & bStr, + const A & a, const B & b) { + _totalCnt++; + if (!(a == b)) { + std::cout << "Assert equal failed: " << std::endl; + std::cout << aStr << ": " << a << std::endl; + std::cout << bStr << ": " << b << std::endl; + std::cout << "(" << file << ":" << line << ")" << std::endl; + abort(); + } + return true; + } +}; + +class AttributeUpdaterStatus +{ +public: + double _totalUpdateTime; + uint64_t _numDocumentUpdates; + uint64_t _numValueUpdates; + + AttributeUpdaterStatus() : + _totalUpdateTime(0), _numDocumentUpdates(0), _numValueUpdates(0) {} + void reset() { + _totalUpdateTime = 0; + _numDocumentUpdates = 0; + _numValueUpdates = 0; + } + void printXML() const { + std::cout << "<total-update-time>" << _totalUpdateTime << "</total-update-time>" << std::endl; + std::cout << "<documents-updated>" << _numDocumentUpdates << "</documents-updated>" << std::endl; + std::cout << "<document-update-throughput>" << documentUpdateThroughput() << "</document-update-throughput>" << std::endl; + std::cout << "<avg-document-update-time>" << avgDocumentUpdateTime() << "</avg-document-update-time>" << std::endl; + std::cout << "<values-updated>" << _numValueUpdates << "</values-updated>" << std::endl; + std::cout << "<value-update-throughput>" << valueUpdateThroughput() << "</value-update-throughput>" << std::endl; + std::cout << "<avg-value-update-time>" << avgValueUpdateTime() << "</avg-value-update-time>" << std::endl; + } + double documentUpdateThroughput() const { + return _numDocumentUpdates * 1000 / _totalUpdateTime; + } + double avgDocumentUpdateTime() const { + return _totalUpdateTime / _numDocumentUpdates; + } + double valueUpdateThroughput() const { + return _numValueUpdates * 1000 / _totalUpdateTime; + } + double avgValueUpdateTime() const { + return _totalUpdateTime / _numValueUpdates; + } +}; + +// AttributeVectorInstance, AttributeVectorType, AttributeVectorBufferType +template <typename Vector, typename T, typename BT> +class AttributeUpdater +{ +protected: + typedef AttributeVector::SP AttributePtr; + typedef std::map<uint32_t, std::vector<T> > AttributeCommit; + + const AttributePtr & _attrPtr; + Vector & _attrVec; + const std::vector<T> & _values; + std::vector<T> _buffer; + std::vector<BT> _getBuffer; + RandomGenerator & _rndGen; + AttributeCommit _expected; + FastOS_Time _timer; + AttributeUpdaterStatus _status; + AttributeValidator _validator; + + // config + bool _validate; + uint32_t _commitFreq; + uint32_t _minValueCount; + uint32_t _maxValueCount; + + uint32_t getRandomCount() { + return _rndGen.rand(_minValueCount, _maxValueCount); + } + uint32_t getRandomDoc() { + return _rndGen.rand(0, _attrPtr->getNumDocs() - 1); + } + const T & getRandomValue() { + return _values[_rndGen.rand(0, _values.size() - 1)]; + } + void updateValues(uint32_t doc); + void commit(); + +public: + AttributeUpdater(const AttributePtr & attrPtr, const std::vector<T> & values, + RandomGenerator & rndGen, bool validate, uint32_t commitFreq, + uint32_t minValueCount, uint32_t maxValueCount) : + _attrPtr(attrPtr), _attrVec(*(static_cast<Vector *>(attrPtr.get()))), + _values(values), _buffer(), _getBuffer(), _rndGen(rndGen), _expected(), _timer(), _status(), _validator(), + _validate(validate), _commitFreq(commitFreq), _minValueCount(minValueCount), _maxValueCount(maxValueCount) + { + } + void resetStatus() { + _status.reset(); + } + const AttributeUpdaterStatus & getStatus() const { + return _status; + } + const AttributeValidator & getValidator() const { + return _validator; + } + void populate(); + void update(uint32_t numUpdates); +}; + +template <typename Vector, typename T, typename BT> +class AttributeUpdaterThread : public AttributeUpdater<Vector, T, BT>, public Runnable +{ +private: + typedef AttributeVector::SP AttributePtr; + +public: + AttributeUpdaterThread(const AttributePtr & attrPtr, const std::vector<T> & values, + RandomGenerator & rndGen, bool validate, uint32_t commitFreq, + uint32_t minValueCount, uint32_t maxValueCount) : + AttributeUpdater<Vector, T, BT>(attrPtr, values, rndGen, validate, commitFreq, minValueCount, maxValueCount), + Runnable(0) {} + + virtual void doRun(); +}; + + +template <typename Vector, typename T, typename BT> +void +AttributeUpdater<Vector, T, BT>::updateValues(uint32_t doc) +{ + uint32_t valueCount = getRandomCount(); + + if (_validate) { + _buffer.clear(); + if (_attrPtr->hasMultiValue()) { + _attrPtr->clearDoc(doc); + for (uint32_t j = 0; j < valueCount; ++j) { + T value = getRandomValue(); + if (_attrPtr->hasWeightedSetType()) { + bool exists = false; + for (typename std::vector<T>::iterator iter = _buffer.begin(); iter != _buffer.end(); ++iter) { + if (iter->getValue() == value.getValue()) { + exists = true; + iter->setWeight(value.getWeight()); + break; + } + } + if (!exists) { + _buffer.push_back(value); + } + } else { + _buffer.push_back(value); + } + _attrVec.append(doc, value.getValue(), value.getWeight()); + } + } else { + _buffer.push_back(getRandomValue()); + _attrVec.update(doc, _buffer.back().getValue()); + } + _expected[doc] = _buffer; + + } else { + if (_attrPtr->hasMultiValue()) { + _attrPtr->clearDoc(doc); + for (uint32_t j = 0; j < valueCount; ++j) { + T value = getRandomValue(); + _attrVec.append(doc, value.getValue(), value.getWeight()); + } + } else { + _attrVec.update(doc, getRandomValue().getValue()); + } + } + + _status._numDocumentUpdates++; + _status._numValueUpdates += (_attrPtr->hasMultiValue() ? valueCount: 1); +} + +template <typename Vector, typename T, typename BT> +void +AttributeUpdater<Vector, T, BT>::commit() +{ + AttributeGuard guard(this->_attrPtr); + if (_validate) { + _attrPtr->commit(); + _getBuffer.resize(_maxValueCount); + for (typename AttributeCommit::iterator iter = _expected.begin(); + iter != _expected.end(); ++iter) + { + uint32_t valueCount = _attrPtr->get(iter->first, &_getBuffer[0], _getBuffer.size()); + _validator.VALIDATOR_ASSERT(_minValueCount <= valueCount && valueCount <= _maxValueCount); + if (valueCount != iter->second.size()) { + std::cout << "validate(" << iter->first << ")" << std::endl; + std::cout << "expected(" << iter->second.size() << ")" << std::endl; + for (size_t i = 0; i < iter->second.size(); ++i) { + std::cout << " [" << iter->second[i].getValue() << ", " << iter->second[i].getWeight() << "]" << std::endl; + } + std::cout << "actual(" << valueCount << ")" << std::endl; + for (size_t i = 0; i < valueCount; ++i) { + std::cout << " [" << _getBuffer[i].getValue() << ", " << _getBuffer[i].getWeight() << "]" << std::endl; + } + } + _validator.VALIDATOR_ASSERT_EQUAL(valueCount, iter->second.size()); + for (uint32_t i = 0; i < valueCount; ++i) { + _validator.VALIDATOR_ASSERT_EQUAL(_getBuffer[i].getValue(), iter->second[i].getValue()); + _validator.VALIDATOR_ASSERT_EQUAL(_getBuffer[i].getWeight(), iter->second[i].getWeight()); + } + } + _expected.clear(); + } else { + _attrPtr->commit(); + } +} + +template <typename Vector, typename T, typename BT> +void +AttributeUpdater<Vector, T, BT>::populate() +{ + _timer.SetNow(); + for (uint32_t doc = 0; doc < _attrPtr->getNumDocs(); ++doc) { + updateValues(doc); + if (doc % _commitFreq == (_commitFreq - 1)) { + commit(); + } + } + commit(); + _status._totalUpdateTime += _timer.MilliSecsToNow(); +} + + +template <typename Vector, typename T, typename BT> +void +AttributeUpdater<Vector, T, BT>::update(uint32_t numUpdates) +{ + _timer.SetNow(); + for (uint32_t i = 0; i < numUpdates; ++i) { + uint32_t doc = getRandomDoc(); + updateValues(doc); + if (i % _commitFreq == (_commitFreq - 1)) { + commit(); + } + } + commit(); + _status._totalUpdateTime += _timer.MilliSecsToNow(); +} + + +template <typename Vector, typename T, typename BT> +void +AttributeUpdaterThread<Vector, T, BT>::doRun() +{ + this->_timer.SetNow(); + while(!_done) { + uint32_t doc = this->getRandomDoc(); + this->updateValues(doc); + if (this->_status._numDocumentUpdates % this->_commitFreq == (this->_commitFreq - 1)) { + this->commit(); + } + } + this->commit(); + this->_status._totalUpdateTime += this->_timer.MilliSecsToNow(); +} + + +} // search + diff --git a/searchlib/src/tests/attribute/benchmarkplotter.rb b/searchlib/src/tests/attribute/benchmarkplotter.rb new file mode 100644 index 00000000000..d77c92c8acd --- /dev/null +++ b/searchlib/src/tests/attribute/benchmarkplotter.rb @@ -0,0 +1,134 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +require 'rexml/document' + +def plot_graph(plot_data, plot_png, title, xlabel, ylabel, graph_titles) + plot_cmd = ""; + plot_cmd += "set terminal png\n" + plot_cmd += "set output \"#{plot_png}\"\n" + plot_cmd += "set title \"#{title}\"\n" + plot_cmd += "set xlabel \"#{xlabel}\"\n" + plot_cmd += "set ylabel \"#{ylabel}\"\n" + c = 2 + plots = [] + plot_cmd += "plot " + graph_titles.each do |title| + plots.push("\"#{plot_data}\" using 1:#{c} title \"#{title}\" with linespoints") + c += 1 + end + plot_cmd += plots.join(", ") + + plot_cmd_file = File.open("plot_graph.cmd", "w") + plot_cmd_file.write(plot_cmd); + plot_cmd_file.close + cmd = "gnuplot plot_graph.cmd" + puts cmd + puts `#{cmd}` +end + +def extract_alpha(num_docs, percentages, input, output, xml_getter) + plot_data = File.open(output, "w"); + num_docs.each do |num| + data_line = "#{num} " + percentages.each do |prc| + unique = num * prc + filename = input.sub("#N", "#{num}").sub("#V", "#{unique}") + value = 0 + begin + xml_root = REXML::Document.new(File.open(filename)).root + value = send(xml_getter, xml_root) + rescue REXML::ParseException + puts "Could not parse file: #{filename}" + end + data_line += "#{value} " + end + plot_data.write(data_line + "\n") + end + plot_data.close +end + +def extract_beta(num_docs, percentage, num_threads, input, output, xml_getter) + plot_data = File.open(output, "w"); + num_docs.each do |num| + data_line = "#{num} " + unique = num * percentage + num_threads.each do |thread| + filename = input.sub("#N", "#{num}").sub("#V", "#{unique}").sub("#S", "#{thread}") + value = 0 + begin + xml_root = REXML::Document.new(File.open(filename)).root + value = send(xml_getter, xml_root) + rescue REXML::ParseException + puts "Could not parse file: #{filename}" + end + data_line += "#{value} " + end + plot_data.write(data_line + "\n") + end + plot_data.close +end + +def xml_getter_update_0_throughput(xml_root) + return xml_root.elements["update[@id='0']"].elements["throughput"].text +end + +def xml_getter_search_throughput(xml_root) + return xml_root.elements["total-searcher-summary"].elements["search-throughput"].text +end + +def xml_getter_updater_thread_throughput(xml_root) + return throughput = xml_root.elements["updater-summary"].elements["throughput"].text +end + + +vectors = ["mv-num-new"]#, "mv-num-new", "sv-string-new", "mv-string-new"]#, "sv-num-old", "mv-num-old", "sv-string-old", "mv-string-old"] +num_docs = [500000, 1000000, 2000000, 4000000, 8000000, 16000000] +unique_percentages = [0.001, 0.01, 0.05, 0.20, 0.50] +num_threads = [1, 2, 4, 8, 16] + +inputs = ["03-27-full/#AV-n#N-v#V-p2-r1-s1-q1000.log", + "03-27-full/#AV-n#N-v#V-s#S-q100-b.log"] +graph_titles = [[], []] +unique_percentages.each do |percentage| + graph_titles[0].push("#{percentage * 100} % uniques") +end +num_threads.each do |thread| + graph_titles[1].push("#{thread} searcher thread(s)") +end + +vectors.each do |vector| + extract_alpha(num_docs, unique_percentages, + inputs[0].sub("#AV", vector), + "#{vector}-update-speed.dat", + :xml_getter_update_0_throughput) + plot_graph("#{vector}-update-speed.dat", + "#{vector}-update-speed.png", + "Update speed when applying 1M updates", + "Number of documents", "Updates per/sec", graph_titles[0]) + + extract_alpha(num_docs, unique_percentages, + inputs[0].sub("#AV", vector), + "#{vector}-search-speed.dat", + :xml_getter_search_throughput) + plot_graph("#{vector}-search-speed.dat", + "#{vector}-search-speed.png", + "Search speed with 1 searcher thread", + "Number of documents", "Queries per/sec", graph_titles[0]) + + extract_beta(num_docs, 0.01, num_threads, + inputs[1].sub("#AV", vector), + "#{vector}-search-speed-multiple.dat", + :xml_getter_search_throughput) + plot_graph("#{vector}-search-speed-multiple.dat", + "#{vector}-search-speed-multiple.png", + "Search speed with 1 update thread and X searcher threads", + "Number of documents", "Queries per/sec", graph_titles[1]) + + extract_beta(num_docs, 0.01, num_threads, + inputs[1].sub("#AV", vector), + "#{vector}-update-speed-multiple.dat", + :xml_getter_updater_thread_throughput) + plot_graph("#{vector}-update-speed-multiple.dat", + "#{vector}-update-speed-multiple.png", + "Update speed with 1 update thread and X searcher threads", + "Number of documents", "Updates per/sec", graph_titles[1]) +end diff --git a/searchlib/src/tests/attribute/bitvector/.gitignore b/searchlib/src/tests/attribute/bitvector/.gitignore new file mode 100644 index 00000000000..05ec0a4df59 --- /dev/null +++ b/searchlib/src/tests/attribute/bitvector/.gitignore @@ -0,0 +1 @@ +searchlib_bitvector_test_app diff --git a/searchlib/src/tests/attribute/bitvector/CMakeLists.txt b/searchlib/src/tests/attribute/bitvector/CMakeLists.txt new file mode 100644 index 00000000000..bc65fc04dc4 --- /dev/null +++ b/searchlib/src/tests/attribute/bitvector/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_bitvector_test_app + SOURCES + bitvector_test.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_bitvector_test_app COMMAND searchlib_bitvector_test_app) diff --git a/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp new file mode 100644 index 00000000000..85f83d217eb --- /dev/null +++ b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp @@ -0,0 +1,632 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("bitvector_test"); +#include <vespa/vespalib/testkit/testapp.h> + +#include <vespa/searchlib/attribute/attribute.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/searchlib/util/randomgenerator.h> +#include <vespa/vespalib/util/compress.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> + +#include <vespa/searchlib/attribute/attributevector.hpp> +#include <vespa/searchlib/attribute/i_document_weight_attribute.h> +#include <vespa/searchlib/queryeval/document_weight_search_iterator.h> +#include <vespa/searchlib/test/initrange.h> +#include <vespa/searchlib/common/bitvectoriterator.h> + +using search::attribute::BasicType; +using search::attribute::CollectionType; +using search::attribute::Config; +using search::AttributeFactory; +using search::FloatingPointAttribute; +using search::IntegerAttribute; +using search::StringAttribute; +using search::AttributeVector; +using search::ParseItem; +using search::fef::TermFieldMatchData; +using search::BitVector; +using search::BitVectorIterator; +using search::queryeval::SearchIterator; + +typedef std::unique_ptr<AttributeVector::SearchContext> SearchContextPtr; +typedef std::unique_ptr<search::queryeval::SearchIterator> SearchBasePtr; + +struct BitVectorTest +{ + typedef AttributeVector::SP AttributePtr; + + BitVectorTest() { } + + ~BitVectorTest() { } + + template <typename VectorType> + VectorType & as(AttributePtr &v); + IntegerAttribute & asInt(AttributePtr &v); + StringAttribute & asString(AttributePtr &v); + FloatingPointAttribute & asFloat(AttributePtr &v); + + AttributePtr + make(Config cfg, + const vespalib::string &pref, + bool fastSearch, + bool enableBitVectors, + bool enableOnlyBitVector, + bool filter); + + void + addDocs(const AttributePtr &v, size_t sz); + + template <typename VectorType> + void populate(VectorType &v, + uint32_t low, + uint32_t high, + bool set); + + template <typename VectorType> + void populateAll(VectorType &v, + uint32_t low, + uint32_t high, + bool set); + + void + buildTermQuery(std::vector<char> & buffer, + const vespalib::string & index, + const vespalib::string & term, bool prefix); + + template <typename V> + vespalib::string + getSearchStr(); + + template <typename V, typename T> + SearchContextPtr + getSearch(const V & vec, const T & term, bool prefix, bool useBitVector); + + template <typename V> + SearchContextPtr + getSearch(const V & vec, bool useBitVector); + + void + checkSearch(AttributePtr v, + SearchBasePtr sb, + TermFieldMatchData &md, + uint32_t expFirstDocId, + uint32_t expFastDocId, + uint32_t expDocFreq, + bool weights, + bool checkStride); + + void + checkSearch(AttributePtr v, + SearchContextPtr sc, + uint32_t expFirstDocId, + uint32_t expLastDocId, + uint32_t expDocFreq, + bool weights, + bool checkStride); + + template <typename VectorType, typename BufferType> + void + test(BasicType bt, CollectionType ct, const vespalib::string &pref, + bool fastSearch, + bool enableBitVectors, + bool enableOnlyBitVector, + bool filter); + + template <typename VectorType, typename BufferType> + void + test(BasicType bt, CollectionType ct, const vespalib::string &pref); +}; + + +template <typename VectorType> +VectorType & +BitVectorTest::as(AttributePtr &v) +{ + VectorType *res = dynamic_cast<VectorType *>(v.get()); + assert(res != NULL); + return *res; +} + + +IntegerAttribute & +BitVectorTest::asInt(AttributePtr &v) +{ + return as<IntegerAttribute>(v); +} + + +StringAttribute & +BitVectorTest::asString(AttributePtr &v) +{ + return as<StringAttribute>(v); +} + + +FloatingPointAttribute & +BitVectorTest::asFloat(AttributePtr &v) +{ + return as<FloatingPointAttribute>(v); +} + + +void +BitVectorTest::buildTermQuery(std::vector<char> &buffer, + const vespalib::string &index, + const vespalib::string &term, + bool prefix) +{ + uint32_t indexLen = index.size(); + uint32_t termLen = term.size(); + uint32_t queryPacketSize = 1 + 2 * 4 + indexLen + termLen; + uint32_t p = 0; + buffer.resize(queryPacketSize); + buffer[p++] = prefix ? ParseItem::ITEM_PREFIXTERM : ParseItem::ITEM_TERM; + p += vespalib::compress::Integer::compressPositive(indexLen, &buffer[p]); + memcpy(&buffer[p], index.c_str(), indexLen); + p += indexLen; + p += vespalib::compress::Integer::compressPositive(termLen, &buffer[p]); + memcpy(&buffer[p], term.c_str(), termLen); + p += termLen; + buffer.resize(p); +} + + +template <> +vespalib::string +BitVectorTest::getSearchStr<IntegerAttribute>() +{ + return "[-42;-42]"; +} + +template <> +vespalib::string +BitVectorTest::getSearchStr<FloatingPointAttribute>() +{ + return "[-42.0;-42.0]"; +} + +template <> +vespalib::string +BitVectorTest::getSearchStr<StringAttribute>() +{ + return "foo"; +} + + +template <typename V, typename T> +SearchContextPtr +BitVectorTest::getSearch(const V &vec, const T &term, bool prefix, + bool useBitVector) +{ + std::vector<char> query; + vespalib::asciistream ss; + ss << term; + buildTermQuery(query, vec.getName(), ss.str(), prefix); + + return (static_cast<const AttributeVector &>(vec)). + getSearch(vespalib::stringref(&query[0], query.size()), + AttributeVector::SearchContext::Params().useBitVector(useBitVector)); +} + + +template <> +SearchContextPtr +BitVectorTest::getSearch<IntegerAttribute>(const IntegerAttribute &v, + bool useBitVector) +{ + return getSearch<IntegerAttribute>(v, "[-42;-42]", false, useBitVector); +} + +template <> +SearchContextPtr +BitVectorTest:: +getSearch<FloatingPointAttribute>(const FloatingPointAttribute &v, + bool useBitVector) +{ + return getSearch<FloatingPointAttribute>(v, "[-42.0;-42.0]", false, + useBitVector); +} + +template <> +SearchContextPtr +BitVectorTest::getSearch<StringAttribute>(const StringAttribute &v, + bool useBitVector) +{ + return getSearch<StringAttribute, const vespalib::string &> + (v, "foo", false, useBitVector); +} + + +BitVectorTest::AttributePtr +BitVectorTest::make(Config cfg, + const vespalib::string &pref, + bool fastSearch, + bool enableBitVectors, + bool enableOnlyBitVector, + bool filter) +{ + cfg.setFastSearch(fastSearch); + cfg.setEnableBitVectors(enableBitVectors); + cfg.setEnableOnlyBitVector(enableOnlyBitVector); + cfg.setIsFilter(filter); + AttributePtr v = AttributeFactory::createAttribute(pref, cfg); + return v; +} + + +void +BitVectorTest::addDocs(const AttributePtr &v, size_t sz) +{ + while (v->getNumDocs() < sz) { + AttributeVector::DocId docId = 0; + EXPECT_TRUE(v->addDoc(docId)); + v->clearDoc(docId); + } + EXPECT_TRUE(v->getNumDocs() == sz); + v->commit(true); +} + + +template <> +void +BitVectorTest::populate(IntegerAttribute &v, + uint32_t low, uint32_t high, + bool set) +{ + for(size_t i(low), m(high); i < m; i+= 5) { + if (!set) { + v.clearDoc(i); + } else if (v.hasMultiValue()) { + v.append(i, -42, 27); + v.append(i, -43, 14); + v.append(i, -42, -3); + } else { + EXPECT_TRUE(v.update(i, -42)); + } + } + v.commit(); +} + + +template <> +void +BitVectorTest::populate(FloatingPointAttribute &v, + uint32_t low, uint32_t high, + bool set) +{ + for(size_t i(low), m(high); i < m; i+= 5) { + if (!set) { + v.clearDoc(i); + } else if (v.hasMultiValue()) { + v.append(i, -42.0, 27); + v.append(i, -43.0, 14); + v.append(i, -42.0, -3); + } else { + EXPECT_TRUE(v.update(i, -42.0)); + } + } + v.commit(); +} + + +template <> +void +BitVectorTest::populate(StringAttribute &v, + uint32_t low, uint32_t high, + bool set) +{ + for(size_t i(low), m(high); i < m; i+= 5) { + if (!set) { + v.clearDoc(i); + } else if (v.hasMultiValue()) { + v.append(i, "foo", 27); + v.append(i, "bar", 14); + v.append(i, "foO", -3); + } else { + EXPECT_TRUE(v.update(i, "foo")); + } + } + v.commit(); +} + +template <> +void +BitVectorTest::populateAll(IntegerAttribute &v, + uint32_t low, uint32_t high, + bool set) +{ + for(size_t i(low), m(high); i < m; ++i) { + if (!set) { + v.clearDoc(i); + } else if (v.hasMultiValue()) { + v.clearDoc(i); + v.append(i, -42, 27); + v.append(i, -43, 14); + v.append(i, -42, -3); + } else { + EXPECT_TRUE(v.update(i, -42)); + } + } + v.commit(); +} + + +template <> +void +BitVectorTest::populateAll(FloatingPointAttribute &v, + uint32_t low, uint32_t high, + bool set) +{ + for(size_t i(low), m(high); i < m; ++i) { + if (!set) { + v.clearDoc(i); + } else if (v.hasMultiValue()) { + v.clearDoc(i); + v.append(i, -42.0, 27); + v.append(i, -43.0, 14); + v.append(i, -42.0, -3); + } else { + EXPECT_TRUE(v.update(i, -42.0)); + } + } + v.commit(); +} + + +template <> +void +BitVectorTest::populateAll(StringAttribute &v, + uint32_t low, uint32_t high, + bool set) +{ + for(size_t i(low), m(high); i < m; ++i) { + if (!set) { + v.clearDoc(i); + } else if (v.hasMultiValue()) { + v.clearDoc(i); + v.append(i, "foo", 27); + v.append(i, "bar", 14); + v.append(i, "foO", -3); + } else { + EXPECT_TRUE(v.update(i, "foo")); + } + } + v.commit(); +} + + +void +BitVectorTest::checkSearch(AttributePtr v, + SearchBasePtr sb, + TermFieldMatchData &md, + uint32_t expFirstDocId, + uint32_t expLastDocId, + uint32_t expDocFreq, + bool weights, + bool checkStride) +{ + sb->initFullRange(); + sb->seek(1u); + uint32_t docId = sb->getDocId(); + uint32_t lastDocId = 0; + uint32_t docFreq = 0; + EXPECT_EQUAL(expFirstDocId, docId); + while (docId != search::endDocId) { + lastDocId = docId; + ++docFreq, + assert(!checkStride || (docId % 5) == 2u); + sb->unpack(docId); + EXPECT_EQUAL(md.getDocId(), docId); + if (v->getCollectionType() == CollectionType::SINGLE || + !weights) { + EXPECT_EQUAL(1, md.getWeight()); + } else if (v->getCollectionType() == CollectionType::ARRAY) { + EXPECT_EQUAL(2, md.getWeight()); + } else { + if (v->getBasicType() == BasicType::STRING) { + EXPECT_EQUAL(24, md.getWeight()); + } else { + EXPECT_EQUAL(-3, md.getWeight()); + } + } + sb->seek(docId + 1); + docId = sb->getDocId(); + } + EXPECT_EQUAL(expLastDocId, lastDocId); + EXPECT_EQUAL(expDocFreq, docFreq); +} + + +void +BitVectorTest::checkSearch(AttributePtr v, + SearchContextPtr sc, + uint32_t expFirstDocId, + uint32_t expLastDocId, + uint32_t expDocFreq, + bool weights, + bool checkStride) +{ + TermFieldMatchData md; + sc->fetchPostings(true); + SearchBasePtr sb = sc->createIterator(&md, true); + checkSearch(v, std::move(sb), md, + expFirstDocId, expLastDocId, expDocFreq, weights, + checkStride); +} + + +template <typename VectorType, typename BufferType> +void +BitVectorTest::test(BasicType bt, + CollectionType ct, + const vespalib::string &pref, + bool fastSearch, + bool enableBitVectors, + bool enableOnlyBitVector, + bool filter) +{ + Config cfg(bt, ct); + AttributePtr v = make(cfg, pref, fastSearch, + enableBitVectors, enableOnlyBitVector, filter); + addDocs(v, 1024); + VectorType &tv = as<VectorType>(v); + populate(tv, 2, 1023, true); + + SearchContextPtr sc = getSearch<VectorType>(tv, true); + checkSearch(v, std::move(sc), 2, 1022, 205, !enableBitVectors && !filter, + true); + sc = getSearch<VectorType>(tv, false); + checkSearch(v, std::move(sc), 2, 1022, 205, !enableOnlyBitVector && + !filter, true); + const search::IDocumentWeightAttribute *dwa = + v->asDocumentWeightAttribute(); + if (dwa != NULL) { + search::IDocumentWeightAttribute::LookupResult lres = + dwa->lookup(getSearchStr<VectorType>()); + typedef search::queryeval::DocumentWeightSearchIterator DWSI; + typedef search::queryeval::SearchIterator SI; + TermFieldMatchData md; + SI::UP dwsi(new DWSI(md, *dwa, lres)); + if (!enableOnlyBitVector) { + checkSearch(v, std::move(dwsi), md, 2, 1022, 205, !filter, true); + } else { + dwsi->initFullRange(); + EXPECT_TRUE(dwsi->isAtEnd()); + } + } + populate(tv, 2, 973, false); + sc = getSearch<VectorType>(tv, true); + checkSearch(v, std::move(sc), 977, 1022, 10, !enableOnlyBitVector && + !filter, true); + populate(tv, 2, 973, true); + sc = getSearch<VectorType>(tv, true); + checkSearch(v, std::move(sc), 2, 1022, 205, !enableBitVectors && !filter, + true); + addDocs(v, 15000); + sc = getSearch<VectorType>(tv, true); + checkSearch(v, std::move(sc), 2, 1022, 205, !enableOnlyBitVector && + !filter, true); + populateAll(tv, 10, 15000, true); + sc = getSearch<VectorType>(tv, true); + checkSearch(v, std::move(sc), 2, 14999, 14992, + !enableBitVectors && !filter, + false); +} + + +template <typename VectorType, typename BufferType> +void +BitVectorTest::test(BasicType bt, + CollectionType ct, + const vespalib::string &pref) +{ + LOG(info, + "test run, pref is %s", + pref.c_str()); + test<VectorType, BufferType>(bt, ct, pref, + false, false, false, false); + test<VectorType, BufferType>(bt, ct, pref, + false, false, false, true); + test<VectorType, BufferType>(bt, ct, pref, + true, false, false, false); + test<VectorType, BufferType>(bt, ct, pref, + true, false, false, true); + test<VectorType, BufferType>(bt, ct, pref, + true, true, false, false); + test<VectorType, BufferType>(bt, ct, pref, + true, true, false, true); + test<VectorType, BufferType>(bt, ct, pref, + true, true, true, false); + test<VectorType, BufferType>(bt, ct, pref, + true, true, true, true); +} + + +TEST_F("Test bitvectors with single value int32", BitVectorTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::largeint_t>(BasicType::INT32, + CollectionType::SINGLE, + "int32_sv"); +} + +TEST_F("Test bitvectors with array value int32", BitVectorTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::largeint_t>(BasicType::INT32, + CollectionType::ARRAY, + "int32_a"); +} + +TEST_F("Test bitvectors with weighted set value int32", BitVectorTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::WeightedInt>(BasicType::INT32, + CollectionType::WSET, + "int32_sv"); +} + +TEST_F("Test bitvectors with single value double", BitVectorTest) +{ + f.template test<FloatingPointAttribute, + double>(BasicType::DOUBLE, + CollectionType::SINGLE, + "double_sv"); +} + +TEST_F("Test bitvectors with array value double", BitVectorTest) +{ + f.template test<FloatingPointAttribute, + double>(BasicType::DOUBLE, + CollectionType::ARRAY, + "double_a"); +} + +TEST_F("Test bitvectors with weighted set value double", BitVectorTest) +{ + f.template test<FloatingPointAttribute, + FloatingPointAttribute::WeightedFloat>(BasicType::DOUBLE, + CollectionType::WSET, + "double_ws"); +} + +TEST_F("Test bitvectors with single value string", BitVectorTest) +{ + f.template test<StringAttribute, + vespalib::string>(BasicType::STRING, + CollectionType::SINGLE, + "string_sv"); +} + +TEST_F("Test bitvectors with array value string", BitVectorTest) +{ + f.template test<StringAttribute, + vespalib::string>(BasicType::STRING, + CollectionType::ARRAY, + "string_a"); +} + +TEST_F("Test bitvectors with weighted set value string", BitVectorTest) +{ + f.template test<StringAttribute, + StringAttribute::WeightedString>(BasicType::STRING, + CollectionType::WSET, + "string_ws"); +} + +TEST("Test bitvector iterators adheres to initRange") { + search::test::InitRangeVerifier initRangeTest; + BitVector::UP bv = BitVector::create(initRangeTest.getDocIdLimit()); + for (uint32_t docId: initRangeTest.getExpectedDocIds()) { + bv->setBit(docId); + } + TermFieldMatchData tfmd; + initRangeTest.verify(*BitVectorIterator::create(bv.get(), initRangeTest.getDocIdLimit(), tfmd, false)); + initRangeTest.verify(*BitVectorIterator::create(bv.get(), initRangeTest.getDocIdLimit(), tfmd, true)); +} + + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/attribute/changevector_test.cpp b/searchlib/src/tests/attribute/changevector_test.cpp new file mode 100644 index 00000000000..9f0a796fd3e --- /dev/null +++ b/searchlib/src/tests/attribute/changevector_test.cpp @@ -0,0 +1,92 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("changevector_test"); +#include <vespa/vespalib/testkit/testapp.h> + +#include <vespa/searchlib/attribute/changevector.h> + +using namespace search; + +template <typename T> +void verifyStrictOrdering(const T & v) { + long count(0); + for (const auto & c : v) { + count++; + EXPECT_EQUAL(count, c._data.get()); + } + EXPECT_EQUAL(v.size(), size_t(count)); +} + +class Accessor { +public: + Accessor(const std::vector<long> & v) : _size(v.size()), _current(v.begin()), _end(v.end()) { } + size_t size() const { return _size; } + void next() { _current++; } + long value() const { return *_current; } + int weight() const { return *_current; } +private: + size_t _size; + std::vector<long>::const_iterator _current; + std::vector<long>::const_iterator _end; +}; + +TEST("require insert ordering is preserved for same doc") +{ + typedef ChangeTemplate<NumericChangeData<long>> Change; + typedef ChangeVectorT<Change> CV; + CV a; + a.push_back(Change(Change::NOOP, 7, 1)); + EXPECT_EQUAL(1u, a.size()); + a.push_back(Change(Change::NOOP, 7, 2)); + EXPECT_EQUAL(2u, a.size()); + verifyStrictOrdering(a); +} + +TEST("require insert ordering is preserved ") +{ + typedef ChangeTemplate<NumericChangeData<long>> Change; + typedef ChangeVectorT<Change> CV; + CV a; + a.push_back(Change(Change::NOOP, 7, 1)); + EXPECT_EQUAL(1u, a.size()); + a.push_back(Change(Change::NOOP, 5, 2)); + EXPECT_EQUAL(2u, a.size()); + a.push_back(Change(Change::NOOP, 6, 3)); + EXPECT_EQUAL(3u, a.size()); + verifyStrictOrdering(a); +} + +TEST("require insert ordering is preserved with mix") +{ + typedef ChangeTemplate<NumericChangeData<long>> Change; + typedef ChangeVectorT<Change> CV; + CV a; + a.push_back(Change(Change::NOOP, 7, 1)); + EXPECT_EQUAL(1u, a.size()); + a.push_back(Change(Change::NOOP, 5, 2)); + EXPECT_EQUAL(2u, a.size()); + a.push_back(Change(Change::NOOP, 5, 3)); + EXPECT_EQUAL(3u, a.size()); + a.push_back(Change(Change::NOOP, 6, 10)); + EXPECT_EQUAL(4u, a.size()); + std::vector<long> v({4,5,6,7,8}); + Accessor ac(v); + a.push_back(5, ac); + EXPECT_EQUAL(9u, a.size()); + a.push_back(Change(Change::NOOP, 5, 9)); + EXPECT_EQUAL(10u, a.size()); + verifyStrictOrdering(a); +} + +TEST("require that inserting empty vector does not affect the vector.") { + typedef ChangeTemplate<NumericChangeData<long>> Change; + typedef ChangeVectorT<Change> CV; + CV a; + std::vector<long> v; + Accessor ac(v); + a.push_back(1, ac); + EXPECT_EQUAL(0u, a.size()); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/attribute/changevector_test.sh b/searchlib/src/tests/attribute/changevector_test.sh new file mode 100644 index 00000000000..cb70f5465a4 --- /dev/null +++ b/searchlib/src/tests/attribute/changevector_test.sh @@ -0,0 +1,7 @@ +#!/bin/bash +$VALGRIND ./searchlib_changevector_test_app +rm -rf *.dat +rm -rf *.idx +rm -rf *.weight +rm -rf clstmp +rm -rf alstmp diff --git a/searchlib/src/tests/attribute/comparator/.gitignore b/searchlib/src/tests/attribute/comparator/.gitignore new file mode 100644 index 00000000000..51c5b5944c9 --- /dev/null +++ b/searchlib/src/tests/attribute/comparator/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +comparator_test +searchlib_comparator_test_app diff --git a/searchlib/src/tests/attribute/comparator/CMakeLists.txt b/searchlib/src/tests/attribute/comparator/CMakeLists.txt new file mode 100644 index 00000000000..4a14181db3c --- /dev/null +++ b/searchlib/src/tests/attribute/comparator/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_comparator_test_app + SOURCES + comparator_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_comparator_test_app COMMAND searchlib_comparator_test_app) diff --git a/searchlib/src/tests/attribute/comparator/DESC b/searchlib/src/tests/attribute/comparator/DESC new file mode 100644 index 00000000000..6b3ba01c89b --- /dev/null +++ b/searchlib/src/tests/attribute/comparator/DESC @@ -0,0 +1 @@ +comparator test. Take a look at comparator_test.cpp for details. diff --git a/searchlib/src/tests/attribute/comparator/FILES b/searchlib/src/tests/attribute/comparator/FILES new file mode 100644 index 00000000000..b4c23c09022 --- /dev/null +++ b/searchlib/src/tests/attribute/comparator/FILES @@ -0,0 +1 @@ +comparator_test.cpp diff --git a/searchlib/src/tests/attribute/comparator/comparator_test.cpp b/searchlib/src/tests/attribute/comparator/comparator_test.cpp new file mode 100644 index 00000000000..2a4c3c6fb87 --- /dev/null +++ b/searchlib/src/tests/attribute/comparator/comparator_test.cpp @@ -0,0 +1,169 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("comparator_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/attribute/enumcomparator.h> +#include <vespa/searchlib/btree/btreeroot.h> + +#include <vespa/searchlib/attribute/enumstore.hpp> +#include <vespa/searchlib/btree/btreenode.hpp> +#include <vespa/searchlib/btree/btreenodeallocator.hpp> +#include <vespa/searchlib/btree/btreeroot.hpp> + +namespace search { + +using namespace btree; + +typedef EnumStoreT<NumericEntryType<int32_t> > NumericEnumStore; +typedef EnumStoreComparatorT<NumericEntryType<int32_t> > NumericComparator; + +typedef EnumStoreT<NumericEntryType<float> > FloatEnumStore; +typedef EnumStoreComparatorT<NumericEntryType<float> > FloatComparator; + +typedef EnumStoreT<StringEntryType> StringEnumStore; +typedef EnumStoreComparatorT<StringEntryType> StringComparator; +typedef EnumStoreFoldedComparatorT<StringEntryType> FoldedStringComparator; + +typedef EnumStoreBase::Index EnumIndex; + +typedef BTreeRoot<EnumIndex, BTreeNoLeafData, + btree::NoAggregated, + const EnumStoreComparatorWrapper> TreeType; +typedef TreeType::NodeAllocatorType NodeAllocator; + +class Test : public vespalib::TestApp { +private: + void requireThatNumericComparatorIsWorking(); + void requireThatFloatComparatorIsWorking(); + void requireThatStringComparatorIsWorking(); + void requireThatComparatorWithTreeIsWorking(); + void requireThatFoldedComparatorIsWorking(); + +public: + Test() {} + int Main(); +}; + +void +Test::requireThatNumericComparatorIsWorking() +{ + NumericEnumStore es(1024, false); + EnumIndex e1, e2; + es.addEnum(10, e1); + es.addEnum(30, e2); + NumericComparator cmp1(es); + EXPECT_TRUE(cmp1(e1, e2)); + EXPECT_TRUE(!cmp1(e2, e1)); + EXPECT_TRUE(!cmp1(e1, e1)); + NumericComparator cmp2(es, 20); + EXPECT_TRUE(cmp2(EnumIndex(), e2)); + EXPECT_TRUE(!cmp2(e2, EnumIndex())); +} + +void +Test::requireThatFloatComparatorIsWorking() +{ + FloatEnumStore es(1024, false); + EnumIndex e1, e2, e3; + es.addEnum(10.5, e1); + es.addEnum(30.5, e2); + es.addEnum(std::numeric_limits<float>::quiet_NaN(), e3); + FloatComparator cmp1(es); + EXPECT_TRUE(cmp1(e1, e2)); + EXPECT_TRUE(!cmp1(e2, e1)); + EXPECT_TRUE(!cmp1(e1, e1)); + EXPECT_TRUE(cmp1(e3, e1)); // nan + EXPECT_TRUE(!cmp1(e1, e3)); // nan + EXPECT_TRUE(!cmp1(e3, e3)); // nan + FloatComparator cmp2(es, 20.5); + EXPECT_TRUE(cmp2(EnumIndex(), e2)); + EXPECT_TRUE(!cmp2(e2, EnumIndex())); +} + +void +Test::requireThatStringComparatorIsWorking() +{ + StringEnumStore es(1024, false); + EnumIndex e1, e2, e3; + es.addEnum("Aa", e1); + es.addEnum("aa", e2); + es.addEnum("aB", e3); + StringComparator cmp1(es); + EXPECT_TRUE(cmp1(e1, e2)); // similar folded, fallback to regular + EXPECT_TRUE(!cmp1(e2, e1)); + EXPECT_TRUE(!cmp1(e1, e1)); + EXPECT_TRUE(cmp1(e2, e3)); // folded compare + EXPECT_TRUE(strcmp("aa", "aB") > 0); // regular + StringComparator cmp2(es, "AB"); + EXPECT_TRUE(cmp2(EnumIndex(), e3)); + EXPECT_TRUE(!cmp2(e3, EnumIndex())); +} + +void +Test::requireThatComparatorWithTreeIsWorking() +{ + NumericEnumStore es(2048, false); + vespalib::GenerationHandler g; + TreeType t; + NodeAllocator m; + EnumIndex ei; + for (int32_t v = 100; v > 0; --v) { + NumericComparator cmp(es, v); + EXPECT_TRUE(!t.find(EnumIndex(), m, cmp).valid()); + es.addEnum(v, ei); + t.insert(ei, BTreeNoLeafData(), m, cmp); + } + EXPECT_EQUAL(100u, t.size(m)); + int32_t exp = 1; + for (TreeType::Iterator itr = t.begin(m); itr.valid(); ++itr) { + EXPECT_EQUAL(exp++, es.getValue(itr.getKey())); + } + EXPECT_EQUAL(101, exp); + t.clear(m); + m.freeze(); + m.transferHoldLists(g.getCurrentGeneration()); + g.incGeneration(); + m.trimHoldLists(g.getFirstUsedGeneration()); +} + +void +Test::requireThatFoldedComparatorIsWorking() +{ + StringEnumStore es(1024, false); + EnumIndex e1, e2, e3, e4; + es.addEnum("Aa", e1); + es.addEnum("aa", e2); + es.addEnum("aB", e3); + es.addEnum("Folded", e4); + FoldedStringComparator cmp1(es); + EXPECT_TRUE(!cmp1(e1, e2)); // similar folded + EXPECT_TRUE(!cmp1(e2, e1)); // similar folded + EXPECT_TRUE(cmp1(e2, e3)); // folded compare + EXPECT_TRUE(!cmp1(e3, e2)); // folded compare + FoldedStringComparator cmp2(es, "fol", false); + FoldedStringComparator cmp3(es, "fol", true); + EXPECT_TRUE(cmp2(EnumIndex(), e4)); + EXPECT_TRUE(!cmp2(e4, EnumIndex())); + EXPECT_TRUE(!cmp3(EnumIndex(), e4)); // similar when prefix + EXPECT_TRUE(!cmp3(e4, EnumIndex())); // similar when prefix +} + +int +Test::Main() +{ + TEST_INIT("comparator_test"); + + requireThatNumericComparatorIsWorking(); + requireThatFloatComparatorIsWorking(); + requireThatStringComparatorIsWorking(); + requireThatComparatorWithTreeIsWorking(); + requireThatFoldedComparatorIsWorking(); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(search::Test); + diff --git a/searchlib/src/tests/attribute/document_weight_iterator/.gitignore b/searchlib/src/tests/attribute/document_weight_iterator/.gitignore new file mode 100644 index 00000000000..08cae9a48df --- /dev/null +++ b/searchlib/src/tests/attribute/document_weight_iterator/.gitignore @@ -0,0 +1 @@ +searchlib_document_weight_iterator_test_app diff --git a/searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt b/searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt new file mode 100644 index 00000000000..2a1b36a626d --- /dev/null +++ b/searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_document_weight_iterator_test_app + SOURCES + document_weight_iterator_test.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_document_weight_iterator_test_app COMMAND searchlib_document_weight_iterator_test_app) diff --git a/searchlib/src/tests/attribute/document_weight_iterator/FILES b/searchlib/src/tests/attribute/document_weight_iterator/FILES new file mode 100644 index 00000000000..9bb94dc8770 --- /dev/null +++ b/searchlib/src/tests/attribute/document_weight_iterator/FILES @@ -0,0 +1 @@ +document_weight_iterator_test.cpp diff --git a/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp b/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp new file mode 100644 index 00000000000..fbe62f80843 --- /dev/null +++ b/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp @@ -0,0 +1,189 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> + +#include <vespa/searchlib/attribute/i_document_weight_attribute.h> +#include <vespa/searchlib/attribute/attributevector.h> +#include <vespa/searchlib/attribute/attribute.h> +#include <vespa/searchlib/attribute/attributefile.h> +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/attributememorysavetarget.h> +#include <vespa/searchlib/attribute/singlenumericattribute.h> +#include <vespa/searchlib/attribute/multinumericattribute.h> +#include <vespa/searchlib/attribute/singlestringattribute.h> +#include <vespa/searchlib/attribute/multistringattribute.h> +#include <vespa/searchlib/attribute/attrvector.h> +#include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/searchlib/util/randomgenerator.h> +#include <vespa/searchlib/attribute/attributevector.hpp> +#include <vespa/searchlib/attribute/multinumericpostattribute.hpp> +#include <vespa/searchlib/attribute/multistringpostattribute.hpp> +#include <vespa/searchlib/attribute/iattributemanager.h> +#include <vespa/searchlib/query/tree/location.h> +#include <vespa/searchlib/query/tree/point.h> +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/query/weight.h> +#include <vespa/searchlib/test/initrange.h> +#include <vespa/searchlib/queryeval/document_weight_search_iterator.h> + +using namespace search; +using namespace search::attribute; + +AttributeVector::SP make_attribute(BasicType type, CollectionType collection, bool fast_search) { + Config cfg(type, collection); + cfg.setFastSearch(fast_search); + return AttributeFactory::createAttribute("my_attribute", cfg); +} + +void add_docs(AttributeVector::SP attr_ptr, size_t limit = 1000) { + AttributeVector::DocId docid; + for (size_t i = 0; i < limit; ++i) { + attr_ptr->addDoc(docid); + } + attr_ptr->commit(); + ASSERT_EQUAL((limit - 1), docid); +} + +template <typename ATTR, typename KEY> +void set_doc(ATTR *attr, uint32_t docid, KEY key, int32_t weight) { + attr->clearDoc(docid); + attr->append(docid, key, weight); + attr->commit(); +} + +void populate_long(AttributeVector::SP attr_ptr) { + IntegerAttribute *attr = static_cast<IntegerAttribute *>(attr_ptr.get()); + set_doc(attr, 1, int64_t(111), 20); + set_doc(attr, 5, int64_t(111), 5); + set_doc(attr, 7, int64_t(111), 10); +} + +void populate_string(AttributeVector::SP attr_ptr) { + StringAttribute *attr = static_cast<StringAttribute *>(attr_ptr.get()); + set_doc(attr, 1, "foo", 20); + set_doc(attr, 5, "foo", 5); + set_doc(attr, 7, "foo", 10); +} + +struct LongFixture { + AttributeVector::SP attr; + const IDocumentWeightAttribute *api; + LongFixture() : attr(make_attribute(BasicType::INT64, CollectionType::WSET, true)), + api(attr->asDocumentWeightAttribute()) + { + ASSERT_TRUE(api != nullptr); + add_docs(attr); + populate_long(attr); + } +}; + +struct StringFixture { + AttributeVector::SP attr; + const IDocumentWeightAttribute *api; + StringFixture() : attr(make_attribute(BasicType::STRING, CollectionType::WSET, true)), + api(attr->asDocumentWeightAttribute()) + { + ASSERT_TRUE(api != nullptr); + add_docs(attr); + populate_string(attr); + } +}; + +TEST("require that appropriate attributes support the document weight attribute interface") { + EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::WSET, true)->asDocumentWeightAttribute() != nullptr); + EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::WSET, true)->asDocumentWeightAttribute() != nullptr); +} + +TEST("require that inappropriate attributes do not support the document weight attribute interface") { + EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::SINGLE, false)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::ARRAY, false)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::WSET, false)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::SINGLE, true)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::ARRAY, true)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::SINGLE, false)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::ARRAY, false)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::WSET, false)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::SINGLE, true)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::ARRAY, true)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::INT32, CollectionType::WSET, true)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::DOUBLE, CollectionType::WSET, true)->asDocumentWeightAttribute() == nullptr); +} + +void verify_valid_lookup(IDocumentWeightAttribute::LookupResult result) { + EXPECT_TRUE(result.posting_idx.valid()); + EXPECT_EQUAL(3u, result.posting_size); + EXPECT_EQUAL(5, result.min_weight); + EXPECT_EQUAL(20, result.max_weight); +} + +void verify_invalid_lookup(IDocumentWeightAttribute::LookupResult result) { + EXPECT_FALSE(result.posting_idx.valid()); + EXPECT_EQUAL(0u, result.posting_size); + EXPECT_EQUAL(0, result.min_weight); + EXPECT_EQUAL(0, result.max_weight); +} + +TEST_F("require that integer lookup works correctly", LongFixture) { + verify_valid_lookup(f1.api->lookup("111")); + verify_invalid_lookup(f1.api->lookup("222")); +} + +TEST_F("require string lookup works correctly", StringFixture) { + verify_valid_lookup(f1.api->lookup("foo")); + verify_invalid_lookup(f1.api->lookup("bar")); +} + +void verify_posting(const IDocumentWeightAttribute &api, const char *term) { + auto result = api.lookup(term); + ASSERT_TRUE(result.posting_idx.valid()); + std::vector<DocumentWeightIterator> itr_store; + api.create(result.posting_idx, itr_store); + ASSERT_EQUAL(1u, itr_store.size()); + { + DocumentWeightIterator &itr = itr_store[0]; + if (itr.valid() && itr.getKey() < 1) { + itr.linearSeek(1); + } + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(1u, itr.getKey()); // docid + EXPECT_EQUAL(20, itr.getData()); // weight + itr.linearSeek(2); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(5u, itr.getKey()); // docid + EXPECT_EQUAL(5, itr.getData()); // weight + itr.linearSeek(6); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(7u, itr.getKey()); // docid + EXPECT_EQUAL(10, itr.getData()); // weight + itr.linearSeek(8); + EXPECT_FALSE(itr.valid()); + } +} + +TEST_F("require that integer iterators are created correctly", LongFixture) { + verify_posting(*f1.api, "111"); +} + +TEST_F("require that string iterators are created correctly", StringFixture) { + verify_posting(*f1.api, "foo"); +} + +TEST("verify init range for document weight search iterator") { + search::test::InitRangeVerifier ir; + AttributeVector::SP attr(make_attribute(BasicType::INT64, CollectionType::WSET, true)); + add_docs(attr, ir.getDocIdLimit()); + auto docids = ir.getExpectedDocIds(); + IntegerAttribute *int_attr = static_cast<IntegerAttribute *>(attr.get()); + for (auto docid: docids) { + set_doc(int_attr, docid, int64_t(123), 1); + } + const IDocumentWeightAttribute *api(attr->asDocumentWeightAttribute()); + ASSERT_TRUE(api != nullptr); + auto dict_entry = api->lookup("123"); + ASSERT_TRUE(dict_entry.posting_idx.valid()); + fef::TermFieldMatchData tfmd; + queryeval::DocumentWeightSearchIterator itr(tfmd, *api, dict_entry); + ir.verify(itr); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/attribute/enumeratedsave/.gitignore b/searchlib/src/tests/attribute/enumeratedsave/.gitignore new file mode 100644 index 00000000000..a4680f95f72 --- /dev/null +++ b/searchlib/src/tests/attribute/enumeratedsave/.gitignore @@ -0,0 +1,127 @@ +/double_a0_e.udat +/double_a0_ee.udat +/double_a1_e.udat +/double_a1_ee.udat +/double_a2_e.udat +/double_a2_ee.udat +/double_sv0_e.udat +/double_sv0_ee.udat +/double_sv1_e.udat +/double_sv1_ee.udat +/double_sv2_e.udat +/double_sv2_ee.udat +/double_ws0_e.udat +/double_ws0_ee.udat +/double_ws1_e.udat +/double_ws1_ee.udat +/double_ws2_e.udat +/double_ws2_ee.udat +/float_a0_e.udat +/float_a0_ee.udat +/float_a1_e.udat +/float_a1_ee.udat +/float_a2_e.udat +/float_a2_ee.udat +/float_sv0_e.udat +/float_sv0_ee.udat +/float_sv1_e.udat +/float_sv1_ee.udat +/float_sv2_e.udat +/float_sv2_ee.udat +/float_ws0_e.udat +/float_ws0_ee.udat +/float_ws1_e.udat +/float_ws1_ee.udat +/float_ws2_e.udat +/float_ws2_ee.udat +/int16_a0_e.udat +/int16_a0_ee.udat +/int16_a1_e.udat +/int16_a1_ee.udat +/int16_a2_e.udat +/int16_a2_ee.udat +/int16_sv0_e.udat +/int16_sv0_ee.udat +/int16_sv1_e.udat +/int16_sv1_ee.udat +/int16_sv2_e.udat +/int16_sv2_ee.udat +/int16_ws0_e.udat +/int16_ws0_ee.udat +/int16_ws1_e.udat +/int16_ws1_ee.udat +/int16_ws2_e.udat +/int16_ws2_ee.udat +/int32_a0_e.udat +/int32_a0_ee.udat +/int32_a1_e.udat +/int32_a1_ee.udat +/int32_a2_e.udat +/int32_a2_ee.udat +/int32_sv0_e.udat +/int32_sv0_ee.udat +/int32_sv1_e.udat +/int32_sv1_ee.udat +/int32_sv2_e.udat +/int32_sv2_ee.udat +/int32_ws0_e.udat +/int32_ws0_ee.udat +/int32_ws1_e.udat +/int32_ws1_ee.udat +/int32_ws2_e.udat +/int32_ws2_ee.udat +/int64_a0_e.udat +/int64_a0_ee.udat +/int64_a1_e.udat +/int64_a1_ee.udat +/int64_a2_e.udat +/int64_a2_ee.udat +/int64_sv0_e.udat +/int64_sv0_ee.udat +/int64_sv1_e.udat +/int64_sv1_ee.udat +/int64_sv2_e.udat +/int64_sv2_ee.udat +/int64_ws0_e.udat +/int64_ws0_ee.udat +/int64_ws1_e.udat +/int64_ws1_ee.udat +/int64_ws2_e.udat +/int64_ws2_ee.udat +/int8_a0_e.udat +/int8_a0_ee.udat +/int8_a1_e.udat +/int8_a1_ee.udat +/int8_a2_e.udat +/int8_a2_ee.udat +/int8_sv0_e.udat +/int8_sv0_ee.udat +/int8_sv1_e.udat +/int8_sv1_ee.udat +/int8_sv2_e.udat +/int8_sv2_ee.udat +/int8_ws0_e.udat +/int8_ws0_ee.udat +/int8_ws1_e.udat +/int8_ws1_ee.udat +/int8_ws2_e.udat +/int8_ws2_ee.udat +/str_a0_e.udat +/str_a0_ee.udat +/str_a1_e.udat +/str_a1_ee.udat +/str_a2_e.udat +/str_a2_ee.udat +/str_sv0_e.udat +/str_sv0_ee.udat +/str_sv1_e.udat +/str_sv1_ee.udat +/str_sv2_e.udat +/str_sv2_ee.udat +/str_ws0_e.udat +/str_ws0_ee.udat +/str_ws1_e.udat +/str_ws1_ee.udat +/str_ws2_e.udat +/str_ws2_ee.udat +searchlib_enumeratedsave_test_app diff --git a/searchlib/src/tests/attribute/enumeratedsave/CMakeLists.txt b/searchlib/src/tests/attribute/enumeratedsave/CMakeLists.txt new file mode 100644 index 00000000000..0dbb59043c1 --- /dev/null +++ b/searchlib/src/tests/attribute/enumeratedsave/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_enumeratedsave_test_app + SOURCES + enumeratedsave_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_enumeratedsave_test_app COMMAND searchlib_enumeratedsave_test_app) diff --git a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp new file mode 100644 index 00000000000..312814eb55a --- /dev/null +++ b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp @@ -0,0 +1,944 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/document/fieldvalue/intfieldvalue.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/searchlib/attribute/attribute.h> +#include <vespa/searchlib/attribute/attributefile.h> +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/attributememorysavetarget.h> +#include <vespa/searchlib/attribute/singlenumericattribute.h> +#include <vespa/searchlib/attribute/multinumericattribute.h> +#include <vespa/searchlib/attribute/singlestringattribute.h> +#include <vespa/searchlib/attribute/multistringattribute.h> +#include <vespa/searchlib/attribute/attrvector.h> +#include <vespa/searchlib/attribute/attributefilesavetarget.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/util/compress.h> +#include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/searchlib/util/randomgenerator.h> +#include <vespa/searchlib/util/bufferwriter.h> +#include <vespa/searchlib/attribute/attributememoryfilebufferwriter.h> + +#include <vespa/searchlib/attribute/attributevector.hpp> + +#include <vespa/log/log.h> +LOG_SETUP("enumeratedsave_test"); +#include <limits> +#include <string> +#include <iostream> + + +using search::attribute::BasicType; +using search::attribute::CollectionType; +using search::attribute::Config; +using search::AttributeFactory; +using search::FloatingPointAttribute; +using search::IntegerAttribute; +using search::StringAttribute; +using search::RandomGenerator; +using search::ParseItem; +using search::fef::TermFieldMatchData; +using search::IAttributeFileWriter; +using search::BufferWriter; +using search::AttributeMemoryFileBufferWriter; + +typedef std::unique_ptr<AttributeVector::SearchContext> SearchContextPtr; +typedef std::unique_ptr<search::queryeval::SearchIterator> SearchBasePtr; + +bool +FastOS_UNIX_File::Sync(void) +{ + // LOG(info, "Skip sync"); + return true; +} + + +class MemAttrFileWriter : public IAttributeFileWriter +{ +private: + Buffer _buf; + +public: + MemAttrFileWriter() + : _buf() + { + } + + virtual Buffer allocBuf(size_t size) override { + return std::make_unique<BufferBuf>(size, 4096); + } + + virtual void writeBuf(Buffer buf_in) override { + if (!_buf) { + _buf = std::move(buf_in); + } else { + _buf->writeBytes(buf_in->getData(), buf_in->getDataLen()); + } + } + + const Buffer &buf() const { return _buf; } + + std::unique_ptr<BufferWriter> allocBufferWriter() override; +}; + +std::unique_ptr<BufferWriter> +MemAttrFileWriter::allocBufferWriter() +{ + if (!_buf) { + _buf = allocBuf(1); + } + return std::make_unique<AttributeMemoryFileBufferWriter>(*this); +} + +class MemAttr : public search::IAttributeSaveTarget +{ +private: + MemAttrFileWriter _datWriter; + MemAttrFileWriter _idxWriter; + MemAttrFileWriter _weightWriter; + MemAttrFileWriter _udatWriter; + +public: + typedef std::shared_ptr<MemAttr> SP; + + MemAttr(void) + : _datWriter(), + _idxWriter(), + _weightWriter(), + _udatWriter() + { + } + + // Implements IAttributeSaveTarget + virtual bool setup() { return true; } + virtual void close() {} + virtual IAttributeFileWriter &datWriter() override { return _datWriter; } + virtual IAttributeFileWriter &idxWriter() override { return _idxWriter; } + virtual IAttributeFileWriter &weightWriter() override { + return _weightWriter; + } + virtual IAttributeFileWriter &udatWriter() override { return _udatWriter; } + + bool + bufEqual(const Buffer &lhs, const Buffer &rhs) const; + + bool + operator==(const MemAttr &rhs) const; +}; + +class EnumeratedSaveTest +{ +private: + typedef AttributeVector::SP AttributePtr; + + template <typename VectorType> + VectorType & + as(AttributePtr &v); + + IntegerAttribute & + asInt(AttributePtr &v); + + StringAttribute & + asString(AttributePtr &v); + + FloatingPointAttribute & + asFloat(AttributePtr &v); + + void + addDocs(const AttributePtr &v, size_t sz); + + template <typename VectorType> + void populate(VectorType &v, unsigned seed, BasicType bt); + + template <typename VectorType, typename BufferType> + void compare(VectorType &a, VectorType &b); + + void + buildTermQuery(std::vector<char> & buffer, + const vespalib::string & index, + const vespalib::string & term, bool prefix); + + template <typename V, typename T> + SearchContextPtr + getSearch(const V & vec, const T & term, bool prefix); + + template <typename V> + SearchContextPtr + getSearch(const V & vec); + + MemAttr::SP + saveMem(AttributeVector &v); + + void + checkMem(AttributeVector &v, const MemAttr &e, bool enumerated); + + MemAttr::SP + saveBoth(AttributePtr v); + + AttributePtr + make(Config cfg, + const vespalib::string &pref, + bool fastSearch = false); + + void + load(AttributePtr v, const vespalib::string &name); + + template <typename VectorType, typename BufferType> + void + checkLoad(AttributePtr v, + const vespalib::string &name, + AttributePtr ev); + + template <typename VectorType, typename BufferType> + void + testReload(AttributePtr v0, + AttributePtr v1, + AttributePtr v2, + MemAttr::SP mv0, + MemAttr::SP mv1, + MemAttr::SP mv2, + MemAttr::SP emv0, + MemAttr::SP emv1, + MemAttr::SP emv2, + Config cfg, + const vespalib::string &pref, + bool fastSearch); + +public: + template <typename VectorType, typename BufferType> + void + test(BasicType bt, CollectionType ct, const vespalib::string &pref); + + EnumeratedSaveTest() + { + } +}; + + +bool +MemAttr::bufEqual(const Buffer &lhs, const Buffer &rhs) const +{ + if (!EXPECT_TRUE((lhs.get() != NULL) == (rhs.get() != NULL))) + return false; + if (lhs.get() == NULL) + return true; + if (!EXPECT_TRUE(lhs->getDataLen() == rhs->getDataLen())) + return false; + if (!EXPECT_TRUE(memcmp(lhs->getData(), rhs->getData(), + lhs->getDataLen()) == 0)) + return false; + return true; +} + +bool +MemAttr::operator==(const MemAttr &rhs) const +{ + if (!EXPECT_TRUE(bufEqual(_datWriter.buf(), rhs._datWriter.buf()))) + return false; + if (!EXPECT_TRUE(bufEqual(_idxWriter.buf(), rhs._idxWriter.buf()))) + return false; + if (!EXPECT_TRUE(bufEqual(_weightWriter.buf(), rhs._weightWriter.buf()))) + return false; + if (!EXPECT_TRUE(bufEqual(_udatWriter.buf(), rhs._udatWriter.buf()))) + return false; + return true; +} + + +void +EnumeratedSaveTest::addDocs(const AttributePtr &v, size_t sz) +{ + if (sz) { + AttributeVector::DocId docId; + for(size_t i(0); i< sz; i++) { + EXPECT_TRUE( v->addDoc(docId) ); + } + EXPECT_TRUE( docId+1 == sz ); + EXPECT_TRUE( v->getNumDocs() == sz ); + v->commit(true); + } +} + + +template <> +void +EnumeratedSaveTest::populate(IntegerAttribute &v, unsigned seed, + BasicType bt) +{ + search::Rand48 rnd; + IntegerAttribute::largeint_t mask(std::numeric_limits + <IntegerAttribute::largeint_t>::max()); + switch (bt.type()) { + case BasicType::INT8: + mask = 0x7f; + break; + case BasicType::INT16: + mask = 0x7fff; + break; + default: + ; + } + rnd.srand48(seed); + int weight = 1; + for(size_t i(0), m(v.getNumDocs()); i < m; i++) { + v.clearDoc(i); + if (i == 9) + continue; + if (i == 7) { + if (v.hasMultiValue()) { + v.append(i, -42, 27); + v.append(i, -43, 14); + v.append(i, -42, -3); + } else { + EXPECT_TRUE( v.update(i, -42) ); + } + v.commit(); + continue; + } + if (v.hasMultiValue()) { + if (v.hasWeightedSetType()) { + weight = (rand() % 256) - 128; + } + for (size_t j(0); j <= i; j++) { + EXPECT_TRUE( v.append(i, rnd.lrand48() & mask, weight) ); + } + v.commit(); + if (!v.hasWeightedSetType()) { + EXPECT_EQUAL(static_cast<uint32_t>(v.getValueCount(i)), i + 1); + ASSERT_TRUE(static_cast<uint32_t>(v.getValueCount(i)) == + i + 1); + } + } else { + EXPECT_TRUE( v.update(i, lrand48() & mask) ); + } + } + v.commit(); +} + + +template <> +void +EnumeratedSaveTest::populate(FloatingPointAttribute &v, unsigned seed, + BasicType bt) +{ + (void) bt; + search::Rand48 rnd; + rnd.srand48(seed); + int weight = 1; + for(size_t i(0), m(v.getNumDocs()); i < m; i++) { + v.clearDoc(i); + if (i == 9) + continue; + if (i == 7) { + if (v.hasMultiValue()) { + v.append(i, -42.0, 27); + v.append(i, -43.0, 14); + v.append(i, -42.0, -3); + } else { + EXPECT_TRUE( v.update(i, -42.0) ); + } + v.commit(); + continue; + } + if (v.hasMultiValue()) { + if (v.hasWeightedSetType()) { + weight = (rand() % 256) - 128; + } + for (size_t j(0); j <= i; j++) { + EXPECT_TRUE( v.append(i, rnd.lrand48(), weight) ); + } + v.commit(); + if (!v.hasWeightedSetType()) { + EXPECT_EQUAL(static_cast<uint32_t>(v.getValueCount(i)), i + 1); + ASSERT_TRUE(static_cast<uint32_t>(v.getValueCount(i)) == + i + 1); + } + } else { + EXPECT_TRUE( v.update(i, lrand48()) ); + } + } + v.commit(); +} + + +template <> +void +EnumeratedSaveTest::populate(StringAttribute &v, unsigned seed, + BasicType bt) +{ + (void) bt; + RandomGenerator rnd(seed); + int weight = 1; + for(size_t i(0), m(v.getNumDocs()); i < m; i++) { + v.clearDoc(i); + if (i == 9) + continue; + if (i == 7) { + if (v.hasMultiValue()) { + v.append(i, "foo", 27); + v.append(i, "bar", 14); + v.append(i, "foO", -3); + } else { + EXPECT_TRUE( v.update(i, "foo") ); + } + v.commit(); + continue; + } + if (v.hasMultiValue()) { + if (v.hasWeightedSetType()) { + weight = rnd.rand(0, 256) - 128; + } + for (size_t j(0); j <= i; j++) { + EXPECT_TRUE( v.append(i, rnd.getRandomString(2, 50), weight) ); + } + v.commit(); + if (!v.hasWeightedSetType()) { + EXPECT_EQUAL(static_cast<uint32_t>(v.getValueCount(i)), i + 1); + } + } else { + EXPECT_TRUE( v.update(i, rnd.getRandomString(2, 50)) ); + } + } + v.commit(); +} + +namespace +{ + +template <typename T> +inline bool +equalsHelper(const T &lhs, const T &rhs) +{ + return lhs == rhs; +} + +template <> +inline bool +equalsHelper<float>(const float &lhs, const float &rhs) +{ + if (std::isnan(lhs)) + return std::isnan(rhs); + if (std::isnan(rhs)) + return false; + return lhs == rhs; +} + +template <> +inline bool +equalsHelper<double>(const double &lhs, const double &rhs) +{ + if (std::isnan(lhs)) + return std::isnan(rhs); + if (std::isnan(rhs)) + return false; + return lhs == rhs; +} + +} + +template <typename VectorType, typename BufferType> +void +EnumeratedSaveTest::compare(VectorType &a, VectorType &b) +{ + EXPECT_EQUAL(a.getNumDocs(), b.getNumDocs()); + ASSERT_TRUE(a.getNumDocs() == b.getNumDocs()); + // EXPECT_EQUAL(a.getMaxValueCount(), b.getMaxValueCount()); + EXPECT_EQUAL(a.getCommittedDocIdLimit(), b.getCommittedDocIdLimit()); + uint32_t asz(a.getMaxValueCount()); + uint32_t bsz(b.getMaxValueCount()); + BufferType *av = new BufferType[asz]; + BufferType *bv = new BufferType[bsz]; + + for (size_t i(0), m(a.getNumDocs()); i < m; i++) { + ASSERT_TRUE(asz >= static_cast<uint32_t>(a.getValueCount(i))); + ASSERT_TRUE(bsz >= static_cast<uint32_t>(b.getValueCount(i))); + EXPECT_EQUAL(a.getValueCount(i), b.getValueCount(i)); + ASSERT_TRUE(a.getValueCount(i) == b.getValueCount(i)); + EXPECT_EQUAL(static_cast<const AttributeVector &>(a).get(i, av, asz), + static_cast<uint32_t>(a.getValueCount(i))); + EXPECT_EQUAL(static_cast<const AttributeVector &>(b).get(i, bv, bsz), + static_cast<uint32_t>(b.getValueCount(i))); + for(size_t j(0), k(std::min(a.getValueCount(i), b.getValueCount(i))); + j < k; j++) { + EXPECT_TRUE(equalsHelper(av[j], bv[j])); + } + } + delete [] bv; + delete [] av; +} + + +template <typename VectorType> +VectorType & +EnumeratedSaveTest::as(AttributePtr &v) +{ + VectorType *res = dynamic_cast<VectorType *>(v.get()); + assert(res != NULL); + return *res; +} + + +IntegerAttribute & +EnumeratedSaveTest::asInt(AttributePtr &v) +{ + return as<IntegerAttribute>(v); +} + + +StringAttribute & +EnumeratedSaveTest::asString(AttributePtr &v) +{ + return as<StringAttribute>(v); +} + + +FloatingPointAttribute & +EnumeratedSaveTest::asFloat(AttributePtr &v) +{ + return as<FloatingPointAttribute>(v); +} + + +void +EnumeratedSaveTest::buildTermQuery(std::vector<char> &buffer, + const vespalib::string &index, + const vespalib::string &term, + bool prefix) +{ + uint32_t indexLen = index.size(); + uint32_t termLen = term.size(); + uint32_t queryPacketSize = 1 + 2 * 4 + indexLen + termLen; + uint32_t p = 0; + buffer.resize(queryPacketSize); + buffer[p++] = prefix ? ParseItem::ITEM_PREFIXTERM : ParseItem::ITEM_TERM; + p += vespalib::compress::Integer::compressPositive(indexLen, &buffer[p]); + memcpy(&buffer[p], index.c_str(), indexLen); + p += indexLen; + p += vespalib::compress::Integer::compressPositive(termLen, &buffer[p]); + memcpy(&buffer[p], term.c_str(), termLen); + p += termLen; + buffer.resize(p); +} + + +template <typename V, typename T> +SearchContextPtr +EnumeratedSaveTest::getSearch(const V &vec, const T &term, bool prefix) +{ + std::vector<char> query; + vespalib::asciistream ss; + ss << term; + buildTermQuery(query, vec.getName(), ss.str(), prefix); + + return (static_cast<const AttributeVector &>(vec)). + getSearch(vespalib::stringref(&query[0], query.size()), + AttributeVector::SearchContext::Params()); +} + + +template <> +SearchContextPtr +EnumeratedSaveTest::getSearch<IntegerAttribute>(const IntegerAttribute &v) +{ + return getSearch<IntegerAttribute>(v, "[-42;-42]", false); +} + +template <> +SearchContextPtr +EnumeratedSaveTest::getSearch<FloatingPointAttribute>(const FloatingPointAttribute &v) +{ + return getSearch<FloatingPointAttribute>(v, "[-42.0;-42.0]", false); +} + +template <> +SearchContextPtr +EnumeratedSaveTest::getSearch<StringAttribute>(const StringAttribute &v) +{ + return getSearch<StringAttribute, const vespalib::string &> + (v, "foo", false); +} + +MemAttr::SP +EnumeratedSaveTest::saveMem(AttributeVector &v) +{ + MemAttr::SP res(new MemAttr); + EXPECT_TRUE(v.save(*res)); + return res; +} + + +void +EnumeratedSaveTest::checkMem(AttributeVector &v, const MemAttr &e, + bool enumerated) +{ + MemAttr m; + v.enableEnumeratedSave(enumerated); + EXPECT_TRUE(v.save(m)); + v.enableEnumeratedSave(false); + ASSERT_TRUE(m == e); +} + + +MemAttr::SP +EnumeratedSaveTest::saveBoth(AttributePtr v) +{ + EXPECT_TRUE(v->save()); + vespalib::string basename = v->getBaseFileName(); + AttributePtr v2 = make(v->getConfig(), basename, true); + EXPECT_TRUE(v2->load()); + v2->enableEnumeratedSave(true); + EXPECT_TRUE(v2->saveAs(basename + "_e")); + if ((v->getConfig().basicType() == BasicType::INT32 && + v->getConfig().collectionType() == CollectionType::WSET) || true) { + search::AttributeMemorySaveTarget ms; + search::TuneFileAttributes tune; + search::index::DummyFileHeaderContext fileHeaderContext; + EXPECT_TRUE(v2->saveAs(basename + "_ee", ms)); + EXPECT_TRUE(ms.writeToFile(tune, fileHeaderContext)); + } + return saveMem(*v2); +} + + +EnumeratedSaveTest::AttributePtr +EnumeratedSaveTest::make(Config cfg, + const vespalib::string &pref, + bool fastSearch) +{ + cfg.setFastSearch(fastSearch); + AttributePtr v = AttributeFactory::createAttribute(pref, cfg); + return v; +} + + +void +EnumeratedSaveTest::load(AttributePtr v, const vespalib::string &name) +{ + v->setBaseFileName(name); + EXPECT_TRUE(v->load()); +} + +template <typename VectorType, typename BufferType> +void +EnumeratedSaveTest::checkLoad(AttributePtr v, const vespalib::string &name, + AttributePtr ev) +{ + v->setBaseFileName(name); + EXPECT_TRUE(v->load()); + compare<VectorType, BufferType>(as<VectorType>(v), as<VectorType>(ev)); +} + + +template <typename VectorType, typename BufferType> +void +EnumeratedSaveTest::testReload(AttributePtr v0, + AttributePtr v1, + AttributePtr v2, + MemAttr::SP mv0, + MemAttr::SP mv1, + MemAttr::SP mv2, + MemAttr::SP emv0, + MemAttr::SP emv1, + MemAttr::SP emv2, + Config cfg, + const vespalib::string &pref, + bool fastSearch) +{ + // typedef AttributePtr AVP; + + bool flagAttr = + cfg.collectionType() == CollectionType::ARRAY && + cfg.basicType() == BasicType::INT8 && + fastSearch; + bool supportsEnumerated = (fastSearch || + cfg.basicType() == BasicType::STRING) && + !flagAttr; + + + AttributePtr v = make(cfg, pref, fastSearch); + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "0", v0))); + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "1", v1))); + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "2", v2))); + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "1", v1))); + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "0", v0))); + + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "0", v0))); + TEST_DO(checkMem(*v, *mv0, false)); + TEST_DO(checkMem(*v, supportsEnumerated ? *emv0 : *mv0, true)); + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "1", v1))); + TEST_DO(checkMem(*v, *mv1, false)); + TEST_DO(checkMem(*v, supportsEnumerated ? *emv1 : *mv1, true)); + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "2", v2))); + TEST_DO(checkMem(*v, *mv2, false)); + TEST_DO(checkMem(*v, supportsEnumerated ? *emv2 : *mv2, true)); + + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "0_e", v0))); + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "1_e", v1))); + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "2_e", v2))); + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "1_e", v1))); + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "0_e", v0))); + + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "0_e", v0))); + TEST_DO(checkMem(*v, *mv0, false)); + TEST_DO(checkMem(*v, supportsEnumerated ? *emv0 : *mv0, true)); + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "1_e", v1))); + TEST_DO(checkMem(*v, *mv1, false)); + TEST_DO(checkMem(*v, supportsEnumerated ? *emv1 : *mv1, true)); + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "2_e", v2))); + TEST_DO(checkMem(*v, *mv2, false)); + TEST_DO(checkMem(*v, supportsEnumerated ? *emv2 : *mv2, true)); + + TermFieldMatchData md; + SearchContextPtr sc = getSearch<VectorType>(as<VectorType>(v)); + sc->fetchPostings(true); + SearchBasePtr sb = sc->createIterator(&md, true); + sb->initFullRange(); + sb->seek(1u); + EXPECT_EQUAL(7u, sb->getDocId()); + sb->unpack(7u); + EXPECT_EQUAL(md.getDocId(), 7u); + if (v->getCollectionType() == CollectionType::SINGLE || + flagAttr) { + EXPECT_EQUAL(md.getWeight(), 1); + } else if (v->getCollectionType() == CollectionType::ARRAY) { + EXPECT_EQUAL(md.getWeight(), 2); + } else { + if (cfg.basicType() == BasicType::STRING) { + EXPECT_EQUAL(md.getWeight(), 24); + } else { + EXPECT_EQUAL(md.getWeight(), -3); + } + } +} + + +template <typename VectorType, typename BufferType> +void +EnumeratedSaveTest::test(BasicType bt, CollectionType ct, + const vespalib::string &pref) +{ + Config cfg(bt, ct); + AttributePtr v0 = AttributeFactory::createAttribute(pref + "0", cfg); + AttributePtr v1 = AttributeFactory::createAttribute(pref + "1", cfg); + AttributePtr v2 = AttributeFactory::createAttribute(pref + "2", cfg); + + addDocs(v0, 0); + addDocs(v1, 10); + addDocs(v2, 30); + + populate(as<VectorType>(v0), 0, bt); + populate(as<VectorType>(v1), 10, bt); + populate(as<VectorType>(v2), 30, bt); + + MemAttr::SP mv0 = saveMem(*v0); + MemAttr::SP mv1 = saveMem(*v1); + MemAttr::SP mv2 = saveMem(*v2); + + MemAttr::SP emv0 = saveBoth(v0); + MemAttr::SP emv1 = saveBoth(v1); + MemAttr::SP emv2 = saveBoth(v2); + + AttributePtr v = make(cfg, pref, true); + checkLoad<VectorType, BufferType>(v, pref + "0_ee", v0); + checkLoad<VectorType, BufferType>(v, pref + "1_ee", v1); + checkLoad<VectorType, BufferType>(v, pref + "2_ee", v2); + v.reset(); + + TEST_DO((testReload<VectorType, BufferType>(v0, v1, v2, + mv0, mv1, mv2, + emv0, emv1, emv2, + cfg, pref, + false))); + TEST_DO((testReload<VectorType, BufferType>(v0, v1, v2, + mv0, mv1, mv2, + emv0, emv1, emv2, + cfg, pref, + true))); +} + +TEST_F("Test enumerated save with single value int8", EnumeratedSaveTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::largeint_t>(BasicType::INT8, + CollectionType::SINGLE, + "int8_sv"); +} + +TEST_F("Test enumerated save with array value int8", EnumeratedSaveTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::largeint_t>(BasicType::INT8, + CollectionType::ARRAY, + "int8_a"); +} + +TEST_F("Test enumerated save with weighted set value int8", + EnumeratedSaveTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::WeightedInt>(BasicType::INT8, + CollectionType::WSET, + "int8_ws"); +} + +TEST_F("Test enumerated save with single value int16", EnumeratedSaveTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::largeint_t>(BasicType::INT16, + CollectionType::SINGLE, + "int16_sv"); +} + +TEST_F("Test enumerated save with array value int16", EnumeratedSaveTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::largeint_t>(BasicType::INT16, + CollectionType::ARRAY, + "int16_a"); +} + +TEST_F("Test enumerated save with weighted set value int16", + EnumeratedSaveTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::WeightedInt>(BasicType::INT16, + CollectionType::WSET, + "int16_ws"); +} + +TEST_F("Test enumerated save with single value int32", EnumeratedSaveTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::largeint_t>(BasicType::INT32, + CollectionType::SINGLE, + "int32_sv"); +} + +TEST_F("Test enumerated save with array value int32", EnumeratedSaveTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::largeint_t>(BasicType::INT32, + CollectionType::ARRAY, + "int32_a"); +} + +TEST_F("Test enumerated save with weighted set value int32", + EnumeratedSaveTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::WeightedInt>(BasicType::INT32, + CollectionType::WSET, + "int32_ws"); +} + +TEST_F("Test enumerated save with single value int64", EnumeratedSaveTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::largeint_t>(BasicType::INT64, + CollectionType::SINGLE, + "int64_sv"); +} + +TEST_F("Test enumerated save with array value int64", EnumeratedSaveTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::largeint_t>(BasicType::INT64, + CollectionType::ARRAY, + "int64_a"); +} + +TEST_F("Test enumerated save with weighted set value int64", + EnumeratedSaveTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::WeightedInt>(BasicType::INT64, + CollectionType::WSET, + "int64_ws"); +} + +TEST_F("Test enumerated save with single value float", EnumeratedSaveTest) +{ + f.template test<FloatingPointAttribute, + double>(BasicType::FLOAT, + CollectionType::SINGLE, + "float_sv"); +} + +TEST_F("Test enumerated save with array value float", EnumeratedSaveTest) +{ + f.template test<FloatingPointAttribute, + double>(BasicType::FLOAT, + CollectionType::ARRAY, + "float_a"); +} + +TEST_F("Test enumerated save with weighted set value float", + EnumeratedSaveTest) +{ + f.template test<FloatingPointAttribute, + FloatingPointAttribute::WeightedFloat>( + BasicType::FLOAT, + CollectionType::WSET, + "float_ws"); +} + + +TEST_F("Test enumerated save with single value double", EnumeratedSaveTest) +{ + f.template test<FloatingPointAttribute, + double>(BasicType::DOUBLE, + CollectionType::SINGLE, + "double_sv"); +} + +TEST_F("Test enumerated save with array value double", EnumeratedSaveTest) +{ + f.template test<FloatingPointAttribute, + double>(BasicType::DOUBLE, + CollectionType::ARRAY, + "double_a"); +} + +TEST_F("Test enumerated save with weighted set value double", + EnumeratedSaveTest) +{ + f.template test<FloatingPointAttribute, + FloatingPointAttribute::WeightedFloat>( + BasicType::DOUBLE, + CollectionType::WSET, + "double_ws"); +} + + +TEST_F("Test enumerated save with single value string", EnumeratedSaveTest) +{ + f.template test<StringAttribute, + vespalib::string>(BasicType::STRING, + CollectionType::SINGLE, + "str_sv"); +} + +TEST_F("Test enumerated save with array value string", EnumeratedSaveTest) +{ + f.template test<StringAttribute, + vespalib::string>(BasicType::STRING, + CollectionType::ARRAY, + "str_a"); +} + +TEST_F("Test enumerated save with weighted set value string", + EnumeratedSaveTest) +{ + f.template test<StringAttribute, + StringAttribute::WeightedString>( + BasicType::STRING, + CollectionType::WSET, + "str_ws"); +} + +TEST_MAIN() +{ + AttributeVector::enableEnumeratedLoad(); + TEST_RUN_ALL(); +} diff --git a/searchlib/src/tests/attribute/enumstore/.gitignore b/searchlib/src/tests/attribute/enumstore/.gitignore new file mode 100644 index 00000000000..c58a018bbd9 --- /dev/null +++ b/searchlib/src/tests/attribute/enumstore/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +enumstore_test +searchlib_enumstore_test_app diff --git a/searchlib/src/tests/attribute/enumstore/CMakeLists.txt b/searchlib/src/tests/attribute/enumstore/CMakeLists.txt new file mode 100644 index 00000000000..33190553747 --- /dev/null +++ b/searchlib/src/tests/attribute/enumstore/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_enumstore_test_app + SOURCES + enumstore_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_enumstore_test_app COMMAND searchlib_enumstore_test_app) diff --git a/searchlib/src/tests/attribute/enumstore/DESC b/searchlib/src/tests/attribute/enumstore/DESC new file mode 100644 index 00000000000..514c9a47caf --- /dev/null +++ b/searchlib/src/tests/attribute/enumstore/DESC @@ -0,0 +1 @@ +This is a test for the EnumStore class. diff --git a/searchlib/src/tests/attribute/enumstore/FILES b/searchlib/src/tests/attribute/enumstore/FILES new file mode 100644 index 00000000000..6fdb2381292 --- /dev/null +++ b/searchlib/src/tests/attribute/enumstore/FILES @@ -0,0 +1 @@ +enumstore.cpp diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp new file mode 100644 index 00000000000..e63889bbeb8 --- /dev/null +++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp @@ -0,0 +1,879 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("enumstore_test"); +#include <vespa/vespalib/testkit/testapp.h> +//#define LOG_ENUM_STORE +#include <vespa/searchlib/attribute/enumstore.hpp> +#include <limits> +#include <string> +#include <iostream> + +namespace search { + +size_t enumStoreAlign(size_t size) +{ + return (size + 15) & -UINT64_C(16); +} + +// EnumStoreBase::Index(0,0) is reserved thus 16 bytes are reserved in buffer 0 +const uint32_t RESERVED_BYTES = 16u; +typedef EnumStoreT<NumericEntryType<uint32_t> > NumericEnumStore; + +class EnumStoreTest : public vespalib::TestApp +{ +private: + typedef EnumStoreT<StringEntryType> StringEnumStore; + typedef EnumStoreT<NumericEntryType<float> > FloatEnumStore; + typedef EnumStoreT<NumericEntryType<double> > DoubleEnumStore; + + typedef EnumStoreBase::Index EnumIndex; + typedef vespalib::GenerationHandler::generation_t generation_t; + + void testIndex(); + void fillDataBuffer(char * data, uint32_t enumValue, uint32_t refCount, + const std::string & string); + void fillDataBuffer(char * data, uint32_t enumValue, uint32_t refCount, + uint32_t value); + void testStringEntry(); + void testNumericEntry(); + + template <typename EnumStoreType, typename T> + void testFloatEnumStore(EnumStoreType & es); + void testFloatEnumStore(); + + void testAddEnum(); + template <typename EnumStoreType> + void testAddEnum(bool hasPostings); + + template <typename EnumStoreType, typename Dictionary> + void + testUniques(const EnumStoreType &ses, + const std::vector<std::string> &unique); + + + void testCompaction(); + template <typename EnumStoreType> + void testCompaction(bool hasPostings, bool disableReEnumerate); + + void testReset(); + template <typename EnumStoreType> + void testReset(bool hasPostings); + + void testHoldListAndGeneration(); + void testMemoryUsage(); + void requireThatAddressSpaceUsageIsReported(); + void testBufferLimit(); + + // helper methods + typedef std::vector<std::string> StringVector; + template <typename T> + T random(T low, T high); + std::string getRandomString(uint32_t minLen, uint32_t maxLen); + StringVector fillRandomStrings(uint32_t numStrings, uint32_t minLen, uint32_t maxLen); + StringVector sortRandomStrings(StringVector & strings); + + struct StringEntry { + StringEntry(uint32_t e, uint32_t r, const std::string & s) : + _enum(e), _refCount(r), _string(s) {} + uint32_t _enum; + uint32_t _refCount; + std::string _string; + }; + + struct Reader { + typedef StringEnumStore::Index Index; + typedef std::vector<Index> IndexVector; + typedef std::vector<StringEntry> ExpectedVector; + uint32_t _generation; + IndexVector _indices; + ExpectedVector _expected; + Reader(uint32_t generation, const IndexVector & indices, + const ExpectedVector & expected) : + _generation(generation), _indices(indices), _expected(expected) {} + }; + + void + checkReaders(const StringEnumStore &ses, + generation_t sesGen, + const std::vector<Reader> &readers); + +public: + EnumStoreTest() {} + int Main(); +}; + +void +EnumStoreTest::testIndex() +{ + { + StringEnumStore::Index idx; + EXPECT_TRUE( ! idx.valid()); + EXPECT_EQUAL(idx.offset(), 0u); + EXPECT_TRUE(idx.bufferId() == 0); + } + { + StringEnumStore::Index idx(enumStoreAlign(1000), 0); + EXPECT_TRUE(idx.offset() == enumStoreAlign(1000)); + EXPECT_TRUE(idx.bufferId() == 0); + } + { + StringEnumStore::Index idx((UINT64_C(1) << 31)- RESERVED_BYTES, 1); + EXPECT_TRUE(idx.offset() == (UINT64_C(1) << 31) - RESERVED_BYTES); + EXPECT_TRUE(idx.bufferId() == 1); + } + { + StringEnumStore::Index idx((UINT64_C(1) << 33) - RESERVED_BYTES, 1); + EXPECT_TRUE(idx.offset() == (UINT64_C(1) << 33) - RESERVED_BYTES); + EXPECT_TRUE(idx.bufferId() == 1); + } + { + StringEnumStore::Index idx((UINT64_C(1) << 35) - RESERVED_BYTES, 1); + EXPECT_TRUE(idx.offset() == (UINT64_C(1) << 35) - RESERVED_BYTES); + EXPECT_TRUE(idx.bufferId() == 1); + } + { + // Change offsets when alignment changes. + StringEnumStore::Index idx1(48, 0); + StringEnumStore::Index idx2(80, 0); + StringEnumStore::Index idx3(48, 0); + EXPECT_TRUE(!(idx1 == idx2)); + EXPECT_TRUE(idx1 == idx3); + } + { + EXPECT_TRUE(StringEnumStore::Index::numBuffers() == 2); + } +} + +void +EnumStoreTest::fillDataBuffer(char * data, uint32_t enumValue, uint32_t refCount, + const std::string & string) +{ + StringEnumStore::insertEntry(data, enumValue, refCount, string.c_str()); +} + +void +EnumStoreTest::fillDataBuffer(char * data, uint32_t enumValue, uint32_t refCount, + uint32_t value) +{ + NumericEnumStore::insertEntry(data, enumValue, refCount, value); +} + +void +EnumStoreTest::testStringEntry() +{ + { + char data[9]; + fillDataBuffer(data, 0, 0, ""); + StringEnumStore::Entry e(data); + EXPECT_TRUE(StringEnumStore::getEntrySize("") == + StringEnumStore::alignEntrySize(8 + 1)); + + EXPECT_TRUE(e.getEnum() == 0); + EXPECT_TRUE(e.getRefCount() == 0); + EXPECT_TRUE(strcmp(e.getValue(), "") == 0); + + e.incRefCount(); + EXPECT_TRUE(e.getEnum() == 0); + EXPECT_TRUE(e.getRefCount() == 1); + EXPECT_TRUE(strcmp(e.getValue(), "") == 0); + e.decRefCount(); + EXPECT_TRUE(e.getEnum() == 0); + EXPECT_TRUE(e.getRefCount() == 0); + EXPECT_TRUE(strcmp(e.getValue(), "") == 0); + } + { + char data[18]; + fillDataBuffer(data, 10, 5, "enumstore"); + StringEnumStore::Entry e(data); + EXPECT_TRUE(StringEnumStore::getEntrySize("enumstore") == + StringEnumStore::alignEntrySize(8 + 1 + 9)); + + EXPECT_TRUE(e.getEnum() == 10); + EXPECT_TRUE(e.getRefCount() == 5); + EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0); + + e.incRefCount(); + EXPECT_TRUE(e.getEnum() == 10); + EXPECT_TRUE(e.getRefCount() == 6); + EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0); + e.decRefCount(); + EXPECT_TRUE(e.getEnum() == 10); + EXPECT_TRUE(e.getRefCount() == 5); + EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0); + } +} + +void +EnumStoreTest::testNumericEntry() +{ + { + char data[12]; + fillDataBuffer(data, 10, 20, 30); + NumericEnumStore::Entry e(data); + EXPECT_TRUE(NumericEnumStore::getEntrySize(30) == + NumericEnumStore::alignEntrySize(8 + 4)); + + EXPECT_TRUE(e.getEnum() == 10); + EXPECT_TRUE(e.getRefCount() == 20); + EXPECT_TRUE(e.getValue() == 30); + + e.incRefCount(); + EXPECT_TRUE(e.getEnum() == 10); + EXPECT_TRUE(e.getRefCount() == 21); + EXPECT_TRUE(e.getValue() == 30); + e.decRefCount(); + EXPECT_TRUE(e.getEnum() == 10); + EXPECT_TRUE(e.getRefCount() == 20); + EXPECT_TRUE(e.getValue() == 30); + } +} + +template <typename EnumStoreType, typename T> +void +EnumStoreTest::testFloatEnumStore(EnumStoreType & es) +{ + EnumIndex idx; + + T a[5] = {-20.5f, -10.5f, -0.5f, 9.5f, 19.5f}; + T b[5] = {-25.5f, -15.5f, -5.5f, 4.5f, 14.5f}; + + for (uint32_t i = 0; i < 5; ++i) { + es.addEnum(a[i], idx); + } + + for (uint32_t i = 0; i < 5; ++i) { + EXPECT_TRUE(es.findIndex(a[i], idx)); + EXPECT_TRUE(!es.findIndex(b[i], idx)); + } + + es.addEnum(std::numeric_limits<T>::quiet_NaN(), idx); + EXPECT_TRUE(es.findIndex(std::numeric_limits<T>::quiet_NaN(), idx)); + EXPECT_TRUE(es.findIndex(std::numeric_limits<T>::quiet_NaN(), idx)); + + for (uint32_t i = 0; i < 5; ++i) { + EXPECT_TRUE(es.findIndex(a[i], idx)); + EXPECT_TRUE(!es.findIndex(b[i], idx)); + } +} + +void +EnumStoreTest::testFloatEnumStore() +{ + { + FloatEnumStore fes(1000, false); + testFloatEnumStore<FloatEnumStore, float>(fes); + } + { + DoubleEnumStore des(1000, false); + testFloatEnumStore<DoubleEnumStore, double>(des); + } +} + +void +EnumStoreTest::testAddEnum() +{ + testAddEnum<StringEnumStore>(false); + + testAddEnum<StringEnumStore>(true); +} + +template <typename EnumStoreType> +void +EnumStoreTest::testAddEnum(bool hasPostings) +{ + EnumStoreType ses(100, hasPostings); + EXPECT_EQUAL(enumStoreAlign(100u) + RESERVED_BYTES, + ses.getBuffer(0).capacity()); + EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0).size()); + EXPECT_EQUAL(enumStoreAlign(100u), ses.getBuffer(0).remaining()); + EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0)._deadElems); + + EnumIndex idx; + uint64_t offset = ses.getBuffer(0).size(); + std::vector<EnumIndex> indices; + std::vector<std::string> unique; + unique.push_back(""); + unique.push_back("add"); + unique.push_back("enumstore"); + unique.push_back("unique"); + + for (uint32_t i = 0; i < unique.size(); ++i) { + ses.addEnum(unique[i].c_str(), idx); + EXPECT_EQUAL(offset, idx.offset()); + EXPECT_EQUAL(0u, idx.bufferId()); + ses.incRefCount(idx); + EXPECT_EQUAL(1u, ses.getRefCount(idx)); + indices.push_back(idx); + offset += EnumStoreType::alignEntrySize(unique[i].size() + 1 + 8); + EXPECT_TRUE(ses.findIndex(unique[i].c_str(), idx)); + EXPECT_TRUE(ses.getLastEnum() == i); + } + ses.freezeTree(); + + for (uint32_t i = 0; i < indices.size(); ++i) { + uint32_t e = ses.getEnum(indices[i]); + EXPECT_EQUAL(i, e); + EXPECT_TRUE(ses.findEnum(unique[i].c_str(), e)); + EXPECT_TRUE(ses.getEnum(btree::EntryRef(e)) == i); + EXPECT_TRUE(ses.findIndex(unique[i].c_str(), idx)); + EXPECT_TRUE(idx == indices[i]); + EXPECT_EQUAL(1u, ses.getRefCount(indices[i])); + StringEntryType::Type value = 0; + EXPECT_TRUE(ses.getValue(indices[i], value)); + EXPECT_TRUE(strcmp(unique[i].c_str(), value) == 0); + } + + if (hasPostings) { + testUniques<EnumStoreType, EnumPostingTree>(ses, unique); + } else { + testUniques<EnumStoreType, EnumTree>(ses, unique); + } +} + +template <typename EnumStoreType, typename Dictionary> +void +EnumStoreTest::testUniques +(const EnumStoreType &ses, const std::vector<std::string> &unique) +{ + const EnumStoreDict<Dictionary> *enumDict = + dynamic_cast<const EnumStoreDict<Dictionary> *> + (&ses.getEnumStoreDict()); + assert(enumDict != NULL); + const Dictionary &dict = enumDict->getDictionary(); + uint32_t i = 0; + EnumIndex idx; + for (typename Dictionary::Iterator iter = dict.begin(); + iter.valid(); ++iter, ++i) { + idx = iter.getKey(); + EXPECT_TRUE(strcmp(unique[i].c_str(), ses.getValue(idx)) == 0); + } + EXPECT_EQUAL(static_cast<uint32_t>(unique.size()), i); +} + + +void +EnumStoreTest::testCompaction() +{ + testCompaction<StringEnumStore>(false, false); + testCompaction<StringEnumStore>(true, false); + testCompaction<StringEnumStore>(false, true); + testCompaction<StringEnumStore>(true, true); +} + +template <typename EnumStoreType> +void +EnumStoreTest::testCompaction(bool hasPostings, bool disableReEnumerate) +{ + // entrySize = 15 before alignment + uint32_t entrySize = EnumStoreType::alignEntrySize(15); + uint32_t bufferSize = entrySize * 5; + EnumStoreType ses(bufferSize, hasPostings); + EnumIndex idx; + std::vector<EnumIndex> indices; + typename EnumStoreType::Type t = "foo"; + std::vector<std::string> uniques; + uniques.push_back("enum00"); + uniques.push_back("enum01"); + uniques.push_back("enum02"); + uniques.push_back("enum03"); + uniques.push_back("enum04"); + + // fill with unique values + for (uint32_t i = 0; i < 5; ++i) { + EXPECT_TRUE(ses.getRemaining() == bufferSize - i * entrySize); + ses.addEnum(uniques[i].c_str(), idx); + ses.incRefCount(idx); + EXPECT_TRUE(ses.getRefCount(idx)); + indices.push_back(idx); + } + EXPECT_EQUAL(0u, ses.getRemaining()); + EXPECT_EQUAL(0u, ses.getBuffer(0).remaining()); + EXPECT_EQUAL(entrySize * 5 + RESERVED_BYTES, ses.getBuffer(0).size()); + EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0)._deadElems); + uint32_t failEntrySize = ses.getEntrySize("enum05"); + EXPECT_TRUE(failEntrySize > ses.getRemaining()); + + // change from enum00 -> enum01 + ses.decRefCount(indices[0]); + ses.incRefCount(indices[1]); + indices[0] = indices[1]; + + // check correct refcount + for (uint32_t i = 0; i < 5; ++i) { + EXPECT_TRUE(ses.findIndex(uniques[i].c_str(), idx)); + uint32_t refCount = ses.getRefCount(idx); + if (i == 0) { + EXPECT_TRUE(refCount == 0); + } else if (i == 1) { + EXPECT_TRUE(refCount == 2); + } else { + EXPECT_TRUE(refCount == 1); + } + } + + // free unused enums + ses.freeUnusedEnums(true); + EXPECT_TRUE(!ses.findIndex("enum00", idx)); + EXPECT_EQUAL(entrySize + RESERVED_BYTES, ses.getBuffer(0)._deadElems); + + // perform compaction + if (disableReEnumerate) { + ses.disableReEnumerate(); + } + EXPECT_TRUE(ses.performCompaction(3 * entrySize)); + if (disableReEnumerate) { + ses.enableReEnumerate(); + } + EXPECT_TRUE(ses.getRemaining() >= 3 * entrySize); + EXPECT_TRUE(ses.getBuffer(1).remaining() >= 3 * entrySize); + EXPECT_TRUE(ses.getBuffer(1).size() == entrySize * 4); + EXPECT_TRUE(ses.getBuffer(1)._deadElems == 0); + + EXPECT_EQUAL((disableReEnumerate ? 4u : 3u), ses.getLastEnum()); + + // add new unique strings + ses.addEnum("enum05", idx); + EXPECT_EQUAL((disableReEnumerate ? 5u : 4u), ses.getEnum(idx)); + ses.addEnum("enum06", idx); + EXPECT_EQUAL((disableReEnumerate ? 6u : 5u), ses.getEnum(idx)); + ses.addEnum("enum00", idx); + EXPECT_EQUAL((disableReEnumerate ? 7u : 6u), ses.getEnum(idx)); + + EXPECT_EQUAL((disableReEnumerate ? 7u : 6u), ses.getLastEnum()); + + // compare old and new indices + for (uint32_t i = 0; i < indices.size(); ++i) { + EXPECT_TRUE(ses.getCurrentIndex(indices[i], idx)); + EXPECT_TRUE(indices[i].bufferId() == 0); + EXPECT_TRUE(idx.bufferId() == 1); + EXPECT_TRUE(ses.getValue(indices[i], t)); + typename EnumStoreType::Type s = "bar"; + EXPECT_TRUE(ses.getValue(idx, s)); + EXPECT_TRUE(strcmp(t, s) == 0); + } + // EnumIndex(0,0) is reserved so we have 4 bytes extra at the start of buffer 0 + EXPECT_TRUE(ses.getCurrentIndex(indices[0], idx)); + EXPECT_EQUAL(entrySize + RESERVED_BYTES, indices[0].offset()); + EXPECT_EQUAL(0u, idx.offset()); + EXPECT_TRUE(ses.getCurrentIndex(indices[1], idx)); + EXPECT_EQUAL(entrySize + RESERVED_BYTES, indices[1].offset()); + EXPECT_EQUAL(0u, idx.offset()); + EXPECT_TRUE(ses.getCurrentIndex(indices[2], idx)); + EXPECT_EQUAL(2 * entrySize + RESERVED_BYTES, indices[2].offset()); + EXPECT_EQUAL(entrySize, idx.offset()); + EXPECT_TRUE(ses.getCurrentIndex(indices[3], idx)); + EXPECT_EQUAL(3 * entrySize + RESERVED_BYTES, indices[3].offset()); + EXPECT_EQUAL(2 * entrySize, idx.offset()); + EXPECT_TRUE(ses.getCurrentIndex(indices[4], idx)); + EXPECT_EQUAL(4 * entrySize + RESERVED_BYTES, indices[4].offset()); + EXPECT_EQUAL(3 * entrySize, idx.offset()); +} + +void +EnumStoreTest::testReset() +{ + testReset<StringEnumStore>(false); + + testReset<StringEnumStore>(true); +} + +template <typename EnumStoreType> +void +EnumStoreTest::testReset(bool hasPostings) +{ + uint32_t numUniques = 10000; + srand(123456789); + StringVector rndStrings = fillRandomStrings(numUniques, 10, 15); + EXPECT_EQUAL(rndStrings.size(), size_t(numUniques)); + StringVector uniques = sortRandomStrings(rndStrings); + EXPECT_EQUAL(uniques.size(), size_t(numUniques)); + // max entrySize = 25 before alignment + uint32_t maxEntrySize = EnumStoreType::alignEntrySize(8 + 1 + 16); + EnumStoreType ses(numUniques * maxEntrySize, hasPostings); + EnumIndex idx; + + uint32_t cnt = 0; + // add new unique strings + for (StringVector::reverse_iterator iter = uniques.rbegin(); iter != uniques.rend(); ++iter) { + ses.addEnum(iter->c_str(), idx); + EXPECT_EQUAL(ses.getNumUniques(), ++cnt); + } + + // check for unique strings + for (StringVector::iterator iter = uniques.begin(); iter != uniques.end(); ++iter) { + EXPECT_TRUE(ses.findIndex(iter->c_str(), idx)); + } + + EXPECT_EQUAL(ses.getNumUniques(), numUniques); + if (hasPostings) { + testUniques<EnumStoreType, EnumPostingTree>(ses, uniques); + } else { + testUniques<EnumStoreType, EnumTree>(ses, uniques); + } + + rndStrings = fillRandomStrings(numUniques, 15, 20); + StringVector newUniques = sortRandomStrings(rndStrings); + + typename EnumStoreType::Builder builder; + for (StringVector::iterator iter = newUniques.begin(); iter != newUniques.end(); ++iter) { + builder.insert(iter->c_str()); + } + + ses.reset(builder); + EXPECT_EQUAL(RESERVED_BYTES, ses.getRemaining()); + + // check for old unique strings + for (StringVector::iterator iter = uniques.begin(); iter != uniques.end(); ++iter) { + EXPECT_TRUE(!ses.findIndex(iter->c_str(), idx)); + } + + // check for new unique strings + for (StringVector::iterator iter = newUniques.begin(); iter != newUniques.end(); ++iter) { + EXPECT_TRUE(ses.findIndex(iter->c_str(), idx)); + } + + EXPECT_EQUAL(ses.getNumUniques(), numUniques); + if (hasPostings) { + testUniques<EnumStoreType, EnumPostingTree>(ses, newUniques); + } else { + testUniques<EnumStoreType, EnumTree>(ses, newUniques); + } +} + +void +EnumStoreTest::testHoldListAndGeneration() +{ + uint32_t entrySize = StringEnumStore::alignEntrySize(8 + 1 + 6); + StringEnumStore ses(100 * entrySize, false); + StringEnumStore::Index idx; + StringVector uniques; + generation_t sesGen = 0u; + uniques.reserve(100); + for (uint32_t i = 0; i < 100; ++i) { + char tmp[16]; + sprintf(tmp, i < 10 ? "enum0%u" : "enum%u", i); + uniques.push_back(tmp); + } + StringVector newUniques; + newUniques.reserve(100); + for (uint32_t i = 0; i < 100; ++i) { + char tmp[16]; + sprintf(tmp, i < 10 ? "unique0%u" : "unique%u", i); + newUniques.push_back(tmp); + } + uint32_t generation = 0; + std::vector<Reader> readers; + + // insert first batch of unique strings + for (uint32_t i = 0; i < 100; ++i) { + ses.addEnum(uniques[i].c_str(), idx); + ses.incRefCount(idx); + EXPECT_TRUE(ses.getRefCount(idx)); + + // associate readers + if (i % 10 == 9) { + Reader::IndexVector indices; + Reader::ExpectedVector expected; + for (uint32_t j = i - 9; j <= i; ++j) { + EXPECT_TRUE(ses.findIndex(uniques[j].c_str(), idx)); + indices.push_back(idx); + StringEnumStore::Entry entry = ses.getEntry(idx); + EXPECT_TRUE(entry.getEnum() == j); + EXPECT_TRUE(entry.getRefCount() == 1); + EXPECT_TRUE(strcmp(entry.getValue(), uniques[j].c_str()) == 0); + expected.push_back(StringEntry(entry.getEnum(), entry.getRefCount(), + std::string(entry.getValue()))); + } + EXPECT_TRUE(indices.size() == 10); + EXPECT_TRUE(expected.size() == 10); + sesGen = generation++; + readers.push_back(Reader(sesGen, indices, expected)); + checkReaders(ses, sesGen, readers); + } + } + + EXPECT_EQUAL(0u, ses.getRemaining()); + EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0)._deadElems); + + // remove all uniques + for (uint32_t i = 0; i < 100; ++i) { + EXPECT_TRUE(ses.findIndex(uniques[i].c_str(), idx)); + ses.decRefCount(idx); + EXPECT_EQUAL(0u, ses.getRefCount(idx)); + } + ses.freeUnusedEnums(true); + EXPECT_EQUAL(100 * entrySize + RESERVED_BYTES, ses.getBuffer(0)._deadElems); + + // perform compaction + uint32_t newEntrySize = StringEnumStore::alignEntrySize(8 + 1 + 8); + EXPECT_TRUE(ses.performCompaction(5 * newEntrySize)); + + // check readers again + checkReaders(ses, sesGen, readers); + + // fill up buffer + uint32_t i = 0; + while (ses.getRemaining() >= newEntrySize) { + //LOG(info, "fill: %s", newUniques[i].c_str()); + ses.addEnum(newUniques[i++].c_str(), idx); + ses.incRefCount(idx); + EXPECT_TRUE(ses.getRefCount(idx)); + } + EXPECT_LESS(ses.getRemaining(), newEntrySize); + // buffer on hold list + EXPECT_TRUE(!ses.performCompaction(5 * newEntrySize)); + + checkReaders(ses, sesGen, readers); + ses.transferHoldLists(sesGen); + ses.trimHoldLists(sesGen + 1); + + // buffer no longer on hold list + EXPECT_LESS(ses.getRemaining(), newEntrySize); + EXPECT_TRUE(ses.performCompaction(5 * newEntrySize)); + EXPECT_TRUE(ses.getRemaining() >= 5 * newEntrySize); +} + +void +EnumStoreTest::testMemoryUsage() +{ + StringEnumStore ses(200, false); + StringEnumStore::Index idx; + uint32_t num = 8; + std::vector<StringEnumStore::Index> indices; + std::vector<std::string> uniques; + for (uint32_t i = 0; i < num; ++i) { + std::stringstream ss; + ss << "enum" << i; + uniques.push_back(ss.str()); + } + generation_t sesGen = 0u; + uint32_t entrySize = StringEnumStore::alignEntrySize(8 + 1 + 5); // enum(4) + refcount(4) + 1(\0) + strlen("enumx") + + // usage before inserting enums + MemoryUsage usage = ses.getMemoryUsage(); + EXPECT_EQUAL(ses.getNumUniques(), uint32_t(0)); + EXPECT_EQUAL(enumStoreAlign(200u) + RESERVED_BYTES, usage.allocatedBytes()); + EXPECT_EQUAL(RESERVED_BYTES, usage.usedBytes()); + EXPECT_EQUAL(RESERVED_BYTES, usage.deadBytes()); + EXPECT_EQUAL(0u, usage.allocatedBytesOnHold()); + + for (uint32_t i = 0; i < num; ++i) { + ses.addEnum(uniques[i].c_str(), idx); + indices.push_back(idx); + ses.incRefCount(idx); + EXPECT_TRUE(ses.getRefCount(idx)); + } + + // usage after inserting enums + usage = ses.getMemoryUsage(); + EXPECT_EQUAL(ses.getNumUniques(), num); + EXPECT_EQUAL(enumStoreAlign(200u) + RESERVED_BYTES, usage.allocatedBytes()); + EXPECT_EQUAL(num * entrySize + RESERVED_BYTES, usage.usedBytes()); + EXPECT_EQUAL(RESERVED_BYTES, usage.deadBytes()); + EXPECT_EQUAL(0u, usage.allocatedBytesOnHold()); + + // assign new enum for num / 2 of indices + for (uint32_t i = 0; i < num / 2; ++i) { + ses.decRefCount(indices[i]); + EXPECT_TRUE(ses.findIndex(uniques.back().c_str(), idx)); + ses.incRefCount(idx); + indices[i] = idx; + } + ses.freeUnusedEnums(true); + + // usage after removing enums + usage = ses.getMemoryUsage(); + EXPECT_EQUAL(ses.getNumUniques(), num / 2); + EXPECT_EQUAL(enumStoreAlign(200u) + RESERVED_BYTES, usage.allocatedBytes()); + EXPECT_EQUAL(num * entrySize + RESERVED_BYTES, usage.usedBytes()); + EXPECT_EQUAL((num / 2) * entrySize + RESERVED_BYTES, usage.deadBytes()); + EXPECT_EQUAL(0u, usage.allocatedBytesOnHold()); + + ses.performCompaction(400); + + // usage after compaction + MemoryUsage usage2 = ses.getMemoryUsage(); + EXPECT_EQUAL(ses.getNumUniques(), num / 2); + EXPECT_EQUAL(usage.usedBytes() + (num / 2) * entrySize, usage2.usedBytes()); + EXPECT_EQUAL(usage.deadBytes(), usage2.deadBytes()); + EXPECT_EQUAL(usage.usedBytes() - usage.deadBytes(), usage2.allocatedBytesOnHold()); + + ses.transferHoldLists(sesGen); + ses.trimHoldLists(sesGen + 1); + + // usage after hold list trimming + MemoryUsage usage3 = ses.getMemoryUsage(); + EXPECT_EQUAL((num / 2) * entrySize, usage3.usedBytes()); + EXPECT_EQUAL(0u, usage3.deadBytes()); + EXPECT_EQUAL(0u, usage3.allocatedBytesOnHold()); +} + +namespace { + +NumericEnumStore::Index +addEnum(NumericEnumStore &store, uint32_t value) +{ + NumericEnumStore::Index result; + store.addEnum(value, result); + store.incRefCount(result); + return result; +} + +void +decRefCount(NumericEnumStore &store, NumericEnumStore::Index idx) +{ + store.decRefCount(idx); + store.freeUnusedEnums(false); +} + +} + +void +EnumStoreTest::requireThatAddressSpaceUsageIsReported() +{ + const size_t ADDRESS_LIMIT = 34359738368; // NumericEnumStore::DataStoreType::RefType::offsetSize() + NumericEnumStore store(200, false); + + EXPECT_EQUAL(AddressSpace(0, ADDRESS_LIMIT), store.getAddressSpaceUsage()); + NumericEnumStore::Index idx1 = addEnum(store, 10); + EXPECT_EQUAL(AddressSpace(16, ADDRESS_LIMIT), store.getAddressSpaceUsage()); + NumericEnumStore::Index idx2 = addEnum(store, 20); + EXPECT_EQUAL(AddressSpace(32, ADDRESS_LIMIT), store.getAddressSpaceUsage()); + decRefCount(store, idx1); + EXPECT_EQUAL(AddressSpace(16, ADDRESS_LIMIT), store.getAddressSpaceUsage()); + decRefCount(store, idx2); + EXPECT_EQUAL(AddressSpace(0, ADDRESS_LIMIT), store.getAddressSpaceUsage()); +} + +size_t +digits(size_t num) +{ + size_t digits = 1; + while (num / 10 > 0) { + num /= 10; + digits++; + } + return digits; +} + +void +EnumStoreTest::testBufferLimit() +{ + size_t enumSize = StringEnumStore::Index::offsetSize(); + StringEnumStore es(enumSize, false); + + size_t strLen = 65536; + char str[strLen + 1]; + for (size_t i = 0; i < strLen; ++i) { + str[i] = 'X'; + } + str[strLen] = 0; + + size_t entrySize = StringEnumStore::getEntrySize(str); + size_t numUniques = enumSize / entrySize; + size_t uniqDigits = digits(numUniques); + + EnumIndex idx; + EnumIndex lastIdx; + for (size_t i = 0; i < numUniques; ++i) { + sprintf(str, "%0*zu", (int)uniqDigits, i); + str[uniqDigits] = 'X'; + es.addEnum(str, idx); + if (i % (numUniques / 32) == 1) { + EXPECT_TRUE(idx.offset() > lastIdx.offset()); + EXPECT_EQUAL(i + 1, es.getNumUniques()); + std::cout << "idx.offset(" << idx.offset() << "), str(" << std::string(str, uniqDigits) << ")" << std::endl; + } + lastIdx = idx; + } + EXPECT_EQUAL(idx.offset(), lastIdx.offset()); + EXPECT_EQUAL(numUniques, es.getNumUniques()); + std::cout << "idx.offset(" << idx.offset() << "), str(" << std::string(str, uniqDigits) << ")" << std::endl; +} + +template <typename T> +T +EnumStoreTest::random(T low, T high) +{ + return (rand() % (high - low)) + low; +} + +std::string +EnumStoreTest::getRandomString(uint32_t minLen, uint32_t maxLen) +{ + uint32_t len = random(minLen, maxLen); + std::string retval; + for (uint32_t i = 0; i < len; ++i) { + char c = random('a', 'z'); + retval.push_back(c); + } + return retval; +} + +EnumStoreTest::StringVector +EnumStoreTest::fillRandomStrings(uint32_t numStrings, uint32_t minLen, uint32_t maxLen) +{ + StringVector retval; + retval.reserve(numStrings); + for (uint32_t i = 0; i < numStrings; ++i) { + retval.push_back(getRandomString(minLen, maxLen)); + } + return retval; +} + +EnumStoreTest::StringVector +EnumStoreTest::sortRandomStrings(StringVector & strings) +{ + std::sort(strings.begin(), strings.end()); + std::vector<std::string> retval; + retval.reserve(strings.size()); + std::vector<std::string>::iterator pos = std::unique(strings.begin(), strings.end()); + std::copy(strings.begin(), pos, std::back_inserter(retval)); + return retval; +} + +void +EnumStoreTest::checkReaders(const StringEnumStore & ses, + generation_t sesGen, + const std::vector<Reader> & readers) +{ + (void) sesGen; + //uint32_t refCount = 1000; + StringEnumStore::Type t = ""; + for (uint32_t i = 0; i < readers.size(); ++i) { + const Reader & r = readers[i]; + for (uint32_t j = 0; j < r._indices.size(); ++j) { + EXPECT_EQUAL(r._expected[j]._enum, ses.getEnum(r._indices[j])); + EXPECT_TRUE(ses.getValue(r._indices[j], t)); + EXPECT_TRUE(r._expected[j]._string == std::string(t)); + } + } +} + + +int +EnumStoreTest::Main() +{ + TEST_INIT("enumstore_test"); + + testIndex(); + testStringEntry(); + testNumericEntry(); + testFloatEnumStore(); + testAddEnum(); + testCompaction(); + testReset(); + testHoldListAndGeneration(); + testMemoryUsage(); + TEST_DO(requireThatAddressSpaceUsageIsReported()); + if (_argc > 1) { + testBufferLimit(); // large test with 8 GB buffer + } + + TEST_DONE(); +} +} + + +TEST_APPHOOK(search::EnumStoreTest); diff --git a/searchlib/src/tests/attribute/extendattributes/.gitignore b/searchlib/src/tests/attribute/extendattributes/.gitignore new file mode 100644 index 00000000000..4018a7d4f5b --- /dev/null +++ b/searchlib/src/tests/attribute/extendattributes/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +extendattribute_test +searchlib_extendattribute_test_app diff --git a/searchlib/src/tests/attribute/extendattributes/CMakeLists.txt b/searchlib/src/tests/attribute/extendattributes/CMakeLists.txt new file mode 100644 index 00000000000..b0803f0a232 --- /dev/null +++ b/searchlib/src/tests/attribute/extendattributes/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_extendattribute_test_app + SOURCES + extendattribute.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_extendattribute_test_app COMMAND sh extendattribute_test.sh) diff --git a/searchlib/src/tests/attribute/extendattributes/DESC b/searchlib/src/tests/attribute/extendattributes/DESC new file mode 100644 index 00000000000..4f88189a1d7 --- /dev/null +++ b/searchlib/src/tests/attribute/extendattributes/DESC @@ -0,0 +1 @@ +Unit tests for extendable attributes. diff --git a/searchlib/src/tests/attribute/extendattributes/FILES b/searchlib/src/tests/attribute/extendattributes/FILES new file mode 100644 index 00000000000..930039cae19 --- /dev/null +++ b/searchlib/src/tests/attribute/extendattributes/FILES @@ -0,0 +1 @@ +extendattribute.cpp diff --git a/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp b/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp new file mode 100644 index 00000000000..0bb751d26ee --- /dev/null +++ b/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp @@ -0,0 +1,176 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("extendattribute_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/attribute/extendableattributes.h> + +namespace search { + +class ExtendAttributeTest : public vespalib::TestApp +{ +private: + template <typename Attribute> + void testExtendInteger(Attribute & attr); + template <typename Attribute> + void testExtendFloat(Attribute & attr); + template <typename Attribute> + void testExtendString(Attribute & attr); + +public: + int Main(); +}; + +template <typename Attribute> +void ExtendAttributeTest::testExtendInteger(Attribute & attr) +{ + uint32_t docId(0); + EXPECT_EQUAL(attr.getNumDocs(), 0u); + attr.addDoc(docId); + EXPECT_EQUAL(docId, 0u); + EXPECT_EQUAL(attr.getNumDocs(), 1u); + attr.add(1, 10); + EXPECT_EQUAL(attr.getInt(0), 1); + attr.add(2, 20); + EXPECT_EQUAL(attr.getInt(0), attr.hasMultiValue() ? 1 : 2); + if (attr.hasMultiValue()) { + AttributeVector::WeightedInt v[2]; + EXPECT_EQUAL((static_cast<AttributeVector &>(attr)).get(0, v, 2), 2u); + EXPECT_EQUAL(v[0].getValue(), 1); + EXPECT_EQUAL(v[1].getValue(), 2); + if (attr.hasWeightedSetType()) { + EXPECT_EQUAL(v[0].getWeight(), 10); + EXPECT_EQUAL(v[1].getWeight(), 20); + } + } + attr.addDoc(docId); + EXPECT_EQUAL(docId, 1u); + EXPECT_EQUAL(attr.getNumDocs(), 2u); + attr.add(3, 30); + EXPECT_EQUAL(attr.getInt(1), 3); + if (attr.hasMultiValue()) { + AttributeVector::WeightedInt v[1]; + EXPECT_EQUAL((static_cast<AttributeVector &>(attr)).get(1, v, 1), 1u); + EXPECT_EQUAL(v[0].getValue(), 3); + if (attr.hasWeightedSetType()) { + EXPECT_EQUAL(v[0].getWeight(), 30); + } + } +} + +template <typename Attribute> +void ExtendAttributeTest::testExtendFloat(Attribute & attr) +{ + uint32_t docId(0); + EXPECT_EQUAL(attr.getNumDocs(), 0u); + attr.addDoc(docId); + EXPECT_EQUAL(docId, 0u); + EXPECT_EQUAL(attr.getNumDocs(), 1u); + attr.add(1.7, 10); + EXPECT_EQUAL(attr.getInt(0), 1); + EXPECT_EQUAL(attr.getFloat(0), 1.7); + attr.add(2.3, 20); + EXPECT_EQUAL(attr.getFloat(0), attr.hasMultiValue() ? 1.7 : 2.3); + if (attr.hasMultiValue()) { + AttributeVector::WeightedFloat v[2]; + EXPECT_EQUAL((static_cast<AttributeVector &>(attr)).get(0, v, 2), 2u); + EXPECT_EQUAL(v[0].getValue(), 1.7); + EXPECT_EQUAL(v[1].getValue(), 2.3); + if (attr.hasWeightedSetType()) { + EXPECT_EQUAL(v[0].getWeight(), 10); + EXPECT_EQUAL(v[1].getWeight(), 20); + } + } + attr.addDoc(docId); + EXPECT_EQUAL(docId, 1u); + EXPECT_EQUAL(attr.getNumDocs(), 2u); + attr.add(3.6, 30); + EXPECT_EQUAL(attr.getFloat(1), 3.6); + if (attr.hasMultiValue()) { + AttributeVector::WeightedFloat v[1]; + EXPECT_EQUAL((static_cast<AttributeVector &>(attr)).get(1, v, 1), 1u); + EXPECT_EQUAL(v[0].getValue(), 3.6); + if (attr.hasWeightedSetType()) { + EXPECT_EQUAL(v[0].getWeight(), 30); + } + } +} + +template <typename Attribute> +void ExtendAttributeTest::testExtendString(Attribute & attr) +{ + uint32_t docId(0); + EXPECT_EQUAL(attr.getNumDocs(), 0u); + attr.addDoc(docId); + EXPECT_EQUAL(docId, 0u); + EXPECT_EQUAL(attr.getNumDocs(), 1u); + attr.add("1.7", 10); + EXPECT_EQUAL(std::string(attr.getString(0, NULL, 0)), "1.7"); + attr.add("2.3", 20); + EXPECT_EQUAL(std::string(attr.getString(0, NULL, 0)), attr.hasMultiValue() ? "1.7" : "2.3"); + if (attr.hasMultiValue()) { + AttributeVector::WeightedString v[2]; + EXPECT_EQUAL((static_cast<AttributeVector &>(attr)).get(0, v, 2), 2u); + EXPECT_EQUAL(v[0].getValue(), "1.7"); + EXPECT_EQUAL(v[1].getValue(), "2.3"); + if (attr.hasWeightedSetType()) { + EXPECT_EQUAL(v[0].getWeight(), 10); + EXPECT_EQUAL(v[1].getWeight(), 20); + } + } + attr.addDoc(docId); + EXPECT_EQUAL(docId, 1u); + EXPECT_EQUAL(attr.getNumDocs(), 2u); + attr.add("3.6", 30); + EXPECT_EQUAL(std::string(attr.getString(1, NULL, 0)), "3.6"); + if (attr.hasMultiValue()) { + AttributeVector::WeightedString v[1]; + EXPECT_EQUAL((static_cast<AttributeVector &>(attr)).get(1, v, 1), 1u); + EXPECT_EQUAL(v[0].getValue(), "3.6"); + if (attr.hasWeightedSetType()) { + EXPECT_EQUAL(v[0].getWeight(), 30); + } + } +} + +int +ExtendAttributeTest::Main() +{ + TEST_INIT("extendattribute_test"); + + SingleIntegerExtAttribute siattr("si1"); + MultiIntegerExtAttribute miattr("mi1"); + WeightedSetIntegerExtAttribute wsiattr("wsi1"); + EXPECT_TRUE( ! siattr.hasMultiValue() ); + EXPECT_TRUE( miattr.hasMultiValue() ); + EXPECT_TRUE( wsiattr.hasWeightedSetType() ); + testExtendInteger(siattr); + testExtendInteger(miattr); + testExtendInteger(wsiattr); + + SingleFloatExtAttribute sdattr("sd1"); + MultiFloatExtAttribute mdattr("md1"); + WeightedSetFloatExtAttribute wsdattr("wsd1"); + EXPECT_TRUE( ! sdattr.hasMultiValue() ); + EXPECT_TRUE( mdattr.hasMultiValue() ); + EXPECT_TRUE( wsdattr.hasWeightedSetType() ); + testExtendFloat(sdattr); + testExtendFloat(mdattr); + testExtendFloat(wsdattr); + + SingleStringExtAttribute ssattr("ss1"); + MultiStringExtAttribute msattr("ms1"); + WeightedSetStringExtAttribute wssattr("wss1"); + EXPECT_TRUE( ! ssattr.hasMultiValue() ); + EXPECT_TRUE( msattr.hasMultiValue() ); + EXPECT_TRUE( wssattr.hasWeightedSetType() ); + testExtendString(ssattr); + testExtendString(msattr); + testExtendString(wssattr); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(search::ExtendAttributeTest); diff --git a/searchlib/src/tests/attribute/extendattributes/extendattribute_test.sh b/searchlib/src/tests/attribute/extendattributes/extendattribute_test.sh new file mode 100755 index 00000000000..6f335b18229 --- /dev/null +++ b/searchlib/src/tests/attribute/extendattributes/extendattribute_test.sh @@ -0,0 +1,3 @@ +#!/bin/bash +$VALGRIND ./searchlib_extendattribute_test_app +rm -rf *.dat diff --git a/searchlib/src/tests/attribute/gidmapattribute/.gitignore b/searchlib/src/tests/attribute/gidmapattribute/.gitignore new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/searchlib/src/tests/attribute/gidmapattribute/.gitignore diff --git a/searchlib/src/tests/attribute/multivaluemapping/.gitignore b/searchlib/src/tests/attribute/multivaluemapping/.gitignore new file mode 100644 index 00000000000..743c738a0a2 --- /dev/null +++ b/searchlib/src/tests/attribute/multivaluemapping/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +multivaluemapping_test +searchlib_multivaluemapping_test_app diff --git a/searchlib/src/tests/attribute/multivaluemapping/CMakeLists.txt b/searchlib/src/tests/attribute/multivaluemapping/CMakeLists.txt new file mode 100644 index 00000000000..36c66b09966 --- /dev/null +++ b/searchlib/src/tests/attribute/multivaluemapping/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_multivaluemapping_test_app + SOURCES + multivaluemapping_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_multivaluemapping_test_app COMMAND searchlib_multivaluemapping_test_app) diff --git a/searchlib/src/tests/attribute/multivaluemapping/DESC b/searchlib/src/tests/attribute/multivaluemapping/DESC new file mode 100644 index 00000000000..44c27ec9926 --- /dev/null +++ b/searchlib/src/tests/attribute/multivaluemapping/DESC @@ -0,0 +1 @@ +This is a test for the MultivalueMapping class. diff --git a/searchlib/src/tests/attribute/multivaluemapping/FILES b/searchlib/src/tests/attribute/multivaluemapping/FILES new file mode 100644 index 00000000000..bf22403a5fe --- /dev/null +++ b/searchlib/src/tests/attribute/multivaluemapping/FILES @@ -0,0 +1 @@ +multivaluemapping.cpp diff --git a/searchlib/src/tests/attribute/multivaluemapping/multivaluemapping_test.cpp b/searchlib/src/tests/attribute/multivaluemapping/multivaluemapping_test.cpp new file mode 100644 index 00000000000..e78e180856b --- /dev/null +++ b/searchlib/src/tests/attribute/multivaluemapping/multivaluemapping_test.cpp @@ -0,0 +1,836 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("multivaluemapping_test"); +#include <vespa/vespalib/testkit/testapp.h> +//#define DEBUG_MULTIVALUE_MAPPING +//#define LOG_MULTIVALUE_MAPPING +#include <vespa/searchlib/attribute/multivaluemapping.h> +#include <algorithm> +#include <limits> + +namespace search { + +namespace +{ + +uint32_t dummyCommittedDocIdLimit = std::numeric_limits<uint32_t>::max(); + +} + +typedef MultiValueMappingT<uint32_t> MvMapping; +typedef MvMapping::Index Index; +typedef multivalue::Index64 Index64; +typedef multivalue::Index32 Index32; +typedef MvMapping::Histogram Histogram; + +class MultiValueMappingTest : public vespalib::TestApp +{ +private: + typedef std::vector<Index> IndexVector; + typedef std::vector<std::vector<uint32_t> > ExpectedVector; + typedef vespalib::GenerationHandler::generation_t generation_t; + + class Reader { + public: + uint32_t _startGen; + uint32_t _endGen; + IndexVector _indices; + ExpectedVector _expected; + uint32_t numKeys() { return _indices.size(); } + Reader(uint32_t startGen, uint32_t endGen, const IndexVector & indices, + const ExpectedVector & expected) : + _startGen(startGen), _endGen(endGen), _indices(indices), _expected(expected) {} + }; + + typedef std::vector<Reader> ReaderVector; + + void testIndex32(); + void testIndex64(); + void testSimpleSetAndGet(); + void testChangingValueCount(); + + void + checkReaders(MvMapping &mvm, + generation_t mvmGen, + ReaderVector &readers); + + void testHoldListAndGeneration(); + void testManualCompaction(); + void testVariousGets(); + void testReplace(); + void testMemoryUsage(); + void testShrink(); + void testHoldElem(); + void requireThatAddressSpaceUsageIsReported(); + void requireThatDeadIsNotAccountedInAddressSpaceUsage(); + +public: + int Main(); +}; + +void +MultiValueMappingTest::testIndex32() +{ + { + Index32 idx; + EXPECT_EQUAL(idx.values(), 0u); + EXPECT_EQUAL(idx.alternative(), 0u); + EXPECT_EQUAL(idx.vectorIdx(), 0u); + EXPECT_EQUAL(idx.offset(), 0u); + } + { + Index32 idx(3, 0, 1000); + EXPECT_EQUAL(idx.values(), 3u); + EXPECT_EQUAL(idx.alternative(), 0u); + EXPECT_EQUAL(idx.vectorIdx(), 6u); + EXPECT_EQUAL(idx.offset(), 1000u); + EXPECT_EQUAL(idx.idx(), 0x300003e8u); + } + { + Index32 idx(15, 1, 134217727); + EXPECT_EQUAL(idx.values(), 15u); + EXPECT_EQUAL(idx.alternative(), 1u); + EXPECT_EQUAL(idx.vectorIdx(), 31u); + EXPECT_EQUAL(idx.offset(), 134217727u); + EXPECT_EQUAL(idx.idx(), 0xffffffffu); + } + { + EXPECT_EQUAL(Index32::maxValues(), 15u); + EXPECT_EQUAL(Index32::alternativeSize(), 2u); + } +} + +void +MultiValueMappingTest::testIndex64() +{ + { + Index64 idx; + EXPECT_EQUAL(idx.values(), 0u); + EXPECT_EQUAL(idx.alternative(), 0u); + EXPECT_EQUAL(idx.vectorIdx(), 0u); + EXPECT_EQUAL(idx.offset(), 0u); + } + { + Index64 idx(3, 0, 1000); + EXPECT_EQUAL(idx.values(), 3u); + EXPECT_EQUAL(idx.alternative(), 0u); + EXPECT_EQUAL(idx.vectorIdx(), 6u); + EXPECT_EQUAL(idx.offset(), 1000u); + EXPECT_EQUAL(idx.idx(), 0x3000003e8ull); + } + { + Index64 idx(15, 1, 134217727); + EXPECT_EQUAL(idx.values(), 15u); + EXPECT_EQUAL(idx.alternative(), 1u); + EXPECT_EQUAL(idx.vectorIdx(), 31u); + EXPECT_EQUAL(idx.offset(), 134217727u); + EXPECT_EQUAL(idx.idx(), 0xf87ffffffull); + } + { + EXPECT_EQUAL(Index64::maxValues(), 1023u); + EXPECT_EQUAL(Index64::alternativeSize(), 2u); + } +} + +void +MultiValueMappingTest::testSimpleSetAndGet() +{ + uint32_t maxValueCount = Index::maxValues() * 2; + uint32_t numKeys = maxValueCount * 2; + MvMapping mvm(dummyCommittedDocIdLimit, numKeys); + EXPECT_EQUAL(mvm.getNumKeys(), numKeys); + Index idx; + + // insert values + for (uint32_t key = 0; key < numKeys; ++key) { + uint32_t valueCount = key / maxValueCount; + std::vector<uint32_t> values(valueCount, key); + Histogram needed(Index::maxValues()); + needed[valueCount] = 1; + if (!mvm.enoughCapacity(needed)) { + mvm.trimHoldLists(1); + mvm.performCompaction(needed); + } + mvm.set(key, values); + EXPECT_EQUAL(mvm.getValueCount(key), valueCount); + idx = mvm._indices[key]; + if (valueCount < Index::maxValues()) { + EXPECT_EQUAL(idx.values(), valueCount); + } else { + EXPECT_EQUAL(idx.values(), Index::maxValues()); + } +#ifdef LOG_MULTIVALUE_MAPPING + LOG(info, "------------------------------------------------------------"); +#endif + } + EXPECT_TRUE(!mvm.hasKey(numKeys)); + + // check for expected values + for (uint32_t key = 0; key < numKeys; ++key) { + uint32_t valueCount = key / maxValueCount; + EXPECT_EQUAL(mvm.getValueCount(key), valueCount); + std::vector<uint32_t> buffer(valueCount); + EXPECT_EQUAL(mvm.get(key, buffer), valueCount); + EXPECT_TRUE(buffer.size() == valueCount); + EXPECT_EQUAL(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), key)), valueCount); + uint32_t value; + const uint32_t * handle = NULL; + EXPECT_EQUAL(mvm.get(key, handle), valueCount); + EXPECT_TRUE(valueCount == 0 ? handle == NULL : handle != NULL); + for (uint32_t i = 0; i < valueCount; ++i) { + EXPECT_TRUE(mvm.get(key, i, value)); + EXPECT_EQUAL(value, key); + EXPECT_TRUE(handle[i] == key); + } + EXPECT_TRUE(!mvm.get(key, valueCount, value)); + } + + // reset + mvm.reset(10); + EXPECT_TRUE(mvm.getNumKeys() == 10); + EXPECT_TRUE(!mvm.hasKey(10)); + EXPECT_TRUE(mvm._genHolder.getHeldBytes() == 0); + for (uint32_t key = 0; key < 10; ++key) { + EXPECT_TRUE(mvm.getValueCount(key) == 0); + std::vector<uint32_t> buffer; + EXPECT_TRUE(mvm.get(key, buffer) == 0); + EXPECT_TRUE(buffer.size() == 0); + } + + // add more keys + for (uint32_t i = 0; i < 5; ++i) { + uint32_t key; + mvm.addKey(key); + EXPECT_TRUE(key == 10 + i); + EXPECT_TRUE(mvm.getNumKeys() == 11 + i); + } +} + +void +MultiValueMappingTest::testChangingValueCount() +{ + uint32_t numKeys = 10; + uint32_t maxCount = Index::maxValues() + 1; + Histogram initCapacity(Index::maxValues()); + for (uint32_t i = 0; i < Index::maxValues(); ++i) { + initCapacity[i] = numKeys; + } + initCapacity[Index::maxValues()] = numKeys * 2; + MvMapping mvm(dummyCommittedDocIdLimit, numKeys, initCapacity); + + // Increasing the value count for some keys + for (uint32_t valueCount = 1; valueCount <= maxCount; ++valueCount) { +#ifdef LOG_MULTIVALUE_MAPPING + LOG(info, "########################### %u ##############################", valueCount); +#endif + uint32_t lastValueCount = valueCount - 1; + // set values + for (uint32_t key = 0; key < numKeys; ++key) { + std::vector<uint32_t> buffer(valueCount, key); + mvm.set(key, buffer); + } + + Histogram remaining = mvm.getRemaining(); + if (valueCount < Index::maxValues()) { + EXPECT_TRUE(remaining[valueCount] == 0); + } else { + EXPECT_TRUE(remaining[Index::maxValues()] == numKeys * (maxCount - valueCount)); + } + + if (valueCount < Index::maxValues()) { + MvMapping::SingleVectorPtr current = mvm.getSingleVector(valueCount, MvMapping::ACTIVE); + EXPECT_TRUE(current.first->used() == numKeys * (valueCount)); + EXPECT_TRUE(current.first->dead() == 0); + + if (lastValueCount != 0) { + MvMapping::SingleVectorPtr last = mvm.getSingleVector(lastValueCount, MvMapping::ACTIVE); + EXPECT_TRUE(last.first->used() == numKeys * (lastValueCount)); + EXPECT_TRUE(last.first->dead() == numKeys * (lastValueCount)); + } + } else { + MvMapping::VectorVectorPtr current = mvm.getVectorVector(MvMapping::ACTIVE); + EXPECT_TRUE(current.first->used() == numKeys * (valueCount - Index::maxValues() + 1)); + EXPECT_TRUE(current.first->dead() == numKeys * (valueCount - Index::maxValues())); + } + + // check values + for (uint32_t key = 0; key < numKeys; ++key) { + std::vector<uint32_t> buffer(valueCount); + EXPECT_TRUE(mvm.get(key, buffer) == valueCount); + EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), key)) == valueCount); + } + } +} + +void +MultiValueMappingTest::checkReaders(MvMapping &mvm, + generation_t mvmGen, + ReaderVector &readers) +{ + for (ReaderVector::iterator iter = readers.begin(); + iter != readers.end(); ) { + if (iter->_endGen <= mvmGen) { +#ifdef LOG_MULTIVALUE_MAPPING + LOG(info, "check and remove reader: start = %u, end = %u", + iter->_startGen, iter->_endGen); +#endif + for (uint32_t key = 0; key < iter->numKeys(); ++key) { + Index idx = iter->_indices[key]; + uint32_t valueCount = iter->_expected[key].size(); + if (valueCount < Index::maxValues()) { + EXPECT_TRUE(idx.values() == valueCount); + for (uint32_t i = idx.offset() * idx.values(), j = 0; + i < (idx.offset() + 1) * idx.values() && j < iter->_expected[key].size(); + ++i, ++j) + { + EXPECT_TRUE(mvm._singleVectors[idx.vectorIdx()][i] == iter->_expected[key][j]); + } + } else { + EXPECT_TRUE(mvm._vectorVectors[idx.alternative()][idx.offset()].size() == + valueCount); + EXPECT_TRUE(std::equal(mvm._vectorVectors[idx.alternative()][idx.offset()].begin(), + mvm._vectorVectors[idx.alternative()][idx.offset()].end(), + iter->_expected[key].begin())); + } + } + iter = readers.erase(iter); + } else { + ++iter; + } + } +} + +void +MultiValueMappingTest::testHoldListAndGeneration() +{ + uint32_t numKeys = 10; + uint32_t maxCount = Index::maxValues() + 1; + uint32_t maxKeys = numKeys * 2; + + Histogram initCapacity(Index::maxValues()); + for (uint32_t i = 1; i < maxCount; ++i) { + initCapacity[i] = numKeys; // make enough capacity for 1/2 of the keys + } + MvMapping mvm(dummyCommittedDocIdLimit, maxKeys, initCapacity); + EXPECT_TRUE(mvm.enoughCapacity(initCapacity)); + + ReaderVector readers; + uint32_t safeGen = std::numeric_limits<uint32_t>::max(); + uint32_t readDuration = 2; + generation_t mvmGen = 0u; + + for (uint32_t valueCount = 1; valueCount < maxCount; ++valueCount) { +#ifdef LOG_MULTIVALUE_MAPPING + LOG(info, "#################### count(%u) - gen(%u) ####################", + valueCount, mvm.getGeneration()); +#endif + + // check and remove readers + checkReaders(mvm, mvmGen, readers); + + // update safe generation and removeOldGenerations + safeGen = std::numeric_limits<uint32_t>::max(); + for (ReaderVector::iterator iter = readers.begin(); iter != readers.end(); ++iter) { + if ((*iter)._startGen < safeGen) { + safeGen= (*iter)._startGen; + } + } + mvm.trimHoldLists(safeGen); + + // set new values for 1/2 of the keys + for (uint32_t key = 0; key < numKeys; ++key) { + std::vector<uint32_t> values(valueCount, valueCount * numKeys + key); + mvm.set(key, values); + } + // check new values + for (uint32_t key = 0; key < numKeys; ++key) { + EXPECT_TRUE(mvm.getValueCount(key) == valueCount); + std::vector<uint32_t> buffer(valueCount); + EXPECT_TRUE(mvm.get(key, buffer) == valueCount); + EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), valueCount * numKeys + key)) == valueCount); + } + mvm.transferHoldLists(mvmGen); + ++mvmGen; + + // associate reader with current generation + IndexVector indices; + ExpectedVector expected; + for (uint32_t key = 0; key < numKeys; ++key) { + indices.push_back(mvm._indices[key]); + expected.push_back(std::vector<uint32_t>(valueCount, valueCount * numKeys + key)); + } + readers.push_back(Reader(mvmGen, mvmGen + readDuration, + indices, expected)); + readDuration = (readDuration % 4) + 2; + + // perform compaction + Histogram needed(Index::maxValues()); + needed[valueCount] = maxKeys; + EXPECT_TRUE(!mvm.enoughCapacity(needed)); + mvm.performCompaction(needed); + + // set new value for all keys (the associated reader should see the old values) + for (uint32_t key = 0; key < maxKeys; ++key) { + std::vector<uint32_t> values(valueCount, valueCount * maxKeys + key); + mvm.set(key, values); + } + // check new values + for (uint32_t key = 0; key < maxKeys; ++key) { + EXPECT_TRUE(mvm.getValueCount(key) == valueCount); + std::vector<uint32_t> buffer(valueCount); + EXPECT_TRUE(mvm.get(key, buffer) == valueCount); + EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), valueCount * maxKeys + key)) == valueCount); + } + + mvm.transferHoldLists(mvmGen); + ++mvmGen; + } + while (!readers.empty()) { + checkReaders(mvm, mvmGen, readers); + mvm.transferHoldLists(mvmGen); + ++mvmGen; + } +} + +void +MultiValueMappingTest::testManualCompaction() +{ + Histogram initCapacity(Index::maxValues()); + uint32_t maxCount = Index::maxValues() + 1; + for (uint32_t i = 1; i < maxCount; ++i) { + initCapacity[i] = 1; + } + MvMapping mvm(dummyCommittedDocIdLimit, maxCount * 2, initCapacity); + EXPECT_TRUE(mvm.enoughCapacity(initCapacity)); + + // first update pass. use all capacity + for (uint32_t key = 1; key < maxCount; ++key) { + std::vector<uint32_t> values(key, key); + Histogram needed(Index::maxValues()); + needed[key] = 1; + EXPECT_TRUE(mvm.enoughCapacity(needed)); + mvm.set(key, values); + EXPECT_TRUE(!mvm.enoughCapacity(needed)); + } + // second update pass. must perform compaction + for (uint32_t key = maxCount + 1; key < maxCount * 2; ++key) { + uint32_t valueCount = key % maxCount; + std::vector<uint32_t> values(valueCount, key); + Histogram needed(Index::maxValues()); + needed[valueCount] = 1; + EXPECT_TRUE(!mvm.enoughCapacity(needed)); + mvm.performCompaction(needed); + EXPECT_TRUE(mvm.enoughCapacity(needed)); + mvm.set(key, values); + } + // check for correct buffer values + for (uint32_t key = 0; key < maxCount * 2; ++key) { + uint32_t valueCount = key % maxCount; + EXPECT_TRUE(mvm.getValueCount(key) == valueCount); + std::vector<uint32_t> buffer(valueCount); + EXPECT_TRUE(mvm.get(key, buffer) == valueCount); + EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), key)) == valueCount); + } + + // reset + mvm.reset(maxCount, initCapacity); + EXPECT_TRUE(mvm.getNumKeys() == maxCount); + EXPECT_TRUE(mvm.enoughCapacity(initCapacity)); + + // new update pass. use all capacity + for (uint32_t key = 1; key < maxCount; ++key) { + std::vector<uint32_t> values(key, key); + Histogram needed(Index::maxValues()); + needed[key] = 1; + EXPECT_EQUAL(mvm.getValueCount(key), 0u); + EXPECT_TRUE(mvm.enoughCapacity(needed)); + mvm.set(key, values); + EXPECT_TRUE(!mvm.enoughCapacity(needed)); + } +} + +void +MultiValueMappingTest::testVariousGets() +{ + MvMapping::Histogram initCapacity(Index::maxValues()); + initCapacity[5] = 1; + initCapacity[Index::maxValues()] = 1; + MvMapping mvm(dummyCommittedDocIdLimit, 3, initCapacity); + Index idx; + + mvm.set(1, std::vector<uint32_t>(5, 50)); + mvm.set(2, std::vector<uint32_t>(25, 250)); + EXPECT_TRUE(25 >= Index::maxValues()); + + { + std::vector<uint32_t> buffer(5); + EXPECT_TRUE(mvm.get(0, &buffer[0], 0) == 0); + EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)5)) == 0); + } + { + std::vector<uint32_t> buffer(5); + EXPECT_TRUE(mvm.get(0, &buffer[0], 5) == 0); + EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)5)) == 0); + } + { + std::vector<uint32_t> buffer(10); + EXPECT_TRUE(mvm.get(1, &buffer[0], 3) == 5); + EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)50)) == 3); + } + { + std::vector<uint32_t> buffer(10); + EXPECT_TRUE(mvm.get(1, &buffer[0], 10) == 5); + EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)50)) == 5); + } + { + std::vector<uint32_t> buffer(30); + EXPECT_TRUE(mvm.get(2, &buffer[0], 23) == 25); + EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)250)) == 23); + } + { + std::vector<uint32_t> buffer(30); + EXPECT_TRUE(mvm.get(2, &buffer[0], 30) == 25); + EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)250)) == 25); + } +} + +void +MultiValueMappingTest::testReplace() +{ + MvMapping::Histogram initCapacity(Index::maxValues()); + initCapacity[5] = 1; + initCapacity[Index::maxValues()] = 1; + MvMapping mvm(dummyCommittedDocIdLimit, 3, initCapacity); + Index idx; + + mvm.set(1, std::vector<uint32_t>(5, 50)); + mvm.set(2, std::vector<uint32_t>(25, 100)); + EXPECT_TRUE(25 >= Index::maxValues()); + + { + EXPECT_TRUE(mvm.getValueCount(0) == 0); + std::vector<uint32_t> replace(5, 50); + mvm.replace(0, replace); + EXPECT_TRUE(mvm.getValueCount(0) == 0); + } + { + EXPECT_TRUE(mvm.getValueCount(1) == 5); + std::vector<uint32_t> buffer(5); + EXPECT_TRUE(mvm.get(1, buffer) == 5); + EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)50)) == 5); + + std::vector<uint32_t> replace(5, 55); + mvm.replace(1, replace); + EXPECT_TRUE(mvm.getValueCount(1) == 5); + EXPECT_TRUE(mvm.get(1, buffer) == 5); + EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)55)) == 5); + } + { + EXPECT_TRUE(mvm.getValueCount(2) == 25); + std::vector<uint32_t> buffer(25); + EXPECT_TRUE(mvm.get(2, buffer) == 25); + EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)100)) == 25); + + std::vector<uint32_t> replace(25, 200); + mvm.replace(2, replace); + EXPECT_TRUE(mvm.getValueCount(2) == 25); + EXPECT_TRUE(mvm.get(2, buffer) == 25); + EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)200)) == 25); + } +} + +void +MultiValueMappingTest::testMemoryUsage() +{ + uint32_t numKeys = Index::maxValues() + 4; + MemoryUsage exp; + exp.incAllocatedBytes(numKeys * sizeof(Index)); + exp.incUsedBytes(numKeys * sizeof(Index)); + uint32_t totalCnt = 0; + + Histogram initCapacity(Index::maxValues()); + for (uint32_t i = 0; i < Index::maxValues(); ++i) { + initCapacity[i] = 2; + exp.incAllocatedBytes(i * 2 * sizeof(uint32_t)); + } + initCapacity[Index::maxValues()] = 12; + exp.incAllocatedBytes(12 * sizeof(vespalib::Array<uint32_t>)); // due to vector vector + + MvMapping mvm(dummyCommittedDocIdLimit, + numKeys, initCapacity, GrowStrategy(numKeys)); + + // usage before inserting values + MemoryUsage usage = mvm.getMemoryUsage(); + EXPECT_EQUAL(mvm.getTotalValueCnt(), totalCnt); + EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes()); + EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes()); + EXPECT_EQUAL(usage.deadBytes(), uint32_t(0)); + EXPECT_EQUAL(usage.allocatedBytesOnHold(), uint32_t(0)); + + // insert values for all keys + for (uint32_t key = 0; key < numKeys; ++key) { + uint32_t cnt = key + 1; + std::vector<uint32_t> values(cnt, key); + mvm.set(key, values); + EXPECT_EQUAL(mvm.getValueCount(key), cnt); + totalCnt += cnt; + exp.incUsedBytes(cnt * sizeof(uint32_t)); + if (cnt >= Index::maxValues()) { + exp.incAllocatedBytes(cnt * sizeof(uint32_t)); + exp.incUsedBytes(sizeof(vespalib::Array<uint32_t>)); // due to vector vector + } + } + + // usage after inserting values + usage = mvm.getMemoryUsage(); + EXPECT_EQUAL(mvm.getTotalValueCnt(), totalCnt); + EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes()); + EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes()); + EXPECT_EQUAL(usage.deadBytes(), uint32_t(0)); + EXPECT_EQUAL(usage.allocatedBytesOnHold(), uint32_t(0)); + + totalCnt = 0; + // insert new values for all keys making dead bytes + for (uint32_t key = 0; key < numKeys; ++key) { + uint32_t cnt = key + 2; + std::vector<uint32_t> values(cnt, key); + mvm.set(key, values); + EXPECT_EQUAL(mvm.getValueCount(key), cnt); + totalCnt += cnt; + exp.incUsedBytes(cnt * sizeof(uint32_t)); + if ((cnt - 1) < Index::maxValues()) { + exp.incDeadBytes((cnt - 1) * sizeof(uint32_t)); // the previous values are marked dead + } else { + exp.incAllocatedBytesOnHold((cnt - 1) * sizeof(uint32_t) + + sizeof(vespalib::Array<uint32_t>)); + } + if (cnt >= Index::maxValues()) { + exp.incAllocatedBytes(cnt * sizeof(uint32_t)); + exp.incUsedBytes(sizeof(vespalib::Array<uint32_t>)); // due to vector vector + } + } + + // usage after inserting new values making dead bytes + usage = mvm.getMemoryUsage(); + EXPECT_EQUAL(mvm.getTotalValueCnt(), totalCnt); + EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes()); + EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes()); + EXPECT_EQUAL(usage.deadBytes(), exp.deadBytes()); + EXPECT_EQUAL(usage.allocatedBytesOnHold(), exp.allocatedBytesOnHold()); + + // make sure all internal vectors are put on hold list + mvm.performCompaction(initCapacity); + usage = mvm.getMemoryUsage(); + EXPECT_EQUAL(mvm.getTotalValueCnt(), totalCnt); + EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes() - exp.deadBytes() - exp.allocatedBytesOnHold()); + EXPECT_EQUAL(usage.deadBytes(), uint32_t(0)); + EXPECT_EQUAL(usage.allocatedBytesOnHold(), exp.allocatedBytes() - numKeys * sizeof(Index) + exp.allocatedBytesOnHold()); + mvm.transferHoldLists(0); + mvm.trimHoldLists(1); + usage = mvm.getMemoryUsage(); + EXPECT_EQUAL(mvm.getTotalValueCnt(), totalCnt); + EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes() - exp.deadBytes() - exp.allocatedBytesOnHold()); + EXPECT_EQUAL(usage.deadBytes(), uint32_t(0)); + EXPECT_EQUAL(usage.allocatedBytesOnHold(), 0u); +} + + +void +MultiValueMappingTest::testShrink() +{ + uint32_t committedDocIdLimit = dummyCommittedDocIdLimit; + MvMapping mvm(committedDocIdLimit); + for (uint32_t i = 0; i < 10; ++i) { + uint32_t k; + mvm.addKey(k); + EXPECT_EQUAL(i, k); + } + mvm.transferHoldLists(0); + mvm.trimHoldLists(1); + uint32_t shrinkTarget = 4; + committedDocIdLimit = shrinkTarget; + mvm.shrinkKeys(shrinkTarget); + mvm.transferHoldLists(1); + mvm.trimHoldLists(2); + EXPECT_EQUAL(shrinkTarget, mvm.getNumKeys()); + EXPECT_EQUAL(shrinkTarget, mvm.getCapacityKeys()); +} + + +void +MultiValueMappingTest::testHoldElem() +{ + uint32_t numKeys = 1; + MemoryUsage exp; + exp.incAllocatedBytes(numKeys * sizeof(Index)); + exp.incUsedBytes(numKeys * sizeof(Index)); + + Histogram initCapacity(Index::maxValues()); + initCapacity[Index::maxValues()] = 3; + exp.incAllocatedBytes(3 * sizeof(vespalib::Array<uint32_t>)); // due to vector vector + + MvMapping mvm(dummyCommittedDocIdLimit, + numKeys, initCapacity, GrowStrategy(numKeys)); + + // usage before inserting values + MemoryUsage usage = mvm.getMemoryUsage(); + EXPECT_EQUAL(mvm.getTotalValueCnt(), 0u); + EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes()); + EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes()); + EXPECT_EQUAL(usage.deadBytes(), exp.deadBytes()); + EXPECT_EQUAL(usage.allocatedBytesOnHold(), exp.allocatedBytesOnHold()); + + uint32_t key = 0; + uint32_t cnt = Index::maxValues() + 3; + { + std::vector<uint32_t> values(cnt, key); + mvm.set(key, values); + exp.incAllocatedBytes(cnt * sizeof(uint32_t)); + exp.incUsedBytes(cnt * sizeof(uint32_t) + + sizeof(vespalib::Array<uint32_t>)); + } + usage = mvm.getMemoryUsage(); + EXPECT_EQUAL(mvm.getTotalValueCnt(), cnt); + EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes()); + EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes()); + EXPECT_EQUAL(usage.deadBytes(), exp.deadBytes()); + EXPECT_EQUAL(usage.allocatedBytesOnHold(), exp.allocatedBytesOnHold()); + ++cnt; + { + std::vector<uint32_t> values(cnt, key); + mvm.set(key, values); + exp.incAllocatedBytes(cnt * sizeof(uint32_t)); + exp.incUsedBytes(cnt * sizeof(uint32_t) + + sizeof(vespalib::Array<uint32_t>)); + exp.incAllocatedBytesOnHold((cnt - 1) * sizeof(uint32_t) + + sizeof(vespalib::Array<uint32_t>)); + } + usage = mvm.getMemoryUsage(); + EXPECT_EQUAL(mvm.getTotalValueCnt(), cnt); + EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes()); + EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes()); + EXPECT_EQUAL(usage.deadBytes(), exp.deadBytes()); + EXPECT_EQUAL(usage.allocatedBytesOnHold(), exp.allocatedBytesOnHold()); + mvm.transferHoldLists(0); + mvm.trimHoldLists(1); + exp.incDeadBytes(sizeof(vespalib::Array<uint32_t>)); + exp.decAllocatedBytes((cnt - 1) * sizeof(uint32_t)); + usage = mvm.getMemoryUsage(); + EXPECT_EQUAL(mvm.getTotalValueCnt(), cnt); + EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes()); + EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes()); + EXPECT_EQUAL(usage.deadBytes(), exp.deadBytes()); + EXPECT_EQUAL(usage.allocatedBytesOnHold(), 0u); +} + +namespace { + +void +insertValues(MvMapping &mvm, uint32_t key, uint32_t count) +{ + std::vector<uint32_t> values(count, 13); + mvm.set(key, values); +} + +Histogram +createHistogram(uint32_t numValuesPerValueClass) +{ + Histogram result(Index32::maxValues()); + for (uint32_t i = 0; i <= Index32::maxValues(); ++i) { + result[i] = numValuesPerValueClass; + } + return result; +} + +const size_t ADDRESS_LIMIT = 134217728; // Index32::offsetSize() + +struct AddressSpaceFixture +{ + MvMapping mvm; + AddressSpaceFixture() + : mvm(dummyCommittedDocIdLimit, 20, createHistogram(4), GrowStrategy(20)) + {} +}; + +} + +void +MultiValueMappingTest::requireThatAddressSpaceUsageIsReported() +{ + AddressSpaceFixture f; + MvMapping &mvm = f.mvm; + + EXPECT_EQUAL(AddressSpace(0, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 1, 1); + EXPECT_EQUAL(AddressSpace(1, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 2, 2); + insertValues(mvm, 3, 2); + EXPECT_EQUAL(AddressSpace(2, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 4, 13); + insertValues(mvm, 5, 13); + insertValues(mvm, 6, 13); + EXPECT_EQUAL(AddressSpace(3, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 7, 14); + insertValues(mvm, 8, 14); + insertValues(mvm, 9, 14); + EXPECT_EQUAL(AddressSpace(3, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 10, 15); + insertValues(mvm, 11, 16); + insertValues(mvm, 12, 17); + insertValues(mvm, 13, 18); + EXPECT_EQUAL(AddressSpace(4, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); +} + +void +MultiValueMappingTest::requireThatDeadIsNotAccountedInAddressSpaceUsage() +{ + AddressSpaceFixture f; + MvMapping &mvm = f.mvm; + + EXPECT_EQUAL(AddressSpace(0, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 1, 3); + insertValues(mvm, 2, 3); + insertValues(mvm, 3, 3); + insertValues(mvm, 4, 3); + EXPECT_EQUAL(AddressSpace(4, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 1, 4); + EXPECT_EQUAL(AddressSpace(3, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 2, 5); + EXPECT_EQUAL(AddressSpace(2, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 10, 15); + insertValues(mvm, 11, 15); + insertValues(mvm, 12, 15); + insertValues(mvm, 13, 15); + EXPECT_EQUAL(AddressSpace(4, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 10, 14); + EXPECT_EQUAL(AddressSpace(3, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 11, 14); + EXPECT_EQUAL(AddressSpace(2, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); +} + +int +MultiValueMappingTest::Main() +{ + TEST_INIT("multivaluemapping_test"); + + testIndex32(); + testIndex64(); + testSimpleSetAndGet(); + testChangingValueCount(); + testHoldListAndGeneration(); + testManualCompaction(); + testVariousGets(); + testReplace(); + testMemoryUsage(); + testShrink(); + testHoldElem(); + TEST_DO(requireThatAddressSpaceUsageIsReported()); + TEST_DO(requireThatDeadIsNotAccountedInAddressSpaceUsage()); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(search::MultiValueMappingTest); diff --git a/searchlib/src/tests/attribute/postinglist/.gitignore b/searchlib/src/tests/attribute/postinglist/.gitignore new file mode 100644 index 00000000000..8cf10f7f9dc --- /dev/null +++ b/searchlib/src/tests/attribute/postinglist/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +postinglist_test +searchlib_postinglist_test_app diff --git a/searchlib/src/tests/attribute/postinglist/CMakeLists.txt b/searchlib/src/tests/attribute/postinglist/CMakeLists.txt new file mode 100644 index 00000000000..a22d1ae2fdc --- /dev/null +++ b/searchlib/src/tests/attribute/postinglist/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_postinglist_test_app + SOURCES + postinglist.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_postinglist_test_app COMMAND searchlib_postinglist_test_app) diff --git a/searchlib/src/tests/attribute/postinglist/DESC b/searchlib/src/tests/attribute/postinglist/DESC new file mode 100644 index 00000000000..1499e3070fb --- /dev/null +++ b/searchlib/src/tests/attribute/postinglist/DESC @@ -0,0 +1 @@ +This is a test for the AttributePostingList class. diff --git a/searchlib/src/tests/attribute/postinglist/FILES b/searchlib/src/tests/attribute/postinglist/FILES new file mode 100644 index 00000000000..268f6c09f1e --- /dev/null +++ b/searchlib/src/tests/attribute/postinglist/FILES @@ -0,0 +1 @@ +postinglist.cpp diff --git a/searchlib/src/tests/attribute/postinglist/postinglist.cpp b/searchlib/src/tests/attribute/postinglist/postinglist.cpp new file mode 100644 index 00000000000..ab95ce27a0e --- /dev/null +++ b/searchlib/src/tests/attribute/postinglist/postinglist.cpp @@ -0,0 +1,707 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("postinglist_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/util/rand48.h> +#include <algorithm> +#include <limits> +#include <map> +#include <set> + +#include <vespa/searchlib/btree/datastore.h> +#include <vespa/searchlib/btree/btreenodeallocator.hpp> +#include <vespa/searchlib/btree/btreenode.hpp> +#include <vespa/searchlib/btree/btreenodestore.hpp> +#include <vespa/searchlib/btree/btreeiterator.hpp> +#include <vespa/searchlib/btree/btreeroot.hpp> +#include <vespa/searchlib/btree/btreestore.hpp> + +namespace search { + +using vespalib::GenerationHandler; + +/* + * TODO: Make it pass MALLOC_OPTIONS=AJ on freebsd and valgrind on Linux. + */ + +class AttributePostingListTest : public vespalib::TestApp +{ +private: + /* Limited STL version for validation of full version */ + typedef std::set<uint32_t> STLPostingList; + typedef std::map<int, STLPostingList> STLValueTree; + + class RandomValue + { + public: + uint32_t _docId; + int _value; + uint32_t _order; + + RandomValue(void) + : _docId(0), + _value(0u), + _order(0u) + { + } + + RandomValue(uint32_t docId, uint32_t value, uint32_t order) + : _docId(docId), + _value(value), + _order(order) + { + } + + bool + operator<(const RandomValue &rhs) const + { + return (_value < rhs._value || + (_value == rhs._value && + (_docId < rhs._docId || + (_docId == rhs._docId && + _order < rhs._order)))); + } + + bool + operator>(const RandomValue &rhs) const + { + return (_value > rhs._value || + (_value == rhs._value && + (_docId > rhs._docId || + (_docId == rhs._docId && + _order > rhs._order)))); + } + + bool + operator==(const RandomValue &rhs) const + { + return (_value == rhs._value && + _docId == rhs._docId && + _order == rhs._order); + } + }; + + class CompareOrder + { + public: + bool + operator()(const RandomValue &a, const RandomValue &b) + { + return (a._order < b._order || + (a._order == b._order && + (a._value < b._value || + (a._value == b._value && + a._docId < b._docId)))); + } + }; + std::vector<RandomValue> _randomValues; + +public: + typedef btree::DataStore<int> IntKeyStore; + typedef btree::BTreeKeyData<uint32_t, btree::BTreeNoLeafData> + AttributePosting; + typedef btree::BTreeStore<uint32_t, + btree::BTreeNoLeafData, + btree::NoAggregated, + std::less<uint32_t>, + btree::BTreeDefaultTraits> + PostingList; + typedef PostingList::NodeAllocatorType PostingListNodeAllocator; + typedef btree::EntryRef PostingIdx; + typedef btree::EntryRef StoreIndex; + + class IntComp { + private: + const IntKeyStore & _store; + int _value; + int getValue(const StoreIndex & idx) const { + if (idx.valid()) { + return _store.getEntry(idx); + } + return _value; + } + public: + IntComp(const IntKeyStore & store) : _store(store), _value(0) {} + IntComp(const IntKeyStore & store, int value) : _store(store), _value(value) {} + bool operator() (const StoreIndex & lhs, const StoreIndex & rhs) const { + return getValue(lhs) < getValue(rhs); + } + }; + + typedef btree::BTreeRoot<StoreIndex, PostingIdx, + btree::NoAggregated, + const IntComp &> IntEnumTree; + typedef IntEnumTree::NodeAllocatorType IntEnumNodeAllocator; + typedef IntEnumTree Tree; + typedef IntEnumNodeAllocator TreeManager; + typedef IntKeyStore ValueHandle; + typedef std::vector<RandomValue> RandomValuesVector; +private: + GenerationHandler _handler; + IntKeyStore *_intKeyStore; + IntEnumNodeAllocator *_intNodeAlloc; + IntEnumTree *_intTree; + PostingList *_intPostings; + STLValueTree *_stlTree; + + Rand48 _randomGenerator; + uint32_t _generation; + + void + allocTree(void); + + void + freeTree(bool verbose); + + void + fillRandomValues(unsigned int count, + unsigned int mvcount); + + void + insertRandomValues(Tree &tree, + TreeManager &treeMgr, + ValueHandle &valueHandle, + PostingList &postings, + STLValueTree *stlTree, + RandomValuesVector &values); + + void + removeRandomValues(Tree &tree, + TreeManager &treeMgr, + ValueHandle &valueHandle, + PostingList &postings, + STLValueTree *stlTree, + RandomValuesVector &values); + + void + lookupRandomValues(Tree &tree, + TreeManager &treeMgr, + const ValueHandle &valueHandle, + PostingList &postings, + STLValueTree *stlTree, + RandomValuesVector &values); + + void + sortRandomValues(void); + + void + doCompactEnumStore(Tree &tree, + TreeManager &treeMgr, + ValueHandle &valueHandle); + + void + doCompactPostingList(Tree &tree, + TreeManager &treeMgr, + PostingList &postings, + PostingListNodeAllocator &postingsAlloc); + + void + bumpGeneration(Tree &tree, + ValueHandle &valueHandle, + PostingList &postings, + PostingListNodeAllocator &postingsAlloc); + + void + removeOldGenerations(Tree &tree, + ValueHandle &valueHandle, + PostingList &postings, + PostingListNodeAllocator &postingsAlloc); + + static const char * + frozenName(bool frozen) + { + return frozen ? "frozen" : "thawed"; + } +public: + AttributePostingListTest(void) + : vespalib::TestApp(), + _randomValues(), + _handler(), + _intKeyStore(NULL), + _intNodeAlloc(NULL), + _intTree(NULL), + _intPostings(NULL), + _stlTree(NULL), + _randomGenerator() + { + } + + int Main(void); +}; + + + +void +AttributePostingListTest::allocTree(void) +{ + _intKeyStore = new IntKeyStore; + _intNodeAlloc = new IntEnumNodeAllocator(); + _intTree = new IntEnumTree(); + _intPostings = new PostingList(); + _stlTree = new STLValueTree; +} + + +void +AttributePostingListTest::freeTree(bool verbose) +{ + (void) verbose; + LOG(info, + "freeTree before clear: %" PRIu64 " (%" PRIu64 " held)" + ", %zu leaves", + static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytes()), + static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytesOnHold()), + _intTree->size(*_intNodeAlloc)); + _intTree->clear(*_intNodeAlloc); + LOG(info, + "freeTree before unhold: %" PRIu64 " (%" PRIu64 " held)", + static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytes()), + static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytesOnHold())); + _intNodeAlloc->freeze(); + _intPostings->freeze(); + _intNodeAlloc->transferHoldLists(_handler.getCurrentGeneration()); + _intPostings->clearBuilder(); + _intPostings->transferHoldLists(_handler.getCurrentGeneration()); + _handler.incGeneration(); + _intNodeAlloc->trimHoldLists(_handler.getFirstUsedGeneration()); + _intPostings->trimHoldLists(_handler.getFirstUsedGeneration()); + LOG(info, + "freeTree after unhold: %" PRIu64 " (%" PRIu64 " held)", + static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytes()), + static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytesOnHold())); + delete _stlTree; + _stlTree = NULL; + delete _intTree; + _intTree = NULL; + delete _intNodeAlloc; + _intNodeAlloc = NULL; + delete _intKeyStore; + _intKeyStore = NULL; + delete _intPostings; + _intPostings = NULL; +} + + +void +AttributePostingListTest:: +fillRandomValues(unsigned int count, + unsigned int mvcount) +{ + unsigned int i; + unsigned int j; + unsigned int mv; + unsigned int mvmax; + unsigned int mvcount2; + unsigned int mvcount3; + + mvmax = 100; + mvcount2 = mvcount * (mvmax * (mvmax - 1)) / 2; + LOG(info, + "Filling %u+%u random values", count, mvcount2); + _randomValues.clear(); + _randomValues.reserve(count); + _randomGenerator.srand48(42); + for (i = 0; i <count; i++) { + uint32_t docId = _randomGenerator.lrand48(); + uint32_t val = _randomGenerator.lrand48(); + uint32_t order = _randomGenerator.lrand48(); + _randomValues.push_back(RandomValue(docId, val, order)); + } + for (mv = 1; mv < mvmax; mv++) { + for (i = 0; i < mvcount; i++) { + for (j = 0; j < mv; j++) { + uint32_t docId = _randomGenerator.lrand48(); + uint32_t val = _randomGenerator.lrand48(); + uint32_t order = _randomGenerator.lrand48(); + _randomValues.push_back(RandomValue(docId, val, order)); + } + } + } + mvcount3 = 0; + for (mv = 10; mv < 4000; mv = mv * 3) + { + mvcount3 += mv * 2; + for (j = 0; j < mv; j++) { + uint32_t val = _randomGenerator.lrand48(); + uint32_t docId = _randomGenerator.lrand48(); + uint32_t order = _randomGenerator.lrand48(); + _randomValues.push_back(RandomValue(docId, val, order)); + val = _randomGenerator.lrand48(); + docId = _randomGenerator.lrand48(); + order = _randomGenerator.lrand48(); + _randomValues.push_back(RandomValue(docId, val, order)); + } + } + std::sort(_randomValues.begin(), + _randomValues.end(), + CompareOrder()); + + EXPECT_TRUE(_randomValues.size() == count + mvcount2 + mvcount3); +} + + +void +AttributePostingListTest:: +insertRandomValues(Tree &tree, + TreeManager &treeMgr, + ValueHandle &valueHandle, + PostingList &postings, + STLValueTree *stlTree, + RandomValuesVector & + values) +{ + RandomValuesVector::iterator i; + RandomValuesVector::iterator ie; + + LOG(info, "insertRandomValues start"); + ie = values.end(); + for (i = values.begin(); i != ie; ++i) { + Tree::Iterator itr = tree.find(StoreIndex(), treeMgr, IntComp(valueHandle, i->_value)); + if (!itr.valid()) { +#if 0 + if (valueHandle.needResize()) + doCompactEnumStore(tree, treeMgr, valueHandle); +#endif + StoreIndex idx = valueHandle.addEntry(i->_value); + if (tree.insert(idx, PostingIdx(), treeMgr, IntComp(valueHandle))) { + itr = tree.find(idx, treeMgr, IntComp(valueHandle)); + } + } else { + } + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(i->_value, valueHandle.getEntry(itr.getKey())); + + /* TODO: Insert docid to postinglist */ + PostingIdx oldIdx = itr.getData(); + PostingIdx newIdx = oldIdx; + AttributePosting newPosting(i->_docId, + btree::BTreeNoLeafData()); + std::vector<AttributePosting> additions; + std::vector<uint32_t> removals; + additions.push_back(newPosting); + postings.apply(newIdx, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + std::atomic_thread_fence(std::memory_order_release); + itr.writeData(newIdx); + + if (stlTree != NULL) { + STLValueTree::iterator it; + it = stlTree->find(i->_value); + if (it == stlTree->end()) { + std::pair<STLValueTree::iterator,bool> ir = + stlTree->insert(std::make_pair(i->_value, + STLPostingList())); + ASSERT_TRUE(ir.second && ir.first != stlTree->end() && + ir.first->first == i->_value); + it = ir.first; + } + ASSERT_TRUE(it != stlTree->end() && it->first == i->_value); + it->second.insert(i->_docId); + + if (it->second.empty()) { + stlTree->erase(it); + ASSERT_TRUE(!itr.valid()); + } else { + size_t postingsize; + + ASSERT_TRUE(itr.valid()); + postingsize = postings.size(newIdx); + ASSERT_TRUE(postingsize > 0 && + postingsize == it->second.size()); + STLPostingList::iterator it3; + STLPostingList::iterator it3b; + STLPostingList::iterator it3e; + + PostingList::Iterator it0; + + it3b = it->second.begin(); + it3e = it->second.end(); + it0 = postings.begin(newIdx); + it3 = it3b; + + while (it3 != it3e) { + ASSERT_TRUE(it0.valid()); + ASSERT_TRUE(*it3 == it0.getKey()); + ++it3; + ++it0; + } + ASSERT_TRUE(!it0.valid()); + } + } + } + ASSERT_TRUE(tree.isValid(treeMgr, IntComp(valueHandle))); + LOG(info, "insertRandomValues done"); +} + + +void +AttributePostingListTest:: +removeRandomValues(Tree &tree, + TreeManager &treeMgr, + ValueHandle &valueHandle, + PostingList &postings, + STLValueTree *stlTree, + RandomValuesVector &values) +{ + RandomValuesVector::iterator i; + RandomValuesVector::iterator ie; + + LOG(info, "removeRandomValues start"); + ie = values.end(); + for (i = values.begin(); i != ie; ++i) { + Tree::Iterator itr = tree.find(StoreIndex(), treeMgr, IntComp(valueHandle, i->_value)); + PostingIdx newIdx; + /* + * TODO: Remove docid from postinglist, and only remove + * value from tree if postinglist is empty + */ + if (itr.valid()) { + PostingIdx oldIdx = itr.getData(); + newIdx = oldIdx; + std::vector<AttributePosting> additions; + std::vector<uint32_t> removals; + removals.push_back(i->_docId); + postings.apply(newIdx, &additions[0], &additions[0]+additions.size(), + &removals[0], &removals[0] + removals.size()); + if (newIdx != oldIdx) { + std::atomic_thread_fence(std::memory_order_release); + itr.writeData(newIdx); + } + if (!newIdx.valid()) { + if (tree.remove(StoreIndex(), treeMgr, IntComp(valueHandle, i->_value))) { + itr = tree.find(StoreIndex(), treeMgr, IntComp(valueHandle, i->_value)); + } + } + } + if (stlTree != NULL) { + STLValueTree::iterator it; + it = stlTree->find(i->_value); + ASSERT_TRUE(it != stlTree->end() && it->first == i->_value); + STLPostingList::iterator it2; + it2 = it->second.find(i->_docId); + ASSERT_TRUE(it2 != it->second.end() && + *it2 == i->_docId); + it->second.erase(it2); + + if (it->second.empty()) { + stlTree->erase(it); + ASSERT_TRUE(!itr.valid()); + } else { + size_t postingsize; + + ASSERT_TRUE(itr.valid()); + postingsize = postings.size(newIdx); + ASSERT_TRUE(postingsize > 0 && + postingsize == it->second.size()); + STLPostingList::iterator it3; + STLPostingList::iterator it3b; + STLPostingList::iterator it3e; + + PostingList::Iterator it0; + + it3b = it->second.begin(); + it3e = it->second.end(); + it0 = postings.begin(newIdx); + it3 = it3b; + + while (it3 != it3e) { + ASSERT_TRUE(it0.valid()); + ASSERT_TRUE(*it3 == it0.getKey()); + ++it3; + ++it0; + } + ASSERT_TRUE(!it0.valid()); + } + } + } + ASSERT_TRUE(tree.isValid(treeMgr, IntComp(valueHandle))); + LOG(info, "removeRandomValues done"); +} + + +void +AttributePostingListTest:: +lookupRandomValues(Tree &tree, + TreeManager &treeMgr, + const ValueHandle &valueHandle, + PostingList &postings, + STLValueTree *stlTree, + RandomValuesVector &values) +{ + RandomValuesVector::iterator i; + RandomValuesVector::iterator ie; + + LOG(info, "lookupRandomValues start"); + ie = values.end(); + for (i = values.begin(); i != ie; ++i) { + Tree::Iterator itr = tree.find(StoreIndex(), treeMgr, IntComp(valueHandle, i->_value)); + ASSERT_TRUE(itr.valid() && + valueHandle.getEntry(itr.getKey()) == i->_value); + if (stlTree != NULL) { + STLValueTree::iterator it; + it = stlTree->find(i->_value); + ASSERT_TRUE(it != stlTree->end() && it->first == i->_value); + + if (it->second.empty()) { + stlTree->erase(it); + ASSERT_TRUE(!itr.valid()); + } else { + size_t postingsize; + + ASSERT_TRUE(itr.valid()); + postingsize = postings.size(itr.getData()); + ASSERT_TRUE(postingsize > 0 && + postingsize == it->second.size()); + STLPostingList::iterator it3; + STLPostingList::iterator it3b; + STLPostingList::iterator it3e; + + PostingList::Iterator it0; + + it3b = it->second.begin(); + it3e = it->second.end(); + it0 = postings.begin(itr.getData()); + it3 = it3b; + + while (it3 != it3e) { + ASSERT_TRUE(it0.valid()); + ASSERT_TRUE(*it3 == it0.getKey()); + ++it3; + ++it0; + } + ASSERT_TRUE(!it0.valid()); + } + } + } + LOG(info, "lookupRandomValues done"); +} + + +void +AttributePostingListTest::doCompactEnumStore(Tree &tree, + TreeManager &treeMgr, + ValueHandle &valueHandle) +{ + LOG(info, + "doCompactEnumStore start"); + + Tree::Iterator i = tree.begin(treeMgr); + + uint32_t numBuffers = valueHandle.getNumBuffers(); + std::vector<uint32_t> toHold; + + for (uint32_t bufferId = 0; bufferId < numBuffers; ++bufferId) { + btree::BufferState &state = valueHandle.getBufferState(bufferId); + if (state._state == btree::BufferState::ACTIVE) { + toHold.push_back(bufferId); + // Freelists already disabled due to variable sized data + } + } + valueHandle.switchActiveBuffer(0, 0u); + + for (; i.valid(); ++i) + { + StoreIndex ov = i.getKey(); + StoreIndex nv = valueHandle.addEntry(valueHandle.getEntry(ov)); + + std::atomic_thread_fence(std::memory_order_release); + i.writeKey(nv); + } + typedef GenerationHandler::generation_t generation_t; + for (std::vector<uint32_t>::const_iterator + it = toHold.begin(), ite = toHold.end(); it != ite; ++it) { + valueHandle.holdBuffer(*it); + } + generation_t generation = _handler.getCurrentGeneration(); + valueHandle.transferHoldLists(generation); + _handler.incGeneration(); + valueHandle.trimHoldLists(_handler.getFirstUsedGeneration()); + + LOG(info, + "doCompactEnumStore done"); +} + + +void +AttributePostingListTest:: +doCompactPostingList(Tree &tree, + TreeManager &treeMgr, + PostingList &postings, + PostingListNodeAllocator &postingsAlloc) +{ + LOG(info, + "doCompactPostingList start"); + +#if 0 + Tree::Iterator i(tree.begin(treeMgr)); + + postings.performCompaction(i, capacityNeeded); +#else + (void) tree; + (void) treeMgr; + (void) postings; + (void) postingsAlloc; +#endif + + LOG(info, + "doCompactPostingList done"); +} + + +void +AttributePostingListTest:: +bumpGeneration(Tree &tree, + ValueHandle &valueHandle, + PostingList &postings, + PostingListNodeAllocator &postingsAlloc) +{ + (void) tree; + (void) valueHandle; + postingsAlloc.freeze(); + postingsAlloc.transferHoldLists(_handler.getCurrentGeneration()); + postings.transferHoldLists(_handler.getCurrentGeneration()); + _handler.incGeneration(); +} + +void +AttributePostingListTest:: +removeOldGenerations(Tree &tree, + ValueHandle &valueHandle, + PostingList &postings, + PostingListNodeAllocator &postingsAlloc) +{ + (void) tree; + (void) valueHandle; + postingsAlloc.trimHoldLists(_handler.getFirstUsedGeneration()); + postings.trimHoldLists(_handler.getFirstUsedGeneration()); +} + +int +AttributePostingListTest::Main() +{ + TEST_INIT("postinglist_test"); + + fillRandomValues(1000, 10); + + allocTree(); + insertRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings, + _stlTree, _randomValues); + lookupRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings, + _stlTree, _randomValues); + _intNodeAlloc->freeze(); + _intNodeAlloc->transferHoldLists(_handler.getCurrentGeneration()); + doCompactEnumStore(*_intTree, *_intNodeAlloc, *_intKeyStore); + removeRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings, + _stlTree, _randomValues); + insertRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings, + _stlTree, _randomValues); + freeTree(true); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(search::AttributePostingListTest); diff --git a/searchlib/src/tests/attribute/postinglistattribute/.gitignore b/searchlib/src/tests/attribute/postinglistattribute/.gitignore new file mode 100644 index 00000000000..9614cdd7626 --- /dev/null +++ b/searchlib/src/tests/attribute/postinglistattribute/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +postinglistattribute_test +searchlib_postinglistattribute_test_app diff --git a/searchlib/src/tests/attribute/postinglistattribute/CMakeLists.txt b/searchlib/src/tests/attribute/postinglistattribute/CMakeLists.txt new file mode 100644 index 00000000000..77d137c7b6e --- /dev/null +++ b/searchlib/src/tests/attribute/postinglistattribute/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_postinglistattribute_test_app + SOURCES + postinglistattribute_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_postinglistattribute_test_app COMMAND sh postinglistattribute_test.sh) diff --git a/searchlib/src/tests/attribute/postinglistattribute/DESC b/searchlib/src/tests/attribute/postinglistattribute/DESC new file mode 100644 index 00000000000..04c97a729a0 --- /dev/null +++ b/searchlib/src/tests/attribute/postinglistattribute/DESC @@ -0,0 +1 @@ +Unit tests for subclasses of PostingListAttribute. diff --git a/searchlib/src/tests/attribute/postinglistattribute/FILES b/searchlib/src/tests/attribute/postinglistattribute/FILES new file mode 100644 index 00000000000..56029570a21 --- /dev/null +++ b/searchlib/src/tests/attribute/postinglistattribute/FILES @@ -0,0 +1 @@ +postinglistattribute.cpp diff --git a/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp new file mode 100644 index 00000000000..5e248dc8758 --- /dev/null +++ b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp @@ -0,0 +1,1021 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("postinglistattribute_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/stllike/asciistream.h> + +#include <vespa/searchlib/attribute/attribute.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/postinglistattribute.h> +#include <vespa/searchlib/attribute/singlenumericpostattribute.h> +#include <vespa/searchlib/attribute/multinumericpostattribute.h> +#include <vespa/searchlib/attribute/singlestringpostattribute.h> +#include <vespa/searchlib/attribute/multistringpostattribute.h> + +#include <vespa/searchlib/attribute/enumstore.hpp> +#include <vespa/searchlib/attribute/attributevector.hpp> +#include <vespa/vespalib/util/compress.h> + +using std::shared_ptr; + +bool +FastOS_UNIX_File::Sync(void) +{ + // LOG(info, "Skip sync"); + return true; +} + +namespace search { + +using attribute::CollectionType; +using attribute::BasicType; +using attribute::Config; +using queryeval::PostingInfo; +using queryeval::MinMaxPostingInfo; +using search::fef::TermFieldMatchData; +using search::queryeval::SearchIterator; + +typedef std::unique_ptr<AttributeVector::SearchContext> SearchContextPtr; +typedef std::unique_ptr<search::queryeval::SearchIterator> SearchBasePtr; + +void +toStr(std::stringstream &ss, SearchIterator &it) +{ + it.initFullRange(); + it.seek(1u); + bool first = true; + while ( !it.isAtEnd()) { + if (first) + first = false; + else + ss << ","; + ss << it.getDocId(); + it.seek(it.getDocId() + 1); + } +} + + +bool +assertIterator(const std::string &exp, SearchIterator &it) +{ + std::stringstream ss; + toStr(ss, it); + if (!EXPECT_EQUAL(exp, ss.str())) + return false; + return true; +} + + +class PostingListAttributeTest : public vespalib::TestApp +{ +private: + typedef IntegerAttribute::largeint_t largeint_t; + typedef AttributeVector::SP AttributePtr; + typedef std::set<AttributeVector::DocId> DocSet; + + typedef SingleValueNumericPostingAttribute< + EnumAttribute<IntegerAttributeTemplate<int32_t> > > + Int32PostingListAttribute; + typedef MultiValueNumericPostingAttribute< + EnumAttribute<IntegerAttributeTemplate<int32_t> >, + multivalue::MVMTemplateArg< + multivalue::Value<EnumStoreBase::Index>, + multivalue::Index32> > Int32ArrayPostingListAttribute; + typedef MultiValueNumericPostingAttribute< + EnumAttribute<IntegerAttributeTemplate<int32_t> >, + multivalue::MVMTemplateArg< + multivalue::WeightedValue<EnumStoreBase::Index>, + multivalue::Index32> > Int32WsetPostingListAttribute; + + typedef SingleValueNumericPostingAttribute< + EnumAttribute<FloatingPointAttributeTemplate<float> > > + FloatPostingListAttribute; + typedef MultiValueNumericPostingAttribute< + EnumAttribute<FloatingPointAttributeTemplate<float> >, + multivalue::MVMTemplateArg< + multivalue::Value<EnumStoreBase::Index>, + multivalue::Index32> > FloatArrayPostingListAttribute; + typedef MultiValueNumericPostingAttribute< + EnumAttribute<FloatingPointAttributeTemplate<float> >, + multivalue::MVMTemplateArg< + multivalue::WeightedValue<EnumStoreBase::Index>, + multivalue::Index32> > FloatWsetPostingListAttribute; + + typedef SingleValueStringPostingAttribute StringPostingListAttribute; + typedef ArrayStringPostingAttribute StringArrayPostingListAttribute; + typedef WeightedSetStringPostingAttribute StringWsetPostingListAttribute; + + template <typename VectorType> + void + populate(VectorType &v); + + template <typename VectorType> + VectorType & + as(AttributePtr &v); + + IntegerAttribute & + asInt(AttributePtr &v); + + StringAttribute & + asString(AttributePtr &v); + + void + buildTermQuery(std::vector<char> & buffer, + const vespalib::string & index, + const vespalib::string & term, bool prefix); + + template <typename V, typename T> + SearchContextPtr + getSearch(const V & vec, const T & term, bool prefix); + + template <typename V> + SearchContextPtr + getSearch(const V & vec); + + template <typename V> + SearchContextPtr + getSearch2(const V & vec); + + bool + assertSearch(const std::string &exp, StringAttribute &sa); + + void addDocs(const AttributePtr & ptr, uint32_t numDocs); + + template <typename VectorType, typename BufferType, typename Range> + void checkPostingList(const VectorType & vec, const std::vector<BufferType> & values, const Range & range); + + template <typename VectorType, typename BufferType> + void testPostingList(const AttributePtr & ptr1, const AttributePtr & ptr2, + uint32_t numDocs, const std::vector<BufferType> & values); + void testPostingList(); + + template <typename AttributeType, typename ValueType> + void checkPostingList(AttributeType & vec, ValueType value, DocSet expected); + template <typename AttributeType, typename ValueType> + void checkNonExistantPostingList(AttributeType & vec, ValueType value); + template <typename AttributeType, typename ValueType> + void testArithmeticValueUpdate(const AttributePtr & ptr); + void testArithmeticValueUpdate(); + + template <typename VectorType, typename ValueType> + void testReload(const AttributePtr & ptr1, const AttributePtr & ptr2, const ValueType & value); + void testReload(); + + template <typename VectorType> + void + testMinMax(AttributePtr &ptr1, uint32_t trimmed); + + template <typename VectorType> + void + testMinMax(AttributePtr &ptr1, AttributePtr &ptr2); + + void + testMinMax(void); + + void + testStringFold(void); +public: + int Main(); +}; + +template <> +void +PostingListAttributeTest::populate<IntegerAttribute>(IntegerAttribute &v) +{ + for(size_t i(0), m(v.getNumDocs()); i < m; i++) { + v.clearDoc(i); + if (i == 0) + continue; + if (i == 9) + continue; + if (i == 7) { + if (v.hasMultiValue()) { + v.append(i, -42, 27); + v.append(i, -43, 14); + v.append(i, -42, -3); + } else { + EXPECT_TRUE( v.update(i, -43) ); + } + v.commit(); + continue; + } + if (i == 20) { + if (v.hasMultiValue()) { + v.append(i, -42, 27); + v.append(i, -43, 14); + v.append(i, -42, -3); + } else { + EXPECT_TRUE( v.update(i, -43) ); + } + v.commit(); + continue; + } + if (i == 25) { + if (v.hasMultiValue()) { + v.append(i, -42, 27); + v.append(i, -43, 12); + v.append(i, -42, -3); + } else { + EXPECT_TRUE( v.update(i, -43) ); + } + v.commit(); + continue; + } + if (v.hasMultiValue()) { + v.append(i, -42, 3); + } else { + v.update(i, -42); + } + v.commit(); + } + v.commit(); +} + +template <> +void +PostingListAttributeTest::populate<StringAttribute>(StringAttribute &v) +{ + for(size_t i(0), m(v.getNumDocs()); i < m; i++) { + v.clearDoc(i); + if (i == 0) + continue; + if (i == 9) + continue; + if (i == 7) { + if (v.hasMultiValue()) { + v.append(i, "foo", 27); + v.append(i, "bar", 14); + v.append(i, "foo", -3); + } else { + EXPECT_TRUE( v.update(i, "bar") ); + } + v.commit(); + continue; + } + if (i == 20) { + if (v.hasMultiValue()) { + v.append(i, "foo", 27); + v.append(i, "bar", 14); + v.append(i, "foo", -3); + } else { + EXPECT_TRUE( v.update(i, "bar") ); + } + v.commit(); + continue; + } + if (i == 25) { + if (v.hasMultiValue()) { + v.append(i, "foo", 27); + v.append(i, "bar", 12); + v.append(i, "foo", -3); + } else { + EXPECT_TRUE( v.update(i, "bar") ); + } + v.commit(); + continue; + } + if (v.hasMultiValue()) { + v.append(i, "foo", 3); + } else { + v.update(i, "foo"); + } + v.commit(); + } +} + + +template <typename VectorType> +VectorType & +PostingListAttributeTest::as(AttributePtr &v) +{ + VectorType *res = dynamic_cast<VectorType *>(v.get()); + assert(res != NULL); + return *res; +} + + +IntegerAttribute & +PostingListAttributeTest::asInt(AttributePtr &v) +{ + return as<IntegerAttribute>(v); +} + + +StringAttribute & +PostingListAttributeTest::asString(AttributePtr &v) +{ + return as<StringAttribute>(v); +} + + +void +PostingListAttributeTest::buildTermQuery(std::vector<char> &buffer, + const vespalib::string &index, + const vespalib::string &term, + bool prefix) +{ + uint32_t indexLen = index.size(); + uint32_t termLen = term.size(); + uint32_t queryPacketSize = 1 + 2 * 4 + indexLen + termLen; + uint32_t p = 0; + buffer.resize(queryPacketSize); + buffer[p++] = prefix ? ParseItem::ITEM_PREFIXTERM : ParseItem::ITEM_TERM; + p += vespalib::compress::Integer::compressPositive(indexLen, &buffer[p]); + memcpy(&buffer[p], index.c_str(), indexLen); + p += indexLen; + p += vespalib::compress::Integer::compressPositive(termLen, &buffer[p]); + memcpy(&buffer[p], term.c_str(), termLen); + p += termLen; + buffer.resize(p); +} + + +template <typename V, typename T> +SearchContextPtr +PostingListAttributeTest::getSearch(const V &vec, const T &term, bool prefix) +{ + std::vector<char> query; + vespalib::asciistream ss; + ss << term; + buildTermQuery(query, vec.getName(), ss.str(), prefix); + + return (static_cast<const AttributeVector &>(vec)). + getSearch(vespalib::stringref(&query[0], query.size()), + AttributeVector::SearchContext::Params()); +} + + +template <> +SearchContextPtr +PostingListAttributeTest::getSearch<IntegerAttribute>(const IntegerAttribute &v) +{ + return getSearch<IntegerAttribute>(v, "[-42;-42]", false); +} + + +template <> +SearchContextPtr +PostingListAttributeTest::getSearch<StringAttribute>(const StringAttribute &v) +{ + return getSearch<StringAttribute, const vespalib::string &> + (v, "foo", false); +} + + +template <> +SearchContextPtr +PostingListAttributeTest::getSearch2<IntegerAttribute>(const IntegerAttribute &v) +{ + return getSearch<IntegerAttribute>(v, "[-43;-43]", false); +} + + +template <> +SearchContextPtr +PostingListAttributeTest::getSearch2<StringAttribute>(const StringAttribute &v) +{ + return getSearch<StringAttribute, const vespalib::string &> + (v, "bar", false); +} + + +bool +PostingListAttributeTest::assertSearch(const std::string &exp, + StringAttribute &sa) +{ + TermFieldMatchData md; + SearchContextPtr sc = getSearch<StringAttribute>(sa); + sc->fetchPostings(true); + SearchBasePtr sb = sc->createIterator(&md, true); + if (!EXPECT_TRUE(assertIterator(exp, *sb))) + return false; + return true; +} + + +void +PostingListAttributeTest::addDocs(const AttributePtr & ptr, uint32_t numDocs) +{ + for (uint32_t i = 0; i < numDocs; ++i) { + uint32_t doc; + ASSERT_TRUE(ptr->addDoc(doc)); + ASSERT_TRUE(doc == i); + ASSERT_TRUE(ptr->getNumDocs() == i + 1); + } + ASSERT_TRUE(ptr->getNumDocs() == numDocs); +} + +class RangeAlpha { +private: + uint32_t _part; +public: + RangeAlpha(uint32_t part) : _part(part) { } + uint32_t getBegin(uint32_t i) const { return i * _part; } + uint32_t getEnd(uint32_t i) const { return (i + 1) * _part; } +}; + +class RangeBeta { +private: + uint32_t _part; + uint32_t _numValues; +public: + RangeBeta(uint32_t part, uint32_t numValues) : _part(part), _numValues(numValues) { } + uint32_t getBegin(uint32_t i) const { return (_numValues - 1 - i) * _part; } + uint32_t getEnd(uint32_t i) const { return (_numValues - i) * _part; } +}; + +template <typename VectorType, typename BufferType, typename RangeGenerator> +void +PostingListAttributeTest::checkPostingList(const VectorType & vec, const std::vector<BufferType> & values, + const RangeGenerator & range) +{ + const typename VectorType::EnumStore & enumStore = vec.getEnumStore(); + const typename VectorType::Dictionary & dict = + enumStore.getPostingDictionary(); + const typename VectorType::PostingList & postingList = vec.getPostingList(); + + for (size_t i = 0; i < values.size(); ++i) { + uint32_t docBegin = range.getBegin(i); + uint32_t docEnd = range.getEnd(i); + + typename VectorType::DictionaryIterator itr = + dict.find(typename VectorType::EnumIndex(), + typename VectorType::ComparatorType(enumStore, values[i])); + ASSERT_TRUE(itr.valid()); + + typename VectorType::PostingList::Iterator postings; + postings = postingList.begin(itr.getData()); + + uint32_t doc = docBegin; + for (; postings.valid(); ++postings) { + EXPECT_EQUAL(doc++, postings.getKey()); + } + EXPECT_EQUAL(doc, docEnd); + } +} + +template <typename VectorType, typename BufferType> +void +PostingListAttributeTest::testPostingList(const AttributePtr & ptr1, const AttributePtr & ptr2, + uint32_t numDocs, const std::vector<BufferType> & values) +{ + LOG(info, "testPostingList: vector '%s'", ptr1->getName().c_str()); + + VectorType & vec1 = static_cast<VectorType &>(*ptr1.get()); + VectorType & vec2 = static_cast<VectorType &>(*ptr2.get()); + addDocs(ptr1, numDocs); + + uint32_t part = numDocs / values.size(); + + // insert values + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t idx = doc / part; + EXPECT_TRUE(vec1.update(doc, values[idx])); + } + vec1.commit(); + +#if 0 + std::cout << "***** printBuffer 0 ***** " << std::endl; + vec1.getEnumStore().printBuffer(std::cout, 0); + std::cout << "***** printBuffer 1 ***** " << std::endl; + vec1.getEnumStore().printBuffer(std::cout, 1); + std::cout << "***** printCurrentContent ***** " << std::endl; + vec1.getEnumStore().printCurrentContent(std::cout); + std::cout << "***** printPostingListContent *****" << std::endl; + vec1.printPostingListContent(std::cout); +#endif + + // check posting list for correct content + checkPostingList(vec1, values, RangeAlpha(part)); + + // load and save vector + ptr1->saveAs(ptr2->getBaseFileName()); + ptr2->load(); +#if 0 + std::cout << "***** vec2.printPostingListContent *****" << std::endl; + vec2.printPostingListContent(std::cout); +#endif + checkPostingList(vec2, values, RangeAlpha(part)); + + // insert values in another order + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t idx = values.size() - 1 - (doc / part); + EXPECT_TRUE(vec1.update(doc, values[idx])); + } + vec1.commit(); + + // check posting list again for correct content + checkPostingList(vec1, values, RangeBeta(part, values.size())); + + // load and save vector + ptr1->saveAs(ptr2->getBaseFileName()); + ptr2->load(); + checkPostingList(vec2, values, RangeBeta(part, values.size())); +} + +void +PostingListAttributeTest::testPostingList() +{ + uint32_t numDocs = 1000; + uint32_t numValues = 50; + + { // IntegerAttribute + std::vector<largeint_t> values; + for (uint32_t i = 0; i < numValues; ++i) { + values.push_back(i); + } + { + Config cfg(Config(BasicType::INT32, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("sint32_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sint32_2", cfg); + testPostingList<Int32PostingListAttribute>(ptr1, ptr2, numDocs, values); + } + { + Config cfg(Config(BasicType::INT32, CollectionType::ARRAY)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("aint32_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("aint32_2", cfg); + testPostingList<Int32ArrayPostingListAttribute>(ptr1, ptr2, numDocs, values); + } + { + Config cfg(Config(BasicType::INT32, CollectionType::WSET)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("wsint32_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("wsint32_2", cfg); + testPostingList<Int32WsetPostingListAttribute>(ptr1, ptr2, numDocs, values); + } + } + + { // FloatingPointAttribute + std::vector<double> values; + for (uint32_t i = 0; i < numValues; ++i) { + values.push_back(i); + } + { + Config cfg(Config(BasicType::FLOAT, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("sfloat_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sfloat_2", cfg); + testPostingList<FloatPostingListAttribute>(ptr1, ptr2, numDocs, values); + } + { + Config cfg(Config(BasicType::FLOAT, CollectionType::ARRAY)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("afloat_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("afloat_2", cfg); + testPostingList<FloatArrayPostingListAttribute>(ptr1, ptr2, numDocs, values); + } + { + Config cfg(Config(BasicType::FLOAT, CollectionType::WSET)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("wsfloat_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("wsfloat_2", cfg); + testPostingList<FloatWsetPostingListAttribute>(ptr1, ptr2, numDocs, values); + } + } + + { // StringAttribute + std::vector<vespalib::string> values; + std::vector<const char *> charValues; + values.reserve(numValues); + charValues.reserve(numValues); + values.push_back(""); + charValues.push_back(values.back().c_str()); + for (uint32_t i = 1; i < numValues; ++i) { + vespalib::asciistream ss; + ss << "string" << i; + values.push_back(ss.str()); + charValues.push_back(values.back().c_str()); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sstr_2", cfg); + testPostingList<StringPostingListAttribute>(ptr1, ptr2, numDocs, charValues); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::ARRAY)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("astr_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("astr_2", cfg); + testPostingList<StringArrayPostingListAttribute>(ptr1, ptr2, numDocs, charValues); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::WSET)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("wsstr_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("wsstr_2", cfg); + testPostingList<StringWsetPostingListAttribute>(ptr1, ptr2, numDocs, charValues); + } + } +} + +template <typename AttributeType, typename ValueType> +void +PostingListAttributeTest::checkPostingList(AttributeType & vec, ValueType value, DocSet expected) +{ + const typename AttributeType::EnumStore & enumStore = vec.getEnumStore(); + const typename AttributeType::Dictionary & dict = + enumStore.getPostingDictionary(); + const typename AttributeType::PostingList & postingList = vec.getPostingList(); + typename AttributeType::DictionaryIterator itr = + dict.find(typename AttributeType::EnumIndex(), + typename AttributeType::ComparatorType(vec.getEnumStore(), value)); + ASSERT_TRUE(itr.valid()); + + typename AttributeType::PostingList::Iterator postings; + postings = postingList.begin(itr.getData()); + + DocSet::iterator docBegin = expected.begin(); + DocSet::iterator docEnd = expected.end(); + for (; postings.valid(); ++postings) { + EXPECT_EQUAL(*docBegin++, postings.getKey()); + } + EXPECT_TRUE(docBegin == docEnd); +} + +template <typename AttributeType, typename ValueType> +void +PostingListAttributeTest::checkNonExistantPostingList(AttributeType & vec, ValueType value) +{ + const typename AttributeType::Dictionary & dict = + vec.getEnumStore().getPostingDictionary(); + typename AttributeType::DictionaryIterator itr = + dict.find(typename AttributeType::EnumIndex(), + typename AttributeType::ComparatorType(vec.getEnumStore(), value)); + EXPECT_TRUE(!itr.valid()); +} + +template <typename AttributeType, typename ValueType> +void +PostingListAttributeTest::testArithmeticValueUpdate(const AttributePtr & ptr) +{ + LOG(info, "testArithmeticValueUpdate: vector '%s'", ptr->getName().c_str()); + + typedef document::ArithmeticValueUpdate Arith; + AttributeType & vec = static_cast<AttributeType &>(*ptr.get()); + + addDocs(ptr, 4); + + uint32_t allDocs[] = {0, 1, 2, 3}; + checkNonExistantPostingList<AttributeType, ValueType>(vec, 0); + + for (uint32_t doc = 0; doc < 4; ++doc) { + ASSERT_TRUE(vec.update(doc, 100)); + } + ptr->commit(); + + checkNonExistantPostingList<AttributeType, ValueType>(vec, 0); + checkPostingList<AttributeType, ValueType>(vec, 100, DocSet(allDocs, allDocs + 4)); + + EXPECT_TRUE(vec.apply(0, Arith(Arith::Add, 10))); + EXPECT_TRUE(vec.apply(1, Arith(Arith::Sub, 10))); + EXPECT_TRUE(vec.apply(2, Arith(Arith::Mul, 10))); + EXPECT_TRUE(vec.apply(3, Arith(Arith::Div, 10))); + ptr->commit(); + + { + uint32_t docs[] = {0}; + checkPostingList<AttributeType, ValueType>(vec, 110, DocSet(docs, docs + 1)); + } + { + uint32_t docs[] = {1}; + checkPostingList<AttributeType, ValueType>(vec, 90, DocSet(docs, docs + 1)); + } + { + uint32_t docs[] = {2}; + checkPostingList<AttributeType, ValueType>(vec, 1000, DocSet(docs, docs + 1)); + } + { + uint32_t docs[] = {3}; + checkPostingList<AttributeType, ValueType>(vec, 10, DocSet(docs, docs + 1)); + } + + + // several inside a single commit + for (uint32_t doc = 0; doc < 4; ++doc) { + ASSERT_TRUE(vec.update(doc, 2000)); + } + EXPECT_TRUE(vec.apply(0, Arith(Arith::Add, 10))); + EXPECT_TRUE(vec.apply(0, Arith(Arith::Add, 10))); + EXPECT_TRUE(vec.apply(1, Arith(Arith::Sub, 10))); + EXPECT_TRUE(vec.apply(1, Arith(Arith::Sub, 10))); + EXPECT_TRUE(vec.apply(2, Arith(Arith::Mul, 10))); + EXPECT_TRUE(vec.apply(2, Arith(Arith::Mul, 10))); + EXPECT_TRUE(vec.apply(3, Arith(Arith::Div, 10))); + EXPECT_TRUE(vec.apply(3, Arith(Arith::Div, 10))); + ptr->commit(); + + vespalib::asciistream ss; + vec.printPostingListContent(ss); + std::cout << ss.str(); + { + uint32_t docs[] = {0}; + checkPostingList<AttributeType, ValueType>(vec, 2020, DocSet(docs, docs + 1)); + } + { + uint32_t docs[] = {1}; + checkPostingList<AttributeType, ValueType>(vec, 1980, DocSet(docs, docs + 1)); + } + { + uint32_t docs[] = {2}; + checkPostingList<AttributeType, ValueType>(vec, 200000, DocSet(docs, docs + 1)); + } + { + uint32_t docs[] = {3}; + checkPostingList<AttributeType, ValueType>(vec, 20, DocSet(docs, docs + 1)); + } + checkNonExistantPostingList<AttributeType, ValueType>(vec, 100); + checkNonExistantPostingList<AttributeType, ValueType>(vec, 110); + checkNonExistantPostingList<AttributeType, ValueType>(vec, 90); + checkNonExistantPostingList<AttributeType, ValueType>(vec, 1000); + checkNonExistantPostingList<AttributeType, ValueType>(vec, 10); + checkNonExistantPostingList<AttributeType, ValueType>(vec, 2000); + checkNonExistantPostingList<AttributeType, ValueType>(vec, 2010); + checkNonExistantPostingList<AttributeType, ValueType>(vec, 1990); + checkNonExistantPostingList<AttributeType, ValueType>(vec, 20000); + checkNonExistantPostingList<AttributeType, ValueType>(vec, 200); +} + +void +PostingListAttributeTest::testArithmeticValueUpdate() +{ + { // IntegerAttribute + Config cfg(Config(BasicType::INT32, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sint32", cfg); + testArithmeticValueUpdate<Int32PostingListAttribute, largeint_t>(ptr); + } + + { // FloatingPointAttribute + Config cfg(Config(BasicType::FLOAT, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sfloat", cfg); + testArithmeticValueUpdate<FloatPostingListAttribute, double>(ptr); + } +} + + +template <typename VectorType, typename ValueType> +void +PostingListAttributeTest::testReload(const AttributePtr & ptr1, const AttributePtr & ptr2, const ValueType & value) +{ + LOG(info, "testReload: vector '%s'", ptr1->getName().c_str()); + + VectorType & vec1 = static_cast<VectorType &>(*ptr1.get()); + + addDocs(ptr1, 5); + for (uint32_t doc = 0; doc < 5; ++doc) { + EXPECT_TRUE(vec1.update(doc, value)); + } + ptr1->commit(); + + ASSERT_TRUE(ptr1->saveAs(ptr2->getBaseFileName())); + ASSERT_TRUE(ptr2->load()); + + EXPECT_TRUE(ptr2->getNumDocs() == 5); + ValueType buffer[1]; + for (uint32_t doc = 0; doc < 5; ++doc) { + EXPECT_TRUE(ptr2->get(doc, buffer, 1) == 1); + EXPECT_EQUAL(buffer[0], value); + } +} + +void +PostingListAttributeTest::testReload() +{ + { // IntegerAttribute + Config cfg(Config(BasicType::INT32, CollectionType::SINGLE)); + cfg.setFastSearch(true); + { + AttributePtr ptr1 = AttributeFactory::createAttribute("sint32_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sint32_2", cfg); + testReload<Int32PostingListAttribute, largeint_t>(ptr1, ptr2, 100); + } + { + AttributePtr ptr1 = AttributeFactory::createAttribute("sint32_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sint32_2", cfg); + testReload<Int32PostingListAttribute, largeint_t>(ptr1, ptr2, 0); + } + } + + { // FloatingPointAttribute + Config cfg(Config(BasicType::FLOAT, CollectionType::SINGLE)); + cfg.setFastSearch(true); + { + AttributePtr ptr1 = AttributeFactory::createAttribute("sfloat_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sfloat_2", cfg); + testReload<FloatPostingListAttribute, double>(ptr1, ptr2, 100); + } + { + AttributePtr ptr1 = AttributeFactory::createAttribute("sfloat_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sfloat_2", cfg); + testReload<FloatPostingListAttribute, double>(ptr1, ptr2, 0); + } + } + + { // StringAttribute + Config cfg(Config(BasicType::STRING, CollectionType::SINGLE)); + cfg.setFastSearch(true); + { + AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sstr_2", cfg); + testReload<StringPostingListAttribute, vespalib::string>(ptr1, ptr2, "unique"); + } + { + AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sstr_2", cfg); + testReload<StringPostingListAttribute, vespalib::string>(ptr1, ptr2, ""); + } + } +} + +template <typename VectorType> +void +PostingListAttributeTest::testMinMax(AttributePtr &ptr1, uint32_t trimmed) +{ + TermFieldMatchData md; + SearchContextPtr sc = getSearch<VectorType>(as<VectorType>(ptr1)); + sc->fetchPostings(true); + SearchBasePtr sb = sc->createIterator(&md, true); + sb->initFullRange(); + + const PostingInfo *pi = sb->getPostingInfo(); + ASSERT_TRUE(pi != NULL); + const MinMaxPostingInfo *mmpi = + dynamic_cast<const MinMaxPostingInfo *>(pi); + ASSERT_TRUE(mmpi != NULL); + + if (ptr1->hasMultiValue()) { + if (trimmed == 2u) { + EXPECT_EQUAL(3, mmpi->getMinWeight()); + } else { + EXPECT_EQUAL(-3, mmpi->getMinWeight()); + } + EXPECT_EQUAL(3, mmpi->getMaxWeight()); + } else { + EXPECT_EQUAL(1, mmpi->getMinWeight()); + EXPECT_EQUAL(1, mmpi->getMaxWeight()); + } + + sb->seek(1u); + EXPECT_EQUAL(1u, sb->getDocId()); + + sc = getSearch2<VectorType>(as<VectorType>(ptr1)); + sc->fetchPostings(true); + sb = sc->createIterator(&md, true); + sb->initFullRange(); + + pi = sb->getPostingInfo(); + if (trimmed == 2) { + ASSERT_TRUE(pi == NULL); + } else { + ASSERT_TRUE(pi != NULL); + mmpi = dynamic_cast<const MinMaxPostingInfo *>(pi); + ASSERT_TRUE(mmpi != NULL); + + if (ptr1->hasMultiValue()) { + if (trimmed == 0) { + EXPECT_EQUAL(12, mmpi->getMinWeight()); + } else { + EXPECT_EQUAL(14, mmpi->getMinWeight()); + } + EXPECT_EQUAL(14, mmpi->getMaxWeight()); + } else { + EXPECT_EQUAL(1, mmpi->getMinWeight()); + EXPECT_EQUAL(1, mmpi->getMaxWeight()); + } + } + + sb->seek(1u); + if (trimmed == 2u) { + EXPECT_TRUE(sb->isAtEnd()); + } else { + EXPECT_EQUAL(7u, sb->getDocId()); + } +} + +template <typename VectorType> +void +PostingListAttributeTest::testMinMax(AttributePtr &ptr1, AttributePtr &ptr2) +{ + uint32_t numDocs = 100; + addDocs(ptr1, numDocs); + populate(as<VectorType>(ptr1)); + + TEST_DO(testMinMax<VectorType>(ptr1, 0u)); + ASSERT_TRUE(ptr1->saveAs(ptr2->getBaseFileName())); + ASSERT_TRUE(ptr2->load()); + testMinMax<VectorType>(ptr2, 0u); + + ptr2->clearDoc(20); + ptr2->clearDoc(25); + ptr2->commit(); + TEST_DO(testMinMax<VectorType>(ptr2, 1u)); + + ptr2->clearDoc(7); + ptr2->commit(); + TEST_DO(testMinMax<VectorType>(ptr2, 2u)); + +} + +void +PostingListAttributeTest::testMinMax(void) +{ + { + Config cfg(Config(BasicType::INT32, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("sint32_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sint32_2", cfg); + testMinMax<IntegerAttribute>(ptr1, ptr2); + } + { + Config cfg(Config(BasicType::INT32, CollectionType::WSET)); + cfg.setFastSearch(true); + AttributePtr ptr1 = + AttributeFactory::createAttribute("wsint32_1", cfg); + AttributePtr ptr2 = + AttributeFactory::createAttribute("wsint32_2", cfg); + testMinMax<IntegerAttribute>(ptr1, ptr2); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sstr_2", cfg); + testMinMax<StringAttribute>(ptr1, ptr2); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::WSET)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("wsstr_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("wsstr_2", cfg); + testMinMax<StringAttribute>(ptr1, ptr2); + } +} + + +void +PostingListAttributeTest::testStringFold(void) +{ + Config cfg(Config(BasicType::STRING, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg); + + addDocs(ptr1, 6); + + StringAttribute &sa(asString(ptr1)); + + sa.update(1, "a"); + sa.commit(); + sa.update(3, "FOo"); + sa.commit(); + sa.update(4, "foo"); + sa.commit(); + sa.update(5, "z"); + sa.commit(); + + EXPECT_TRUE(assertSearch("3,4", sa)); + + sa.update(2, "FOO"); + sa.commit(); + + EXPECT_TRUE(assertSearch("2,3,4", sa)); + + sa.update(4, ""); + sa.commit(); + + EXPECT_TRUE(assertSearch("2,3", sa)); + + sa.update(2, ""); + sa.commit(); + + EXPECT_TRUE(assertSearch("3", sa)); + + sa.update(3, ""); + sa.commit(); + + EXPECT_TRUE(assertSearch("", sa)); +} + + +int +PostingListAttributeTest::Main() +{ + TEST_INIT("postinglistattribute_test"); + + testPostingList(); + testArithmeticValueUpdate(); + testReload(); + testMinMax(); + testStringFold(); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(search::PostingListAttributeTest); diff --git a/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.sh b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.sh new file mode 100755 index 00000000000..e6f9c214cb9 --- /dev/null +++ b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.sh @@ -0,0 +1,5 @@ +#!/bin/bash +$VALGRIND ./searchlib_postinglistattribute_test_app +rm -rf *.dat +rm -rf *.idx +rm -rf *.weight diff --git a/searchlib/src/tests/attribute/runnable.h b/searchlib/src/tests/attribute/runnable.h new file mode 100644 index 00000000000..418230a2fc5 --- /dev/null +++ b/searchlib/src/tests/attribute/runnable.h @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/util/sync.h> + +namespace search { + +class Runnable : public FastOS_Runnable +{ +protected: + uint32_t _id; + vespalib::Monitor _cond; + bool _done; + bool _stopped; + +public: + Runnable(uint32_t id) : + _id(id), _cond(), _done(false), _stopped(false) + { } + void Run(FastOS_ThreadInterface *, void *) { + doRun(); + + vespalib::MonitorGuard guard(_cond); + _stopped = true; + guard.broadcast(); + } + virtual void doRun() = 0; + void stop() { + vespalib::MonitorGuard guard(_cond); + _done = true; + } + void join() { + vespalib::MonitorGuard guard(_cond); + while (!_stopped) { + guard.wait(); + } + } +}; + +} // search + diff --git a/searchlib/src/tests/attribute/searchable/.gitignore b/searchlib/src/tests/attribute/searchable/.gitignore new file mode 100644 index 00000000000..663692907f6 --- /dev/null +++ b/searchlib/src/tests/attribute/searchable/.gitignore @@ -0,0 +1,4 @@ +/my_logctl_file +searchlib_attribute_blueprint_test_app +searchlib_attribute_searchable_adapter_test_app +searchlib_attribute_weighted_set_blueprint_test_app diff --git a/searchlib/src/tests/attribute/searchable/CMakeLists.txt b/searchlib/src/tests/attribute/searchable/CMakeLists.txt new file mode 100644 index 00000000000..ed76520af29 --- /dev/null +++ b/searchlib/src/tests/attribute/searchable/CMakeLists.txt @@ -0,0 +1,22 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_attribute_searchable_adapter_test_app + SOURCES + attribute_searchable_adapter_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_attribute_searchable_adapter_test_app COMMAND sh attribute_searchable_adapter_test.sh) +vespa_add_executable(searchlib_attribute_weighted_set_blueprint_test_app + SOURCES + attribute_weighted_set_blueprint_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_attribute_weighted_set_blueprint_test_app COMMAND searchlib_attribute_weighted_set_blueprint_test_app) +vespa_add_executable(searchlib_attribute_blueprint_test_app + SOURCES + attributeblueprint_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_attribute_blueprint_test_app COMMAND searchlib_attribute_blueprint_test_app) diff --git a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp new file mode 100644 index 00000000000..1d69f516b52 --- /dev/null +++ b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp @@ -0,0 +1,689 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> + +#include <vespa/searchcommon/attribute/iattributecontext.h> +#include <vespa/searchlib/attribute/attribute_blueprint_factory.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/attributecontext.h> +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/searchlib/attribute/attributevector.h> +#include <vespa/searchlib/attribute/extendableattributes.h> +#include <vespa/searchlib/attribute/iattributemanager.h> +#include <vespa/searchlib/attribute/predicate_attribute.h> +#include <vespa/searchlib/attribute/singlenumericattribute.h> +#include <vespa/searchlib/attribute/singlenumericattribute.hpp> +#include <vespa/searchlib/attribute/singlenumericpostattribute.hpp> +#include <vespa/searchlib/fef/fef.h> +#include <vespa/searchlib/query/tree/location.h> +#include <vespa/searchlib/query/tree/point.h> +#include <vespa/searchlib/query/tree/predicate_query_term.h> +#include <vespa/searchlib/query/tree/rectangle.h> +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/query/weight.h> +#include <vespa/searchlib/queryeval/blueprint.h> +#include <vespa/searchlib/queryeval/fake_requestcontext.h> +#include <vespa/searchlib/queryeval/field_spec.h> +#include <vespa/searchlib/queryeval/searchiterator.h> +#include <vespa/searchlib/queryeval/wand/parallel_weak_and_search.h> +#include <memory> + +using search::AttributeEnumGuard; +using search::AttributeFactory; +using search::AttributeGuard; +using search::AttributeVector; +using search::IAttributeManager; +using search::IntegerAttribute; +using search::SingleStringExtAttribute; +using search::attribute::IAttributeContext; +using search::fef::MatchData; +using search::fef::MatchDataLayout; +using search::fef::TermFieldHandle; +using search::fef::TermFieldMatchData; +using search::query::Location; +using search::query::Node; +using search::query::Point; +using search::query::PredicateQueryTerm; +using search::query::Rectangle; +using search::query::SimpleDotProduct; +using search::query::SimpleLocationTerm; +using search::query::SimplePredicateQuery; +using search::query::SimplePrefixTerm; +using search::query::SimpleRangeTerm; +using search::query::SimpleSuffixTerm; +using search::query::SimpleSubstringTerm; +using search::query::SimpleStringTerm; +using search::query::SimpleWandTerm; +using search::query::SimpleWeightedSetTerm; +using search::query::Weight; +using search::queryeval::Blueprint; +using search::queryeval::FieldSpec; +using search::queryeval::FakeRequestContext; +using search::queryeval::MinMaxPostingInfo; +using search::queryeval::ParallelWeakAndSearch; +using search::queryeval::PostingInfo; +using search::queryeval::SearchIterator; +using std::vector; +using vespalib::string; +using namespace search::attribute; +using namespace search; + +namespace { + +const string field = "field"; +const string other = "other"; +const int32_t weight = 1; +const uint32_t num_docs = 1000; + +class MyAttributeManager : public IAttributeManager { + AttributeVector::SP _attribute_vector; + AttributeVector::SP _other; + +public: + explicit MyAttributeManager(AttributeVector *attr) + : _attribute_vector(attr), _other() {} + + explicit MyAttributeManager(AttributeVector::SP attr) + : _attribute_vector(attr), _other() {} + + void set_other(AttributeVector::SP attr) { + _other = attr; + } + + virtual AttributeGuard::UP getAttribute(const string &name) const { + if (name == field) { + return AttributeGuard::UP(new AttributeGuard(_attribute_vector)); + } else if (name == other) { + return AttributeGuard::UP(new AttributeGuard(_other)); + } else { + return AttributeGuard::UP(nullptr); + } + } + + virtual AttributeGuard::UP + getAttributeStableEnum(const string &name) const { + if (name == field) { + return AttributeGuard::UP(new AttributeEnumGuard(_attribute_vector)); + } else if (name == other) { + return AttributeGuard::UP(new AttributeEnumGuard(_other)); + } else { + return AttributeGuard::UP(nullptr); + } + } + + virtual void getAttributeList(vector<AttributeGuard> &) const { + assert(!"Not implemented"); + } + virtual IAttributeContext::UP createContext() const { + assert(!"Not implemented"); + return IAttributeContext::UP(); + } +}; + +struct Result { + struct Hit { + uint32_t docid; + double raw_score; + int32_t match_weight; + Hit(uint32_t id, double raw, int32_t match_weight_in) + : docid(id), raw_score(raw), match_weight(match_weight_in) {} + }; + size_t est_hits; + bool est_empty; + bool has_minmax; + int32_t min_weight; + int32_t max_weight; + size_t wand_hits; + int64_t wand_initial_threshold; + double wand_boost_factor; + std::vector<Hit> hits; + vespalib::string iterator_dump; + + Result(size_t est_hits_in, bool est_empty_in) + : est_hits(est_hits_in), est_empty(est_empty_in), + has_minmax(false), min_weight(0), max_weight(0), + wand_hits(0), wand_initial_threshold(0), wand_boost_factor(0.0), + hits(), iterator_dump() {} + + void set_minmax(int32_t min, int32_t max) { + has_minmax = true; + min_weight = min; + max_weight = max; + } +}; + +void extract_posting_info(Result &result, const PostingInfo *postingInfo) { + if (postingInfo != NULL) { + const MinMaxPostingInfo *minMax = dynamic_cast<const MinMaxPostingInfo *>(postingInfo); + if (minMax != NULL) { + result.set_minmax(minMax->getMinWeight(), minMax->getMaxWeight()); + } + } +} + +void extract_wand_params(Result &result, ParallelWeakAndSearch *wand) { + if (wand != nullptr) { + result.wand_hits = wand->getMatchParams().scores.getScoresToTrack(); + result.wand_initial_threshold = wand->getMatchParams().scoreThreshold; + result.wand_boost_factor = wand->getMatchParams().thresholdBoostFactor; + } +} + +Result do_search(IAttributeManager &attribute_manager, const Node &node, bool strict) { + uint32_t fieldId = 0; + AttributeContext ac(attribute_manager); + FakeRequestContext requestContext(&ac); + AttributeBlueprintFactory source; + MatchDataLayout mdl; + TermFieldHandle handle = mdl.allocTermField(fieldId); + MatchData::UP match_data = mdl.createMatchData(); + Blueprint::UP bp = source.createBlueprint(requestContext, FieldSpec(field, fieldId, handle), node); + ASSERT_TRUE(bp.get() != nullptr); + Result result(bp->getState().estimate().estHits, bp->getState().estimate().empty); + bp->fetchPostings(strict); + SearchIterator::UP iterator = bp->createSearch(*match_data, strict); + ASSERT_TRUE(iterator.get() != nullptr); + iterator->initFullRange(); + extract_posting_info(result, iterator->getPostingInfo()); + extract_wand_params(result, dynamic_cast<ParallelWeakAndSearch*>(iterator.get())); + result.iterator_dump = iterator->asString(); + for (uint32_t docid = 1; docid < num_docs; ++docid) { + if (iterator->seek(docid)) { + iterator->unpack(docid); + result.hits.emplace_back(docid, + match_data->resolveTermField(handle)->getRawScore(), + match_data->resolveTermField(handle)->getWeight()); + } + } + return result; +} + +bool search(const Node &node, IAttributeManager &attribute_manager, + bool fast_search = false, bool strict = true) +{ + Result result = do_search(attribute_manager, node, strict); + if (fast_search) { + EXPECT_LESS(result.est_hits, num_docs / 10); + } else { + EXPECT_TRUE(!result.est_empty); + EXPECT_EQUAL(num_docs, result.est_hits); + } + return (result.hits.size() == 1) && (result.hits[0].docid == (num_docs - 1)); +} + +bool search(const string &term, IAttributeManager &attribute_manager, + bool fast_search = false, bool strict = true) +{ + TEST_STATE(term.c_str()); + SimpleStringTerm node(term, "field", 0, Weight(0)); + return search(node, attribute_manager, fast_search, strict); +} + +template <typename T> struct AttributeVectorTypeFinder { + //typedef search::SingleValueStringAttribute Type; + typedef SingleStringExtAttribute Type; + static void add(Type & a, const T & v) { a.add(v, weight); } +}; +template <> struct AttributeVectorTypeFinder<int64_t> { + typedef search::SingleValueNumericAttribute<search::IntegerAttributeTemplate<int64_t> > Type; + static void add(Type & a, int64_t v) { a.set(a.getNumDocs()-1, v); a.commit(); } +}; + +void add_docs(AttributeVector *attr, size_t n) { + AttributeVector::DocId docid; + for (size_t i = 0; i < n; ++i) { + attr->addDoc(docid); + if (attr->inherits(PredicateAttribute::classId)) { + const_cast<uint8_t *>(static_cast<PredicateAttribute *>(attr)->getMinFeatureVector().first)[docid] = 0; + } + } + ASSERT_EQUAL(n - 1, docid); +} + +template <typename T> +MyAttributeManager makeAttributeManager(T value) { + typedef AttributeVectorTypeFinder<T> AT; + typedef typename AT::Type AttributeVectorType; + AttributeVectorType *attr = new AttributeVectorType(field); + add_docs(attr, num_docs); + AT::add(*attr, value); + MyAttributeManager attribute_manager(attr); + return attribute_manager; +} + +MyAttributeManager makeFastSearchLongAttributeManager(int64_t value) { + Config cfg(BasicType::INT64, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributeVector::SP attr_ptr = AttributeFactory::createAttribute(field, cfg); + IntegerAttribute *attr = static_cast<IntegerAttribute *>(attr_ptr.get()); + add_docs(attr, num_docs); + attr->update(num_docs - 1, value); + attr->commit(); + MyAttributeManager attribute_manager(attr_ptr); + return attribute_manager; +} + +TEST("requireThatIteratorsCanBeCreated") { + MyAttributeManager attribute_manager = makeAttributeManager("foo"); + + EXPECT_TRUE(search("foo", attribute_manager)); +} + +TEST("requireThatRangeTermsWorkToo") { + MyAttributeManager attribute_manager = makeAttributeManager(int64_t(42)); + + EXPECT_TRUE(search("[23;46]", attribute_manager)); + EXPECT_TRUE(!search("[10;23]", attribute_manager)); + EXPECT_TRUE(!search(">43", attribute_manager)); + EXPECT_TRUE(search("[10;]", attribute_manager)); +} + +TEST("requireThatPrefixTermsWork") { + MyAttributeManager attribute_manager = makeAttributeManager("foo"); + + SimplePrefixTerm node("fo", "field", 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager)); +} + +TEST("requireThatLocationTermsWork") { + // 0xcc is z-curve for (10, 10). + MyAttributeManager attribute_manager = makeAttributeManager(int64_t(0xcc)); + + SimpleLocationTerm node(Location(Point(10, 10), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager)); + node = SimpleLocationTerm(Location(Point(100, 100), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(!search(node, attribute_manager)); + node = SimpleLocationTerm(Location(Point(13, 13), 4, 0), + field, 0, Weight(0)); + EXPECT_TRUE(!search(node, attribute_manager)); + node = SimpleLocationTerm(Location(Point(10, 13), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager)); +} + +TEST("requireThatOptimizedLocationTermsWork") { + // 0xcc is z-curve for (10, 10). + MyAttributeManager attribute_manager = makeFastSearchLongAttributeManager(int64_t(0xcc)); + + SimpleLocationTerm node(Location(Point(10, 10), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager, true)); + node = SimpleLocationTerm(Location(Point(100, 100), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(!search(node, attribute_manager, true)); + node = SimpleLocationTerm(Location(Point(13, 13), 4, 0), + field, 0, Weight(0)); + EXPECT_TRUE(!search(node, attribute_manager, true)); + node = SimpleLocationTerm(Location(Point(10, 13), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager, true)); +} + +TEST("require that optimized location search works with wrapped bounding box (no hits)") { + // 0xcc is z-curve for (10, 10). + MyAttributeManager attribute_manager = makeFastSearchLongAttributeManager(int64_t(0xcc)); + SimpleLocationTerm term1(Location(Rectangle(5, 5, 15, 15)), field, 0, Weight(0)); // unwrapped + SimpleLocationTerm term2(Location(Rectangle(15, 5, 5, 15)), field, 0, Weight(0)); // wrapped x + SimpleLocationTerm term3(Location(Rectangle(5, 15, 15, 5)), field, 0, Weight(0)); // wrapped y + Result result1 = do_search(attribute_manager, term1, true); + Result result2 = do_search(attribute_manager, term2, true); + Result result3 = do_search(attribute_manager, term3, true); + EXPECT_EQUAL(1u, result1.hits.size()); + EXPECT_EQUAL(0u, result2.hits.size()); + EXPECT_EQUAL(0u, result3.hits.size()); + EXPECT_TRUE(result1.iterator_dump.find("LocationPreFilterIterator") != vespalib::string::npos); + EXPECT_TRUE(result2.iterator_dump.find("EmptySearch") != vespalib::string::npos); + EXPECT_TRUE(result3.iterator_dump.find("EmptySearch") != vespalib::string::npos); +} + +void set_weights(StringAttribute *attr, uint32_t docid, + int32_t foo_weight, int32_t bar_weight, int32_t baz_weight) +{ + attr->clearDoc(docid); + if (foo_weight > 0) attr->append(docid, "foo", foo_weight); + if (bar_weight > 0) attr->append(docid, "bar", bar_weight); + if (baz_weight > 0) attr->append(docid, "baz", baz_weight); + attr->commit(); +} + +MyAttributeManager make_weighted_string_attribute_manager(bool fast_search) { + Config cfg(BasicType::STRING, CollectionType::WSET); + cfg.setFastSearch(fast_search); + AttributeVector::SP attr_ptr = AttributeFactory::createAttribute(field, cfg); + StringAttribute *attr = static_cast<StringAttribute *>(attr_ptr.get()); + add_docs(attr, num_docs); + set_weights(attr, 10, 0, 200, 0); + set_weights(attr, 20, 100, 200, 300); + set_weights(attr, 30, 0, 0, 300); + set_weights(attr, 40, 100, 0, 0); + set_weights(attr, 50, 1000, 0, 300); + MyAttributeManager attribute_manager(attr_ptr); + return attribute_manager; +} + +TEST("require that attribute dot product works") { + for (int i = 0; i <= 0x3; ++i) { + bool fast_search = ((i & 0x1) != 0); + bool strict = ((i & 0x2) != 0); + MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search); + SimpleDotProduct node(field, 0, Weight(1)); + node.append(Node::UP(new SimpleStringTerm("foo", "", 0, Weight(1)))); + node.append(Node::UP(new SimpleStringTerm("bar", "", 0, Weight(1)))); + node.append(Node::UP(new SimpleStringTerm("baz", "", 0, Weight(1)))); + node.append(Node::UP(new SimpleStringTerm("fox", "", 0, Weight(1)))); + Result result = do_search(attribute_manager, node, strict); + ASSERT_EQUAL(5u, result.hits.size()); + if (fast_search) { + EXPECT_EQUAL(8u, result.est_hits); + } else { + // 'fox' is detected to produce no hits since it has no enum value + EXPECT_EQUAL(num_docs * 3, result.est_hits); + } + EXPECT_FALSE(result.est_empty); + EXPECT_EQUAL(10u, result.hits[0].docid); + EXPECT_EQUAL(200.0, result.hits[0].raw_score); + EXPECT_EQUAL(20u, result.hits[1].docid); + EXPECT_EQUAL(600.0, result.hits[1].raw_score); + EXPECT_EQUAL(30u, result.hits[2].docid); + EXPECT_EQUAL(300.0, result.hits[2].raw_score); + EXPECT_EQUAL(40u, result.hits[3].docid); + EXPECT_EQUAL(100.0, result.hits[3].raw_score); + EXPECT_EQUAL(50u, result.hits[4].docid); + EXPECT_EQUAL(1300.0, result.hits[4].raw_score); + } +} + +TEST("require that attribute dot product can produce no hits") { + for (int i = 0; i <= 0x3; ++i) { + bool fast_search = ((i & 0x1) != 0); + bool strict = ((i & 0x2) != 0); + MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search); + SimpleDotProduct node(field, 0, Weight(1)); + node.append(Node::UP(new SimpleStringTerm("notfoo", "", 0, Weight(1)))); + node.append(Node::UP(new SimpleStringTerm("notbar", "", 0, Weight(1)))); + node.append(Node::UP(new SimpleStringTerm("notbaz", "", 0, Weight(1)))); + node.append(Node::UP(new SimpleStringTerm("notfox", "", 0, Weight(1)))); + Result result = do_search(attribute_manager, node, strict); + ASSERT_EQUAL(0u, result.hits.size()); + EXPECT_EQUAL(0u, result.est_hits); + EXPECT_TRUE(result.est_empty); + } +} + +TEST("require that direct attribute iterators work") { + for (int i = 0; i <= 0x3; ++i) { + bool fast_search = ((i & 0x1) != 0); + bool strict = ((i & 0x2) != 0); + MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search); + SimpleStringTerm empty_node("notfoo", "", 0, Weight(1)); + Result empty_result = do_search(attribute_manager, empty_node, strict); + EXPECT_EQUAL(0u, empty_result.hits.size()); + SimpleStringTerm node("foo", "", 0, Weight(1)); + Result result = do_search(attribute_manager, node, strict); + if (fast_search) { + EXPECT_EQUAL(3u, result.est_hits); + EXPECT_TRUE(result.has_minmax); + EXPECT_EQUAL(100, result.min_weight); + EXPECT_EQUAL(1000, result.max_weight); + EXPECT_TRUE(result.iterator_dump.find("DocumentWeightSearchIterator") + != vespalib::string::npos); + } else { + EXPECT_EQUAL(num_docs, result.est_hits); + EXPECT_FALSE(result.has_minmax); + EXPECT_TRUE(result.iterator_dump.find("DocumentWeightSearchIterator") + == vespalib::string::npos); + } + ASSERT_EQUAL(3u, result.hits.size()); + EXPECT_FALSE(result.est_empty); + EXPECT_EQUAL(20u, result.hits[0].docid); + EXPECT_EQUAL(40u, result.hits[1].docid); + EXPECT_EQUAL(50u, result.hits[2].docid); + } +} + +const char *as_str(bool flag) { return flag? "true" : "false"; } + +TEST("require that attribute parallel wand works") { + for (int i = 0; i <= 0x3; ++i) { + bool fast_search = ((i & 0x1) != 0); + bool strict = ((i & 0x2) != 0); + MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search); + SimpleWandTerm node(field, 0, Weight(1), 10, 500, 1.5); + node.append(Node::UP(new SimpleStringTerm("foo", "", 0, Weight(1)))); + node.append(Node::UP(new SimpleStringTerm("bar", "", 0, Weight(1)))); + node.append(Node::UP(new SimpleStringTerm("baz", "", 0, Weight(1)))); + node.append(Node::UP(new SimpleStringTerm("fox", "", 0, Weight(1)))); + Result result = do_search(attribute_manager, node, strict); + EXPECT_FALSE(result.est_empty); + if (fast_search) { + EXPECT_EQUAL(8u, result.est_hits); + } else { + // 'fox' is detected to produce no hits since it has no enum value + EXPECT_EQUAL(num_docs * 3, result.est_hits); + } + if (EXPECT_EQUAL(2u, result.hits.size())) { + if (result.iterator_dump.find("MonitoringDumpIterator") == vespalib::string::npos) { + EXPECT_EQUAL(10u, result.wand_hits); + EXPECT_EQUAL(500, result.wand_initial_threshold); + EXPECT_EQUAL(1.5, result.wand_boost_factor); + } + EXPECT_EQUAL(20u, result.hits[0].docid); + EXPECT_EQUAL(600.0, result.hits[0].raw_score); + EXPECT_EQUAL(50u, result.hits[1].docid); + EXPECT_EQUAL(1300.0, result.hits[1].raw_score); + } else { + fprintf(stderr, " (fast_search: %s, strict: %s)\n", + as_str(fast_search), as_str(strict)); + assert(false); + } + } +} + +TEST("require that attribute weighted set term works") { + for (int i = 0; i <= 0x3; ++i) { + bool fast_search = ((i & 0x1) != 0); + bool strict = ((i & 0x2) != 0); + MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search); + SimpleWeightedSetTerm node(field, 0, Weight(1)); + node.append(Node::UP(new SimpleStringTerm("foo", "", 0, Weight(10)))); + node.append(Node::UP(new SimpleStringTerm("bar", "", 0, Weight(20)))); + node.append(Node::UP(new SimpleStringTerm("baz", "", 0, Weight(30)))); + node.append(Node::UP(new SimpleStringTerm("fox", "", 0, Weight(40)))); + Result result = do_search(attribute_manager, node, strict); + EXPECT_FALSE(result.est_empty); + ASSERT_EQUAL(5u, result.hits.size()); + if (fast_search && result.iterator_dump.find("MonitoringDumpIterator") == vespalib::string::npos) { + fprintf(stderr, "DUMP: %s\n", result.iterator_dump.c_str()); + EXPECT_TRUE(result.iterator_dump.find("AttributeIteratorPack") != vespalib::string::npos); + } + EXPECT_EQUAL(10u, result.hits[0].docid); + EXPECT_EQUAL(20, result.hits[0].match_weight); + EXPECT_EQUAL(20u, result.hits[1].docid); + EXPECT_EQUAL(30, result.hits[1].match_weight); + EXPECT_EQUAL(30u, result.hits[2].docid); + EXPECT_EQUAL(30, result.hits[2].match_weight); + EXPECT_EQUAL(40u, result.hits[3].docid); + EXPECT_EQUAL(10, result.hits[3].match_weight); + EXPECT_EQUAL(50u, result.hits[4].docid); + EXPECT_EQUAL(30, result.hits[4].match_weight); + } +} + +TEST("require that predicate query in non-predicate field yields empty.") { + MyAttributeManager attribute_manager = makeAttributeManager("foo"); + + PredicateQueryTerm::UP term(new PredicateQueryTerm); + SimplePredicateQuery node(std::move(term), field, 0, Weight(1)); + Result result = do_search(attribute_manager, node, true); + EXPECT_TRUE(result.est_empty); + EXPECT_EQUAL(0u, result.hits.size()); +} + +TEST("require that predicate query in predicate field yields results.") { + PredicateAttribute *attr = + new PredicateAttribute( + field, Config(BasicType::PREDICATE, + CollectionType::SINGLE)); + add_docs(attr, num_docs); + attr->getIndex().indexEmptyDocument(2); // matches anything + attr->getIndex().commit(); + const_cast<PredicateAttribute::IntervalRange *>(attr->getIntervalRangeVector())[2] = 1u; + MyAttributeManager attribute_manager(attr); + + PredicateQueryTerm::UP term(new PredicateQueryTerm); + SimplePredicateQuery node(std::move(term), field, 0, Weight(1)); + Result result = do_search(attribute_manager, node, true); + EXPECT_FALSE(result.est_empty); + EXPECT_EQUAL(1u, result.hits.size()); +} + +TEST("require that substring terms work") { + MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(true); + SimpleSubstringTerm node("a", "", 0, Weight(1)); + Result result = do_search(attribute_manager, node, true); + ASSERT_EQUAL(4u, result.hits.size()); + EXPECT_EQUAL(10u, result.hits[0].docid); + EXPECT_EQUAL(20u, result.hits[1].docid); + EXPECT_EQUAL(30u, result.hits[2].docid); + EXPECT_EQUAL(50u, result.hits[3].docid); +} + +TEST("require that suffix terms work") { + MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(true); + SimpleSuffixTerm node("oo", "", 0, Weight(1)); + Result result = do_search(attribute_manager, node, true); + ASSERT_EQUAL(3u, result.hits.size()); + EXPECT_EQUAL(20u, result.hits[0].docid); + EXPECT_EQUAL(40u, result.hits[1].docid); + EXPECT_EQUAL(50u, result.hits[2].docid); +} + +void set_attr_value(AttributeVector &attr, uint32_t docid, size_t value) { + IntegerAttribute *int_attr = dynamic_cast<IntegerAttribute *>(&attr); + FloatingPointAttribute *float_attr = dynamic_cast<FloatingPointAttribute *>(&attr); + StringAttribute *string_attr = dynamic_cast<StringAttribute *>(&attr); + if (int_attr != nullptr) { + int_attr->update(docid, value); + int_attr->commit(); + } else if (float_attr != nullptr) { + float_attr->update(docid, value); + float_attr->commit(); + } else if (string_attr != nullptr) { + ASSERT_LESS(value, size_t(27*26 + 26)); + vespalib::string str; + str.push_back('a' + value / 27); + str.push_back('a' + value % 27); + string_attr->update(docid, str); + string_attr->commit(); + } else { + ASSERT_TRUE(false); + } +} + +MyAttributeManager make_diversity_setup(BasicType::Type field_type, + bool field_fast_search, + BasicType::Type other_type, + bool other_fast_search) +{ + Config field_cfg(field_type, CollectionType::SINGLE); + field_cfg.setFastSearch(field_fast_search); + AttributeVector::SP field_attr = AttributeFactory::createAttribute(field, field_cfg); + Config other_cfg(other_type, CollectionType::SINGLE); + other_cfg.setFastSearch(other_fast_search); + AttributeVector::SP other_attr = AttributeFactory::createAttribute(other, other_cfg); + add_docs(&*field_attr, num_docs); + add_docs(&*other_attr, num_docs); + for (size_t i = 1; i < num_docs; ++i) { + set_attr_value(*field_attr, i, i / 5); + set_attr_value(*other_attr, i, i / 10); + } + MyAttributeManager attribute_manager(field_attr); + attribute_manager.set_other(other_attr); + return attribute_manager; +} + +size_t diversity_hits(IAttributeManager &manager, const vespalib::string &term, bool strict) { + SimpleRangeTerm node(term, "", 0, Weight(1)); + Result result = do_search(manager, node, strict); + return result.hits.size(); +} + +std::pair<size_t,size_t> diversity_docid_range(IAttributeManager &manager, const vespalib::string &term, bool strict) { + SimpleRangeTerm node(term, "", 0, Weight(1)); + Result result = do_search(manager, node, strict); + std::pair<size_t, size_t> range(0, 0); + for (const Result::Hit &hit: result.hits) { + if (range.first == 0) { + range.first = hit.docid; + range.second = hit.docid; + } else { + EXPECT_GREATER(size_t(hit.docid), range.second); + range.second = hit.docid; + } + } + return range; +} + +TEST("require that diversity range searches work for various types") { + for (auto field_type: std::vector<BasicType::Type>({BasicType::INT32, BasicType::DOUBLE})) { + for (auto other_type: std::vector<BasicType::Type>({BasicType::INT16, BasicType::INT32, BasicType::INT64, + BasicType::FLOAT, BasicType::DOUBLE, BasicType::STRING})) + { + for (bool other_fast_search: std::vector<bool>({true, false})) { + MyAttributeManager manager = make_diversity_setup(field_type, true, other_type, other_fast_search); + for (bool strict: std::vector<bool>({true, false})) { + TEST_STATE(vespalib::make_string("field_type: %s, other_type: %s, other_fast_search: %s, strict: %s", + BasicType(field_type).asString(), BasicType(other_type).asString(), + other_fast_search ? "true" : "false", strict ? "true" : "false").c_str()); + EXPECT_EQUAL(999u, diversity_hits(manager, "[;;1000;other;10]", strict)); + EXPECT_EQUAL(999u, diversity_hits(manager, "[;;-1000;other;10]", strict)); + EXPECT_EQUAL(100u, diversity_hits(manager, "[;;1000;other;1]", strict)); + EXPECT_EQUAL(100u, diversity_hits(manager, "[;;-1000;other;1]", strict)); + EXPECT_EQUAL(300u, diversity_hits(manager, "[;;1000;other;3]", strict)); + EXPECT_EQUAL(300u, diversity_hits(manager, "[;;-1000;other;3]", strict)); + EXPECT_EQUAL(10u, diversity_hits(manager, "[;;10;other;3]", strict)); + EXPECT_EQUAL(10u, diversity_hits(manager, "[;;-10;other;3]", strict)); + EXPECT_EQUAL(1u, diversity_docid_range(manager, "[;;10;other;3]", strict).first); + EXPECT_EQUAL(30u, diversity_docid_range(manager, "[;;10;other;3]", strict).second); + EXPECT_EQUAL(965u, diversity_docid_range(manager, "[;;-10;other;3]", strict).first); + EXPECT_EQUAL(997u, diversity_docid_range(manager, "[;;-10;other;3]", strict).second); + } + } + } + } +} + +TEST("require that diversity also works for a single unique value") { + MyAttributeManager manager = make_diversity_setup(BasicType::INT32, true, BasicType::INT32, true); + EXPECT_EQUAL(2u, diversity_hits(manager, "[2;2;100;other;2]", true)); + EXPECT_EQUAL(2u, diversity_hits(manager, "[2;2;-100;other;2]", true)); + EXPECT_EQUAL(2u, diversity_hits(manager, "[2;2;100;other;2]", false)); + EXPECT_EQUAL(2u, diversity_hits(manager, "[2;2;-100;other;2]", false)); +} + +TEST("require that diversity range searches gives empty results for non-existing diversity attributes") { + MyAttributeManager manager = make_diversity_setup(BasicType::INT32, true, BasicType::INT32, true); + EXPECT_EQUAL(0u, diversity_hits(manager, "[;;1000;bogus;10]", true)); + EXPECT_EQUAL(0u, diversity_hits(manager, "[;;-1000;bogus;10]", true)); + EXPECT_EQUAL(0u, diversity_hits(manager, "[;;1000;;10]", true)); + EXPECT_EQUAL(0u, diversity_hits(manager, "[;;-1000;;10]", true)); +} + +TEST("require that loose diversity gives enough diversity and hits while doing less work") { + MyAttributeManager manager = make_diversity_setup(BasicType::INT32, true, BasicType::INT32, true); + EXPECT_EQUAL(999u, diversity_hits(manager, "[;;1000;other;10;4;loose]", true)); + EXPECT_EQUAL(1u, diversity_docid_range(manager, "[;;10;other;3;2;loose]", true).first); + EXPECT_EQUAL(16u, diversity_docid_range(manager, "[;;10;other;3;2;loose]", true).second); +} + +TEST("require that strict diversity gives enough diversity and hits while doing less work, even though more than loose, but more correct than loose") { + MyAttributeManager manager = make_diversity_setup(BasicType::INT32, true, BasicType::INT32, true); + EXPECT_EQUAL(999u, diversity_hits(manager, "[;;-1000;other;10;4;strict]", true)); + EXPECT_EQUAL(1u, diversity_docid_range(manager, "[;;10;other;3;2;strict]", true).first); + EXPECT_EQUAL(23u, diversity_docid_range(manager, "[;;10;other;3;2;strict]", true).second); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.sh b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.sh new file mode 100755 index 00000000000..9fcee4b1ebb --- /dev/null +++ b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.sh @@ -0,0 +1,4 @@ +#!/bin/bash +$VALGRIND ./searchlib_attribute_searchable_adapter_test_sh +rm -f ./my_logctl_file +VESPA_LOG_CONTROL_FILE=./my_logctl_file VESPA_LOG_LEVEL=all $VALGRIND ./searchlib_attribute_searchable_adapter_test_app diff --git a/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp b/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp new file mode 100644 index 00000000000..bd781a37a5b --- /dev/null +++ b/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp @@ -0,0 +1,231 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/testkit/testapp.h> + +#include <vespa/searchlib/attribute/attribute_blueprint_factory.h> +#include <vespa/searchlib/attribute/attribute_weighted_set_blueprint.h> +#include <vespa/searchlib/attribute/iattributemanager.h> +#include <vespa/searchlib/attribute/attributecontext.h> +#include <vespa/searchlib/attribute/attributevector.h> +#include <vespa/searchlib/attribute/extendableattributes.h> +#include <vespa/searchlib/attribute/singlestringattribute.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/fef/fef.h> +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/queryeval/field_spec.h> +#include <vespa/searchlib/queryeval/searchiterator.h> +#include <vespa/searchlib/queryeval/blueprint.h> +#include <vespa/searchlib/queryeval/fake_result.h> +#include <vespa/searchlib/queryeval/weighted_set_term_search.h> +#include <vespa/searchlib/queryeval/fake_requestcontext.h> +#include <memory> +#include <string> +#include <map> + +#include <vespa/searchlib/attribute/enumstore.hpp> +#include <vespa/searchlib/attribute/singlestringattribute.h> + +using namespace search; +using namespace search::query; +using namespace search::fef; +using namespace search::queryeval; +using namespace search::attribute; + +namespace { + +class FakeAttributeManager : public IAttributeManager +{ +private: + typedef std::map<std::string, AttributeVector::SP> Map; + Map _map; + + AttributeVector::SP lookup(const std::string &name) const { + Map::const_iterator pos = _map.find(name); + if (pos == _map.end()) { + return AttributeVector::SP(); + } + return pos->second; + } + +public: + FakeAttributeManager() : _map() {} + + void addAttribute(AttributeVector::SP attr) { + _map[attr->getName()] = attr; + } + + virtual AttributeGuard::UP getAttribute(const vespalib::string &name) const { + return AttributeGuard::UP(new AttributeGuard(lookup(name))); + } + + virtual AttributeGuard::UP getAttributeStableEnum(const vespalib::string &name) const { + return AttributeGuard::UP(new AttributeEnumGuard(lookup(name))); + } + + virtual void getAttributeList(std::vector<AttributeGuard> &list) const { + Map::const_iterator pos = _map.begin(); + for (; pos != _map.end(); ++pos) { + list.push_back(pos->second); + } + } + + virtual IAttributeContext::UP createContext() const { + return IAttributeContext::UP(new AttributeContext(*this)); + } +}; + +void +setupAttributeManager(FakeAttributeManager &manager) +{ + AttributeVector::DocId docId; + { + AttributeVector::SP attr_sp = AttributeFactory::createAttribute( + "integer", Config(BasicType("int64"))); + IntegerAttribute *attr = (IntegerAttribute*)(attr_sp.get()); + attr->addDoc(docId); + assert(0u == docId); + for (size_t i = 1; i < 10; ++i) { + attr->addDoc(docId); + assert(i == docId); + attr->update(docId, i); + attr->commit(); + } + manager.addAttribute(attr_sp); + } + { + AttributeVector::SP attr_sp = AttributeFactory::createAttribute( + "string", Config(BasicType("string"))); + StringAttribute *attr = (StringAttribute*)(attr_sp.get()); + attr->addDoc(docId); + assert(0u == docId); + for (size_t i = 1; i < 10; ++i) { + attr->addDoc(docId); + assert(i == docId); + attr->update(i, std::string(1, '1' + i - 1).c_str()); + attr->commit(); + } + manager.addAttribute(attr_sp); + } + { + AttributeVector::SP attr_sp = AttributeFactory::createAttribute( + "multi", Config(BasicType("int64"), search::attribute::CollectionType("array"))); + IntegerAttribute *attr = (IntegerAttribute*)(attr_sp.get()); + attr->addDoc(docId); + assert(0u == docId); + for (size_t i = 1; i < 10; ++i) { + attr->addDoc(docId); + assert(i == docId); + attr->append(docId, i, 0); + attr->append(docId, i + 10, 1); + attr->commit(); + } + manager.addAttribute(attr_sp); + } +} + +struct WS { + static const uint32_t fieldId = 42; + IAttributeManager & attribute_manager; + MatchDataLayout layout; + TermFieldHandle handle; + std::vector<std::pair<std::string, uint32_t> > tokens; + + WS(IAttributeManager & manager) : attribute_manager(manager), layout(), handle(layout.allocTermField(fieldId)), tokens() { + MatchData::UP tmp = layout.createMatchData(); + ASSERT_TRUE(tmp->resolveTermField(handle)->getFieldId() == fieldId); + } + + WS &add(const std::string &token, uint32_t weight) { + tokens.push_back(std::make_pair(token, weight)); + return *this; + } + + Node::UP createNode() const { + SimpleWeightedSetTerm *node = new SimpleWeightedSetTerm("view", 0, Weight(0)); + for (size_t i = 0; i < tokens.size(); ++i) { + node->append(Node::UP(new SimpleStringTerm(tokens[i].first, "view", 0, Weight(tokens[i].second)))); + } + return Node::UP(node); + } + + bool isGenericSearch(Searchable &searchable, const std::string &field, bool strict) const { + AttributeContext ac(attribute_manager); + FakeRequestContext requestContext(&ac); + MatchData::UP md = layout.createMatchData(); + Node::UP node = createNode(); + FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle)); + queryeval::Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, *node); + bp->fetchPostings(strict); + SearchIterator::UP sb = bp->createSearch(*md, strict); + return (dynamic_cast<WeightedSetTermSearch*>(sb.get()) != 0); + } + + FakeResult search(Searchable &searchable, const std::string &field, bool strict) const { + AttributeContext ac(attribute_manager); + FakeRequestContext requestContext(&ac); + MatchData::UP md = layout.createMatchData(); + Node::UP node = createNode(); + FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle)); + queryeval::Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, *node); + bp->fetchPostings(strict); + SearchIterator::UP sb = bp->createSearch(*md, strict); + FakeResult result; + sb->initFullRange(); + for (uint32_t docId = 1; docId < 10; ++docId) { + if (sb->seek(docId)) { + sb->unpack(docId); + result.doc(docId); + TermFieldMatchData &data = *md->resolveTermField(handle); + FieldPositionsIterator itr = data.getIterator(); + for (; itr.valid(); itr.next()) { + result.elem(itr.getElementId()); + result.weight(itr.getElementWeight()); + result.pos(itr.getPosition()); + } + } + } + return result; + } +}; + +} // namespace <unnamed> + +class Test : public vespalib::TestApp +{ +public: + int Main(); +}; + +int +Test::Main() +{ + TEST_INIT("attribute_weighted_set_test"); + { + FakeAttributeManager manager; + setupAttributeManager(manager); + AttributeBlueprintFactory adapter; + + FakeResult expect = FakeResult() + .doc(3).elem(0).weight(30).pos(0) + .doc(5).elem(0).weight(50).pos(0) + .doc(7).elem(0).weight(70).pos(0); + WS ws = WS(manager).add("7", 70).add("5", 50).add("3", 30); + + EXPECT_TRUE(ws.isGenericSearch(adapter, "integer", true)); + EXPECT_TRUE(!ws.isGenericSearch(adapter, "integer", false)); + EXPECT_TRUE(ws.isGenericSearch(adapter, "string", true)); + EXPECT_TRUE(!ws.isGenericSearch(adapter, "string", false)); + EXPECT_TRUE(ws.isGenericSearch(adapter, "multi", true)); + EXPECT_TRUE(ws.isGenericSearch(adapter, "multi", false)); + + EXPECT_EQUAL(expect, ws.search(adapter, "integer", true)); + EXPECT_EQUAL(expect, ws.search(adapter, "integer", false)); + EXPECT_EQUAL(expect, ws.search(adapter, "string", true)); + EXPECT_EQUAL(expect, ws.search(adapter, "string", false)); + EXPECT_EQUAL(expect, ws.search(adapter, "multi", true)); + EXPECT_EQUAL(expect, ws.search(adapter, "multi", false)); + } + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/attribute/searchable/attributeblueprint_test.cpp b/searchlib/src/tests/attribute/searchable/attributeblueprint_test.cpp new file mode 100644 index 00000000000..ed851d872e1 --- /dev/null +++ b/searchlib/src/tests/attribute/searchable/attributeblueprint_test.cpp @@ -0,0 +1,240 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("attributeblueprint_test"); + +#include <vespa/searchcommon/attribute/iattributecontext.h> +#include <vespa/searchlib/attribute/attribute_blueprint_factory.h> +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/searchlib/attribute/attributecontext.h> +#include <vespa/searchlib/attribute/attributevector.h> +#include <vespa/searchlib/attribute/extendableattributes.h> +#include <vespa/searchlib/attribute/singlenumericattribute.h> +#include <vespa/searchlib/attribute/singlenumericattribute.hpp> +#include <vespa/searchlib/attribute/singlenumericpostattribute.hpp> +#include <vespa/searchlib/attribute/iattributemanager.h> +#include <vespa/searchlib/fef/matchdata.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchlib/query/tree/location.h> +#include <vespa/searchlib/query/tree/point.h> +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/query/weight.h> +#include <vespa/searchlib/queryeval/searchiterator.h> +#include <vespa/searchlib/queryeval/fake_requestcontext.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <memory> +#include <string> + +using search::AttributeEnumGuard; +using search::AttributeGuard; +using search::AttributeVector; +using search::IAttributeManager; +using search::SingleStringExtAttribute; +using search::attribute::IAttributeContext; +using search::fef::MatchData; +using search::fef::TermFieldMatchData; +using search::query::Location; +using search::query::Node; +using search::query::Point; +using search::query::SimpleLocationTerm; +using search::query::SimplePrefixTerm; +using search::query::SimpleStringTerm; +using search::query::Weight; +using search::queryeval::Blueprint; +using search::queryeval::FieldSpec; +using search::queryeval::SearchIterator; +using search::queryeval::FakeRequestContext; +using std::string; +using std::vector; +using namespace search::attribute; +using namespace search; + +namespace { + +class Test : public vespalib::TestApp { + void requireThatIteratorsCanBeCreated(); + void requireThatRangeTermsWorkToo(); + void requireThatPrefixTermsWork(); + void requireThatLocationTermsWork(); + void requireThatFastSearchLocationTermsWork(); + + bool search(const string &term, IAttributeManager &attribute_manager); + bool search(const Node &term, IAttributeManager &attribute_manager); + +public: + int Main(); +}; + +int +Test::Main() +{ + TEST_INIT("attributeblueprint_test"); + + TEST_DO(requireThatIteratorsCanBeCreated()); + TEST_DO(requireThatRangeTermsWorkToo()); + TEST_DO(requireThatPrefixTermsWork()); + TEST_DO(requireThatLocationTermsWork()); + TEST_DO(requireThatFastSearchLocationTermsWork()); + + TEST_DONE(); +} + +const string field = "field"; +const int32_t weight = 1; + +class MyAttributeManager : public IAttributeManager { + AttributeVector::SP _attribute_vector; + AttributeVector::DocId _docid; + +public: + MyAttributeManager(AttributeVector *attr) + : _attribute_vector(attr) {} + + virtual AttributeGuard::UP getAttribute(const string &) const { + return AttributeGuard::UP(new AttributeGuard(_attribute_vector)); + } + + virtual AttributeGuard::UP + getAttributeStableEnum(const string &) const { + return AttributeGuard::UP(new AttributeEnumGuard(_attribute_vector)); + } + + virtual void getAttributeList(vector<AttributeGuard> &) const { + assert(!"Not implemented"); + } + virtual IAttributeContext::UP createContext() const { + assert(!"Not implemented"); + return IAttributeContext::UP(); + } +}; + +bool Test::search(const string &term, IAttributeManager &attribute_manager) { + TEST_STATE(term.c_str()); + SimpleStringTerm node(term, "field", 0, Weight(0)); + bool ret = search(node, attribute_manager); + return ret; +} + +bool Test::search(const Node &node, IAttributeManager &attribute_manager) { + AttributeContext ac(attribute_manager); + FakeRequestContext requestContext(&ac); + MatchData::UP md(MatchData::makeTestInstance(0, 1, 1)); + AttributeBlueprintFactory source; + Blueprint::UP result = source.createBlueprint(requestContext, FieldSpec(field, 0, 0), node); + ASSERT_TRUE(result.get()); + EXPECT_TRUE(!result->getState().estimate().empty); + EXPECT_EQUAL(3u, result->getState().estimate().estHits); + result->fetchPostings(true); + SearchIterator::UP iterator = result->createSearch(*md, true); + ASSERT_TRUE((bool)iterator); + iterator->initFullRange(); + EXPECT_TRUE(!iterator->seek(1)); + return iterator->seek(2); +} + +template <typename T> struct AttributeVectorTypeFinder { + typedef SingleStringExtAttribute Type; + static void add(Type & a, const T & v) { a.add(v, weight); } +}; +template <> struct AttributeVectorTypeFinder<int64_t> { + typedef search::SingleValueNumericAttribute<search::IntegerAttributeTemplate<int64_t> > Type; + static void add(Type & a, int64_t v) { a.set(a.getNumDocs()-1, v); a.commit(); } +}; + +struct FastSearchLongAttribute { + typedef search::SingleValueNumericPostingAttribute< search::EnumAttribute<search::IntegerAttributeTemplate<int64_t> > > Type; + static void add(Type & a, int64_t v) { a.update(a.getNumDocs()-1, v); a.commit(); } +}; + +template <typename AT, typename T> +MyAttributeManager fill(typename AT::Type * attr, T value) { + AttributeVector::DocId docid; + attr->addDoc(docid); + attr->addDoc(docid); + attr->addDoc(docid); + assert(2u == docid); + AT::add(*attr, value); + MyAttributeManager attribute_manager(attr); + return attribute_manager; +} + +template <typename T> +MyAttributeManager makeAttributeManager(T value) { + typedef AttributeVectorTypeFinder<T> AT; + typedef typename AT::Type AttributeVectorType; + AttributeVectorType *attr = new AttributeVectorType(field); + return fill<AT, T>(attr, value); +} + +MyAttributeManager makeFastSearchLongAttribute(int64_t value) { + typedef FastSearchLongAttribute::Type AttributeVectorType; + Config cfg(BasicType::fromType(int64_t()), CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributeVectorType *attr = new AttributeVectorType(field, cfg); + return fill<FastSearchLongAttribute, int64_t>(attr, value); +} + +void Test::requireThatIteratorsCanBeCreated() { + MyAttributeManager attribute_manager = makeAttributeManager("foo"); + + EXPECT_TRUE(search("foo", attribute_manager)); +} + +void Test::requireThatRangeTermsWorkToo() { + MyAttributeManager attribute_manager = makeAttributeManager(int64_t(42)); + + EXPECT_TRUE(search("[23;46]", attribute_manager)); + EXPECT_TRUE(!search("[10;23]", attribute_manager)); + EXPECT_TRUE(!search(">43", attribute_manager)); + EXPECT_TRUE(search("[10;]", attribute_manager)); +} + +void Test::requireThatPrefixTermsWork() +{ + MyAttributeManager attribute_manager = makeAttributeManager("foo"); + + SimplePrefixTerm node("fo", "field", 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager)); +} + +void Test::requireThatLocationTermsWork() { + // 0xcc is z-curve for (10, 10). + MyAttributeManager attribute_manager = makeAttributeManager(int64_t(0xcc)); + + SimpleLocationTerm node(Location(Point(10, 10), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager)); + node = SimpleLocationTerm(Location(Point(100, 100), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(!search(node, attribute_manager)); + node = SimpleLocationTerm(Location(Point(13, 13), 4, 0), + field, 0, Weight(0)); + EXPECT_TRUE(!search(node, attribute_manager)); + node = SimpleLocationTerm(Location(Point(10, 13), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager)); +} + +void Test::requireThatFastSearchLocationTermsWork() { + // 0xcc is z-curve for (10, 10). + MyAttributeManager attribute_manager = makeFastSearchLongAttribute(int64_t(0xcc)); + + SimpleLocationTerm node(Location(Point(10, 10), 3, 0), + field, 0, Weight(0)); +#if 0 + EXPECT_TRUE(search(node, attribute_manager)); + node = SimpleLocationTerm(Location(Point(100, 100), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(!search(node, attribute_manager)); + node = SimpleLocationTerm(Location(Point(13, 13), 4, 0), + field, 0, Weight(0)); + EXPECT_TRUE(!search(node, attribute_manager)); + node = SimpleLocationTerm(Location(Point(10, 13), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager)); +#endif +} + +} // namespace + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/attribute/searchcontext/.gitignore b/searchlib/src/tests/attribute/searchcontext/.gitignore new file mode 100644 index 00000000000..61dc5e8fc8e --- /dev/null +++ b/searchlib/src/tests/attribute/searchcontext/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +searchcontext_test +searchlib_searchcontext_test_app diff --git a/searchlib/src/tests/attribute/searchcontext/CMakeLists.txt b/searchlib/src/tests/attribute/searchcontext/CMakeLists.txt new file mode 100644 index 00000000000..24652373a00 --- /dev/null +++ b/searchlib/src/tests/attribute/searchcontext/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_searchcontext_test_app + SOURCES + searchcontext.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_searchcontext_test_app COMMAND sh searchcontext_test.sh) diff --git a/searchlib/src/tests/attribute/searchcontext/DESC b/searchlib/src/tests/attribute/searchcontext/DESC new file mode 100644 index 00000000000..8ce9805dbb0 --- /dev/null +++ b/searchlib/src/tests/attribute/searchcontext/DESC @@ -0,0 +1 @@ +Unit test for AttributeVector::SearchContext using all attribute vector implementations. diff --git a/searchlib/src/tests/attribute/searchcontext/FILES b/searchlib/src/tests/attribute/searchcontext/FILES new file mode 100644 index 00000000000..cebd66e863f --- /dev/null +++ b/searchlib/src/tests/attribute/searchcontext/FILES @@ -0,0 +1 @@ +searchcontext.cpp diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext.cpp b/searchlib/src/tests/attribute/searchcontext/searchcontext.cpp new file mode 100644 index 00000000000..6c69e79a93b --- /dev/null +++ b/searchlib/src/tests/attribute/searchcontext/searchcontext.cpp @@ -0,0 +1,1900 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +#include <vespa/searchlib/attribute/attribute.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/attributeiterators.h> +#include <vespa/searchlib/attribute/flagattribute.h> +#include <vespa/searchlib/attribute/singlenumericattribute.h> +#include <vespa/searchlib/attribute/multinumericattribute.h> +#include <vespa/searchlib/attribute/singlestringattribute.h> +#include <vespa/searchlib/attribute/multistringattribute.h> +#include <vespa/searchlib/common/bitvectoriterator.h> +#include <vespa/searchlib/fef/matchdata.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchlib/fef/termfieldmatchdataarray.h> +#include <vespa/searchlib/fef/termfieldmatchdataposition.h> +#include <vespa/searchlib/queryeval/searchiterator.h> +#include <vespa/searchlib/queryeval/emptysearch.h> +#include <vespa/searchlib/queryeval/hitcollector.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/util/compress.h> +#include <vespa/vespalib/stllike/asciistream.h> +#include <vespa/searchlib/test/initrange.h> +#include <iterator> +#include <set> + +#include <vespa/searchlib/attribute/attributevector.hpp> + +LOG_SETUP("searchcontext_test"); + +namespace search { + +namespace +{ + +bool +isUnsignedSmallIntAttribute(const AttributeVector &a) +{ + switch (a.getBasicType()) + { + case attribute::BasicType::UINT1: + case attribute::BasicType::UINT2: + case attribute::BasicType::UINT4: + return true; + default: + return false; + } +} + +} + +typedef AttributeVector::SP AttributePtr; +typedef std::unique_ptr<AttributeVector::SearchContext> SearchContextPtr; +typedef AttributeVector::SearchContext SearchContext; +using attribute::Config; +using attribute::BasicType; +using attribute::CollectionType; +typedef AttributeVector::largeint_t largeint_t; +typedef queryeval::SearchIterator::UP SearchBasePtr; +typedef std::unique_ptr<ResultSet> ResultSetPtr; + +using queryeval::HitCollector; +using queryeval::SearchIterator; +using fef::MatchData; +using fef::TermFieldMatchData; +using fef::TermFieldMatchDataArray; +using fef::TermFieldMatchDataPosition; + +class DocSet : public std::set<uint32_t> +{ +public: + DocSet() : std::set<uint32_t>() {} + DocSet(const uint32_t *b, const uint32_t *e) : std::set<uint32_t>(b, e) {} + DocSet & put(const uint32_t &v) { + insert(v); + return *this; + } +}; + +template <typename V, typename T> +class PostingList +{ +private: + V * _vec; + T _value; + DocSet _hits; + +public: + PostingList(V & vec, T value) : _vec(&vec), _value(value), _hits() {} + const V & getAttribute() const { return *_vec; } + V & getAttribute() { return *_vec; } + const T & getValue() const { return _value; } + DocSet & getHits() { return _hits; } + const DocSet & getHits() const { return _hits; } + uint32_t getHitCount() const { return _hits.size(); } +}; + +class DocRange +{ +public: + uint32_t start; + uint32_t end; + DocRange(uint32_t start_, uint32_t end_) : start(start_), end(end_) {} +}; + +class SearchContextTest : public vespalib::TestApp +{ +private: + typedef std::map<vespalib::string, Config> ConfigMap; + // Map of all config objects + ConfigMap _integerCfg; + ConfigMap _floatCfg; + ConfigMap _stringCfg; + + + // helper functions + void + addReservedDoc(AttributeVector &ptr); + + void addDocs(AttributeVector & ptr, uint32_t numDocs); + template <typename T> + void fillVector(std::vector<T> & values, size_t numValues); + template <typename V, typename T> + void fillAttribute(V & vec, const std::vector<T> & values); + template <typename V, typename T> + void resetAttribute(V & vec, const T & value); + template <typename V, typename T> + void fillPostingList(PostingList<V, T> & pl, const DocRange & range); + template <typename V, typename T> + void fillPostingList(PostingList<V, T> & pl); + void buildTermQuery(std::vector<char> & buffer, const vespalib::string & index, const vespalib::string & term, + QueryTermSimple::SearchTerm termType=QueryTermSimple::WORD); + template <typename V, typename T> + SearchContextPtr getSearch(const V & vec, const T & term, QueryTermSimple::SearchTerm termType=QueryTermSimple::WORD); + ResultSetPtr performSearch(SearchIterator & sb, uint32_t numDocs); + template <typename V, typename T> + ResultSetPtr performSearch(const V & vec, const T & term, QueryTermSimple::SearchTerm termType=QueryTermSimple::WORD); + template <typename V> + void performSearch(const V & vec, const vespalib::string & term, + const DocSet & expected, QueryTermSimple::SearchTerm termType); + void checkResultSet(const ResultSet & rs, const DocSet & exp, bool bitVector); + + template<typename T, typename A> + void testInitRange(T key, const vespalib::string & keyAsString, const ConfigMap & cfgs); + void testInitRange(); + // test search functionality + template <typename V, typename T> + void testFind(const PostingList<V, T> & first); + + template <typename V, typename T> + void testSearch(V & attribute, uint32_t numDocs, const std::vector<T> & values); + template<typename T, typename A> + void testSearch(const ConfigMap & cfgs); + template <typename V, typename T> + void testMultiValueSearchHelper(V & vec, const std::vector<T> & values); + template <typename V, typename T> + void testMultiValueSearch(V & first, V & second, const std::vector<T> & values); + void testSearch(); + + class IteratorTester { + public: + virtual bool matches(const SearchIterator & base) const = 0; + virtual ~IteratorTester() { } + }; + class AttributeIteratorTester : public IteratorTester + { + public: + virtual bool matches(const SearchIterator & base) const { + return dynamic_cast<const AttributeIterator *>(&base) != NULL; + } + }; + class FlagAttributeIteratorTester : public IteratorTester + { + public: + virtual bool matches(const SearchIterator & base) const { + return (dynamic_cast<const FlagAttributeIterator *>(&base) != NULL) || + (dynamic_cast<const BitVectorIterator *>(&base) != NULL) || + (dynamic_cast<const queryeval::EmptySearch *>(&base) != NULL); + } + }; + class AttributePostingListIteratorTester : public IteratorTester + { + public: + virtual bool matches(const SearchIterator & base) const { + return dynamic_cast<const AttributePostingListIterator *>(&base) != NULL || + dynamic_cast<const queryeval::EmptySearch *>(&base) != NULL; + + } + }; + + + // test search iterator functionality + void testStrictSearchIterator(SearchContext & threeHits, + SearchContext & noHits, + const IteratorTester & typeTester); + void testNonStrictSearchIterator(SearchContext & threeHits, + SearchContext & noHits, + const IteratorTester & typeTester); + void fillForSearchIteratorTest(IntegerAttribute * ia); + void fillForSemiNibbleSearchIteratorTest(IntegerAttribute * ia); + void testSearchIterator(); + + + // test search iterator unpacking + void fillForSearchIteratorUnpackingTest(IntegerAttribute * ia, bool extra); + void testSearchIteratorUnpacking(const AttributePtr & ptr, + SearchContext & sc, + bool extra, + bool strict); + void testSearchIteratorUnpacking(); + + + // test range search + template <typename VectorType> + void performRangeSearch(const VectorType & vec, const vespalib::string & term, + const DocSet & expected); + template <typename VectorType, typename ValueType> + void testRangeSearch(const AttributePtr & ptr, uint32_t numDocs, std::vector<ValueType> values); + void testRangeSearch(); + void testRangeSearchLimited(); + + + // test case insensitive search + void performCaseInsensitiveSearch(const StringAttribute & vec, const vespalib::string & term, + const DocSet & expected); + void testCaseInsensitiveSearch(const AttributePtr & ptr); + void testCaseInsensitiveSearch(); + void testRegexSearch(const AttributePtr & ptr); + void testRegexSearch(); + + + // test prefix search + void performPrefixSearch(const StringAttribute & vec, const vespalib::string & term, + const DocSet & expected, QueryTermSimple::SearchTerm termType); + void testPrefixSearch(const AttributePtr & ptr); + void testPrefixSearch(); + + // test that search is working after clear doc + template <typename VectorType, typename ValueType> + void requireThatSearchIsWorkingAfterClearDoc(const vespalib::string & name, const Config & cfg, + ValueType startValue, const vespalib::string & term); + void requireThatSearchIsWorkingAfterClearDoc(); + + // test that search is working after load and clear doc + template <typename VectorType, typename ValueType> + void requireThatSearchIsWorkingAfterLoadAndClearDoc(const vespalib::string & name, const Config & cfg, + ValueType startValue, ValueType defaultValue, + const vespalib::string & term); + void requireThatSearchIsWorkingAfterLoadAndClearDoc(); + + template <typename VectorType, typename ValueType> + void requireThatSearchIsWorkingAfterUpdates(const vespalib::string & name, + const Config & cfg, + ValueType value1, + ValueType value2); + void requireThatSearchIsWorkingAfterUpdates(); + + void requireThatFlagAttributeIsWorkingWhenNewDocsAreAdded(); + + template <typename VectorType, typename ValueType> + void requireThatInvalidSearchTermGivesZeroHits(const vespalib::string & name, + const Config & cfg, + ValueType value); + void requireThatInvalidSearchTermGivesZeroHits(); + + void requireThatFlagAttributeHandlesTheByteRange(); + + void requireThatOutOfBoundsSearchTermGivesZeroHits(const vespalib::string &name, + const Config &cfg, + int64_t maxValue); + void requireThatOutOfBoundsSearchTermGivesZeroHits(); + + // init maps with config objects + void initIntegerConfig(); + void initFloatConfig(); + void initStringConfig(); + +public: + SearchContextTest(); + int Main(); +}; + + +void +SearchContextTest::addReservedDoc(AttributeVector &ptr) +{ + ptr.addReservedDoc(); +} + + +void +SearchContextTest::addDocs(AttributeVector & ptr, uint32_t numDocs) +{ + uint32_t docId; + addReservedDoc(ptr); + for (uint32_t i = 1; i <= numDocs; ++i) { + ptr.addDoc(docId); + EXPECT_EQUAL(docId, i); + } + ASSERT_TRUE(ptr.getNumDocs() == numDocs + 1); +} + +template <typename T> +void +SearchContextTest::fillVector(std::vector<T> & values, size_t numValues) +{ + values.clear(); + values.reserve(numValues); + for (size_t i = 1; i <= numValues; ++i) { + values.push_back(static_cast<T>(i)); + } +} + +template <> +void +SearchContextTest::fillVector(std::vector<vespalib::string> & values, size_t numValues) +{ + values.clear(); + values.reserve(numValues); + for (size_t i = 0; i < numValues; ++i) { + vespalib::asciistream ss; + ss << "string" << (i < 10 ? "0" : "") << i; + values.push_back(ss.str()); + } +} + +template <typename V, typename T> +void +SearchContextTest::fillAttribute(V & vec, const std::vector<T> & values) +{ + for (uint32_t doc = 1; doc < vec.getNumDocs(); ++doc) { + ASSERT_TRUE(doc < vec.getNumDocs()); + vec.clearDoc(doc); + uint32_t valueCount = doc % (values.size() + 1); + for (uint32_t i = 0; i < valueCount; ++i) { + // std::cout << "append(" << doc << ", " << values[i] << ")" << std::endl; + EXPECT_TRUE(vec.append(doc, values[i], 1)); + } + } + vec.commit(true); +} + +template <typename V, typename T> +void +SearchContextTest::resetAttribute(V & vec, const T & value) +{ + for (uint32_t doc = 1; doc < vec.getNumDocs(); ++doc) { + ASSERT_TRUE(doc < vec.getNumDocs()); + EXPECT_TRUE(vec.update(doc, value)); + } + vec.commit(true); +} + +template <typename V, typename T> +void +SearchContextTest::fillPostingList(PostingList<V, T> & pl, const DocRange & range) +{ + pl.getHits().clear(); + for (uint32_t doc = range.start; doc < range.end; ++doc) { + ASSERT_TRUE(doc < pl.getAttribute().getNumDocs()); + EXPECT_TRUE(pl.getAttribute().update(doc, pl.getValue())); + pl.getHits().insert(doc); + } + pl.getAttribute().commit(true); +} + +template <typename V, typename T> +void +SearchContextTest::fillPostingList(PostingList<V, T> & pl) +{ + AttributeVector & vec = dynamic_cast<AttributeVector &>(pl.getAttribute()); + pl.getHits().clear(); + uint32_t sz = vec.getMaxValueCount(); + T * buf = new T[sz]; + for (uint32_t doc = 1; doc < vec.getNumDocs(); ++doc) { + uint32_t valueCount = vec.get(doc, buf, sz); + EXPECT_TRUE(valueCount <= sz); + for (uint32_t i = 0; i < valueCount; ++i) { + if (buf[i] == pl.getValue()) { + //std::cout << "hit for doc(" << doc << "): buf[" << i << "] (=" << buf[i] << ") == " << pl.getValue() << std::endl; + pl.getHits().insert(doc); + break; + } + } + } + delete [] buf; +} + +void +SearchContextTest::buildTermQuery(std::vector<char> & buffer, const vespalib::string & index, const vespalib::string & term, QueryTermSimple::SearchTerm termType) +{ + uint32_t indexLen = index.size(); + uint32_t termLen = term.size(); + uint32_t queryPacketSize = 1 + 2 * 4 + indexLen + termLen; + uint32_t p = 0; + buffer.resize(queryPacketSize); + switch (termType) { + case QueryTermSimple::PREFIXTERM: buffer[p++] = ParseItem::ITEM_PREFIXTERM; break; + case QueryTermSimple::REGEXP: buffer[p++] = ParseItem::ITEM_REGEXP; break; + default: + buffer[p++] = ParseItem::ITEM_TERM; + break; + } + p += vespalib::compress::Integer::compressPositive(indexLen, &buffer[p]); + memcpy(&buffer[p], index.c_str(), indexLen); + p += indexLen; + p += vespalib::compress::Integer::compressPositive(termLen, &buffer[p]); + memcpy(&buffer[p], term.c_str(), termLen); + p += termLen; + buffer.resize(p); +} + +template <typename V, typename T> +SearchContextPtr +SearchContextTest::getSearch(const V & vec, const T & term, QueryTermSimple::SearchTerm termType) +{ + std::vector<char> query; + vespalib::asciistream ss; + ss << term; + buildTermQuery(query, vec.getName(), ss.str(), termType); + + return (dynamic_cast<const AttributeVector &>(vec)). + getSearch(vespalib::stringref(&query[0], query.size()), + AttributeVector::SearchContext::Params()); +} + +ResultSetPtr +SearchContextTest::performSearch(SearchIterator & sb, uint32_t numDocs) +{ + HitCollector hc(numDocs, numDocs, 0); + sb.initFullRange(); + // assume strict toplevel search object located at start + for (sb.seek(1u); ! sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) { + hc.addHit(sb.getDocId(), 0.0); + } + return hc.getResultSet(); +} + +template <typename V, typename T> +ResultSetPtr +SearchContextTest::performSearch(const V & vec, const T & term, QueryTermSimple::SearchTerm termType) +{ + TermFieldMatchData dummy; + SearchContextPtr sc = getSearch(vec, term, termType); + sc->fetchPostings(true); + SearchBasePtr sb = sc->createIterator(&dummy, true); + ResultSetPtr rs = performSearch(*sb, vec.getNumDocs()); + return rs; +} + +template <typename V> +void +SearchContextTest::performSearch(const V & vec, const vespalib::string & term, + const DocSet & expected, QueryTermSimple::SearchTerm termType) +{ +#if 0 + std::cout << "performSearch[" << term << "]: {"; + std::copy(expected.begin(), expected.end(), std::ostream_iterator<uint32_t>(std::cout, ", ")); + std::cout << "}, prefix(" << (prefix ? "true" : "false") << ")" << std::endl; +#endif + { // strict search iterator + ResultSetPtr rs = performSearch(vec, term, termType); + checkResultSet(*rs, expected, false); + } +} + +void +SearchContextTest::checkResultSet(const ResultSet & rs, const DocSet & expected, bool bitVector) +{ + EXPECT_EQUAL(rs.getNumHits(), expected.size()); + if (bitVector) { + const BitVector * vec = rs.getBitOverflow(); + if (expected.size() != 0) { + ASSERT_TRUE(vec != NULL); + for (const auto & expect : expected) { + EXPECT_TRUE(vec->testBit(expect)); + } + } + } else { + const RankedHit * array = rs.getArray(); + if (expected.size() != 0) { + ASSERT_TRUE(array != NULL); + uint32_t i = 0; + for (DocSet::const_iterator iter = expected.begin(); + iter != expected.end(); ++iter, ++i) + { + EXPECT_TRUE(array[i]._docId == *iter); + } + } + } +} + + +//----------------------------------------------------------------------------- +// Test search functionality +//----------------------------------------------------------------------------- +template <typename V, typename T> +void +SearchContextTest::testFind(const PostingList<V, T> & pl) +{ + { // strict search iterator + SearchContextPtr sc = getSearch(pl.getAttribute(), pl.getValue()); + sc->fetchPostings(true); + TermFieldMatchData dummy; + SearchBasePtr sb = sc->createIterator(&dummy, true); + ResultSetPtr rs = performSearch(*sb, pl.getAttribute().getNumDocs()); + checkResultSet(*rs, pl.getHits(), false); + } +} + +template <typename V, typename T> +void +SearchContextTest::testSearch(V & attribute, uint32_t numDocs, const std::vector<T> & values) +{ + LOG(info, "testSearch: vector '%s' with %u documents and %lu unique values", + attribute.getName().c_str(), numDocs, static_cast<unsigned long>(values.size())); + + // fill attribute vectors + addDocs(attribute, numDocs); + + std::vector<PostingList<V, T> > lists; + + // fill posting lists + ASSERT_TRUE((attribute.getNumDocs() - 1) % values.size() == 0); + uint32_t hitCount = attribute.getNumDocs() / values.size(); + for (uint32_t i = 0; i < values.size(); ++i) { + // for each value a range with hitCount documents will hit on that value + lists.push_back(PostingList<V, T>(attribute, values[i])); + fillPostingList(lists.back(), DocRange(i * hitCount + 1, (i + 1) * hitCount + 1)); + } + + // test find() + for (const auto & list : lists) { + testFind(list); + } +} + +template <typename V, typename T> +void +SearchContextTest::testMultiValueSearchHelper(V & vec, const std::vector<T> & values) +{ + std::vector<PostingList<V, T> > lists; + + // fill posting lists based on attribute content + for (const T & value : values) { + lists.push_back(PostingList<V, T>(vec, value)); + fillPostingList(lists.back()); + } + + // test find() + for (const auto & list : lists) { + //std::cout << "testFind(lists[" << i << "]): value = " << lists[i].getValue() + // << ", hit count = " << lists[i].getHitCount() << std::endl; + testFind(list); + } +} + +template <typename V, typename T> +void +SearchContextTest::testMultiValueSearch(V & first, V & second, const std::vector<T> & values) +{ + addDocs(first, second.getNumDocs()); + LOG(info, "testMultiValueSearch: vector '%s' with %u documents and %lu unique values", + first.getName().c_str(), first.getNumDocs(), static_cast<unsigned long>(values.size())); + + fillAttribute(first, values); + + testMultiValueSearchHelper(first, values); + + ASSERT_TRUE(first.saveAs(second.getBaseFileName())); + ASSERT_TRUE(second.load()); + + testMultiValueSearchHelper(second, values); + + size_t sz = values.size(); + ASSERT_TRUE(sz > 2); + std::vector<T> subset; + // values[sz - 2] is not used -> 0 hits + // values[sz - 1] is used once -> 1 hit + for (size_t i = 0; i < sz - 2; ++i) { + subset.push_back(values[i]); + } + + fillAttribute(first, subset); + + ASSERT_TRUE(1u < first.getNumDocs()); + EXPECT_TRUE(first.append(1u, values[sz - 1], 1)); + first.commit(true); + + testMultiValueSearchHelper(first, values); + + ASSERT_TRUE(first.saveAs(second.getBaseFileName())); + ASSERT_TRUE(second.load()); + + testMultiValueSearchHelper(second, values); +} + +template<typename T, typename A> +void SearchContextTest::testSearch(const ConfigMap & cfgs) { + uint32_t numDocs = 100; + uint32_t numUniques = 20; + std::vector<T> values; + fillVector(values, numUniques); + for (const auto & cfg : cfgs) { + AttributePtr second = AttributeFactory::createAttribute(cfg.first + "-2", cfg.second); + testSearch(*(dynamic_cast<A *>(second.get())), numDocs, values); + if (second->hasMultiValue()) { + AttributePtr first = AttributeFactory::createAttribute(cfg.first + "-1", cfg.second); + testMultiValueSearch(*(dynamic_cast<A *>(first.get())), + *(dynamic_cast<A *>(second.get())), values); + } + } +} + +using search::test::InitRangeVerifier; + +template<typename T, typename A> +void SearchContextTest::testInitRange(T key, const vespalib::string & keyAsString, const ConfigMap & cfgs) { + InitRangeVerifier ir; + for (const auto & cfg : cfgs) { + AttributePtr attribute = AttributeFactory::createAttribute(cfg.first + "-initrange", cfg.second); + addDocs(*attribute, ir.getDocIdLimit()); + for (uint32_t doc : ir.getExpectedDocIds()) { + EXPECT_TRUE(nullptr != dynamic_cast<A *>(attribute.get())); + EXPECT_TRUE(dynamic_cast<A *>(attribute.get())->update(doc, key)); + } + attribute->commit(true); + SearchContextPtr sc = getSearch(*attribute, keyAsString); + ASSERT_TRUE(sc->valid()); + sc->fetchPostings(true); + TermFieldMatchData dummy; + SearchBasePtr sb = sc->createIterator(&dummy, true); + ir.verify(*sb); + } +} + +void SearchContextTest::testInitRange() { + testInitRange<AttributeVector::largeint_t, IntegerAttribute>(42, "42", _integerCfg); + testInitRange<double, FloatingPointAttribute>(42.42, "42.42", _floatCfg); + testInitRange<vespalib::string, StringAttribute>("any-key", "any-key", _stringCfg); +} + +void +SearchContextTest::testSearch() +{ + const uint32_t numDocs = 100; + const uint32_t numUniques = 20; + + { // IntegerAttribute + for (const auto & cfg : _integerCfg) { + AttributePtr attribute = AttributeFactory::createAttribute(cfg.first + "-3", cfg.second); + SearchContextPtr sc = getSearch(*attribute, "100"); + ASSERT_TRUE(sc->valid()); + sc = getSearch(*attribute, "1A0"); + EXPECT_FALSE( sc->valid() ); + } + + + { // CollectionType::ARRAY Flags. + std::vector<AttributeVector::largeint_t> values; + fillVector(values, numUniques); + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr second = AttributeFactory::createAttribute("flags-2", cfg); + testSearch(*(dynamic_cast<IntegerAttribute *>(second.get())), numDocs, values); + AttributePtr first = AttributeFactory::createAttribute("flags-1", cfg); + testMultiValueSearch(*(dynamic_cast<IntegerAttribute *>(first.get())), + *(dynamic_cast<IntegerAttribute *>(second.get())), values); + } + } + + { // FloatingPointAttribute + for (const auto & cfg : _floatCfg) { + AttributePtr attribute = AttributeFactory::createAttribute(cfg.first + "-3", cfg.second); + SearchContextPtr sc = getSearch(*attribute, "100"); + ASSERT_TRUE(sc->valid()); + sc = getSearch(*attribute, "7.3"); + ASSERT_TRUE( sc->valid() ); + sc = getSearch(*attribute, "1A0"); + EXPECT_FALSE( sc->valid() ); + } + } + + testSearch<AttributeVector::largeint_t, IntegerAttribute>(_integerCfg); + testSearch<double, FloatingPointAttribute>(_floatCfg); + testSearch<vespalib::string, StringAttribute>(_stringCfg); +} + +//----------------------------------------------------------------------------- +// Test search iterator functionality +//----------------------------------------------------------------------------- +void +SearchContextTest::testStrictSearchIterator(SearchContext & threeHits, + SearchContext & noHits, + const IteratorTester & typeTester) +{ + TermFieldMatchData dummy; + { // search for value with 3 hits + threeHits.fetchPostings(true); + SearchBasePtr sb = threeHits.createIterator(&dummy, true); + sb->initFullRange(); + EXPECT_TRUE(typeTester.matches(*sb)); + EXPECT_TRUE(sb->getDocId() == sb->beginId() || + sb->getDocId() == 1u); + EXPECT_TRUE(sb->seek(1)); + EXPECT_EQUAL(sb->getDocId(), 1u); + EXPECT_TRUE(!sb->seek(2)); + EXPECT_EQUAL(sb->getDocId(), 3u); + EXPECT_TRUE(sb->seek(3)); + EXPECT_EQUAL(sb->getDocId(), 3u); + EXPECT_TRUE(!sb->seek(4)); + EXPECT_EQUAL(sb->getDocId(), 5u); + EXPECT_TRUE(sb->seek(5)); + EXPECT_EQUAL(sb->getDocId(), 5u); + EXPECT_TRUE(!sb->seek(6)); + EXPECT_TRUE(sb->isAtEnd()); + } + + { // search for value with no hits + noHits.fetchPostings(true); + SearchBasePtr sb = noHits.createIterator(&dummy, true); + sb->initFullRange(); + ASSERT_TRUE(typeTester.matches(*sb)); + EXPECT_TRUE(sb->getDocId() == sb->beginId() || + sb->isAtEnd()); + EXPECT_TRUE(!sb->seek(1)); + EXPECT_TRUE(sb->isAtEnd()); + } +} + +void +SearchContextTest::testNonStrictSearchIterator(SearchContext & threeHits, + SearchContext & noHits, + const IteratorTester & typeTester) +{ + TermFieldMatchData dummy; + { // search for value with three hits + threeHits.fetchPostings(false); + SearchBasePtr sb = threeHits.createIterator(&dummy, false); + sb->initFullRange(); + EXPECT_TRUE(typeTester.matches(*sb)); + EXPECT_TRUE(sb->seek(1)); + EXPECT_EQUAL(sb->getDocId(), 1u); + EXPECT_TRUE(!sb->seek(2)); + EXPECT_EQUAL(sb->getDocId(), 1u); + EXPECT_TRUE(sb->seek(3)); + EXPECT_EQUAL(sb->getDocId(), 3u); + EXPECT_TRUE(!sb->seek(4)); + EXPECT_EQUAL(sb->getDocId(), 3u); + EXPECT_TRUE(sb->seek(5)); + EXPECT_EQUAL(sb->getDocId(), 5u); + EXPECT_TRUE(!sb->seek(6)); + EXPECT_TRUE(sb->getDocId() == 5u || sb->isAtEnd()); + } + { // search for value with no hits + noHits.fetchPostings(false); + SearchBasePtr sb = noHits.createIterator(&dummy, false); + sb->initFullRange(); + + EXPECT_TRUE(typeTester.matches(*sb)); + EXPECT_TRUE(sb->getDocId() == sb->beginId() || + sb->isAtEnd()); + EXPECT_TRUE(!sb->seek(1)); + EXPECT_NOT_EQUAL(sb->getDocId(), 1u); + EXPECT_TRUE(!sb->seek(6)); + EXPECT_NOT_EQUAL(sb->getDocId(), 6u); + } +} + +void +SearchContextTest::fillForSearchIteratorTest(IntegerAttribute * ia) +{ + addReservedDoc(*ia); + ia->addDocs(5); + ia->update(1, 10); + ia->update(2, 20); + ia->update(3, 10); + ia->update(4, 20); + ia->update(5, 10); + ia->commit(true); +} + +void +SearchContextTest::fillForSemiNibbleSearchIteratorTest(IntegerAttribute * ia) +{ + addReservedDoc(*ia); + ia->addDocs(5); + ia->update(1, 1); + ia->update(2, 2); + ia->update(3, 1); + ia->update(4, 2); + ia->update(5, 1); + ia->commit(true); +} + +void +SearchContextTest::testSearchIterator() +{ + { + Config cfg(BasicType::INT32, CollectionType::SINGLE); + AttributePtr ptr = AttributeFactory::createAttribute("s-int32", cfg); + fillForSearchIteratorTest(dynamic_cast<IntegerAttribute *>(ptr.get())); + + SearchContextPtr threeHits = getSearch(*ptr.get(), 10); + SearchContextPtr noHits = getSearch(*ptr.get(), 30); + AttributeIteratorTester tester; + testStrictSearchIterator(*threeHits, *noHits, tester); + threeHits = getSearch(*ptr.get(), 10); + noHits = getSearch(*ptr.get(), 30); + testNonStrictSearchIterator(*threeHits, *noHits, tester); + } + { + Config cfg(BasicType::UINT2, CollectionType::SINGLE); + AttributePtr ptr = AttributeFactory::createAttribute("s-uint2", cfg); + fillForSemiNibbleSearchIteratorTest(dynamic_cast<IntegerAttribute *> + (ptr.get())); + + SearchContextPtr threeHits = getSearch(*ptr.get(), 1); + SearchContextPtr noHits = getSearch(*ptr.get(), 3); + AttributeIteratorTester tester; + testStrictSearchIterator(*threeHits, *noHits, tester); + threeHits = getSearch(*ptr.get(), 1); + noHits = getSearch(*ptr.get(), 3); + testNonStrictSearchIterator(*threeHits, *noHits, tester); + } + { + Config cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sfs-int32", cfg); + fillForSearchIteratorTest(dynamic_cast<IntegerAttribute *>(ptr.get())); + + SearchContextPtr threeHits = getSearch(*ptr.get(), 10); + SearchContextPtr noHits = getSearch(*ptr.get(), 30); + AttributePostingListIteratorTester tester; + testStrictSearchIterator(*threeHits, *noHits, tester); + } + { + Config cfg(BasicType::STRING, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sfs-string", cfg); + StringAttribute * sa = dynamic_cast<StringAttribute *>(ptr.get()); + addReservedDoc(*ptr); + ptr->addDocs(5); + sa->update(1, "three"); + sa->update(2, "two"); + sa->update(3, "three"); + sa->update(4, "two"); + sa->update(5, "three"); + ptr->commit(true); + + SearchContextPtr threeHits = getSearch(*ptr.get(), "three"); + SearchContextPtr noHits = getSearch(*ptr.get(), "none"); + AttributePostingListIteratorTester tester; + testStrictSearchIterator(*threeHits, *noHits, tester); + } + { + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("flags", cfg); + fillForSearchIteratorTest(dynamic_cast<IntegerAttribute *>(ptr.get())); + + SearchContextPtr threeHits = getSearch(*ptr.get(), 10); + SearchContextPtr noHits = getSearch(*ptr.get(), 30); + FlagAttributeIteratorTester tester; + testStrictSearchIterator(*threeHits, *noHits, tester); + threeHits = getSearch(*ptr.get(), 10); + noHits = getSearch(*ptr.get(), 30); + testNonStrictSearchIterator(*threeHits, *noHits, tester); + } +} + + + +//----------------------------------------------------------------------------- +// Test search iterator unpacking +//----------------------------------------------------------------------------- +void +SearchContextTest::fillForSearchIteratorUnpackingTest(IntegerAttribute * ia, + bool extra) +{ + addReservedDoc(*ia); + ia->addDocs(3); + if (ia->getCollectionType() == CollectionType::SINGLE) { + ia->update(1, 10); + ia->update(2, 10); + ia->update(3, 10); + } else if (ia->getCollectionType() == CollectionType::ARRAY) { + ia->append(1, 10, 1); + ia->append(2, 10, 1); + ia->append(2, 10, 1); + ia->append(3, 10, 1); + ia->append(3, 10, 1); + ia->append(3, 10, 1); + } else { // WEIGHTED SET + ia->append(1, 10, -50); + ia->append(2, 10, 0); + ia->append(3, 10, 50); + } + ia->commit(true); + if (!extra) + return; + ia->addDocs(20); + for (uint32_t d = 4; d < 24; ++d) { + if (ia->getCollectionType() == CollectionType::SINGLE) + ia->update(d, 10); + else + ia->append(d, 10, 1); + } + ia->commit(true); +} + +void +SearchContextTest::testSearchIteratorUnpacking(const AttributePtr & attr, + SearchContext & sc, + bool extra, + bool strict) +{ + LOG(info, + "testSearchIteratorUnpacking: vector '%s'", attr->getName().c_str()); + + TermFieldMatchData md; + md.reset(100); + + TermFieldMatchDataPosition pos; + pos.setElementWeight(100); + md.appendPosition(pos); + + sc.fetchPostings(strict); + SearchBasePtr sb = sc.createIterator(&md, strict); + sb->initFullRange(); + + std::vector<int32_t> weights(3); + if (attr->getCollectionType() == CollectionType::SINGLE || + (attr->getCollectionType() == CollectionType::ARRAY && attr->getBasicType() == BasicType::INT8)) + { + weights[0] = 1; + weights[1] = 1; + weights[2] = 1; + } else if (attr->getCollectionType() == CollectionType::ARRAY) { + weights[0] = 1; + weights[1] = 2; + weights[2] = 3; + } else { + weights[0] = -50; + weights[1] = 0; + weights[2] = 50; + } + + // unpack and check weights + sb->unpack(1); + EXPECT_EQUAL(sb->getDocId(), 1u); + EXPECT_EQUAL(md.getDocId(), 1u); + EXPECT_EQUAL(md.getWeight(), weights[0]); + + sb->unpack(2); + EXPECT_EQUAL(sb->getDocId(), 2u); + EXPECT_EQUAL(md.getDocId(), 2u); + EXPECT_EQUAL(md.getWeight(), weights[1]); + + sb->unpack(3); + EXPECT_EQUAL(sb->getDocId(), 3u); + EXPECT_EQUAL(md.getDocId(), 3u); + EXPECT_EQUAL(md.getWeight(), weights[2]); + if (extra) { + sb->unpack(4); + EXPECT_EQUAL(sb->getDocId(), 4u); + EXPECT_EQUAL(md.getDocId(), 4u); + EXPECT_EQUAL(md.getWeight(), 1); + } +} + +void +SearchContextTest::testSearchIteratorUnpacking() +{ + std::vector<std::pair<vespalib::string, Config> > config; + + { + Config cfg(BasicType::INT32, CollectionType::SINGLE); + config.emplace_back("s-int32", cfg); + } + { + Config cfg(BasicType::UINT4, CollectionType::SINGLE); + config.emplace_back("s-uint4", cfg); + } + { + Config cfg(BasicType::INT32, CollectionType::ARRAY); + config.emplace_back("a-int32", cfg); + } + { + Config cfg(BasicType::INT32, CollectionType::WSET); + config.emplace_back("w-int32", cfg); + } + { + Config cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + config.emplace_back("sfs-int32", cfg); + } + { + Config cfg(BasicType::INT32, CollectionType::ARRAY); + cfg.setFastSearch(true); + config.emplace_back("afs-int32", cfg); + } + { + Config cfg(BasicType::INT32, CollectionType::WSET); + cfg.setFastSearch(true); + config.emplace_back("wfs-int32", cfg); + } + { + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + config.emplace_back("flags", cfg); + } + + for (const auto & cfg : config) { + AttributePtr ptr = AttributeFactory::createAttribute(cfg.first, cfg.second); + fillForSearchIteratorUnpackingTest(dynamic_cast<IntegerAttribute *>(ptr.get()), false); + SearchContextPtr sc = getSearch(*ptr.get(), 10); + testSearchIteratorUnpacking(ptr, *sc, false, true); + sc = getSearch(*ptr.get(), 10); + testSearchIteratorUnpacking(ptr, *sc, false, false); + if (cfg.second.fastSearch()) { + AttributePtr ptr2 = AttributeFactory::createAttribute(cfg.first + "-extra", cfg.second); + fillForSearchIteratorUnpackingTest(dynamic_cast<IntegerAttribute *>(ptr2.get()), true); + SearchContextPtr sc2 = getSearch(*ptr2.get(), 10); + testSearchIteratorUnpacking(ptr2, *sc2, true, true); + sc2 = getSearch(*ptr2.get(), 10); + testSearchIteratorUnpacking(ptr2, *sc2, true, false); + } + } +} + + + +//----------------------------------------------------------------------------- +// Test range search +//----------------------------------------------------------------------------- + +template <typename VectorType> +void +SearchContextTest::performRangeSearch(const VectorType & vec, const vespalib::string & term, + const DocSet & expected) +{ + performSearch(vec, term, expected, QueryTermSimple::WORD); +} + +template <typename VectorType, typename ValueType> +void +SearchContextTest::testRangeSearch(const AttributePtr & ptr, uint32_t numDocs, std::vector<ValueType> values) +{ + LOG(info, "testRangeSearch: vector '%s'", ptr->getName().c_str()); + + VectorType & vec = dynamic_cast<VectorType &>(*ptr.get()); + + addDocs(vec, numDocs); + + std::map<ValueType, DocSet> postingList; + + uint32_t docCnt = 0; + for (uint32_t i = 0; i < values.size() && docCnt < numDocs; i+=2) { + //std::cout << "postingList[" << values[i] << "]: {"; + for (uint32_t j = 0; j < (i + 1) && docCnt < numDocs; ++j, ++docCnt) { + EXPECT_TRUE(vec.update(docCnt + 1u, values[i])); + postingList[values[i]].insert(docCnt + 1u); + //std::cout << docCnt << ", "; + } + //std::cout << "}" << std::endl; + } + ptr->commit(true); + uint32_t smallHits = 0; + ValueType zeroValue = 0; + bool smallUInt = isUnsignedSmallIntAttribute(vec); + if (smallUInt) { + for (uint32_t i = docCnt ; i < numDocs; ++i) { + postingList[zeroValue].insert(i + 1u); + ++smallHits; + } + } + + // test less than ("<a") + for (uint32_t i = 0; i < values.size(); ++i) { + vespalib::asciistream ss; + ss << "<" << values[i]; + DocSet expected; + if (smallUInt) { + expected.insert(postingList[zeroValue].begin(), + postingList[zeroValue].end()); + } + for (uint32_t j = 0; j < i; ++j) { + expected.insert(postingList[values[j]].begin(), postingList[values[j]].end()); + } + performRangeSearch(vec, ss.str(), expected); + } + + // test greater than (">a") + for (uint32_t i = 0; i < values.size(); ++i) { + vespalib::asciistream ss; + ss << ">" << values[i]; + DocSet expected; + for (uint32_t j = i + 1; j < values.size(); ++j) { + expected.insert(postingList[values[j]].begin(), postingList[values[j]].end()); + } + performRangeSearch(vec, ss.str(), expected); + } + + // test range ("[a;b]") + for (uint32_t i = 0; i < values.size(); ++i) { + for (uint32_t j = 0; j < values.size(); ++j) { // illegal range when j < i + vespalib::asciistream ss; + ss << "[" << values[i] << ";" << values[j] << "]"; + DocSet expected; + for (uint32_t k = i; k < j + 1; ++k) { + expected.insert(postingList[values[k]].begin(), postingList[values[k]].end()); + } + performRangeSearch(vec, ss.str(), expected); + } + } + + { // test large range + vespalib::asciistream ss; + ss << "[" << (values.front() - 1) << ";" << (values.back() + 1) << "]"; + DocSet expected; + for (uint32_t doc = 0; doc < numDocs; ++doc) { + expected.insert(doc + 1); + } + performRangeSearch(vec, ss.str(), expected); + } +} + +void +SearchContextTest::testRangeSearchLimited() +{ + largeint_t VALUES [] = {0,1,1,2,3,4,5,6,7,8,9,9,10 }; + std::vector<largeint_t> values(VALUES, VALUES+sizeof(VALUES)/sizeof(VALUES[0])); + Config cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("limited-int32", cfg); + IntegerAttribute & vec = dynamic_cast<IntegerAttribute &>(*ptr); + addDocs(vec, values.size()); + for (size_t i(1); i < values.size(); i++) { + EXPECT_TRUE(vec.update(i, values[i])); + } + ptr->commit(true); + + DocSet expected; + for (size_t i(1); i < 12; i++) { + expected.put(i); + } + performRangeSearch(vec, "[1;9]", expected); + performRangeSearch(vec, "[1;9;100]", expected); + performRangeSearch(vec, "[1;9;-100]", expected); + expected.clear(); + expected.put(3); + performRangeSearch(vec, "<1;3>", expected); + expected.put(4); + performRangeSearch(vec, "<1;3]", expected); + expected.clear(); + expected.put(1).put(2).put(3); + performRangeSearch(vec, "[1;3>", expected); + expected.put(4); + performRangeSearch(vec, "[1;3]", expected); + expected.clear(); + expected.put(1).put(2); + performRangeSearch(vec, "[1;9;1]", expected); + performRangeSearch(vec, "[1;9;2]", expected); + expected.put(3); + performRangeSearch(vec, "[1;9;3]", expected); + expected.clear(); + expected.put(10).put(11); + performRangeSearch(vec, "[1;9;-1]", expected); + performRangeSearch(vec, "[1;9;-2]", expected); + expected.put(9); + performRangeSearch(vec, "[1;9;-3]", expected); + performRangeSearch(vec, "[1;9;-3]", expected); + + expected.clear(); + for (size_t i(1); i < 13; i++) { + expected.put(i); + } + performRangeSearch(vec, "[;;100]", expected); + performRangeSearch(vec, "[;;-100]", expected); + + expected.clear(); + expected.put(1).put(2); + performRangeSearch(vec, "[;;1]", expected); + expected.clear(); + expected.put(12); + performRangeSearch(vec, "[;;-1]", expected); +} + +void +SearchContextTest::testRangeSearch() +{ + const uint32_t numDocs = 100; + const uint32_t numValues = 20; + const uint32_t numNibbleValues = 9; + + { // IntegerAttribute + std::vector<largeint_t> values; + std::vector<largeint_t> nibbleValues; + largeint_t start = 1; + + for (uint32_t i = 0; i < numValues; ++i) { + values.push_back(start + i); + } + for (uint32_t i = 0; i < numNibbleValues; ++i) { + nibbleValues.push_back(start + i); + } + + for (const auto & cfg : _integerCfg) { + AttributePtr ptr = AttributeFactory::createAttribute(cfg.first, cfg.second); + testRangeSearch<IntegerAttribute, largeint_t>(ptr, numDocs, values); + } + { // CollectionType::ARRAY Flags. + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("flags", cfg); + testRangeSearch<IntegerAttribute, largeint_t>(ptr, numDocs, values); + } + { + Config cfg(BasicType::UINT4, CollectionType::SINGLE); + AttributePtr ptr = AttributeFactory::createAttribute("s-uint4", cfg); + testRangeSearch<IntegerAttribute, largeint_t>(ptr, numDocs, nibbleValues); + } + } + + { // FloatingPointAttribute + std::vector<double> values; + double start = 1; + + for (uint32_t i = 0; i < numValues; ++i) { + values.push_back(start + i); + } + + for (const auto & cfg : _floatCfg) { + AttributePtr ptr = AttributeFactory::createAttribute(cfg.first, cfg.second); + testRangeSearch<FloatingPointAttribute, double>(ptr, numDocs, values); + } + } +} + + +//----------------------------------------------------------------------------- +// Test case insensitive search +//----------------------------------------------------------------------------- + +void +SearchContextTest::performCaseInsensitiveSearch(const StringAttribute & vec, const vespalib::string & term, + const DocSet & expected) +{ + performSearch(vec, term, expected, QueryTermSimple::WORD); +} + +void +SearchContextTest::testCaseInsensitiveSearch(const AttributePtr & ptr) +{ + LOG(info, "testCaseInsensitiveSearch: vector '%s'", ptr->getName().c_str()); + + StringAttribute & vec = dynamic_cast<StringAttribute &>(*ptr.get()); + + uint32_t numDocs = 5 * 5; + addDocs(*ptr.get(), numDocs); + + const char * terms[][5] = { + {"lower", "upper", "firstupper", "mixedcase", "intermixedcase"}, // lower + {"LOWER", "UPPER", "FIRSTUPPER", "MIXEDCASE", "INTERMIXEDCASE"}, // upper + {"Lower", "Upper", "Firstupper", "Mixedcase", "Intermixedcase"}, // firstUpper + {"Lower", "Upper", "FirstUpper", "MixedCase", "InterMixedCase"}, // mixedCase + {"lower", "upper", "firstUpper", "mixedCase", "interMixedCase"}, // interMixedCase + }; + + uint32_t doc = 1; + for (uint32_t j = 0; j < 5; ++j) { + for (uint32_t i = 0; i < 5; ++i) { + ASSERT_TRUE(doc < vec.getNumDocs()); + EXPECT_TRUE(vec.update(doc++, terms[i][j])); + } + } + + ptr->commit(true); + + const char * buffer[1]; + doc = 1; + for (uint32_t j = 0; j < 5; ++j) { + for (uint32_t i = 0; i < 5; ++i) { + EXPECT_EQUAL(ptr->get(doc++, buffer, 1), uint32_t(1)); + EXPECT_EQUAL(vespalib::string(buffer[0]), vespalib::string(terms[i][j])); + } + } + + DocSet empty; + for (uint32_t j = 0; j < 5; ++j) { + DocSet expected; + for (doc = j * 5 + 1; doc < (j + 1) * 5 + 1; ++doc) { + expected.insert(doc); + } + // for non-posting attributes only lower case search terms should give hits + performCaseInsensitiveSearch(vec, terms[0][j], expected); + + if (ptr->getConfig().fastSearch()) { + for (uint32_t i = 1; i < 5; ++i) { + performCaseInsensitiveSearch(vec, terms[i][j], expected); + } + } else { + for (uint32_t i = 1; i < 4; ++i) { + performCaseInsensitiveSearch(vec, terms[i][j], empty); + } + } + } + performCaseInsensitiveSearch(vec, "none", empty); + performCaseInsensitiveSearch(vec, "NONE", empty); + performCaseInsensitiveSearch(vec, "None", empty); +} + +void +SearchContextTest::testRegexSearch(const AttributePtr & ptr) +{ + LOG(info, "testRegexSearch: vector '%s'", ptr->getName().c_str()); + + StringAttribute & vec = dynamic_cast<StringAttribute &>(*ptr.get()); + + uint32_t numDocs = 6; + addDocs(*ptr.get(), numDocs); + + const char * strings [] = {"abc1def", "abc2Def", "abc2def", "abc4def", "abc5def", "abc6def"}; + std::vector<const char *> terms = { "abc", "bc2de" }; + + for (uint32_t doc = 1; doc < numDocs + 1; ++doc) { + ASSERT_TRUE(doc < vec.getNumDocs()); + EXPECT_TRUE(vec.update(doc, strings[doc - 1])); + } + + ptr->commit(true); + + std::vector<DocSet> expected; + DocSet empty; + { + uint32_t docs[] = {1, 2, 3, 4, 5, 6}; + expected.push_back(DocSet(docs, docs + 6)); // "abc" + } + { + uint32_t docs[] = {2, 3}; + expected.push_back(DocSet(docs, docs + 2)); // "bc2de" + } + + for (uint32_t i = 0; i < terms.size(); ++i) { + performSearch(vec, terms[i], expected[i], QueryTermSimple::REGEXP); + performSearch(vec, terms[i], empty, QueryTermSimple::WORD); + } +} + + +void +SearchContextTest::testCaseInsensitiveSearch() +{ + for (const auto & cfg : _stringCfg) { + testCaseInsensitiveSearch(AttributeFactory::createAttribute(cfg.first, cfg.second)); + } +} + +void +SearchContextTest::testRegexSearch() +{ + for (const auto & cfg : _stringCfg) { + testRegexSearch(AttributeFactory::createAttribute(cfg.first, cfg.second)); + } +} + + +//----------------------------------------------------------------------------- +// Test prefix search +//----------------------------------------------------------------------------- + +void +SearchContextTest::performPrefixSearch(const StringAttribute & vec, const vespalib::string & term, + const DocSet & expected, QueryTermSimple::SearchTerm termType) +{ + performSearch(vec, term, expected, termType); +} + +void +SearchContextTest::testPrefixSearch(const AttributePtr & ptr) +{ + LOG(info, "testPrefixSearch: vector '%s'", ptr->getName().c_str()); + + StringAttribute & vec = dynamic_cast<StringAttribute &>(*ptr.get()); + + uint32_t numDocs = 6; + addDocs(*ptr.get(), numDocs); + + const char * strings [] = {"prefixsearch", "PREFIXSEARCH", "PrefixSearch", "precommit", "PRECOMMIT", "PreCommit"}; + const char * terms[][3] = {{"pre", "PRE", "Pre"}, {"pref", "PREF", "Pref"}, + {"prec", "PREC", "PreC"}, {"prex", "PREX", "Prex"}}; + + for (uint32_t doc = 1; doc < numDocs + 1; ++doc) { + ASSERT_TRUE(doc < vec.getNumDocs()); + EXPECT_TRUE(vec.update(doc, strings[doc - 1])); + } + + ptr->commit(true); + + std::vector<DocSet> expected; + DocSet empty; + { + uint32_t docs[] = {1, 2, 3, 4, 5, 6}; + expected.push_back(DocSet(docs, docs + 6)); // "pre" + } + { + uint32_t docs[] = {1, 2, 3}; + expected.push_back(DocSet(docs, docs + 3)); // "pref" + } + { + uint32_t docs[] = {4, 5, 6}; + expected.push_back(DocSet(docs, docs + 3)); // "prec" + } + expected.push_back(DocSet()); // "prex" + + for (uint32_t i = 0; i < 4; ++i) { + for (uint32_t j = 0; j < 3; ++j) { + if (j == 0 || ptr->getConfig().fastSearch()) { + performPrefixSearch(vec, terms[i][j], expected[i], QueryTermSimple::PREFIXTERM); + performPrefixSearch(vec, terms[i][j], empty, QueryTermSimple::WORD); + } else { + performPrefixSearch(vec, terms[i][j], empty, QueryTermSimple::PREFIXTERM); + performPrefixSearch(vec, terms[i][j], empty, QueryTermSimple::WORD); + } + } + } +} + + +void +SearchContextTest::testPrefixSearch() +{ + for (const auto & cfg : _stringCfg) { + testPrefixSearch(AttributeFactory::createAttribute(cfg.first, cfg.second)); + } +} + +template <typename VectorType, typename ValueType> +void +SearchContextTest::requireThatSearchIsWorkingAfterClearDoc(const vespalib::string & name, + const Config & cfg, + ValueType startValue, + const vespalib::string & term) +{ + AttributePtr a = AttributeFactory::createAttribute(name, cfg); + LOG(info, "requireThatSearchIsWorkingAfterClearDoc: vector '%s', term '%s'", + a->getName().c_str(), term.c_str()); + addReservedDoc(*a); + a->addDocs(4); + VectorType & v = dynamic_cast<VectorType &>(*a); + resetAttribute(v, startValue); + { + ResultSetPtr rs = performSearch(v, term); + EXPECT_EQUAL(4u, rs->getNumHits()); + ASSERT_TRUE(4u == rs->getNumHits()); + const RankedHit * array = rs->getArray(); + EXPECT_EQUAL(1u, array[0]._docId); + EXPECT_EQUAL(2u, array[1]._docId); + EXPECT_EQUAL(3u, array[2]._docId); + EXPECT_EQUAL(4u, array[3]._docId); + } + a->clearDoc(1); + a->clearDoc(3); + a->commit(true); + { + ResultSetPtr rs = performSearch(v, term); + EXPECT_EQUAL(2u, rs->getNumHits()); + const RankedHit * array = rs->getArray(); + EXPECT_EQUAL(2u, array[0]._docId); + EXPECT_EQUAL(4u, array[1]._docId); + } +} + +void +SearchContextTest::requireThatSearchIsWorkingAfterClearDoc() +{ + for (const auto & cfg : _integerCfg) { + requireThatSearchIsWorkingAfterClearDoc<IntegerAttribute>(cfg.first, cfg.second, 10, "10"); + requireThatSearchIsWorkingAfterClearDoc<IntegerAttribute>(cfg.first, cfg.second, 10, "<11"); + } + + for (const auto & cfg : _floatCfg) { + requireThatSearchIsWorkingAfterClearDoc<FloatingPointAttribute>(cfg.first, cfg.second, 10.5, "10.5"); + requireThatSearchIsWorkingAfterClearDoc<FloatingPointAttribute>(cfg.first, cfg.second, 10.5, "<10.6"); + } + + for (const auto & cfg : _stringCfg) { + requireThatSearchIsWorkingAfterClearDoc<StringAttribute>(cfg.first, cfg.second, "start", "start"); + } +} + +template <typename VectorType, typename ValueType> +void +SearchContextTest::requireThatSearchIsWorkingAfterLoadAndClearDoc(const vespalib::string & name, + const Config & cfg, + ValueType startValue, + ValueType defaultValue, + const vespalib::string & term) +{ + AttributePtr a = AttributeFactory::createAttribute(name, cfg); + LOG(info, "requireThatSearchIsWorkingAfterLoadAndClearDoc: vector '%s', term '%s'", + a->getName().c_str(), term.c_str()); + addReservedDoc(*a); + a->addDocs(15); + VectorType & va = dynamic_cast<VectorType &>(*a); + resetAttribute(va, startValue); // triggers vector vector in posting list (count 15) + AttributePtr b = AttributeFactory::createAttribute(name + "-save", cfg); + EXPECT_TRUE(a->saveAs(b->getBaseFileName())); + EXPECT_TRUE(b->load()); + b->clearDoc(6); // goes from vector vector to single vector with count 14 + b->commit(true); + { + ResultSetPtr rs = performSearch(dynamic_cast<VectorType &>(*b), term); + EXPECT_EQUAL(14u, rs->getNumHits()); + const RankedHit * array = rs->getArray(); + for (uint32_t i = 0; i < 14; ++i) { + if (i < 5) { + EXPECT_EQUAL(i + 1, array[i]._docId); + } else + EXPECT_EQUAL(i + 2, array[i]._docId); + } + } + ValueType buf; + if (cfg.collectionType().isMultiValue()) { + EXPECT_EQUAL(0u, b->get(6, &buf, 1)); + } else { + EXPECT_EQUAL(1u, b->get(6, &buf, 1)); + EXPECT_EQUAL(defaultValue, buf); + } +} + +void +SearchContextTest::requireThatSearchIsWorkingAfterLoadAndClearDoc() +{ + { + int64_t value = 10; + int64_t defValue = search::attribute::getUndefined<int32_t>(); + requireThatSearchIsWorkingAfterLoadAndClearDoc<IntegerAttribute>("s-fs-int32", _integerCfg["s-fs-int32"], + value, defValue, "10"); + requireThatSearchIsWorkingAfterLoadAndClearDoc<IntegerAttribute>("a-fs-int32", _integerCfg["a-fs-int32"], + value, defValue, "10"); + } + { + vespalib::string value = "foo"; + vespalib::string defValue = ""; + requireThatSearchIsWorkingAfterLoadAndClearDoc<StringAttribute>("s-fs-str", _stringCfg["s-fs-str"], + value, defValue, value); + requireThatSearchIsWorkingAfterLoadAndClearDoc<StringAttribute>("a-fs-str", _stringCfg["a-fs-str"], + value, defValue, value); + } +} + +template <typename VectorType, typename ValueType> +void +SearchContextTest::requireThatSearchIsWorkingAfterUpdates(const vespalib::string & name, + const Config & cfg, + ValueType value1, + ValueType value2) +{ + AttributePtr a = AttributeFactory::createAttribute(name, cfg); + VectorType & va = dynamic_cast<VectorType &>(*a); + LOG(info, "requireThatSearchIsWorkingAfterUpdates: vector '%s'", a->getName().c_str()); + addReservedDoc(*a); + a->addDocs(2); + va.update(1, value1); + va.commit(true); + va.update(2, value1); + va.update(2, value2); + va.commit(true); + { + ResultSetPtr rs = performSearch(va, value1); + EXPECT_EQUAL(1u, rs->getNumHits()); // doc 1 should not have this value + } + { + ResultSetPtr rs = performSearch(va, value2); + EXPECT_EQUAL(1u, rs->getNumHits()); + } +} + +void +SearchContextTest::requireThatSearchIsWorkingAfterUpdates() +{ + for (const auto & cfg : _integerCfg) { + requireThatSearchIsWorkingAfterUpdates<IntegerAttribute>(cfg.first, cfg.second, 10, 20); + } + + for (const auto & cfg : _stringCfg) { + requireThatSearchIsWorkingAfterUpdates<StringAttribute>(cfg.first, cfg.second, "foo", "bar"); + } +} + +void +SearchContextTest::requireThatFlagAttributeIsWorkingWhenNewDocsAreAdded() +{ + LOG(info, "requireThatFlagAttributeIsWorkingWhenNewDocsAreAdded()"); + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + { + cfg.setGrowStrategy(GrowStrategy(1, 0, 1)); + AttributePtr a = AttributeFactory::createAttribute("flags", cfg); + FlagAttribute & fa = dynamic_cast<FlagAttribute &>(*a); + addReservedDoc(fa); + fa.addDocs(1); + fa.append(1, 10, 1); + fa.append(1, 24, 1); + fa.commit(true); + fa.addDocs(1); + fa.append(2, 20, 1); + fa.append(2, 24, 1); + fa.commit(true); + fa.addDocs(1); + fa.append(3, 30, 1); + fa.append(3, 26, 1); + fa.commit(true); + fa.addDocs(1); + fa.append(4, 40, 1); + fa.append(4, 24, 1); + fa.commit(true); + { + ResultSetPtr rs = performSearch(fa, "<24"); + EXPECT_EQUAL(2u, rs->getNumHits()); + EXPECT_EQUAL(1u, rs->getArray()[0]._docId); + EXPECT_EQUAL(2u, rs->getArray()[1]._docId); + } + { + ResultSetPtr rs = performSearch(fa, "24"); + EXPECT_EQUAL(3u, rs->getNumHits()); + EXPECT_EQUAL(1u, rs->getArray()[0]._docId); + EXPECT_EQUAL(2u, rs->getArray()[1]._docId); + EXPECT_EQUAL(4u, rs->getArray()[2]._docId); + } + } + { + cfg.setGrowStrategy(GrowStrategy(4, 0, 4)); + AttributePtr a = AttributeFactory::createAttribute("flags", cfg); + FlagAttribute & fa = dynamic_cast<FlagAttribute &>(*a); + std::vector<uint32_t> exp50; + std::vector<uint32_t> exp60; + addReservedDoc(fa); + for (uint32_t i = 0; i < 200; ++i) { + uint32_t docId; + EXPECT_TRUE(fa.addDoc(docId)); + if (i % 2 == 0) { + fa.append(docId, 50, 1); + exp50.push_back(docId); + } else { + fa.append(docId, 60, 1); + exp60.push_back(docId); + } + fa.commit(true); + { + ResultSetPtr rs1 = performSearch(fa, "50"); + ResultSetPtr rs2 = performSearch(fa, "<51"); + EXPECT_EQUAL(exp50.size(), rs1->getNumHits()); + EXPECT_EQUAL(exp50.size(), rs2->getNumHits()); + for (size_t j = 0; j < exp50.size(); ++j) { + EXPECT_EQUAL(exp50[j], rs1->getArray()[j]._docId); + EXPECT_EQUAL(exp50[j], rs2->getArray()[j]._docId); + } + } + { + ResultSetPtr rs = performSearch(fa, "60"); + EXPECT_EQUAL(exp60.size(), rs->getNumHits()); + for (size_t j = 0; j < exp60.size(); ++j) { + EXPECT_EQUAL(exp60[j], rs->getArray()[j]._docId); + } + } + } + } +} + +template <typename VectorType, typename ValueType> +void +SearchContextTest::requireThatInvalidSearchTermGivesZeroHits(const vespalib::string & name, + const Config & cfg, + ValueType value) +{ + AttributePtr a = AttributeFactory::createAttribute(name, cfg); + VectorType & va = dynamic_cast<VectorType &>(*a); + LOG(info, "requireThatInvalidSearchTermGivesZeroHits: vector '%s'", a->getName().c_str()); + addReservedDoc(*a); + a->addDocs(1); + va.update(1, value); + va.commit(true); + ResultSetPtr rs = performSearch(va, "foo"); + EXPECT_EQUAL(0u, rs->getNumHits()); +} + +void +SearchContextTest::requireThatInvalidSearchTermGivesZeroHits() +{ + for (const auto & cfg : _integerCfg) { + requireThatInvalidSearchTermGivesZeroHits<IntegerAttribute>(cfg.first, cfg.second, 10); + } + for (const auto & cfg : _floatCfg) { + requireThatInvalidSearchTermGivesZeroHits<FloatingPointAttribute>(cfg.first, cfg.second, 10); + } +} + +void +SearchContextTest::requireThatFlagAttributeHandlesTheByteRange() +{ + LOG(info, "requireThatFlagAttributeHandlesTheByteRange()"); + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + + AttributePtr a = AttributeFactory::createAttribute("flags", cfg); + FlagAttribute & fa = dynamic_cast<FlagAttribute &>(*a); + addReservedDoc(fa); + fa.addDocs(5); + fa.append(1, -128, 1); + fa.append(2, -64, 1); + fa.append(2, -8, 1); + fa.append(3, 0, 1); + fa.append(3, 8, 1); + fa.append(4, 64, 1); + fa.append(4, 24, 1); + fa.append(5, 127, 1); + fa.commit(true); + + performSearch(fa, "-128", DocSet().put(1), QueryTermSimple::WORD); + performSearch(fa, "127", DocSet().put(5), QueryTermSimple::WORD); + performSearch(fa, ">-128", DocSet().put(2).put(3).put(4).put(5), QueryTermSimple::WORD); + performSearch(fa, "<127", DocSet().put(1).put(2).put(3).put(4), QueryTermSimple::WORD); + performSearch(fa, "[-128;-8]", DocSet().put(1).put(2), QueryTermSimple::WORD); + performSearch(fa, "[-8;8]", DocSet().put(2).put(3), QueryTermSimple::WORD); + performSearch(fa, "[8;127]", DocSet().put(3).put(4).put(5), QueryTermSimple::WORD); + performSearch(fa, "[-129;-8]", DocSet().put(1).put(2), QueryTermSimple::WORD); + performSearch(fa, "[8;128]", DocSet().put(3).put(4).put(5), QueryTermSimple::WORD); +} + +void +SearchContextTest::requireThatOutOfBoundsSearchTermGivesZeroHits(const vespalib::string &name, + const Config &cfg, + int64_t maxValue) +{ + AttributePtr a = AttributeFactory::createAttribute(name, cfg); + IntegerAttribute &ia = dynamic_cast<IntegerAttribute &>(*a); + addReservedDoc(*a); + a->addDocs(1); + ia.update(1, maxValue); + ia.commit(true); + vespalib::string term = vespalib::make_string("%" PRIu64 "", (int64_t) maxValue + 1); + LOG(info, "requireThatOutOfBoundsSearchTermGivesZeroHits: vector '%s', term '%s'", a->getName().c_str(), term.c_str()); + ResultSetPtr rs = performSearch(ia, term); + EXPECT_EQUAL(0u, rs->getNumHits()); +} + +void +SearchContextTest::requireThatOutOfBoundsSearchTermGivesZeroHits() +{ + for (const auto & cfg : _integerCfg) { + int32_t maxValue = std::numeric_limits<int32_t>::max(); + requireThatOutOfBoundsSearchTermGivesZeroHits(cfg.first, cfg.second, maxValue); + } + { + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + int8_t maxValue = std::numeric_limits<int8_t>::max(); + requireThatOutOfBoundsSearchTermGivesZeroHits("flags", cfg, maxValue); + } +} + + +void +SearchContextTest::initIntegerConfig() +{ + { // CollectionType::SINGLE + Config cfg(BasicType::INT32, CollectionType::SINGLE); + _integerCfg["s-int32"] = cfg; + } + { // CollectionType::SINGLE && fastSearch + Config cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + _integerCfg["s-fs-int32"] = cfg; + } + { // CollectionType::ARRAY + Config cfg(BasicType::INT32, CollectionType::ARRAY); + _integerCfg["a-int32"] = cfg; + } + { // CollectionType::ARRAY && fastSearch + Config cfg(BasicType::INT32, CollectionType::ARRAY); + cfg.setFastSearch(true); + _integerCfg["a-fs-int32"] = cfg; + } + { // CollectionType::WSET + Config cfg(BasicType::INT32, CollectionType::WSET); + _integerCfg["w-int32"] = cfg; + } + { // CollectionType::WSET && fastSearch + Config cfg(BasicType::INT32, CollectionType::WSET); + cfg.setFastSearch(true); + _integerCfg["w-fs-int32"] = cfg; + } +} + +void +SearchContextTest::initFloatConfig() +{ + { // CollectionType::SINGLE + Config cfg(BasicType::FLOAT, CollectionType::SINGLE); + _floatCfg["s-float"] = cfg; + } + { // CollectionType::SINGLE && fastSearch + Config cfg(BasicType::FLOAT, CollectionType::SINGLE); + cfg.setFastSearch(true); + _floatCfg["s-fs-float"] = cfg; + } + { // CollectionType::ARRAY + Config cfg(BasicType::FLOAT, CollectionType::ARRAY); + _floatCfg["a-float"] = cfg; + } + { // CollectionType::ARRAY && fastSearch + Config cfg(BasicType::FLOAT, CollectionType::ARRAY); + cfg.setFastSearch(true); + _floatCfg["a-fs-float"] = cfg; + } + { // CollectionType::WSET + Config cfg(BasicType::FLOAT, CollectionType::WSET); + _floatCfg["w-float"] = cfg; + } + { // CollectionType::WSET && fastSearch + Config cfg(BasicType::FLOAT, CollectionType::WSET); + cfg.setFastSearch(true); + _floatCfg["w-fs-float"] = cfg; + } +} + +void +SearchContextTest::initStringConfig() +{ + { // CollectionType::SINGLE + Config cfg(BasicType::STRING, CollectionType::SINGLE); + _stringCfg["s-str"] = cfg; + } + { // CollectionType::ARRAY + Config cfg(BasicType::STRING, CollectionType::ARRAY); + _stringCfg["a-str"] = cfg; + } + { // CollectionType::WSET + Config cfg(BasicType::STRING, CollectionType::WSET); + _stringCfg["w-str"] = cfg; + } + { // CollectionType::SINGLE && fastSearch + Config cfg(BasicType::STRING, CollectionType::SINGLE); + cfg.setFastSearch(true); + _stringCfg["s-fs-str"] = cfg; + } + { // CollectionType::ARRAY && fastSearch + Config cfg(BasicType::STRING, CollectionType::ARRAY); + cfg.setFastSearch(true); + _stringCfg["a-fs-str"] = cfg; + } + { // CollectionType::WSET && fastSearch + Config cfg(BasicType::STRING, CollectionType::WSET); + cfg.setFastSearch(true); + _stringCfg["w-fs-str"] = cfg; + } +} + +SearchContextTest::SearchContextTest() : + _integerCfg(), + _floatCfg(), + _stringCfg() +{ + initIntegerConfig(); + initFloatConfig(); + initStringConfig(); +} + +int +SearchContextTest::Main() +{ + TEST_INIT("searchcontext_test"); + EXPECT_TRUE(true); + + testSearch(); + testInitRange(); + testRangeSearch(); + testRangeSearchLimited(); + testCaseInsensitiveSearch(); + testRegexSearch(); + testPrefixSearch(); + testSearchIterator(); + testSearchIteratorUnpacking(); + TEST_DO(requireThatSearchIsWorkingAfterClearDoc()); + TEST_DO(requireThatSearchIsWorkingAfterLoadAndClearDoc()); + TEST_DO(requireThatSearchIsWorkingAfterUpdates()); + TEST_DO(requireThatFlagAttributeIsWorkingWhenNewDocsAreAdded()); + TEST_DO(requireThatInvalidSearchTermGivesZeroHits()); + TEST_DO(requireThatFlagAttributeHandlesTheByteRange()); + TEST_DO(requireThatOutOfBoundsSearchTermGivesZeroHits()); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(search::SearchContextTest); diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.sh b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.sh new file mode 100755 index 00000000000..3aae4bfe4d5 --- /dev/null +++ b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.sh @@ -0,0 +1,5 @@ +#!/bin/bash +$VALGRIND ./searchlib_searchcontext_test_app +rm -rf *.dat +rm -rf *.idx +rm -rf *.weight diff --git a/searchlib/src/tests/attribute/sourceselector/.gitignore b/searchlib/src/tests/attribute/sourceselector/.gitignore new file mode 100644 index 00000000000..265c856fd01 --- /dev/null +++ b/searchlib/src/tests/attribute/sourceselector/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +sourceselector_test +searchlib_sourceselector_test_app diff --git a/searchlib/src/tests/attribute/sourceselector/CMakeLists.txt b/searchlib/src/tests/attribute/sourceselector/CMakeLists.txt new file mode 100644 index 00000000000..24b7a75dd07 --- /dev/null +++ b/searchlib/src/tests/attribute/sourceselector/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_sourceselector_test_app + SOURCES + sourceselector_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_sourceselector_test_app COMMAND searchlib_sourceselector_test_app) diff --git a/searchlib/src/tests/attribute/sourceselector/DESC b/searchlib/src/tests/attribute/sourceselector/DESC new file mode 100644 index 00000000000..7568f5de080 --- /dev/null +++ b/searchlib/src/tests/attribute/sourceselector/DESC @@ -0,0 +1 @@ +This is a test of the sourceselector interface. diff --git a/searchlib/src/tests/attribute/sourceselector/FILES b/searchlib/src/tests/attribute/sourceselector/FILES new file mode 100644 index 00000000000..0d2803e762d --- /dev/null +++ b/searchlib/src/tests/attribute/sourceselector/FILES @@ -0,0 +1 @@ +sourceselector.cpp diff --git a/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp b/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp new file mode 100644 index 00000000000..a3595f8724d --- /dev/null +++ b/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp @@ -0,0 +1,216 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for sourceselector. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("sourceselector_test"); + +#include <vespa/searchlib/attribute/fixedsourceselector.h> +#include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/vespalib/testkit/testapp.h> + +using std::unique_ptr; +using std::string; +using namespace search; +using namespace search::queryeval; +using search::common::FileHeaderContext; +using search::index::DummyFileHeaderContext; + +namespace { +template <typename T, size_t N> size_t arraysize(const T (&)[N]) { return N; } + +const uint32_t maxDocId = 4096; +struct DocSource { uint32_t docId; uint8_t source; }; +const DocSource docs[] = { {0,1}, {1, 0}, {2, 2}, {4, 3}, {8, 9}, {16, 178}, + {32, 1}, {64, 2}, {128, 3}, {256,4}, {512, 2}, + {1024, 1}, {2048,5}, {maxDocId,1} }; +const string index_dir = "test_data"; +const string base_file_name = "test_data/sourcelist"; +const string base_file_name2 = "test_data/sourcelist2"; +const uint32_t default_source = 7; +const uint32_t base_id = 42; + +class Test : public vespalib::TestApp +{ +public: + int Main(); +private: + void testSourceSelector(const DocSource *docSource, size_t sz, uint8_t defaultSource, ISourceSelector & selector); + void testFixed(const DocSource *docSource, size_t sz); + template <typename SelectorType> + void requireThatSelectorCanCloneAndSubtract(); + void requireThatSelectorCanCloneAndSubtract(); + template <typename SelectorType> + void requireThatSelectorCanSaveAndLoad(); + void requireThatSelectorCanSaveAndLoad(); + template <typename SelectorType> + void requireThatCompleteSourceRangeIsHandled(); + void requireThatCompleteSourceRangeIsHandled(); + template <typename SelectorType> + void requireThatSourcesAreCountedCorrectly(); + void requireThatSourcesAreCountedCorrectly(); +}; + +int +Test::Main() +{ + TEST_INIT("sourceselector_test"); + + if (_argc > 0) { + DummyFileHeaderContext::setCreator(_argv[0]); + } + testFixed(docs, arraysize(docs)); + TEST_DO(requireThatSelectorCanCloneAndSubtract()); + TEST_DO(requireThatSelectorCanSaveAndLoad()); + TEST_DO(requireThatCompleteSourceRangeIsHandled()); + TEST_DO(requireThatSourcesAreCountedCorrectly()); + + TEST_DONE(); +} + +void setSources(ISourceSelector &selector) { + for (size_t i = 0; i < arraysize(docs); ++i) { + selector.setSource(docs[i].docId, docs[i].source); + } +} + +void Test::testFixed(const DocSource *docSource, size_t sz) +{ + FixedSourceSelector selector(default_source, base_file_name, 10); + EXPECT_EQUAL(default_source, selector.getDefaultSource()); + EXPECT_EQUAL(10u, selector.getDocIdLimit()); +// EXPECT_EQUAL(default_source, selector.createIterator()->getSource(maxDocId + 1)); + setSources(selector); + testSourceSelector(docSource, sz, selector.getDefaultSource(), selector); + EXPECT_EQUAL(maxDocId+1, selector.getDocIdLimit()); +} + +void Test::testSourceSelector(const DocSource *docSource, size_t sz, + uint8_t defaultSource, ISourceSelector &selector) +{ + { + ISourceSelector::Iterator::UP it(selector.createIterator()); + for (size_t i = 0; i < sz; ++i) { + EXPECT_EQUAL(docSource[i].source, it->getSource(docSource[i].docId)); + } + } + { + ISourceSelector::Iterator::UP it(selector.createIterator()); + for (size_t i = 0, j = 0; i <= docSource[sz - 1].docId; ++i) { + if (i != docSource[j].docId) { + EXPECT_EQUAL(defaultSource, it->getSource(i)); + } else { + EXPECT_EQUAL(docSource[j].source, it->getSource(i)); + ++j; + } + } + } +} + +template <typename SelectorType> +void +Test::requireThatSelectorCanCloneAndSubtract() +{ + SelectorType selector(default_source, base_file_name); + setSources(selector); + selector.setBaseId(base_id); + + const uint32_t diff = 3; + typename SelectorType::UP + new_selector(selector.cloneAndSubtract(base_file_name2, diff)); + EXPECT_EQUAL(default_source - diff, new_selector->getDefaultSource()); + EXPECT_EQUAL(base_id + diff, new_selector->getBaseId()); + EXPECT_EQUAL(maxDocId+1, new_selector->getDocIdLimit()); + + ISourceSelector::Iterator::UP it(new_selector->createIterator()); + for(size_t i = 0; i < arraysize(docs); ++i) { + if (docs[i].source > diff) { + EXPECT_EQUAL(docs[i].source - diff, it->getSource(docs[i].docId)); + } else { + EXPECT_EQUAL(0, it->getSource(docs[i].docId)); + } + } +} + +void +Test::requireThatSelectorCanCloneAndSubtract() +{ + requireThatSelectorCanCloneAndSubtract<FixedSourceSelector>(); +} + +template <typename SelectorType> +void +Test::requireThatSelectorCanSaveAndLoad() +{ + SelectorType selector(default_source, base_file_name2); + setSources(selector); + selector.setBaseId(base_id); + selector.setSource(maxDocId + 1, default_source); + + FastOS_FileInterface::EmptyAndRemoveDirectory(index_dir.c_str()); + FastOS_FileInterface::MakeDirIfNotPresentOrExit(index_dir.c_str()); + + SourceSelector::SaveInfo::UP save_info = + selector.extractSaveInfo(base_file_name); + save_info->save(TuneFileAttributes(), DummyFileHeaderContext()); + typename SelectorType::UP + selector2(SelectorType::load(base_file_name)); + testSourceSelector(docs, arraysize(docs), default_source, *selector2); + EXPECT_EQUAL(base_id, selector2->getBaseId()); + EXPECT_EQUAL(maxDocId + 2, selector2->getDocIdLimit()); + + FastOS_FileInterface::EmptyAndRemoveDirectory(index_dir.c_str()); +} + +void +Test::requireThatSelectorCanSaveAndLoad() +{ + requireThatSelectorCanSaveAndLoad<FixedSourceSelector>(); +} + +template <typename SelectorType> +void +Test::requireThatCompleteSourceRangeIsHandled() +{ + SelectorType selector(default_source, base_file_name); + for (uint32_t i = 0; i < ISourceSelector::SOURCE_LIMIT; ++i) { + selector.setSource(i, i); + } + ISourceSelector::Iterator::UP itr = selector.createIterator(); + for (uint32_t i = 0; i < ISourceSelector::SOURCE_LIMIT; ++i) { + EXPECT_EQUAL((queryeval::Source)i, itr->getSource(i)); + } +} + +void +Test::requireThatCompleteSourceRangeIsHandled() +{ + requireThatCompleteSourceRangeIsHandled<FixedSourceSelector>(); +} + +template <typename SelectorType> +void +Test::requireThatSourcesAreCountedCorrectly() +{ + SelectorType selector(default_source, base_file_name); + for (uint32_t i = 0; i < 256; ++i) { + selector.setSource(i, i%16); + } + SourceSelector::Histogram hist = selector.getDistribution(); + for (uint32_t i = 0; i < 16; ++i) { + EXPECT_EQUAL(16u, hist[i]); + } + for (uint32_t i = 16; i < 256; ++i) { + EXPECT_EQUAL(0u, hist[i]); + } +} + +void +Test::requireThatSourcesAreCountedCorrectly() +{ + requireThatSourcesAreCountedCorrectly<FixedSourceSelector>(); +} + +} // namespace + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/attribute/stringattribute/.gitignore b/searchlib/src/tests/attribute/stringattribute/.gitignore new file mode 100644 index 00000000000..0e8a04bc19d --- /dev/null +++ b/searchlib/src/tests/attribute/stringattribute/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +stringattribute_test +searchlib_stringattribute_test_app diff --git a/searchlib/src/tests/attribute/stringattribute/CMakeLists.txt b/searchlib/src/tests/attribute/stringattribute/CMakeLists.txt new file mode 100644 index 00000000000..032ce9cac4e --- /dev/null +++ b/searchlib/src/tests/attribute/stringattribute/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_stringattribute_test_app + SOURCES + stringattribute_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_stringattribute_test_app COMMAND sh stringattribute_test.sh) diff --git a/searchlib/src/tests/attribute/stringattribute/DESC b/searchlib/src/tests/attribute/stringattribute/DESC new file mode 100644 index 00000000000..5d94ab94325 --- /dev/null +++ b/searchlib/src/tests/attribute/stringattribute/DESC @@ -0,0 +1 @@ +Unit tests for SingleValueStringAttribute and MultiValueStringAttribute. diff --git a/searchlib/src/tests/attribute/stringattribute/FILES b/searchlib/src/tests/attribute/stringattribute/FILES new file mode 100644 index 00000000000..e68ef57177d --- /dev/null +++ b/searchlib/src/tests/attribute/stringattribute/FILES @@ -0,0 +1 @@ +stringattribute.cpp diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp new file mode 100644 index 00000000000..154340ba408 --- /dev/null +++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp @@ -0,0 +1,453 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("stringattribute_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/attribute/enumstore.h> +#include <vespa/searchlib/attribute/singlestringattribute.h> +#include <vespa/searchlib/attribute/singlestringpostattribute.h> +#include <vespa/searchlib/attribute/multistringattribute.h> +#include <vespa/searchlib/attribute/multistringpostattribute.h> + +#include <vespa/searchlib/attribute/enumstore.hpp> +#include <vespa/searchlib/attribute/singlestringattribute.h> +#include <vespa/searchlib/attribute/singlestringpostattribute.hpp> +#include <vespa/searchlib/attribute/multistringattribute.h> +#include <vespa/searchlib/attribute/multistringpostattribute.hpp> + +namespace search { + +using attribute::CollectionType; +using attribute::IAttributeVector; + +class StringAttributeTest : public vespalib::TestApp +{ +private: + typedef ArrayStringAttribute ArrayStr; + typedef WeightedSetStringAttribute WeightedSetStr; + typedef ArrayStringPostingAttribute ArrayStrPosting; + typedef WeightedSetStringPostingAttribute WeightedSetStrPosting; + typedef attribute::Config Config; + typedef attribute::BasicType BasicType; + + template <typename Attribute> + void addDocs(Attribute & vec, uint32_t numDocs); + template <typename Attribute> + void checkCount(Attribute & vec, uint32_t doc, uint32_t valueCount, + uint32_t numValues, const vespalib::string & value); + void testMultiValue(); + template <typename Attribute> + void testMultiValue(Attribute & attr, uint32_t numDocs); + void testMultiValueMultipleClearDocBetweenCommit(); + void testMultiValueRemove(); + void testSingleValue(); + void testDefaultValueOnAddDoc(AttributeVector & v); + template <typename Attribute> + void testSingleValue(Attribute & svsa, Config &cfg); + +public: + int Main(); +}; + +template <typename Attribute> +void +StringAttributeTest::addDocs(Attribute & vec, uint32_t numDocs) +{ + for (uint32_t i = 0; i < numDocs; ++i) { + typename Attribute::DocId doc; + EXPECT_TRUE(vec.addDoc(doc)); + EXPECT_TRUE(doc == i); + EXPECT_TRUE(vec.getNumDocs() == i + 1); + EXPECT_TRUE(vec.getValueCount(doc) == 0); + } + EXPECT_TRUE(vec.getNumDocs() == numDocs); +} + +template <typename Attribute> +void +StringAttributeTest::checkCount(Attribute & vec, uint32_t doc, uint32_t valueCount, + uint32_t numValues, const vespalib::string & value) +{ + std::vector<vespalib::string> buffer(valueCount); + EXPECT_TRUE(static_cast<uint32_t>(vec.getValueCount(doc)) == valueCount); + EXPECT_TRUE(vec.get(doc, &buffer[0], buffer.size()) == valueCount); + EXPECT_TRUE(std::count(buffer.begin(), buffer.end(), value) == numValues); +} + + +void +StringAttributeTest::testMultiValue() +{ + uint32_t numDocs = ArrayStr::MultiValueMapping::maxValues() + 1; + + { // Array String Attribute + ASSERT_TRUE(ArrayStr::MultiValueMapping::maxValues() == numDocs - 1); + ArrayStr attr("a-string"); + testMultiValue(attr, numDocs); + } + { // Weighted Set String Attribute + ASSERT_TRUE(WeightedSetStr::MultiValueMapping::maxValues() == numDocs - 1); + WeightedSetStr attr("ws-string", + Config(BasicType::STRING, CollectionType::WSET)); + testMultiValue(attr, numDocs); + } + { // Array String Posting Attribute + ASSERT_TRUE(ArrayStrPosting::MultiValueMapping::maxValues() == numDocs - 1); + Config cfg(BasicType::STRING, CollectionType::ARRAY); + cfg.setFastSearch(true); + ArrayStrPosting attr("a-fs-string", cfg); + testMultiValue(attr, numDocs); + } + { // Weighted Set String Posting Attribute + ASSERT_TRUE(WeightedSetStrPosting::MultiValueMapping::maxValues() == numDocs - 1); + Config cfg(BasicType::STRING, CollectionType::WSET); + cfg.setFastSearch(true); + WeightedSetStrPosting attr("ws-fs-string", cfg); + testMultiValue(attr, numDocs); + } + +} + + +template <typename Attribute> +void +StringAttributeTest::testMultiValue(Attribute & attr, uint32_t numDocs) +{ + EXPECT_TRUE(attr.getNumDocs() == 0); + + // generate two sets of unique strings + std::vector<vespalib::string> uniqueStrings; + uniqueStrings.reserve(numDocs - 1); + for (uint32_t i = 0; i < numDocs - 1; ++i) { + char unique[16]; + sprintf(unique, i < 10 ? "enum0%u" : "enum%u", i); + uniqueStrings.push_back(vespalib::string(unique)); + } + std::vector<vespalib::string> newUniques; + newUniques.reserve(numDocs - 1); + for (uint32_t i = 0; i < numDocs - 1; ++i) { + char unique[16]; + sprintf(unique, i < 10 ? "unique0%u" : "unique%u", i); + newUniques.push_back(vespalib::string(unique)); + } + + // add docs + addDocs(attr, numDocs); + + // insert values + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t valueCount = doc; + for (uint32_t j = 0; j < valueCount; ++j) { + EXPECT_TRUE(attr.append(doc, uniqueStrings[j], 1)); + } + attr.commit(); + } + + //attr.getEnumStore().printCurrentContent(); + + // check values and enums + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t valueCount = attr.getValueCount(doc); + EXPECT_TRUE(valueCount == doc); + + // test get first + if (valueCount == 0) { + EXPECT_TRUE(attr.get(doc) == NULL); + EXPECT_TRUE(attr.getEnum(doc) == std::numeric_limits<uint32_t>::max()); + } else { + EXPECT_TRUE(strcmp(attr.get(doc), uniqueStrings[0].c_str()) == 0); + uint32_t e; + EXPECT_TRUE(attr.findEnum(uniqueStrings[0].c_str(), e)); + EXPECT_TRUE(attr.getEnum(doc) == e); + } + + // test get all + std::vector<vespalib::string> values(valueCount); + EXPECT_TRUE(attr.get(doc, &values[0], valueCount) == valueCount); + + std::vector<uint32_t> enums(valueCount); + EXPECT_TRUE((static_cast<search::attribute::IAttributeVector &>(attr)).get(doc, &enums[0], valueCount) == valueCount); + + for (uint32_t j = 0; j < valueCount; ++j) { + //LOG(info, "doc[%u][%u] = %s", doc, j, values[j].c_str()); + EXPECT_TRUE(values[j] == uniqueStrings[j]); + uint32_t e = 100; + EXPECT_TRUE(attr.findEnum(values[j].c_str(), e)); + EXPECT_TRUE(enums[j] == e); + } + } + + // check for correct refcounts + for (uint32_t i = 0; i < uniqueStrings.size(); ++i) { + typename Attribute::EnumStore::Index idx; + EXPECT_TRUE(attr.getEnumStore().findIndex(uniqueStrings[i].c_str(), idx)); + uint32_t expectedUsers = numDocs - 1 - i; + EXPECT_EQUAL(expectedUsers, attr.getEnumStore().getRefCount(idx)); + } + + typename Attribute::Histogram remaining = attr.getMultiValueMapping().getRemaining(); + for (typename Attribute::Histogram::const_iterator it(remaining.begin()), mt(remaining.end()); it != mt; ++it) { + EXPECT_TRUE(it->second == 0); + } + + // clear and insert new unique strings + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t oldValueCount = doc; + uint32_t valueCount = numDocs - 1 - doc; + //LOG(info, "clear and insert: doc = %u, valueCount = %u", doc, valueCount); + EXPECT_TRUE(attr.clearDoc(doc) == oldValueCount); + for (uint32_t j = 0; j < valueCount; ++j) { + EXPECT_TRUE(attr.append(doc, newUniques[j], 1)); + } + attr.commit(); + + //attr.getEnumStore().printCurrentContent(); + } + + // check values and enums + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t valueCount = attr.getValueCount(doc); + uint32_t expectedValueCount = numDocs - 1 - doc; + EXPECT_TRUE(valueCount == expectedValueCount); + + // test get all + std::vector<vespalib::string> values(valueCount); + EXPECT_TRUE(attr.get(doc, &values[0], valueCount) == valueCount); + + std::vector<uint32_t> enums(valueCount); + EXPECT_TRUE((static_cast<search::attribute::IAttributeVector &>(attr)).get(doc, &enums[0], valueCount) == valueCount); + + for (uint32_t j = 0; j < valueCount; ++j) { + //LOG(info, "doc[%u][%u] = %s", doc, j, values[j].c_str()); + EXPECT_TRUE(values[j] == newUniques[j]); + uint32_t e = 100; + EXPECT_TRUE(attr.findEnum(values[j].c_str(), e)); + EXPECT_TRUE(enums[j] == e); + } + } + + // check that enumXX strings are removed + for (uint32_t i = 0; i < uniqueStrings.size(); ++i) { + uint32_t e; + EXPECT_TRUE(!attr.findEnum(uniqueStrings[i].c_str(), e)); + } + + // check for correct refcounts + for (uint32_t i = 0; i < newUniques.size(); ++i) { + typename Attribute::EnumStore::Index idx; + EXPECT_TRUE(attr.getEnumStore().findIndex(newUniques[i].c_str(), idx)); + uint32_t expectedUsers = numDocs - 1 - i; + EXPECT_EQUAL(expectedUsers, attr.getEnumStore().getRefCount(idx)); + } +} + +void +StringAttributeTest::testMultiValueMultipleClearDocBetweenCommit() +{ + // This is also tested for all array attributes in attribute unit test + ArrayStr mvsa("a-string"); + uint32_t numDocs = 50; + addDocs(mvsa, numDocs); + std::vector<vespalib::string> buffer(numDocs); + + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t valueCount = doc; + EXPECT_TRUE(mvsa.clearDoc(doc) == 0); + for (uint32_t j = 0; j < valueCount; ++j) { + EXPECT_TRUE(mvsa.append(doc, "first", 1)); + } + EXPECT_TRUE(mvsa.clearDoc(doc) == 0); + for (uint32_t j = 0; j < valueCount; ++j) { + EXPECT_TRUE(mvsa.append(doc, "second", 1)); + } + mvsa.commit(); + + // check for correct values + checkCount(mvsa, doc, valueCount, valueCount, "second"); + } +} + + +void +StringAttributeTest::testMultiValueRemove() +{ + // This is also tested for all array attributes in attribute unit test + ArrayStr mvsa("a-string"); + uint32_t numDocs = 50; + addDocs(mvsa, numDocs); + std::vector<vespalib::string> buffer(9); + + for (uint32_t doc = 0; doc < numDocs; ++doc) { + EXPECT_TRUE(mvsa.append(doc, "one", 1)); + for (uint32_t i = 0; i < 3; ++i) { + EXPECT_TRUE(mvsa.append(doc, "three", 1)); + } + for (uint32_t i = 0; i < 5; ++i) { + EXPECT_TRUE(mvsa.append(doc, "five", 1)); + } + + mvsa.commit(); + checkCount(mvsa, doc, 9, 1, "one"); + checkCount(mvsa, doc, 9, 3, "three"); + checkCount(mvsa, doc, 9, 5, "five"); + + EXPECT_TRUE(mvsa.remove(doc, "zero", 1)); + mvsa.commit(); + checkCount(mvsa, doc, 9, 1, "one"); + checkCount(mvsa, doc, 9, 3, "three"); + checkCount(mvsa, doc, 9, 5, "five"); + + EXPECT_TRUE(mvsa.remove(doc, "one", 1)); + mvsa.commit(); + checkCount(mvsa, doc, 8, 0, "one"); + checkCount(mvsa, doc, 8, 3, "three"); + checkCount(mvsa, doc, 8, 5, "five"); + + EXPECT_TRUE(mvsa.remove(doc, "five", 1)); + mvsa.commit(); + checkCount(mvsa, doc, 3, 0, "one"); + checkCount(mvsa, doc, 3, 3, "three"); + checkCount(mvsa, doc, 3, 0, "five"); + } +} + +void +StringAttributeTest::testSingleValue() +{ + { + Config cfg(BasicType::STRING, CollectionType::SINGLE); + SingleValueStringAttribute svsa("svsa", cfg); + const IAttributeVector * ia = &svsa; + EXPECT_TRUE(dynamic_cast<const SingleValueEnumAttributeBase *>(ia) != nullptr); + testSingleValue(svsa, cfg); + + SingleValueStringAttribute svsb("svsa", cfg); + testDefaultValueOnAddDoc(svsb); + } + { + Config cfg(BasicType::STRING, CollectionType::SINGLE); + cfg.setFastSearch(true); + SingleValueStringPostingAttribute svsa("svspb", cfg); + testSingleValue(svsa, cfg); + + SingleValueStringPostingAttribute svsb("svspb", cfg); + testDefaultValueOnAddDoc(svsb); + } +} + +void StringAttributeTest::testDefaultValueOnAddDoc(AttributeVector & v) +{ + EXPECT_EQUAL(0u, v.getNumDocs()); + v.addReservedDoc(); + EXPECT_EQUAL(1u, v.getNumDocs()); + EXPECT_TRUE( EnumStoreBase::Index(v.getEnum(0)).valid() ); + uint32_t doc(7); + EXPECT_TRUE( v.addDoc(doc) ); + EXPECT_EQUAL(1u, doc); + EXPECT_EQUAL(2u, v.getNumDocs()); + EXPECT_TRUE( EnumStoreBase::Index(v.getEnum(doc)).valid() ); + EXPECT_EQUAL(0u, strlen(v.getString(doc, NULL, 0))); +} + +template <typename Attribute> +void +StringAttributeTest::testSingleValue(Attribute & svsa, Config &cfg) +{ + StringAttribute & v = svsa; + const char * t = "not defined"; + uint32_t doc = 2000; + uint32_t e1 = 2000; + uint32_t e2 = 2000; + uint32_t numDocs = 1000; + char tmp[32]; + + // add docs + for (uint32_t i = 0; i < numDocs; ++i) { + EXPECT_TRUE( v.addDoc(doc) ); + EXPECT_TRUE( doc == i ); + EXPECT_TRUE( v.getNumDocs() == i + 1 ); + EXPECT_TRUE( v.getValueCount(doc) == 1 ); + EXPECT_TRUE( ! EnumStoreBase::Index(v.getEnum(doc)).valid() ); + } + + std::map<vespalib::string, uint32_t> enums; + // 10 unique strings + for (uint32_t i = 0; i < numDocs; ++i) { + sprintf(tmp, "enum%u", i % 10); + EXPECT_TRUE( v.update(i, tmp) ); + EXPECT_TRUE( v.getValueCount(i) == 1 ); + EXPECT_TRUE( ! EnumStoreBase::Index(v.getEnum(i)).valid() ); + if ((i % 10) == 9) { + v.commit(); + for (uint32_t j = i - 9; j <= i; ++j) { + sprintf(tmp, "enum%u", j % 10); + EXPECT_TRUE( strcmp(t = v.get(j), tmp) == 0 ); + e1 = v.getEnum(j); + EXPECT_TRUE( v.findEnum(t, e2) ); + EXPECT_TRUE( e1 == e2 ); + if (enums.count(vespalib::string(t)) == 0) { + enums[vespalib::string(t)] = e1; + } else { + EXPECT_TRUE( e1 == enums[vespalib::string(t)]); + EXPECT_TRUE( e2 == enums[vespalib::string(t)]); + } + } + } + } + + //svsa.printBuffers(); + + // 1000 unique strings + for (uint32_t i = 0; i < numDocs; ++i) { + sprintf(tmp, "unique%u", i); + EXPECT_TRUE( v.update(i, tmp) ); + sprintf(tmp, "enum%u", i % 10); + EXPECT_TRUE( strcmp(v.get(i), tmp) == 0 ); + if ((i % 10) == 9) { + //LOG(info, "commit: i = %u", i); + v.commit(); + for (uint32_t j = i - 9; j <= i; ++j) { + sprintf(tmp, "unique%u", j); + EXPECT_TRUE( strcmp(t = v.get(j), tmp) == 0 ); + e1 = v.getEnum(j); + EXPECT_TRUE( v.findEnum(t, e2) ); + EXPECT_TRUE( e1 == e2 ); + } + //svsa.printBuffers(); + } + } + //svsa.printBuffers(); + + // check that enumX strings are removed ( + for (uint32_t i = 0; i < 10; ++i) { + sprintf(tmp, "enum%u", i); + EXPECT_TRUE( !v.findEnum(tmp, e1) ); + } + + + Attribute load("load", cfg); + svsa.saveAs(load.getBaseFileName()); + load.load(); +} + + + +int +StringAttributeTest::Main() +{ + TEST_INIT("stringattribute_test"); + + testMultiValue(); + + testMultiValueMultipleClearDocBetweenCommit(); + + testMultiValueRemove(); + + testSingleValue(); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(search::StringAttributeTest); diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.sh b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.sh new file mode 100755 index 00000000000..d7ac263c1c9 --- /dev/null +++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.sh @@ -0,0 +1,3 @@ +#!/bin/bash +$VALGRIND ./searchlib_stringattribute_test_app +rm -rf *.dat diff --git a/searchlib/src/tests/attribute/tensorattribute/.gitignore b/searchlib/src/tests/attribute/tensorattribute/.gitignore new file mode 100644 index 00000000000..08519fe7ae8 --- /dev/null +++ b/searchlib/src/tests/attribute/tensorattribute/.gitignore @@ -0,0 +1 @@ +searchlib_tensorattribute_test_app diff --git a/searchlib/src/tests/attribute/tensorattribute/CMakeLists.txt b/searchlib/src/tests/attribute/tensorattribute/CMakeLists.txt new file mode 100644 index 00000000000..ec16b4363eb --- /dev/null +++ b/searchlib/src/tests/attribute/tensorattribute/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_tensorattribute_test_app + SOURCES + tensorattribute_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_tensorattribute_test_app COMMAND sh tensorattribute_test.sh) diff --git a/searchlib/src/tests/attribute/tensorattribute/DESC b/searchlib/src/tests/attribute/tensorattribute/DESC new file mode 100644 index 00000000000..1cd9aa7cf14 --- /dev/null +++ b/searchlib/src/tests/attribute/tensorattribute/DESC @@ -0,0 +1 @@ +Unit tests for TensorAttribute. diff --git a/searchlib/src/tests/attribute/tensorattribute/FILES b/searchlib/src/tests/attribute/tensorattribute/FILES new file mode 100644 index 00000000000..1c8480ffde7 --- /dev/null +++ b/searchlib/src/tests/attribute/tensorattribute/FILES @@ -0,0 +1 @@ +tensorattribute.cpp diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp new file mode 100644 index 00000000000..137f93bcffe --- /dev/null +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -0,0 +1,217 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("tensorattribute_test"); +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchlib/attribute/tensorattribute.h> +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/vespalib/tensor/tensor_factory.h> +#include <vespa/vespalib/tensor/default_tensor.h> +#include <vespa/vespalib/tensor/simple/simple_tensor_builder.h> + +using search::attribute::TensorAttribute; +using search::AttributeGuard; +using search::AttributeVector; +using vespalib::tensor::Tensor; +using vespalib::tensor::TensorCells; +using vespalib::tensor::TensorDimensions; +using vespalib::tensor::TensorFactory; +using vespalib::tensor::TensorType; +using vespalib::tensor::SimpleTensorBuilder; + +namespace vespalib { +namespace tensor { + +static bool operator==(const Tensor &lhs, const Tensor &rhs) +{ + return lhs.equals(rhs); +} + +} +} + + +struct Fixture +{ + using BasicType = search::attribute::BasicType; + using CollectionType = search::attribute::CollectionType; + using Config = search::attribute::Config; + + Config _cfg; + vespalib::string _name; + std::shared_ptr<TensorAttribute> _tensorAttr; + std::shared_ptr<AttributeVector> _attr; + vespalib::tensor::DefaultTensor::builder _builder; + + Fixture(const vespalib::string &typeSpec) + : _cfg(BasicType::TENSOR, CollectionType::SINGLE), + _name("test"), + _tensorAttr(), + _attr() + { + _cfg.setTensorType(TensorType::fromSpec(typeSpec)); + _tensorAttr = std::make_shared<TensorAttribute>(_name, _cfg); + _attr = _tensorAttr; + _attr->addReservedDoc(); + } + + Tensor::UP createTensor(const TensorCells &cells) { + return TensorFactory::create(cells, _builder); + } + Tensor::UP createTensor(const TensorCells &cells, + const TensorDimensions &dimensions) { + return TensorFactory::create(cells, dimensions, _builder); + } + + void ensureSpace(uint32_t docId) { + while (_attr->getNumDocs() <= docId) { + uint32_t newDocId = 0u; + _attr->addDoc(newDocId); + _attr->commit(); + } + } + + void clearTensor(uint32_t docId) { + ensureSpace(docId); + _tensorAttr->clearDoc(docId); + _attr->commit(); + } + + void setTensor(uint32_t docId, const Tensor &tensor) { + ensureSpace(docId); + _tensorAttr->setTensor(docId, tensor); + _attr->commit(); + } + + search::attribute::Status getStatus() { + _attr->commit(true); + return _attr->getStatus(); + } + + void + assertGetNoTensor(uint32_t docId) { + AttributeGuard guard(_attr); + Tensor::UP actTensor = _tensorAttr->getTensor(docId); + EXPECT_FALSE(actTensor); + } + + void + assertGetTensor(const Tensor &expTensor, uint32_t docId) + { + AttributeGuard guard(_attr); + Tensor::UP actTensor = _tensorAttr->getTensor(docId); + EXPECT_TRUE(static_cast<bool>(actTensor)); + EXPECT_EQUAL(expTensor, *actTensor); + } + + void + assertGetTensor(const TensorCells &expCells, + const TensorDimensions &expDimensions, + uint32_t docId) + { + Tensor::UP expTensor = createTensor(expCells, expDimensions); + assertGetTensor(*expTensor, docId); + } + + void save() { + bool saveok = _attr->save(); + EXPECT_TRUE(saveok); + } + + void load() { + _tensorAttr = std::make_shared<TensorAttribute>(_name, _cfg); + _attr = _tensorAttr; + bool loadok = _attr->load(); + EXPECT_TRUE(loadok); + } +}; + + +TEST_F("Test empty tensor attribute", Fixture("tensor()")) +{ + EXPECT_EQUAL(1u, f._attr->getNumDocs()); + EXPECT_EQUAL(1u, f._attr->getCommittedDocIdLimit()); +} + + +TEST_F("Test setting tensor value", Fixture("tensor(x{}, y{})")) +{ + f.ensureSpace(4); + EXPECT_EQUAL(5u, f._attr->getNumDocs()); + EXPECT_EQUAL(5u, f._attr->getCommittedDocIdLimit()); + TEST_DO(f.assertGetNoTensor(4)); + f.setTensor(4, *f.createTensor({}, {})); + TEST_DO(f.assertGetTensor({}, {"x", "y"}, 4)); + f.setTensor(3, *f.createTensor({ {{}, 3} }, { "x", "y"})); + TEST_DO(f.assertGetTensor({ {{}, 3} }, { "x", "y"}, 3)); + TEST_DO(f.assertGetNoTensor(2)); + TEST_DO(f.clearTensor(3)); + TEST_DO(f.assertGetNoTensor(3)); +} + + +TEST_F("Test saving / loading tensor attribute", Fixture("tensor(x{}, y{})")) +{ + f.ensureSpace(4); + f.setTensor(4, *f.createTensor({}, {})); + f.setTensor(3, *f.createTensor({ {{}, 3} }, { "x", "y"})); + TEST_DO(f.save()); + TEST_DO(f.load()); + EXPECT_EQUAL(5u, f._attr->getNumDocs()); + EXPECT_EQUAL(5u, f._attr->getCommittedDocIdLimit()); + TEST_DO(f.assertGetTensor({ {{}, 3} }, { "x", "y"}, 3)); + TEST_DO(f.assertGetTensor({}, {"x", "y"}, 4)); + TEST_DO(f.assertGetNoTensor(2)); +} + + +TEST_F("Test compaction of tensor attribute", Fixture("tensor(x{}, y{})")) +{ + f.ensureSpace(4); + Tensor::UP emptytensor = f.createTensor({}, {}); + Tensor::UP emptyxytensor = f.createTensor({}, {"x", "y"}); + Tensor::UP simpletensor = f.createTensor({ {{}, 3} }, { "x", "y"}); + Tensor::UP filltensor = f.createTensor({ {{}, 5} }, { "x", "y"}); + f.setTensor(4, *emptytensor); + f.setTensor(3, *simpletensor); + f.setTensor(2, *filltensor); + f.clearTensor(2); + f.setTensor(2, *filltensor); + search::attribute::Status oldStatus = f.getStatus(); + search::attribute::Status newStatus = oldStatus; + uint64_t iter = 0; + uint64_t iterLimit = 100000; + for (; iter < iterLimit; ++iter) { + f.clearTensor(2); + f.setTensor(2, *filltensor); + newStatus = f.getStatus(); + if (newStatus.getUsed() < oldStatus.getUsed()) { + break; + } + oldStatus = newStatus; + } + EXPECT_GREATER(iterLimit, iter); + LOG(info, + "iter = %" PRIu64 ", memory usage %" PRIu64 ", -> %" PRIu64, + iter, oldStatus.getUsed(), newStatus.getUsed()); + TEST_DO(f.assertGetNoTensor(1)); + TEST_DO(f.assertGetTensor(*filltensor, 2)); + TEST_DO(f.assertGetTensor(*simpletensor, 3)); + TEST_DO(f.assertGetTensor(*emptyxytensor, 4)); +} + +TEST_F("Test tensortype file header tag", Fixture("tensor(x[10])")) +{ + f.ensureSpace(4); + TEST_DO(f.save()); + + vespalib::FileHeader header; + FastOS_File file; + EXPECT_TRUE(file.OpenReadOnly("test.dat")); + (void) header.readFile(file); + file.Close(); + EXPECT_TRUE(header.hasTag("tensortype")); + EXPECT_EQUAL("tensor(x[10])", header.getTag("tensortype").asString()); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.sh b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.sh new file mode 100644 index 00000000000..2e940d5d99a --- /dev/null +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.sh @@ -0,0 +1,3 @@ +#!/bin/bash +$VALGRIND ./searchlib_tensorattribute_test_app +rm -rf *.dat |