aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/tests/attribute
diff options
context:
space:
mode:
Diffstat (limited to 'searchlib/src/tests/attribute')
-rw-r--r--searchlib/src/tests/attribute/.gitignore11
-rw-r--r--searchlib/src/tests/attribute/CMakeLists.txt29
-rw-r--r--searchlib/src/tests/attribute/DESC1
-rw-r--r--searchlib/src/tests/attribute/FILES2
-rw-r--r--searchlib/src/tests/attribute/attribute_test.cpp2200
-rw-r--r--searchlib/src/tests/attribute/attribute_test.sh7
-rw-r--r--searchlib/src/tests/attribute/attributebenchmark.cpp678
-rw-r--r--searchlib/src/tests/attribute/attributebenchmark.rb22
-rw-r--r--searchlib/src/tests/attribute/attributefilewriter/.gitignore1
-rw-r--r--searchlib/src/tests/attribute/attributefilewriter/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp116
-rw-r--r--searchlib/src/tests/attribute/attributeguard.cpp32
-rw-r--r--searchlib/src/tests/attribute/attributeguard_test.sh7
-rw-r--r--searchlib/src/tests/attribute/attributemanager/.gitignore4
-rw-r--r--searchlib/src/tests/attribute/attributemanager/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp422
-rw-r--r--searchlib/src/tests/attribute/attributesearcher.h265
-rw-r--r--searchlib/src/tests/attribute/attributeupdater.h299
-rw-r--r--searchlib/src/tests/attribute/benchmarkplotter.rb134
-rw-r--r--searchlib/src/tests/attribute/bitvector/.gitignore1
-rw-r--r--searchlib/src/tests/attribute/bitvector/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/attribute/bitvector/bitvector_test.cpp632
-rw-r--r--searchlib/src/tests/attribute/changevector_test.cpp92
-rw-r--r--searchlib/src/tests/attribute/changevector_test.sh7
-rw-r--r--searchlib/src/tests/attribute/comparator/.gitignore4
-rw-r--r--searchlib/src/tests/attribute/comparator/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/attribute/comparator/DESC1
-rw-r--r--searchlib/src/tests/attribute/comparator/FILES1
-rw-r--r--searchlib/src/tests/attribute/comparator/comparator_test.cpp169
-rw-r--r--searchlib/src/tests/attribute/document_weight_iterator/.gitignore1
-rw-r--r--searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/attribute/document_weight_iterator/FILES1
-rw-r--r--searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp189
-rw-r--r--searchlib/src/tests/attribute/enumeratedsave/.gitignore127
-rw-r--r--searchlib/src/tests/attribute/enumeratedsave/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp944
-rw-r--r--searchlib/src/tests/attribute/enumstore/.gitignore4
-rw-r--r--searchlib/src/tests/attribute/enumstore/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/attribute/enumstore/DESC1
-rw-r--r--searchlib/src/tests/attribute/enumstore/FILES1
-rw-r--r--searchlib/src/tests/attribute/enumstore/enumstore_test.cpp879
-rw-r--r--searchlib/src/tests/attribute/extendattributes/.gitignore4
-rw-r--r--searchlib/src/tests/attribute/extendattributes/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/attribute/extendattributes/DESC1
-rw-r--r--searchlib/src/tests/attribute/extendattributes/FILES1
-rw-r--r--searchlib/src/tests/attribute/extendattributes/extendattribute.cpp176
-rwxr-xr-xsearchlib/src/tests/attribute/extendattributes/extendattribute_test.sh3
-rw-r--r--searchlib/src/tests/attribute/gidmapattribute/.gitignore0
-rw-r--r--searchlib/src/tests/attribute/multivaluemapping/.gitignore4
-rw-r--r--searchlib/src/tests/attribute/multivaluemapping/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/attribute/multivaluemapping/DESC1
-rw-r--r--searchlib/src/tests/attribute/multivaluemapping/FILES1
-rw-r--r--searchlib/src/tests/attribute/multivaluemapping/multivaluemapping_test.cpp836
-rw-r--r--searchlib/src/tests/attribute/postinglist/.gitignore4
-rw-r--r--searchlib/src/tests/attribute/postinglist/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/attribute/postinglist/DESC1
-rw-r--r--searchlib/src/tests/attribute/postinglist/FILES1
-rw-r--r--searchlib/src/tests/attribute/postinglist/postinglist.cpp707
-rw-r--r--searchlib/src/tests/attribute/postinglistattribute/.gitignore4
-rw-r--r--searchlib/src/tests/attribute/postinglistattribute/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/attribute/postinglistattribute/DESC1
-rw-r--r--searchlib/src/tests/attribute/postinglistattribute/FILES1
-rw-r--r--searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp1021
-rwxr-xr-xsearchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.sh5
-rw-r--r--searchlib/src/tests/attribute/runnable.h43
-rw-r--r--searchlib/src/tests/attribute/searchable/.gitignore4
-rw-r--r--searchlib/src/tests/attribute/searchable/CMakeLists.txt22
-rw-r--r--searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp689
-rwxr-xr-xsearchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.sh4
-rw-r--r--searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp231
-rw-r--r--searchlib/src/tests/attribute/searchable/attributeblueprint_test.cpp240
-rw-r--r--searchlib/src/tests/attribute/searchcontext/.gitignore4
-rw-r--r--searchlib/src/tests/attribute/searchcontext/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/attribute/searchcontext/DESC1
-rw-r--r--searchlib/src/tests/attribute/searchcontext/FILES1
-rw-r--r--searchlib/src/tests/attribute/searchcontext/searchcontext.cpp1900
-rwxr-xr-xsearchlib/src/tests/attribute/searchcontext/searchcontext_test.sh5
-rw-r--r--searchlib/src/tests/attribute/sourceselector/.gitignore4
-rw-r--r--searchlib/src/tests/attribute/sourceselector/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/attribute/sourceselector/DESC1
-rw-r--r--searchlib/src/tests/attribute/sourceselector/FILES1
-rw-r--r--searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp216
-rw-r--r--searchlib/src/tests/attribute/stringattribute/.gitignore4
-rw-r--r--searchlib/src/tests/attribute/stringattribute/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/attribute/stringattribute/DESC1
-rw-r--r--searchlib/src/tests/attribute/stringattribute/FILES1
-rw-r--r--searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp453
-rwxr-xr-xsearchlib/src/tests/attribute/stringattribute/stringattribute_test.sh3
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/.gitignore1
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/CMakeLists.txt8
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/DESC1
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/FILES1
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp217
-rw-r--r--searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.sh3
94 files changed, 14230 insertions, 0 deletions
diff --git a/searchlib/src/tests/attribute/.gitignore b/searchlib/src/tests/attribute/.gitignore
new file mode 100644
index 00000000000..732912ab981
--- /dev/null
+++ b/searchlib/src/tests/attribute/.gitignore
@@ -0,0 +1,11 @@
+*.dat
+*.idx
+*.weight
+.depend
+Makefile
+attribute_test
+attributebenchmark
+searchlib_attribute_test_app
+searchlib_attributeguard_test_app
+searchlib_changevector_test_app
+searchlib_attributebenchmark_app
diff --git a/searchlib/src/tests/attribute/CMakeLists.txt b/searchlib/src/tests/attribute/CMakeLists.txt
new file mode 100644
index 00000000000..0598b5776a8
--- /dev/null
+++ b/searchlib/src/tests/attribute/CMakeLists.txt
@@ -0,0 +1,29 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_attributeguard_test_app
+ SOURCES
+ attributeguard.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_attributeguard_test_app COMMAND sh attributeguard_test.sh)
+vespa_add_executable(searchlib_attribute_test_app
+ SOURCES
+ attribute_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_attribute_test_app COMMAND sh attribute_test.sh)
+vespa_add_executable(searchlib_changevector_test_app
+ SOURCES
+ changevector_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_changevector_test_app COMMAND sh changevector_test.sh)
+vespa_add_executable(searchlib_attributebenchmark_app
+ SOURCES
+ attributebenchmark.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_attributebenchmark_app COMMAND searchlib_attributebenchmark_app BENCHMARK)
diff --git a/searchlib/src/tests/attribute/DESC b/searchlib/src/tests/attribute/DESC
new file mode 100644
index 00000000000..6a9215b1a3b
--- /dev/null
+++ b/searchlib/src/tests/attribute/DESC
@@ -0,0 +1 @@
+Unit tests for attribute use.
diff --git a/searchlib/src/tests/attribute/FILES b/searchlib/src/tests/attribute/FILES
new file mode 100644
index 00000000000..b742644b750
--- /dev/null
+++ b/searchlib/src/tests/attribute/FILES
@@ -0,0 +1,2 @@
+attribute.cpp
+attributebenchmark.cpp
diff --git a/searchlib/src/tests/attribute/attribute_test.cpp b/searchlib/src/tests/attribute/attribute_test.cpp
new file mode 100644
index 00000000000..b1d4e675e23
--- /dev/null
+++ b/searchlib/src/tests/attribute/attribute_test.cpp
@@ -0,0 +1,2200 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/document/fieldvalue/intfieldvalue.h>
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/searchlib/attribute/attribute.h>
+#include <vespa/searchlib/attribute/attributefile.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/attributememorysavetarget.h>
+#include <vespa/searchlib/attribute/singlenumericattribute.h>
+#include <vespa/searchlib/attribute/multinumericattribute.h>
+#include <vespa/searchlib/attribute/singlestringattribute.h>
+#include <vespa/searchlib/attribute/multistringattribute.h>
+#include <vespa/searchlib/attribute/attrvector.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchlib/util/randomgenerator.h>
+#include <vespa/vespalib/io/fileutil.h>
+
+#include <vespa/searchlib/attribute/attributevector.hpp>
+
+#include <vespa/log/log.h>
+LOG_SETUP("attribute_test");
+
+
+using namespace document;
+using std::shared_ptr;
+using search::common::FileHeaderContext;
+using search::index::DummyFileHeaderContext;
+using search::attribute::BasicType;
+using search::attribute::IAttributeVector;
+
+namespace
+{
+
+
+vespalib::string empty;
+vespalib::string clstmp("clstmp");
+vespalib::string asuDir("asutmp");
+
+bool
+isUnsignedSmallIntAttribute(const BasicType::Type &type)
+{
+ switch (type)
+ {
+ case BasicType::UINT1:
+ case BasicType::UINT2:
+ case BasicType::UINT4:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool
+isUnsignedSmallIntAttribute(const AttributeVector &a)
+{
+ return isUnsignedSmallIntAttribute(a.getBasicType());
+}
+
+template <typename BufferType>
+void
+expectZero(const BufferType &b)
+{
+ EXPECT_EQUAL(0, b);
+}
+
+template <>
+void
+expectZero(const vespalib::string &b)
+{
+ EXPECT_EQUAL(empty, b);
+}
+
+uint64_t
+statSize(const vespalib::string &fileName)
+{
+ FastOS_StatInfo statInfo;
+ if (EXPECT_TRUE(FastOS_File::Stat(fileName.c_str(), &statInfo))) {
+ return statInfo._size;
+ } else {
+ return 0u;
+ }
+}
+
+uint64_t
+statSize(const AttributeVector &a)
+{
+ vespalib::string baseFileName = a.getBaseFileName();
+ uint64_t resultSize = statSize(baseFileName + ".dat");
+ if (a.hasMultiValue()) {
+ resultSize += statSize(baseFileName + ".idx");
+ }
+ if (a.hasWeightedSetType()) {
+ resultSize += statSize(baseFileName + ".weight");
+ }
+ if (a.hasEnum() && a.getEnumeratedSave()) {
+ resultSize += statSize(baseFileName + ".udat");
+ }
+ return resultSize;
+}
+
+
+bool
+preciseEstimatedSize(const AttributeVector &a)
+{
+ if (a.getBasicType() == BasicType::STRING &&
+ EXPECT_TRUE(a.hasEnum()) && !a.getEnumeratedSave()) {
+ return false; // Using average of string lens, can be somewhat off
+ }
+ return true;
+}
+
+}
+
+namespace search {
+
+using attribute::CollectionType;
+using attribute::Config;
+
+class AttributeTest : public vespalib::TestApp
+{
+private:
+ typedef AttributeVector::SP AttributePtr;
+
+ void addDocs(const AttributePtr & v, size_t sz);
+ template <typename VectorType>
+ void populate(VectorType & ptr, unsigned seed);
+ template <typename VectorType, typename BufferType>
+ void compare(VectorType & a, VectorType & b);
+
+ void testReloadInt(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c, size_t numDocs);
+ void testReloadString(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c, size_t numDocs);
+ template <typename VectorType, typename BufferType>
+ void testReload(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c);
+ void testMemorySaverInt(const AttributePtr & a, const AttributePtr & b, size_t numDocs);
+ void testMemorySaverString(const AttributePtr & a, const AttributePtr & b, size_t numDocs);
+ template <typename VectorType, typename BufferType>
+ void testMemorySaver(const AttributePtr & a, const AttributePtr & b);
+
+ void testReload();
+ void testHasLoadData();
+ void testMemorySaver();
+
+ void commit(const AttributePtr & ptr);
+
+ template <typename T>
+ void fillNumeric(std::vector<T> & values, uint32_t numValues);
+ void fillString(std::vector<vespalib::string> & values, uint32_t numValues);
+ template <typename VectorType, typename BufferType>
+ bool appendToVector(VectorType & v, uint32_t doc, uint32_t valueCount,
+ const std::vector<BufferType> & values);
+ template <typename BufferType>
+ bool checkCount(const AttributePtr & ptr, uint32_t doc, uint32_t valueCount,
+ uint32_t numValues, const BufferType & value);
+ template <typename BufferType>
+ bool checkContent(const AttributePtr & ptr, uint32_t doc, uint32_t valueCount,
+ uint32_t range, const std::vector<BufferType> & values);
+
+ // CollectionType::SINGLE
+ template <typename VectorType, typename BufferType, typename BaseType>
+ void testSingle(const AttributePtr & ptr, const std::vector<BufferType> & values);
+ void testSingle();
+
+ // CollectionType::ARRAY
+ template <typename BufferType>
+ void printArray(const AttributePtr & ptr);
+ template <typename VectorType, typename BufferType>
+ void testArray(const AttributePtr & ptr, const std::vector<BufferType> & values);
+ void testArray();
+
+ // CollectionType::WSET
+ template <typename BufferType>
+ void printWeightedSet(const AttributePtr & ptr);
+ template <typename VectorType, typename BufferType>
+ void testWeightedSet(const AttributePtr & ptr, const std::vector<BufferType> & values);
+ void testWeightedSet();
+ void testBaseName();
+
+ template <typename VectorType, typename BufferType>
+ void testArithmeticValueUpdate(const AttributePtr & ptr);
+ void testArithmeticValueUpdate();
+
+ template <typename VectorType, typename BaseType, typename BufferType>
+ void testArithmeticWithUndefinedValue(const AttributePtr & ptr, BaseType before, BaseType after);
+ void testArithmeticWithUndefinedValue();
+
+ template <typename VectorType, typename BufferType>
+ void testMapValueUpdate(const AttributePtr & ptr, BufferType initValue,
+ const FieldValue & initFieldValue, const FieldValue & nonExistant,
+ bool removeIfZero, bool createIfNonExistant);
+ void testMapValueUpdate();
+
+ void testStatus();
+ void testNullProtection();
+ void testGeneration(const AttributePtr & attr, bool exactStatus);
+ void testGeneration();
+
+ void
+ testCreateSerialNum(void);
+
+ template <typename VectorType, typename BufferType>
+ void
+ testCompactLidSpace(const Config &config,
+ bool fs,
+ bool es);
+
+ template <typename VectorType, typename BufferType>
+ void
+ testCompactLidSpace(const Config &config);
+
+ void
+ testCompactLidSpace(const Config &config);
+
+ void
+ testCompactLidSpace(void);
+
+ template <typename AttributeType>
+ void requireThatAddressSpaceUsageIsReported(const Config &config, bool fastSearch);
+ template <typename AttributeType>
+ void requireThatAddressSpaceUsageIsReported(const Config &config);
+ void requireThatAddressSpaceUsageIsReported();
+
+public:
+ AttributeTest() { }
+ int Main();
+};
+
+void AttributeTest::testBaseName()
+{
+ AttributeVector::BaseName v("attr1");
+ EXPECT_EQUAL(v.getAttributeName(), "attr1");
+ EXPECT_TRUE(v.getSnapshotName().empty());
+ // EXPECT_TRUE(v.getIndexName().empty());
+ EXPECT_EQUAL("", v.getIndexName());
+ EXPECT_TRUE(v.getDirName().empty());
+ v = "attribute/attr1/attr1";
+ EXPECT_EQUAL(v.getAttributeName(), "attr1");
+ EXPECT_TRUE(v.getSnapshotName().empty());
+ // EXPECT_TRUE(v.getIndexName().empty());
+ EXPECT_EQUAL("", v.getIndexName());
+ EXPECT_EQUAL(v.getDirName(), "attribute/attr1");
+ v = "attribute/attr1/snapshot-X/attr1";
+ EXPECT_EQUAL(v.getAttributeName(), "attr1");
+ EXPECT_EQUAL(v.getSnapshotName(), "snapshot-X");
+ // EXPECT_TRUE(v.getIndexName().empty());
+ EXPECT_EQUAL("", v.getIndexName());
+ EXPECT_EQUAL(v.getDirName(), "attribute/attr1/snapshot-X");
+ v = "/attribute/attr1/snapshot-X/attr1";
+ EXPECT_EQUAL(v.getAttributeName(), "attr1");
+ EXPECT_EQUAL(v.getSnapshotName(), "snapshot-X");
+ // EXPECT_TRUE(v.getIndexName().empty());
+ EXPECT_EQUAL("", v.getIndexName());
+ EXPECT_EQUAL(v.getDirName(), "/attribute/attr1/snapshot-X");
+ v = "index.1/1.ready/attribute/attr1/snapshot-X/attr1";
+ EXPECT_EQUAL(v.getAttributeName(), "attr1");
+ EXPECT_EQUAL(v.getSnapshotName(), "snapshot-X");
+ EXPECT_EQUAL(v.getIndexName(), "index.1");
+ EXPECT_EQUAL(v.getDirName(), "index.1/1.ready/attribute/attr1/snapshot-X");
+ v = "/index.1/1.ready/attribute/attr1/snapshot-X/attr1";
+ EXPECT_EQUAL(v.getAttributeName(), "attr1");
+ EXPECT_EQUAL(v.getSnapshotName(), "snapshot-X");
+ EXPECT_EQUAL(v.getIndexName(), "index.1");
+ EXPECT_EQUAL(v.getDirName(),
+ "/index.1/1.ready/attribute/attr1/snapshot-X");
+ v = "xxxyyyy/zzz/index.1/1.ready/attribute/attr1/snapshot-X/attr1";
+ EXPECT_EQUAL(v.getAttributeName(), "attr1");
+ EXPECT_EQUAL(v.getSnapshotName(), "snapshot-X");
+ EXPECT_EQUAL(v.getIndexName(), "index.1");
+ EXPECT_EQUAL(v.getDirName(),
+ "xxxyyyy/zzz/index.1/1.ready/attribute/attr1/snapshot-X");
+}
+
+void AttributeTest::addDocs(const AttributePtr & v, size_t sz)
+{
+ if (sz) {
+ AttributeVector::DocId docId;
+ for(size_t i(0); i< sz; i++) {
+ EXPECT_TRUE( v->addDoc(docId) );
+ }
+ EXPECT_TRUE( docId+1 == sz );
+ EXPECT_TRUE( v->getNumDocs() == sz );
+ commit(v);
+ }
+}
+
+
+template <>
+void AttributeTest::populate(IntegerAttribute & v, unsigned seed)
+{
+ srand(seed);
+ int weight = 1;
+ for(size_t i(0), m(v.getNumDocs()); i < m; i++) {
+ v.clearDoc(i);
+ if (v.hasMultiValue()) {
+ if (v.hasWeightedSetType()) {
+ weight = (rand() % 256) - 128;
+ }
+ for (size_t j(0); j <= i; j++) {
+ EXPECT_TRUE( v.append(i, rand(), weight) );
+ }
+ } else {
+ EXPECT_TRUE( v.update(i, rand()) );
+ }
+ }
+ v.commit();
+}
+
+template <>
+void AttributeTest::populate(FloatingPointAttribute & v, unsigned seed)
+{
+ srand(seed);
+ int weight = 1;
+ for(size_t i(0), m(v.getNumDocs()); i < m; i++) {
+ v.clearDoc(i);
+ if (v.hasMultiValue()) {
+ if (v.hasWeightedSetType()) {
+ weight = (rand() % 256) - 128;
+ }
+ for (size_t j(0); j <= i; j++) {
+ EXPECT_TRUE( v.append(i, rand() * 1.25, weight) );
+ }
+ } else {
+ EXPECT_TRUE( v.update(i, rand() * 1.25) );
+ }
+ }
+ v.commit();
+}
+
+template <>
+void AttributeTest::populate(StringAttribute & v, unsigned seed)
+{
+ RandomGenerator rnd(seed);
+ int weight = 1;
+ for(size_t i(0), m(v.getNumDocs()); i < m; i++) {
+ v.clearDoc(i);
+ if (v.hasMultiValue()) {
+ if (v.hasWeightedSetType()) {
+ weight = rnd.rand(0, 256) - 128;
+ }
+ for (size_t j(0); j <= i; j++) {
+ EXPECT_TRUE( v.append(i, rnd.getRandomString(2, 50), weight) );
+ }
+ } else {
+ EXPECT_TRUE( v.update(i, rnd.getRandomString(2, 50)) );
+ }
+ }
+ v.commit();
+}
+
+template <typename VectorType, typename BufferType>
+void AttributeTest::compare(VectorType & a, VectorType & b)
+{
+ EXPECT_EQUAL(a.getNumDocs(), b.getNumDocs());
+ ASSERT_TRUE(a.getNumDocs() == b.getNumDocs());
+ uint32_t asz(a.getMaxValueCount());
+ uint32_t bsz(b.getMaxValueCount());
+ BufferType *av = new BufferType[asz];
+ BufferType *bv = new BufferType[bsz];
+
+ for (size_t i(0), m(a.getNumDocs()); i < m; i++) {
+ ASSERT_TRUE(asz >= static_cast<uint32_t>(a.getValueCount(i)));
+ ASSERT_TRUE(bsz >= static_cast<uint32_t>(b.getValueCount(i)));
+ EXPECT_EQUAL(a.getValueCount(i), b.getValueCount(i));
+ ASSERT_TRUE(a.getValueCount(i) == b.getValueCount(i));
+ EXPECT_EQUAL(static_cast<const AttributeVector &>(a).get(i, av, asz), static_cast<uint32_t>(a.getValueCount(i)));
+ EXPECT_EQUAL(static_cast<const AttributeVector &>(b).get(i, bv, bsz), static_cast<uint32_t>(b.getValueCount(i)));
+ for(size_t j(0), k(std::min(a.getValueCount(i), b.getValueCount(i))); j < k; j++) {
+ EXPECT_TRUE(av[j] == bv[j]);
+ }
+ }
+ delete [] bv;
+ delete [] av;
+}
+
+void AttributeTest::testReloadInt(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c, size_t numDocs)
+{
+ addDocs(a, numDocs);
+ addDocs(b, numDocs);
+ populate(static_cast<IntegerAttribute &>(*a.get()), 17);
+ populate(static_cast<IntegerAttribute &>(*b.get()), 17);
+ if (a->hasWeightedSetType()) {
+ testReload<IntegerAttribute, IntegerAttribute::WeightedInt>(a, b, c);
+ } else {
+ testReload<IntegerAttribute, IntegerAttribute::largeint_t>(a, b, c);
+ }
+}
+
+
+void AttributeTest::testReloadString(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c, size_t numDocs)
+{
+ addDocs(a, numDocs);
+ addDocs(b, numDocs);
+ populate(static_cast<StringAttribute &>(*a.get()), 17);
+ populate(static_cast<StringAttribute &>(*b.get()), 17);
+ if (a->hasWeightedSetType()) {
+ testReload<StringAttribute, StringAttribute::WeightedString>(a, b, c);
+ } else {
+ testReload<StringAttribute, vespalib::string>(a, b, c);
+ }
+}
+
+template <typename VectorType, typename BufferType>
+void AttributeTest::testReload(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c)
+{
+ LOG(info, "testReload: vector '%s'", a->getName().c_str());
+
+ compare<VectorType, BufferType>
+ (*(static_cast<VectorType *>(a.get())), *(static_cast<VectorType *>(b.get())));
+ a->setCreateSerialNum(43u);
+ EXPECT_TRUE( a->saveAs(b->getBaseFileName()) );
+ if (preciseEstimatedSize(*a)) {
+ EXPECT_EQUAL(statSize(*b), a->getEstimatedSaveByteSize());
+ } else {
+ double estSize = a->getEstimatedSaveByteSize();
+ double actSize = statSize(*b);
+ EXPECT_LESS_EQUAL(actSize * 1.0, estSize * 1.3);
+ EXPECT_GREATER_EQUAL(actSize * 1.0, estSize * 0.7);
+ }
+ EXPECT_TRUE( a->saveAs(c->getBaseFileName()) );
+ if (preciseEstimatedSize(*a)) {
+ EXPECT_EQUAL(statSize(*c), a->getEstimatedSaveByteSize());
+ }
+ EXPECT_TRUE( b->load() );
+ EXPECT_EQUAL(43u, b->getCreateSerialNum());
+ compare<VectorType, BufferType>
+ (*(static_cast<VectorType *>(a.get())), *(static_cast<VectorType *>(b.get())));
+ EXPECT_TRUE( c->load() );
+ compare<VectorType, BufferType>
+ (*(static_cast<VectorType *>(a.get())), *(static_cast<VectorType *>(c.get())));
+
+ if (isUnsignedSmallIntAttribute(*a))
+ return;
+ populate(static_cast<VectorType &>(*b.get()), 700);
+ populate(static_cast<VectorType &>(*c.get()), 700);
+ compare<VectorType, BufferType>
+ (*(static_cast<VectorType *>(b.get())), *(static_cast<VectorType *>(c.get())));
+
+ {
+ ReadAttributeFile readC(c->getBaseFileName(), c->getConfig());
+ WriteAttributeFile writeC(b->getBaseFileName(), b->getConfig(),
+ DummyFileHeaderContext(),
+ c->getNumDocs());
+ std::unique_ptr<AttributeFile::Record> record(readC.getRecord());
+ ASSERT_TRUE(record.get());
+ for (size_t i(0), m(c->getNumDocs()); i < m; i++) {
+ EXPECT_TRUE(readC.read(*record));
+ EXPECT_TRUE(writeC.write(*record));
+ }
+ EXPECT_TRUE( ! readC.read(*record));
+ }
+ EXPECT_TRUE( b->load() );
+ compare<VectorType, BufferType>
+ (*(static_cast<VectorType *>(a.get())),
+ *(static_cast<VectorType *>(b.get())));
+ {
+ ReadAttributeFile readC(c->getBaseFileName(), c->getConfig());
+ WriteAttributeFile writeC(b->getBaseFileName(), b->getConfig(),
+ DummyFileHeaderContext(),
+ c->getNumDocs());
+ readC.enableDirectIO();
+ writeC.enableDirectIO();
+ std::unique_ptr<AttributeFile::Record> record(readC.getRecord());
+ ASSERT_TRUE(record.get());
+ for (size_t i(0), m(c->getNumDocs()); i < m; i++) {
+ EXPECT_TRUE(readC.read(*record));
+ EXPECT_TRUE(writeC.write(*record));
+ }
+ EXPECT_TRUE( ! readC.read(*record));
+ }
+ EXPECT_TRUE( b->load() );
+ compare<VectorType, BufferType>
+ (*(static_cast<VectorType *>(a.get())), *(static_cast<VectorType *>(b.get())));
+}
+
+
+void AttributeTest::testReload()
+{
+ // IntegerAttribute
+ // CollectionType::SINGLE
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("sint32_1", Config(BasicType::INT32, CollectionType::SINGLE));
+ AttributePtr iv2 = AttributeFactory::createAttribute("sint32_2", Config(BasicType::INT32, CollectionType::SINGLE));
+ AttributePtr iv3 = AttributeFactory::createAttribute("sint32_3", Config(BasicType::INT32, CollectionType::SINGLE));
+ testReloadInt(iv1, iv2, iv3, 0);
+ testReloadInt(iv1, iv2, iv3, 100);
+ }
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("suint4_1", Config(BasicType::UINT4, CollectionType::SINGLE));
+ AttributePtr iv2 = AttributeFactory::createAttribute("suint4_2", Config(BasicType::UINT4, CollectionType::SINGLE));
+ AttributePtr iv3 = AttributeFactory::createAttribute("suint4_3", Config(BasicType::UINT4, CollectionType::SINGLE));
+ testReloadInt(iv1, iv2, iv3, 0);
+ testReloadInt(iv1, iv2, iv3, 100);
+ }
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("suint2_1", Config(BasicType::UINT2, CollectionType::SINGLE));
+ AttributePtr iv2 = AttributeFactory::createAttribute("suint2_2", Config(BasicType::UINT2, CollectionType::SINGLE));
+ AttributePtr iv3 = AttributeFactory::createAttribute("suint2_3", Config(BasicType::UINT2, CollectionType::SINGLE));
+ testReloadInt(iv1, iv2, iv3, 0);
+ testReloadInt(iv1, iv2, iv3, 100);
+ }
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("suint1_1", Config(BasicType::UINT1, CollectionType::SINGLE));
+ AttributePtr iv2 = AttributeFactory::createAttribute("suint1_2", Config(BasicType::UINT1, CollectionType::SINGLE));
+ AttributePtr iv3 = AttributeFactory::createAttribute("suint1_3", Config(BasicType::UINT1, CollectionType::SINGLE));
+ testReloadInt(iv1, iv2, iv3, 0);
+ testReloadInt(iv1, iv2, iv3, 100);
+ }
+ {
+ Config cfg(BasicType::INT32, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ AttributePtr iv1 = AttributeFactory::createAttribute("sfsint32_1", cfg);
+ AttributePtr iv2 = AttributeFactory::createAttribute("sfsint32_2", cfg);
+ AttributePtr iv3 = AttributeFactory::createAttribute("sfsint32_3", cfg);
+ testReloadInt(iv1, iv2, iv3, 0);
+ testReloadInt(iv1, iv2, iv3, 100);
+ }
+ // CollectionType::ARRAY
+ {
+ Config cfg(BasicType::INT8, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ AttributePtr iv1 = AttributeFactory::createAttribute("flag_1", cfg);
+ AttributePtr iv2 = AttributeFactory::createAttribute("flag_2", cfg);
+ AttributePtr iv3 = AttributeFactory::createAttribute("flag_3", cfg);
+ testReloadInt(iv1, iv2, iv3, 0);
+ testReloadInt(iv1, iv2, iv3, 100);
+ }
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("aint32_1", Config(BasicType::INT32, CollectionType::ARRAY));
+ AttributePtr iv2 = AttributeFactory::createAttribute("aint32_2", Config(BasicType::INT32, CollectionType::ARRAY));
+ AttributePtr iv3 = AttributeFactory::createAttribute("aint32_3", Config(BasicType::INT32, CollectionType::ARRAY));
+ testReloadInt(iv1, iv2, iv3, 0);
+ testReloadInt(iv1, iv2, iv3, 100);
+ }
+ {
+ Config cfg(BasicType::INT32, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ AttributePtr iv1 = AttributeFactory::createAttribute("afsint32_1", cfg);
+ AttributePtr iv2 = AttributeFactory::createAttribute("afsint32_2", cfg);
+ AttributePtr iv3 = AttributeFactory::createAttribute("afsint32_3", cfg);
+ testReloadInt(iv1, iv2, iv3, 0);
+ testReloadInt(iv1, iv2, iv3, 100);
+ }
+ // CollectionType::WSET
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("wint32_1", Config(BasicType::INT32, CollectionType::WSET));
+ AttributePtr iv2 = AttributeFactory::createAttribute("wint32_2", Config(BasicType::INT32, CollectionType::WSET));
+ AttributePtr iv3 = AttributeFactory::createAttribute("wint32_3", Config(BasicType::INT32, CollectionType::WSET));
+ testReloadInt(iv1, iv2, iv3, 0);
+ testReloadInt(iv1, iv2, iv3, 100);
+ }
+ {
+ Config cfg(BasicType::INT32, CollectionType::WSET);
+ cfg.setFastSearch(true);
+ AttributePtr iv1 = AttributeFactory::createAttribute("wfsint32_1", cfg);
+ AttributePtr iv2 = AttributeFactory::createAttribute("wfsint32_2", cfg);
+ AttributePtr iv3 = AttributeFactory::createAttribute("wfsint32_3", cfg);
+ testReloadInt(iv1, iv2, iv3, 0);
+ testReloadInt(iv1, iv2, iv3, 100);
+ }
+
+
+ // StringAttribute
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("sstring_1", Config(BasicType::STRING, CollectionType::SINGLE));
+ AttributePtr iv2 = AttributeFactory::createAttribute("sstring_2", Config(BasicType::STRING, CollectionType::SINGLE));
+ AttributePtr iv3 = AttributeFactory::createAttribute("sstring_3", Config(BasicType::STRING, CollectionType::SINGLE));
+ testReloadString(iv1, iv2, iv3, 0);
+ testReloadString(iv1, iv2, iv3, 100);
+ }
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("astring_1", Config(BasicType::STRING, CollectionType::ARRAY));
+ AttributePtr iv2 = AttributeFactory::createAttribute("astring_2", Config(BasicType::STRING, CollectionType::ARRAY));
+ AttributePtr iv3 = AttributeFactory::createAttribute("astring_3", Config(BasicType::STRING, CollectionType::ARRAY));
+ testReloadString(iv1, iv2, iv3, 0);
+ testReloadString(iv1, iv2, iv3, 100);
+ }
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("wstring_1", Config(BasicType::STRING, CollectionType::WSET));
+ AttributePtr iv2 = AttributeFactory::createAttribute("wstring_2", Config(BasicType::STRING, CollectionType::WSET));
+ AttributePtr iv3 = AttributeFactory::createAttribute("wstring_3", Config(BasicType::STRING, CollectionType::WSET));
+ testReloadString(iv1, iv2, iv3, 0);
+ testReloadString(iv1, iv2, iv3, 100);
+ }
+ {
+ Config cfg(Config(BasicType::STRING, CollectionType::SINGLE));
+ cfg.setFastSearch(true);
+ AttributePtr iv1 = AttributeFactory::createAttribute("sfsstring_1", cfg);
+ AttributePtr iv2 = AttributeFactory::createAttribute("sfsstring_2", cfg);
+ AttributePtr iv3 = AttributeFactory::createAttribute("sfsstring_3", cfg);
+ testReloadString(iv1, iv2, iv3, 0);
+ testReloadString(iv1, iv2, iv3, 100);
+ }
+ {
+ Config cfg(Config(BasicType::STRING, CollectionType::ARRAY));
+ cfg.setFastSearch(true);
+ AttributePtr iv1 = AttributeFactory::createAttribute("afsstring_1", cfg);
+ AttributePtr iv2 = AttributeFactory::createAttribute("afsstring_2", cfg);
+ AttributePtr iv3 = AttributeFactory::createAttribute("afsstring_3", cfg);
+ testReloadString(iv1, iv2, iv3, 0);
+ testReloadString(iv1, iv2, iv3, 100);
+ }
+ {
+ Config cfg(Config(BasicType::STRING, CollectionType::WSET));
+ cfg.setFastSearch(true);
+ AttributePtr iv1 = AttributeFactory::createAttribute("wsfsstring_1", cfg);
+ AttributePtr iv2 = AttributeFactory::createAttribute("wsfsstring_2", cfg);
+ AttributePtr iv3 = AttributeFactory::createAttribute("wsfsstring_3", cfg);
+ testReloadString(iv1, iv2, iv3, 0);
+ testReloadString(iv1, iv2, iv3, 100);
+ }
+}
+
+void AttributeTest::testHasLoadData()
+{
+ { // single value
+ AttributePtr av = AttributeFactory::createAttribute("loaddata1", Config(BasicType::INT32));
+ EXPECT_TRUE(!av->hasLoadData());
+ av->save();
+ EXPECT_TRUE(av->hasLoadData());
+ av->saveAs("loaddata2");
+ av = AttributeFactory::createAttribute("loaddata2", Config(BasicType::INT32));
+ EXPECT_TRUE(av->hasLoadData());
+ av->saveAs("loaddata3");
+ }
+ { // array
+ AttributePtr av = AttributeFactory::createAttribute("loaddata3", Config(BasicType::INT32, CollectionType::ARRAY));
+ EXPECT_TRUE(!av->hasLoadData());
+ av->save();
+ EXPECT_TRUE(av->hasLoadData());
+ av->saveAs("loaddata4");
+ av = AttributeFactory::createAttribute("loaddata4", Config(BasicType::INT32, CollectionType::ARRAY));
+ EXPECT_TRUE(av->hasLoadData());
+ av->saveAs("loaddata5");
+ }
+ { // wset
+ AttributePtr av = AttributeFactory::createAttribute("loaddata5", Config(BasicType::INT32, CollectionType::WSET));
+ EXPECT_TRUE(!av->hasLoadData());
+ av->save();
+ EXPECT_TRUE(av->hasLoadData());
+ av->saveAs("loaddata6");
+ av = AttributeFactory::createAttribute("loaddata6", Config(BasicType::INT32, CollectionType::WSET));
+ EXPECT_TRUE(av->hasLoadData());
+ }
+}
+
+void
+AttributeTest::testMemorySaverInt(const AttributePtr & a, const AttributePtr & b, size_t numDocs)
+{
+ addDocs(a, numDocs);
+ populate(static_cast<IntegerAttribute &>(*a.get()), 21);
+ if (a->hasWeightedSetType()) {
+ testMemorySaver<IntegerAttribute, IntegerAttribute::WeightedInt>(a, b);
+ } else {
+ testMemorySaver<IntegerAttribute, IntegerAttribute::largeint_t>(a, b);
+ }
+}
+
+void
+AttributeTest::testMemorySaverString(const AttributePtr & a, const AttributePtr & b, size_t numDocs)
+{
+ addDocs(a, numDocs);
+ populate(static_cast<StringAttribute &>(*a.get()), 21);
+ if (a->hasWeightedSetType()) {
+ testMemorySaver<StringAttribute, StringAttribute::WeightedString>(a, b);
+ } else {
+ testMemorySaver<StringAttribute, vespalib::string>(a, b);
+ }
+}
+
+template <typename VectorType, typename BufferType>
+void
+AttributeTest::testMemorySaver(const AttributePtr & a, const AttributePtr & b)
+{
+ LOG(info, "testMemorySaver: vector '%s'", a->getName().c_str());
+
+ AttributeMemorySaveTarget saveTarget;
+ EXPECT_TRUE(a->saveAs(b->getBaseFileName(), saveTarget));
+ FastOS_StatInfo statInfo;
+ vespalib::string datFile = vespalib::make_string("%s.dat", b->getBaseFileName().c_str());
+ EXPECT_TRUE(!FastOS_File::Stat(datFile.c_str(), &statInfo));
+ EXPECT_TRUE(saveTarget.writeToFile(TuneFileAttributes(),
+ DummyFileHeaderContext()));
+ EXPECT_TRUE(FastOS_File::Stat(datFile.c_str(), &statInfo));
+ EXPECT_TRUE(b->load());
+ compare<VectorType, BufferType>
+ (*(static_cast<VectorType *>(a.get())), *(static_cast<VectorType *>(b.get())));
+}
+
+void
+AttributeTest::testMemorySaver()
+{
+ // CollectionType::SINGLE
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("sint32_1ms", Config(BasicType::INT32, CollectionType::SINGLE));
+ AttributePtr iv2 = AttributeFactory::createAttribute("sint32_2ms", Config(BasicType::INT32, CollectionType::SINGLE));
+ testMemorySaverInt(iv1, iv2, 100);
+ }
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("suint4_1ms", Config(BasicType::UINT4, CollectionType::SINGLE));
+ AttributePtr iv2 = AttributeFactory::createAttribute("suint4_2ms", Config(BasicType::UINT4, CollectionType::SINGLE));
+ testMemorySaverInt(iv1, iv2, 100);
+ }
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("sstr_1ms", Config(BasicType::STRING, CollectionType::SINGLE));
+ AttributePtr iv2 = AttributeFactory::createAttribute("sstr_2ms", Config(BasicType::STRING, CollectionType::SINGLE));
+ testMemorySaverString(iv1, iv2, 100);
+ }
+ // CollectionType::ARRAY
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("aint32_1ms", Config(BasicType::INT32, CollectionType::ARRAY));
+ AttributePtr iv2 = AttributeFactory::createAttribute("aint32_2ms", Config(BasicType::INT32, CollectionType::ARRAY));
+ testMemorySaverInt(iv1, iv2, 100);
+ }
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("astr_1ms", Config(BasicType::STRING, CollectionType::ARRAY));
+ AttributePtr iv2 = AttributeFactory::createAttribute("astr_2ms", Config(BasicType::STRING, CollectionType::ARRAY));
+ testMemorySaverString(iv1, iv2, 100);
+ }
+ // CollectionType::WSET
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("wint32_1ms", Config(BasicType::INT32, CollectionType::WSET));
+ AttributePtr iv2 = AttributeFactory::createAttribute("wint32_2ms", Config(BasicType::INT32, CollectionType::WSET));
+ testMemorySaverInt(iv1, iv2, 100);
+ }
+ {
+ AttributePtr iv1 = AttributeFactory::createAttribute("wstr_1ms", Config(BasicType::STRING, CollectionType::WSET));
+ AttributePtr iv2 = AttributeFactory::createAttribute("wstr_2ms", Config(BasicType::STRING, CollectionType::WSET));
+ testMemorySaverString(iv1, iv2, 100);
+ }
+}
+
+
+template <typename T>
+void
+AttributeTest::fillNumeric(std::vector<T> & values, uint32_t numValues)
+{
+ values.clear();
+ values.reserve(numValues);
+ for (uint32_t i = 0; i < numValues; ++i) {
+ values.push_back(static_cast<T>(i));
+ }
+}
+
+void
+AttributeTest::fillString(std::vector<vespalib::string> & values, uint32_t numValues)
+{
+ values.clear();
+ values.reserve(numValues);
+ for (uint32_t i = 0; i < numValues; ++i) {
+ vespalib::asciistream ss;
+ ss << "string" << (i < 10 ? "0" : "") << i;
+ values.push_back(ss.str());
+ }
+}
+
+template <typename VectorType, typename BufferType>
+bool
+AttributeTest::appendToVector(VectorType & v, uint32_t doc, uint32_t valueCount,
+ const std::vector<BufferType> & values)
+{
+ bool retval = true;
+ for (uint32_t i = 0; i < valueCount; ++i) {
+ EXPECT_TRUE((retval = retval && v.append(doc, values[i], 1)));
+ }
+ return retval;
+}
+
+template <typename BufferType>
+bool
+AttributeTest::checkCount(const AttributePtr & ptr, uint32_t doc, uint32_t valueCount,
+ uint32_t numValues, const BufferType & value)
+{
+ std::vector<BufferType> buffer(valueCount);
+ if (!EXPECT_EQUAL(valueCount, ptr->getValueCount(doc))) return false;
+ if (!EXPECT_EQUAL(valueCount, ptr->get(doc, &buffer[0], buffer.size()))) return false;
+ if (!EXPECT_EQUAL(numValues, static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), value)))) return false;
+ return true;
+}
+
+template <typename BufferType>
+bool
+AttributeTest::checkContent(const AttributePtr & ptr, uint32_t doc, uint32_t valueCount,
+ uint32_t range, const std::vector<BufferType> & values)
+{
+ std::vector<BufferType> buffer(valueCount);
+ bool retval = true;
+ EXPECT_TRUE((retval = retval && (static_cast<uint32_t>(ptr->getValueCount(doc)) == valueCount)));
+ EXPECT_TRUE((retval = retval && (ptr->get(doc, &buffer[0], buffer.size()) == valueCount)));
+ for (uint32_t i = 0; i < valueCount; ++i) {
+ EXPECT_TRUE((retval = retval && (buffer[i] == values[i % range])));
+ }
+ return retval;
+}
+
+
+//-----------------------------------------------------------------------------
+// CollectionType::SINGLE
+//-----------------------------------------------------------------------------
+
+template <typename VectorType, typename BufferType, typename BaseType>
+void
+AttributeTest::testSingle(const AttributePtr & ptr, const std::vector<BufferType> & values)
+{
+ LOG(info, "testSingle: vector '%s' with %u documents and %lu values",
+ ptr->getName().c_str(), ptr->getNumDocs(), static_cast<unsigned long>(values.size()));
+
+ VectorType & v = *(static_cast<VectorType *>(ptr.get()));
+ uint32_t numUniques = values.size();
+ std::vector<BufferType> buffer(1);
+
+ // test update()
+ for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) {
+ EXPECT_TRUE(ptr->getValueCount(doc) == 1);
+ uint32_t i = doc % numUniques;
+ uint32_t j = (doc + 1) % numUniques;
+
+ EXPECT_TRUE(v.update(doc, values[i]));
+ ptr->commit();
+ EXPECT_TRUE(checkCount(ptr, doc, 1, 1, values[i]));
+
+ EXPECT_TRUE(v.update(doc, values[j]));
+ ptr->commit();
+ EXPECT_TRUE(checkCount(ptr, doc, 1, 1, values[j]));
+ }
+ EXPECT_TRUE(!v.update(ptr->getNumDocs(), values[0]));
+
+ // test append()
+ for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) {
+ EXPECT_TRUE(!v.append(doc, values[0], 1));
+ }
+ EXPECT_TRUE(!v.append(ptr->getNumDocs(), values[0], 1));
+
+ // test remove()
+ for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) {
+ EXPECT_TRUE(!v.remove(doc, values[0], 1));
+ }
+ EXPECT_TRUE(!v.remove(ptr->getNumDocs(), values[0], 1));
+
+ bool smallUInt = isUnsignedSmallIntAttribute(*ptr);
+ // test clearDoc()
+ for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) {
+ uint32_t i = (doc + 2) % numUniques;
+
+ EXPECT_TRUE(v.update(doc, values[i]));
+ if (doc % 2 == 0) { // alternate clearing
+ ptr->clearDoc(doc);
+ }
+ ptr->commit();
+ EXPECT_EQUAL(1u, ptr->get(doc, &buffer[0], buffer.size()));
+ if (doc % 2 == 0) {
+ if (smallUInt) {
+ expectZero(buffer[0]);
+ } else {
+ EXPECT_TRUE(attribute::isUndefined<BaseType>(buffer[0]));
+ }
+ } else {
+ EXPECT_TRUE(!attribute::isUndefined<BaseType>(buffer[0]));
+ EXPECT_EQUAL(values[i], buffer[0]);
+ }
+ }
+ EXPECT_TRUE(!v.clearDoc(ptr->getNumDocs()));
+}
+
+void
+AttributeTest::testSingle()
+{
+ uint32_t numDocs = 1000;
+ uint32_t numUniques = 50;
+ uint32_t numUniqueNibbles = 9;
+ {
+ std::vector<AttributeVector::largeint_t> values;
+ fillNumeric(values, numUniques);
+ std::vector<AttributeVector::largeint_t> nibbleValues;
+ fillNumeric(nibbleValues, numUniqueNibbles);
+ {
+ AttributePtr ptr = AttributeFactory::createAttribute("sv-int32", Config(BasicType::INT32, CollectionType::SINGLE));
+ addDocs(ptr, numDocs);
+ testSingle<IntegerAttribute, AttributeVector::largeint_t, int32_t>(ptr, values);
+ }
+ {
+ AttributePtr ptr = AttributeFactory::createAttribute("sv-uint4", Config(BasicType::UINT4, CollectionType::SINGLE));
+ addDocs(ptr, numDocs);
+ testSingle<IntegerAttribute, AttributeVector::largeint_t, int8_t>(ptr, nibbleValues);
+ }
+ {
+ Config cfg(BasicType::INT32, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("sv-post-int32", cfg);
+ addDocs(ptr, numDocs);
+ testSingle<IntegerAttribute, AttributeVector::largeint_t, int32_t>(ptr, values);
+ }
+ }
+ {
+ std::vector<double> values;
+ fillNumeric(values, numUniques);
+ {
+ AttributePtr ptr = AttributeFactory::createAttribute("sv-float", Config(BasicType::FLOAT, CollectionType::SINGLE));
+ addDocs(ptr, numDocs);
+ testSingle<FloatingPointAttribute, double, float>(ptr, values);
+ }
+ {
+ Config cfg(BasicType::FLOAT, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("sv-post-float", cfg);
+ addDocs(ptr, numDocs);
+ testSingle<FloatingPointAttribute, double, float>(ptr, values);
+ }
+
+ }
+ {
+ std::vector<vespalib::string> values;
+ fillString(values, numUniques);
+ {
+ AttributePtr ptr = AttributeFactory::createAttribute("sv-string", Config(BasicType::STRING, CollectionType::SINGLE));
+ addDocs(ptr, numDocs);
+ testSingle<StringAttribute, vespalib::string, vespalib::string>(ptr, values);
+ }
+ {
+ Config cfg(Config(BasicType::STRING, CollectionType::SINGLE));
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("sv-fs-string", cfg);
+ addDocs(ptr, numDocs);
+ testSingle<StringAttribute, vespalib::string, vespalib::string>(ptr, values);
+ }
+ }
+}
+
+
+//-----------------------------------------------------------------------------
+// CollectionType::ARRAY
+//-----------------------------------------------------------------------------
+
+template <typename VectorType, typename BufferType>
+void
+AttributeTest::testArray(const AttributePtr & ptr, const std::vector<BufferType> & values)
+{
+ LOG(info, "testArray: vector '%s' with %i documents and %lu values",
+ ptr->getName().c_str(), ptr->getNumDocs(), static_cast<unsigned long>(values.size()));
+
+ VectorType & v = *(static_cast<VectorType *>(ptr.get()));
+ uint32_t numUniques = values.size();
+ ASSERT_TRUE(numUniques >= 6);
+
+
+ // test update()
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 0u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u);
+ size_t sumAppends(0);
+ for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) {
+ uint32_t valueCount = doc % numUniques;
+ ptr->clearDoc(doc);
+
+ EXPECT_TRUE(appendToVector(v, doc, valueCount, values));
+ ptr->commit();
+ sumAppends += valueCount;
+
+ uint32_t i = doc % numUniques;
+ EXPECT_TRUE(v.update(doc, values[i]));
+ ptr->commit();
+ EXPECT_TRUE(checkCount(ptr, doc, 1, 1, values[i]));
+ }
+ EXPECT_TRUE(!v.update(ptr->getNumDocs(), values[0]));
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), (1 + 2)*ptr->getNumDocs() + sumAppends);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), sumAppends);
+
+
+ // test append()
+ for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) {
+ uint32_t valueCount = doc % numUniques;
+ ptr->clearDoc(doc);
+
+ // append unique values
+ EXPECT_TRUE(appendToVector(v, doc, valueCount, values));
+ ptr->commit();
+ EXPECT_TRUE(checkContent(ptr, doc, valueCount, valueCount, values));
+
+ // append duplicates
+ EXPECT_TRUE(appendToVector(v, doc, valueCount, values));
+ ptr->commit();
+ EXPECT_TRUE(checkContent(ptr, doc, valueCount * 2, valueCount, values));
+ }
+ EXPECT_TRUE(!v.append(ptr->getNumDocs(), values[0], 1));
+
+
+ // test remove()
+ for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) {
+ ptr->clearDoc(doc);
+
+ EXPECT_TRUE(v.append(doc, values[1], 1));
+ for (uint32_t i = 0; i < 3; ++i) {
+ EXPECT_TRUE(v.append(doc, values[3], 1));
+ }
+ for (uint32_t i = 0; i < 5; ++i) {
+ EXPECT_TRUE(v.append(doc, values[5], 1));
+ }
+
+ ptr->commit();
+ EXPECT_TRUE(checkCount(ptr, doc, 9, 1, values[1]));
+ EXPECT_TRUE(checkCount(ptr, doc, 9, 3, values[3]));
+ EXPECT_TRUE(checkCount(ptr, doc, 9, 5, values[5]));
+
+ EXPECT_TRUE(v.remove(doc, values[0], 1));
+ ptr->commit();
+ EXPECT_TRUE(checkCount(ptr, doc, 9, 1, values[1]));
+ EXPECT_TRUE(checkCount(ptr, doc, 9, 3, values[3]));
+ EXPECT_TRUE(checkCount(ptr, doc, 9, 5, values[5]));
+
+ EXPECT_TRUE(v.remove(doc, values[1], 1));
+ ptr->commit();
+ EXPECT_TRUE(checkCount(ptr, doc, 8, 0, values[1]));
+ EXPECT_TRUE(checkCount(ptr, doc, 8, 3, values[3]));
+ EXPECT_TRUE(checkCount(ptr, doc, 8, 5, values[5]));
+
+ EXPECT_TRUE(v.remove(doc, values[5], 1));
+ ptr->commit();
+ EXPECT_TRUE(checkCount(ptr, doc, 3, 0, values[1]));
+ EXPECT_TRUE(checkCount(ptr, doc, 3, 3, values[3]));
+ EXPECT_TRUE(checkCount(ptr, doc, 3, 0, values[5]));
+ }
+ EXPECT_TRUE(!v.remove(ptr->getNumDocs(), values[0], 1));
+
+
+ // test clearDoc()
+ for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) {
+ uint32_t valueCount = doc % numUniques;
+
+ ptr->clearDoc(doc);
+ for (uint32_t j = 0; j < valueCount; ++j) {
+ EXPECT_TRUE(v.append(doc, values[0], 1));
+ }
+ ptr->clearDoc(doc);
+ for (uint32_t j = 0; j < valueCount; ++j) {
+ EXPECT_TRUE(v.append(doc, values[1], 1));
+ }
+ ptr->commit();
+
+ EXPECT_TRUE(checkCount(ptr, doc, valueCount, valueCount, values[1]));
+ }
+ EXPECT_TRUE(!v.clearDoc(ptr->getNumDocs()));
+}
+
+template <typename BufferType>
+void
+AttributeTest::printArray(const AttributePtr & ptr)
+{
+ uint32_t bufferSize = ptr->getMaxValueCount();
+ std::vector<BufferType> buffer(bufferSize);
+ for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) {
+ uint32_t valueCount = ptr->get(doc, &buffer[0], buffer.size());
+ for (uint32_t i = 0; i < valueCount; ++i) {
+ std::cout << "doc[" << doc << "][" << i << "] = " << buffer[i]
+ << std::endl;
+ }
+ }
+}
+
+void
+AttributeTest::testArray()
+{
+ uint32_t numDocs = 100;
+ uint32_t numUniques = 50;
+ { // IntegerAttribute
+ std::vector<AttributeVector::largeint_t> values;
+ fillNumeric(values, numUniques);
+ {
+ AttributePtr ptr = AttributeFactory::createAttribute("a-int32", Config(BasicType::INT32, CollectionType::ARRAY));
+ addDocs(ptr, numDocs);
+ testArray<IntegerAttribute, AttributeVector::largeint_t>(ptr, values);
+ }
+ {
+ Config cfg(BasicType::INT8, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("flags", cfg);
+ addDocs(ptr, numDocs);
+ testArray<IntegerAttribute, AttributeVector::largeint_t>(ptr, values);
+ }
+ {
+ Config cfg(BasicType::INT32, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("a-fs-int32", cfg);
+ addDocs(ptr, numDocs);
+ testArray<IntegerAttribute, AttributeVector::largeint_t>(ptr, values);
+ }
+ }
+ { // FloatingPointAttribute
+ std::vector<double> values;
+ fillNumeric(values, numUniques);
+ {
+ AttributePtr ptr = AttributeFactory::createAttribute("a-float", Config(BasicType::FLOAT, CollectionType::ARRAY));
+ addDocs(ptr, numDocs);
+ testArray<FloatingPointAttribute, double>(ptr, values);
+ }
+ {
+ Config cfg(BasicType::FLOAT, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("a-fs-float", cfg);
+ addDocs(ptr, numDocs);
+ testArray<FloatingPointAttribute, double>(ptr, values);
+ }
+ }
+ { // StringAttribute
+ std::vector<vespalib::string> values;
+ fillString(values, numUniques);
+ {
+ AttributePtr ptr = AttributeFactory::createAttribute("a-string", Config(BasicType::STRING, CollectionType::ARRAY));
+ addDocs(ptr, numDocs);
+ testArray<StringAttribute, vespalib::string>(ptr, values);
+ }
+ {
+ Config cfg(BasicType::STRING, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("afs-string", cfg);
+ addDocs(ptr, numDocs);
+ testArray<StringAttribute, vespalib::string>(ptr, values);
+ }
+ }
+}
+
+
+//-----------------------------------------------------------------------------
+// CollectionType::WSET
+//-----------------------------------------------------------------------------
+
+template <typename BufferType>
+void
+AttributeTest::printWeightedSet(const AttributePtr & ptr)
+{
+ std::vector<BufferType> buffer(ptr->getMaxValueCount());
+ for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) {
+ uint32_t valueCount = ptr->get(doc, &buffer[0], buffer.size());
+ for (uint32_t i = 0; i < valueCount; ++i) {
+ std::cout << "doc[" << doc << "][" << i << "] = {" << buffer[i].getValue()
+ << ", " << buffer[i].getWeight() << "}" << std::endl;
+ }
+ }
+}
+
+template <typename VectorType, typename BufferType>
+void
+AttributeTest::testWeightedSet(const AttributePtr & ptr, const std::vector<BufferType> & values)
+{
+ LOG(info, "testWeightedSet: vector '%s' with %u documents and %lu values",
+ ptr->getName().c_str(), ptr->getNumDocs(), static_cast<unsigned long>(values.size()));
+
+ VectorType & v = *(static_cast<VectorType *>(ptr.get()));
+ uint32_t numDocs = v.getNumDocs();
+ ASSERT_TRUE(values.size() >= numDocs + 10);
+ uint32_t bufferSize = numDocs + 10;
+ std::vector<BufferType> buffer(bufferSize);
+
+ // fill and check
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 0u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u);
+ for (uint32_t doc = 0; doc < numDocs; ++doc) {
+ uint32_t valueCount = doc;
+ v.clearDoc(doc);
+ for (uint32_t j = 0; j < valueCount; ++j) {
+ EXPECT_TRUE(v.append(doc, values[j].getValue(), values[j].getWeight()));
+ }
+ commit(ptr);
+ EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount);
+ for (uint32_t j = 0; j < valueCount; ++j) {
+ EXPECT_TRUE(buffer[j].getValue() == values[j].getValue());
+ EXPECT_TRUE(buffer[j].getWeight() == values[j].getWeight());
+ }
+ }
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), numDocs + (numDocs*(numDocs-1))/2);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u);
+
+ // test append()
+ for (uint32_t doc = 0; doc < numDocs; ++doc) {
+ uint32_t valueCount = doc;
+
+ // append non-existent value
+ EXPECT_TRUE(v.append(doc, values[doc].getValue(), values[doc].getWeight()));
+ commit(ptr);
+ EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 1);
+ EXPECT_TRUE(buffer[doc].getValue() == values[doc].getValue());
+ EXPECT_TRUE(buffer[doc].getWeight() == values[doc].getWeight());
+
+ // append existent value
+ EXPECT_TRUE(v.append(doc, values[doc].getValue(), values[doc].getWeight() + 10));
+ commit(ptr);
+ EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 1);
+ EXPECT_TRUE(buffer[doc].getValue() == values[doc].getValue());
+ EXPECT_TRUE(buffer[doc].getWeight() == values[doc].getWeight() + 10);
+
+ // append non-existent value two times
+ EXPECT_TRUE(v.append(doc, values[doc + 1].getValue(), values[doc + 1].getWeight()));
+ EXPECT_TRUE(v.append(doc, values[doc + 1].getValue(), values[doc + 1].getWeight() + 10));
+ commit(ptr);
+ EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 2);
+ EXPECT_TRUE(buffer[doc + 1].getValue() == values[doc + 1].getValue());
+ EXPECT_TRUE(buffer[doc + 1].getWeight() == values[doc + 1].getWeight() + 10);
+ }
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), numDocs + (numDocs*(numDocs-1))/2 + numDocs*4);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u);
+
+ // test remove()
+ for (uint32_t doc = 0; doc < numDocs; ++doc) {
+ uint32_t valueCount = doc;
+
+ // remove non-existent value
+ EXPECT_TRUE(static_cast<uint32_t>(v.getValueCount(doc)) == valueCount + 2);
+ EXPECT_TRUE(v.remove(doc, values[doc + 2].getValue(), 0));
+ commit(ptr);
+ EXPECT_TRUE(static_cast<uint32_t>(v.getValueCount(doc)) == valueCount + 2);
+
+ // remove existent value
+ EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 2);
+ EXPECT_TRUE(buffer[doc + 1].getValue() == values[doc + 1].getValue());
+ EXPECT_TRUE(v.remove(doc, values[doc + 1].getValue(), 0));
+ commit(ptr);
+ EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 1);
+ for (uint32_t i = 0; i < valueCount + 1; ++i) {
+ EXPECT_TRUE(buffer[i].getValue() != values[doc + 1].getValue());
+ }
+ }
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), numDocs + (numDocs*(numDocs-1))/2 + numDocs*4 + numDocs * 2);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u);
+}
+
+void
+AttributeTest::testWeightedSet()
+{
+ uint32_t numDocs = 100;
+ uint32_t numValues = numDocs + 10;
+ { // IntegerAttribute
+ std::vector<AttributeVector::WeightedInt> values;
+ values.reserve(numValues);
+ for (uint32_t i = 0; i < numValues; ++i) {
+ values.push_back(AttributeVector::WeightedInt(i, i + numValues));
+ }
+
+ {
+ AttributePtr ptr = AttributeFactory::createAttribute
+ ("wsint32", Config(BasicType::INT32, CollectionType::WSET));
+ addDocs(ptr, numDocs);
+ testWeightedSet<IntegerAttribute, AttributeVector::WeightedInt>(ptr, values);
+ }
+ {
+ Config cfg(BasicType::INT32, CollectionType::WSET);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("ws-fs-int32", cfg);
+ addDocs(ptr, numDocs);
+ testWeightedSet<IntegerAttribute, AttributeVector::WeightedInt>(ptr, values);
+ IAttributeVector::EnumHandle e;
+ EXPECT_TRUE(ptr->findEnum("1", e));
+ }
+ }
+ { // FloatingPointAttribute
+ std::vector<AttributeVector::WeightedFloat> values;
+ values.reserve(numValues);
+ for (uint32_t i = 0; i < numValues; ++i) {
+ values.push_back(AttributeVector::WeightedFloat(i, i + numValues));
+ }
+
+ {
+ Config cfg(BasicType::FLOAT, CollectionType::WSET);
+ AttributePtr ptr = AttributeFactory::createAttribute("ws-float", cfg);
+ addDocs(ptr, numDocs);
+ testWeightedSet<FloatingPointAttribute, AttributeVector::WeightedFloat>(ptr, values);
+ }
+ {
+ Config cfg(BasicType::FLOAT, CollectionType::WSET);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("ws-fs-float", cfg);
+ addDocs(ptr, numDocs);
+ testWeightedSet<FloatingPointAttribute, AttributeVector::WeightedFloat>(ptr, values);
+ IAttributeVector::EnumHandle e;
+ EXPECT_TRUE(ptr->findEnum("1", e));
+ }
+ }
+ { // StringAttribute
+ std::vector<AttributeVector::WeightedString> values;
+ values.reserve(numValues);
+ for (uint32_t i = 0; i < numValues; ++i) {
+ vespalib::asciistream ss;
+ ss << "string" << (i < 10 ? "0" : "") << i;
+ values.push_back(AttributeVector::WeightedString(ss.str(), i + numValues));
+ }
+
+ {
+ AttributePtr ptr = AttributeFactory::createAttribute
+ ("wsstr", Config(BasicType::STRING, CollectionType::WSET));
+ addDocs(ptr, numDocs);
+ testWeightedSet<StringAttribute, AttributeVector::WeightedString>(ptr, values);
+ }
+ {
+ Config cfg(Config(BasicType::STRING, CollectionType::WSET));
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("wsfsstr", cfg);
+ addDocs(ptr, numDocs);
+ testWeightedSet<StringAttribute, AttributeVector::WeightedString>(ptr, values);
+ IAttributeVector::EnumHandle e;
+ EXPECT_TRUE(ptr->findEnum("string00", e));
+ }
+ }
+}
+
+template <typename VectorType, typename BufferType>
+void
+AttributeTest::testArithmeticValueUpdate(const AttributePtr & ptr)
+{
+ LOG(info, "testArithmeticValueUpdate: vector '%s'", ptr->getName().c_str());
+
+ typedef document::ArithmeticValueUpdate Arith;
+ VectorType & vec = static_cast<VectorType &>(*ptr.get());
+ addDocs(ptr, 13);
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 0u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u);
+ for (uint32_t doc = 0; doc < 13; ++doc) {
+ ASSERT_TRUE(vec.update(doc, 100));
+ }
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 13u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u);
+ ptr->commit();
+
+ EXPECT_TRUE(vec.apply(0, Arith(Arith::Add, 10)));
+ EXPECT_TRUE(vec.apply(1, Arith(Arith::Add, -10)));
+ EXPECT_TRUE(vec.apply(2, Arith(Arith::Sub, 10)));
+ EXPECT_TRUE(vec.apply(3, Arith(Arith::Sub, -10)));
+ EXPECT_TRUE(vec.apply(4, Arith(Arith::Mul, 10)));
+ EXPECT_TRUE(vec.apply(5, Arith(Arith::Mul, -10)));
+ EXPECT_TRUE(vec.apply(6, Arith(Arith::Div, 10)));
+ EXPECT_TRUE(vec.apply(7, Arith(Arith::Div, -10)));
+ EXPECT_TRUE(vec.apply(8, Arith(Arith::Add, 10.5)));
+ EXPECT_TRUE(vec.apply(9, Arith(Arith::Sub, 10.5)));
+ EXPECT_TRUE(vec.apply(10, Arith(Arith::Mul, 1.2)));
+ EXPECT_TRUE(vec.apply(11, Arith(Arith::Mul, 0.8)));
+ EXPECT_TRUE(vec.apply(12, Arith(Arith::Div, 0.8)));
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 26u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 13u);
+ ptr->commit();
+
+ std::vector<BufferType> buf(1);
+ ptr->get(0, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 110);
+ ptr->get(1, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 90);
+ ptr->get(2, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 90);
+ ptr->get(3, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 110);
+ ptr->get(4, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 1000);
+ ptr->get(5, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], -1000);
+ ptr->get(6, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 10);
+ ptr->get(7, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], -10);
+ if (ptr->getBasicType() == BasicType::INT32) {
+ ptr->get(8, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 110);
+ ptr->get(9, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 90);
+ } else if (ptr->getBasicType() == BasicType::FLOAT ||
+ ptr->getBasicType() == BasicType::DOUBLE)
+ {
+ ptr->get(8, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 110.5);
+ ptr->get(9, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 89.5);
+ } else {
+ ASSERT_TRUE(false);
+ }
+ ptr->get(10, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 120);
+ ptr->get(11, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 80);
+ ptr->get(12, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 125);
+
+
+ // try several arithmetic operations on the same document in a single commit
+ ASSERT_TRUE(vec.update(0, 1100));
+ ASSERT_TRUE(vec.update(1, 1100));
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 28u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 13u);
+ for (uint32_t i = 0; i < 10; ++i) {
+ ASSERT_TRUE(vec.apply(0, Arith(Arith::Add, 10)));
+ ASSERT_TRUE(vec.apply(1, Arith(Arith::Add, 10)));
+ }
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 48u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 33u);
+ ptr->commit();
+ ptr->get(0, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 1200);
+ ptr->get(1, &buf[0], 1);
+ EXPECT_EQUAL(buf[0], 1200);
+
+ ASSERT_TRUE(vec.update(0, 10));
+ ASSERT_TRUE(vec.update(1, 10));
+ ASSERT_TRUE(vec.update(2, 10));
+ ASSERT_TRUE(vec.update(3, 10));
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 52u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 33u);
+ for (uint32_t i = 0; i < 8; ++i) {
+ EXPECT_TRUE(vec.apply(0, Arith(Arith::Mul, 1.2)));
+ EXPECT_TRUE(vec.apply(1, Arith(Arith::Mul, 2.3)));
+ EXPECT_TRUE(vec.apply(2, Arith(Arith::Mul, 3.4)));
+ EXPECT_TRUE(vec.apply(3, Arith(Arith::Mul, 5.6)));
+ ptr->commit();
+ }
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 84u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 65u);
+
+
+ // try divide by zero
+ ASSERT_TRUE(vec.update(0, 100));
+ EXPECT_TRUE(vec.apply(0, Arith(Arith::Div, 0)));
+ ptr->commit();
+ if (ptr->getClass().inherits(FloatingPointAttribute::classId)) {
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 86u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 66u);
+ } else { // does not apply for interger attributes
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 85u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 65u);
+ }
+ ptr->get(0, &buf[0], 1);
+ if (ptr->getBasicType() == BasicType::INT32) {
+ EXPECT_EQUAL(buf[0], 100);
+ }
+
+ // try divide by zero with empty change vector
+ EXPECT_TRUE(vec.apply(0, Arith(Arith::Div, 0)));
+ ptr->commit();
+ if (ptr->getClass().inherits(FloatingPointAttribute::classId)) {
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 87u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 67u);
+ } else { // does not apply for interger attributes
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 85u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 65u);
+ }
+}
+
+void
+AttributeTest::testArithmeticValueUpdate()
+{
+ {
+ AttributePtr ptr = AttributeFactory::createAttribute("sint32", Config(BasicType::INT32, CollectionType::SINGLE));
+ testArithmeticValueUpdate<IntegerAttribute, IntegerAttribute::largeint_t>(ptr);
+ }
+ {
+ AttributePtr ptr = AttributeFactory::createAttribute("sfloat", Config(BasicType::FLOAT, CollectionType::SINGLE));
+ testArithmeticValueUpdate<FloatingPointAttribute, double>(ptr);
+ }
+ {
+ Config cfg(BasicType::INT32, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("sfsint32", cfg);
+ testArithmeticValueUpdate<IntegerAttribute, IntegerAttribute::largeint_t>(ptr);
+ }
+ {
+ Config cfg(BasicType::FLOAT, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("sfsfloat", cfg);
+ testArithmeticValueUpdate<FloatingPointAttribute, double>(ptr);
+ }
+ {
+ Config cfg(BasicType::DOUBLE, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("sfsdouble", cfg);
+ testArithmeticValueUpdate<FloatingPointAttribute, double>(ptr);
+ }
+}
+
+
+template <typename VectorType, typename BaseType, typename BufferType>
+void
+AttributeTest::testArithmeticWithUndefinedValue(const AttributePtr & ptr, BaseType before, BaseType after)
+{
+ LOG(info, "testArithmeticWithUndefinedValue: vector '%s'", ptr->getName().c_str());
+
+ typedef document::ArithmeticValueUpdate Arith;
+ VectorType & vec = static_cast<VectorType &>(*ptr.get());
+ addDocs(ptr, 1);
+ ASSERT_TRUE(vec.update(0, before));
+ ptr->commit();
+
+ EXPECT_TRUE(vec.apply(0, Arith(Arith::Add, 10)));
+ ptr->commit();
+
+ std::vector<BufferType> buf(1);
+ ptr->get(0, &buf[0], 1);
+
+ if (ptr->getClass().inherits(FloatingPointAttribute::classId)) {
+ EXPECT_TRUE(std::isnan(buf[0]));
+ } else {
+ EXPECT_EQUAL(buf[0], after);
+ }
+}
+
+void
+AttributeTest::testArithmeticWithUndefinedValue()
+{
+ {
+ AttributePtr ptr = AttributeFactory::createAttribute("sint32", Config(BasicType::INT32, CollectionType::SINGLE));
+ testArithmeticWithUndefinedValue<IntegerAttribute, int32_t, IntegerAttribute::largeint_t>
+ (ptr, std::numeric_limits<int32_t>::min(), std::numeric_limits<int32_t>::min());
+ }
+ {
+ AttributePtr ptr = AttributeFactory::createAttribute("sfloat", Config(BasicType::FLOAT, CollectionType::SINGLE));
+ testArithmeticWithUndefinedValue<FloatingPointAttribute, float, double>
+ (ptr, std::numeric_limits<float>::quiet_NaN(), std::numeric_limits<float>::quiet_NaN());
+ }
+ {
+ AttributePtr ptr = AttributeFactory::createAttribute("sdouble", Config(BasicType::DOUBLE, CollectionType::SINGLE));
+ testArithmeticWithUndefinedValue<FloatingPointAttribute, double, double>
+ (ptr, std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN());
+ }
+}
+
+
+template <typename VectorType, typename BufferType>
+void
+AttributeTest::testMapValueUpdate(const AttributePtr & ptr, BufferType initValue,
+ const FieldValue & initFieldValue, const FieldValue & nonExistant,
+ bool removeIfZero, bool createIfNonExistant)
+{
+ LOG(info, "testMapValueUpdate: vector '%s'", ptr->getName().c_str());
+ typedef MapValueUpdate MapVU;
+ typedef ArithmeticValueUpdate ArithVU;
+ VectorType & vec = static_cast<VectorType &>(*ptr.get());
+
+ addDocs(ptr, 6);
+ for (uint32_t doc = 0; doc < 6; ++doc) {
+ ASSERT_TRUE(vec.append(doc, initValue.getValue(), 100));
+ }
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 6u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u);
+
+ EXPECT_TRUE(ptr->apply(0, MapVU(initFieldValue,
+ ArithVU(ArithVU::Add, 10))));
+ EXPECT_TRUE(ptr->apply(1, MapVU(initFieldValue,
+ ArithVU(ArithVU::Sub, 10))));
+ EXPECT_TRUE(ptr->apply(2, MapVU(initFieldValue,
+ ArithVU(ArithVU::Mul, 10))));
+ EXPECT_TRUE(ptr->apply(3, MapVU(initFieldValue,
+ ArithVU(ArithVU::Div, 10))));
+ ptr->commit();
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 10u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 4u);
+
+ std::vector<BufferType> buf(2);
+ ptr->get(0, &buf[0], 2);
+ EXPECT_EQUAL(buf[0].getWeight(), 110);
+ ptr->get(1, &buf[0], 2);
+ EXPECT_EQUAL(buf[0].getWeight(), 90);
+ ptr->get(2, &buf[0], 2);
+ EXPECT_EQUAL(buf[0].getWeight(), 1000);
+ ptr->get(3, &buf[0], 2);
+ EXPECT_EQUAL(buf[0].getWeight(), 10);
+
+ // removeifzero
+ EXPECT_TRUE(ptr->apply(4, MapVU(initFieldValue,
+ ArithVU(ArithVU::Sub, 100))));
+ ptr->commit();
+ if (removeIfZero) {
+ EXPECT_EQUAL(ptr->get(4, &buf[0], 2), uint32_t(0));
+ } else {
+ EXPECT_EQUAL(ptr->get(4, &buf[0], 2), uint32_t(1));
+ EXPECT_EQUAL(buf[0].getWeight(), 0);
+ }
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 11u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 5u);
+
+ // createifnonexistant
+ EXPECT_TRUE(ptr->apply(5, MapVU(nonExistant,
+ ArithVU(ArithVU::Add, 10))));
+ ptr->commit();
+ if (createIfNonExistant) {
+ EXPECT_EQUAL(ptr->get(5, &buf[0], 2), uint32_t(2));
+ EXPECT_EQUAL(buf[0].getWeight(), 100);
+ EXPECT_EQUAL(buf[1].getWeight(), 10);
+ } else {
+ EXPECT_EQUAL(ptr->get(5, &buf[0], 2), uint32_t(1));
+ EXPECT_EQUAL(buf[0].getWeight(), 100);
+ }
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 12u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 6u);
+
+
+ // try divide by zero (should be ignored)
+ vec.clearDoc(0);
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 13u);
+ ASSERT_TRUE(vec.append(0, initValue.getValue(), 12345));
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 14u);
+ EXPECT_TRUE(ptr->apply(0, MapVU(initFieldValue, ArithVU(ArithVU::Div, 0))));
+ EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 14u);
+ EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 6u);
+ ptr->commit();
+ ptr->get(0, &buf[0], 1);
+ EXPECT_EQUAL(buf[0].getWeight(), 12345);
+}
+
+void
+AttributeTest::testMapValueUpdate()
+{
+ { // regular set
+ AttributePtr ptr = AttributeFactory::createAttribute
+ ("wsint32", Config(BasicType::INT32, CollectionType::WSET));
+ testMapValueUpdate<IntegerAttribute, AttributeVector::WeightedInt>
+ (ptr, AttributeVector::WeightedInt(64, 1), IntFieldValue(64),
+ IntFieldValue(32), false, false);
+ }
+ { // remove if zero
+ AttributePtr ptr = AttributeFactory::createAttribute
+ ("wsint32", Config(BasicType::INT32, CollectionType(CollectionType::WSET, true, false)));
+ testMapValueUpdate<IntegerAttribute, AttributeVector::WeightedInt>
+ (ptr, AttributeVector::WeightedInt(64, 1), IntFieldValue(64),
+ IntFieldValue(32), true, false);
+ }
+ { // create if non existant
+ AttributePtr ptr = AttributeFactory::createAttribute
+ ("wsint32", Config(BasicType::INT32, CollectionType(CollectionType::WSET, false, true)));
+ testMapValueUpdate<IntegerAttribute, AttributeVector::WeightedInt>
+ (ptr, AttributeVector::WeightedInt(64, 1), IntFieldValue(64),
+ IntFieldValue(32), false, true);
+ }
+
+ Config setCfg(Config(BasicType::STRING, CollectionType::WSET));
+ Config setRemoveCfg(Config(BasicType::STRING, CollectionType(CollectionType::WSET, true, false)));
+ Config setCreateCfg(Config(BasicType::STRING, CollectionType(CollectionType::WSET, false, true)));
+
+ { // regular set
+ AttributePtr ptr = AttributeFactory::createAttribute("wsstr", setCfg);
+ testMapValueUpdate<StringAttribute, AttributeVector::WeightedString>
+ (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"),
+ StringFieldValue("second"), false, false);
+ }
+ { // remove if zero
+ AttributePtr ptr = AttributeFactory::createAttribute("wsstr", setRemoveCfg);
+ testMapValueUpdate<StringAttribute, AttributeVector::WeightedString>
+ (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"),
+ StringFieldValue("second"), true, false);
+ }
+ { // create if non existant
+ AttributePtr ptr = AttributeFactory::createAttribute("wsstr", setCreateCfg);
+ testMapValueUpdate<StringAttribute, AttributeVector::WeightedString>
+ (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"),
+ StringFieldValue("second"), false, true);
+ }
+
+ // fast-search - posting lists
+ { // regular set
+ setCfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("wsfsstr", setCfg);
+ testMapValueUpdate<StringAttribute, AttributeVector::WeightedString>
+ (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"),
+ StringFieldValue("second"), false, false);
+ }
+ { // remove if zero
+ setRemoveCfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("wsfsstr", setRemoveCfg);
+ testMapValueUpdate<StringAttribute, AttributeVector::WeightedString>
+ (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"),
+ StringFieldValue("second"), true, false);
+ }
+ { // create if non existant
+ setCreateCfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("wsfsstr", setCreateCfg);
+ testMapValueUpdate<StringAttribute, AttributeVector::WeightedString>
+ (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"),
+ StringFieldValue("second"), false, true);
+ }
+}
+
+
+
+void
+AttributeTest::commit(const AttributePtr & ptr)
+{
+ ptr->commit();
+}
+
+
+void
+AttributeTest::testStatus()
+{
+ std::vector<vespalib::string> values;
+ fillString(values, 16);
+ uint32_t numDocs = 100;
+ // No posting list
+ static constexpr size_t LeafNodeSize =
+ 4 + sizeof(EnumStoreBase::Index) * EnumTreeTraits::LEAF_SLOTS;
+ static constexpr size_t InternalNodeSize =
+ 8 + (sizeof(EnumStoreBase::Index) +
+ sizeof(btree::EntryRef)) * EnumTreeTraits::INTERNAL_SLOTS;
+ static constexpr size_t NestedVectorSize = 24; // sizeof(vespalib::Array)
+
+ {
+ Config cfg(BasicType::STRING, CollectionType::ARRAY);
+ AttributePtr ptr = AttributeFactory::createAttribute("as", cfg);
+ addDocs(ptr, numDocs);
+ StringAttribute & sa = *(static_cast<StringAttribute *>(ptr.get()));
+ for (uint32_t i = 0; i < numDocs; ++i) {
+ EXPECT_TRUE(appendToVector(sa, i, 1, values));
+ }
+ ptr->commit(true);
+ EXPECT_EQUAL(ptr->getStatus().getNumDocs(), 100u);
+ EXPECT_EQUAL(ptr->getStatus().getNumValues(), 100u);
+ EXPECT_EQUAL(ptr->getStatus().getNumUniqueValues(), 1u);
+ size_t expUsed = 0;
+ expUsed += 1 * InternalNodeSize + 1 * LeafNodeSize; // enum store tree
+ expUsed += 1 * 32; // enum store (uniquevalues * bytes per entry)
+ // multi value mapping (numdocs * sizeof(MappingIndex) + numvalues * sizeof(EnumIndex))
+ expUsed += 100 * sizeof(search::multivalue::Index32) + 100 * 4;
+ EXPECT_GREATER_EQUAL(ptr->getStatus().getUsed(), expUsed);
+ EXPECT_GREATER_EQUAL(ptr->getStatus().getAllocated(), expUsed);
+ }
+
+ {
+ Config cfg(BasicType::STRING, CollectionType::ARRAY);
+ AttributePtr ptr = AttributeFactory::createAttribute("as", cfg);
+ addDocs(ptr, numDocs);
+ StringAttribute & sa = *(static_cast<StringAttribute *>(ptr.get()));
+ const size_t numUniq(16);
+ const size_t numValuesPerDoc(16);
+ for (uint32_t i = 0; i < numDocs; ++i) {
+ EXPECT_TRUE(appendToVector(sa, i, numValuesPerDoc, values));
+ }
+ ptr->commit(true);
+ EXPECT_EQUAL(ptr->getStatus().getNumDocs(), numDocs);
+ EXPECT_EQUAL(ptr->getStatus().getNumValues(), numDocs*numValuesPerDoc);
+ EXPECT_EQUAL(ptr->getStatus().getNumUniqueValues(), numUniq);
+ size_t expUsed = 0;
+ expUsed += 1 * InternalNodeSize + 1 * LeafNodeSize; // enum store tree
+ expUsed += numUniq * 32; // enum store (16 unique values, 32 bytes per entry)
+ // multi value mapping (numdocs * sizeof(MappingIndex) + numvalues * sizeof(EnumIndex) +
+ // numdocs * sizeof(Array<EnumIndex>) (due to vector vector))
+ expUsed += numDocs * sizeof(search::multivalue::Index32) + numDocs * numValuesPerDoc * sizeof(EnumStoreBase::Index) + ((numValuesPerDoc > search::multivalue::Index32::maxValues()) ? numDocs * NestedVectorSize : 0);
+ EXPECT_GREATER_EQUAL(ptr->getStatus().getUsed(), expUsed);
+ EXPECT_GREATER_EQUAL(ptr->getStatus().getAllocated(), expUsed);
+ }
+}
+
+void
+AttributeTest::testNullProtection()
+{
+ size_t len1 = strlen("evil");
+ size_t len2 = strlen("string");
+ size_t len = len1 + 1 + len2;
+ vespalib::string good("good");
+ vespalib::string evil("evil string");
+ vespalib::string pureEvil("evil");
+ EXPECT_EQUAL(strlen(evil.data()), len);
+ EXPECT_EQUAL(strlen(evil.c_str()), len);
+ evil[len1] = 0; // replace space with '\0'
+ EXPECT_EQUAL(strlen(evil.data()), len1);
+ EXPECT_EQUAL(strlen(evil.c_str()), len1);
+ EXPECT_EQUAL(strlen(evil.data() + len1), 0u);
+ EXPECT_EQUAL(strlen(evil.c_str() + len1), 0u);
+ EXPECT_EQUAL(strlen(evil.data() + len1 + 1), len2);
+ EXPECT_EQUAL(strlen(evil.c_str() + len1 + 1), len2);
+ EXPECT_EQUAL(evil.size(), len);
+ { // string
+ AttributeVector::DocId docId;
+ std::vector<vespalib::string> buf(16);
+ AttributePtr attr = AttributeFactory::createAttribute("string", Config(BasicType::STRING, CollectionType::SINGLE));
+ StringAttribute &v = static_cast<StringAttribute &>(*attr.get());
+ EXPECT_TRUE(v.addDoc(docId));
+ EXPECT_TRUE(v.update(docId, evil));
+ v.commit();
+ size_t n = static_cast<const AttributeVector &>(v).get(docId, &buf[0], buf.size());
+ EXPECT_EQUAL(n, 1u);
+ EXPECT_EQUAL(buf[0], pureEvil);
+ }
+ { // string array
+ AttributeVector::DocId docId;
+ std::vector<vespalib::string> buf(16);
+ AttributePtr attr = AttributeFactory::createAttribute("string", Config(BasicType::STRING, CollectionType::ARRAY));
+ StringAttribute &v = static_cast<StringAttribute &>(*attr.get());
+ EXPECT_TRUE(v.addDoc(docId));
+ EXPECT_TRUE(v.append(0, good, 1));
+ EXPECT_TRUE(v.append(0, evil, 1));
+ EXPECT_TRUE(v.append(0, good, 1));
+ v.commit();
+ size_t n = static_cast<const AttributeVector &>(v).get(0, &buf[0], buf.size());
+ EXPECT_EQUAL(n, 3u);
+ EXPECT_EQUAL(buf[0], good);
+ EXPECT_EQUAL(buf[1], pureEvil);
+ EXPECT_EQUAL(buf[2], good);
+ }
+ { // string set
+ AttributeVector::DocId docId;
+ std::vector<StringAttribute::WeightedString> buf(16);
+ AttributePtr attr = AttributeFactory::createAttribute("string", Config(BasicType::STRING, CollectionType::WSET));
+ StringAttribute &v = static_cast<StringAttribute &>(*attr.get());
+ EXPECT_TRUE(v.addDoc(docId));
+ EXPECT_TRUE(v.append(0, good, 10));
+ EXPECT_TRUE(v.append(0, evil, 20));
+ v.commit();
+ size_t n = static_cast<const AttributeVector &>(v).get(0, &buf[0], buf.size());
+ EXPECT_EQUAL(n, 2u);
+ if (buf[0].getValue() != good) {
+ std::swap(buf[0], buf[1]);
+ }
+ EXPECT_EQUAL(buf[0].getValue(), good);
+ EXPECT_EQUAL(buf[0].getWeight(), 10);
+ EXPECT_EQUAL(buf[1].getValue(), pureEvil);
+ EXPECT_EQUAL(buf[1].getWeight(), 20);
+
+ // remove
+ EXPECT_TRUE(v.remove(0, evil, 20));
+ v.commit();
+ n = static_cast<const AttributeVector &>(v).get(0, &buf[0], buf.size());
+ EXPECT_EQUAL(n, 1u);
+ EXPECT_EQUAL(buf[0].getValue(), good);
+ EXPECT_EQUAL(buf[0].getWeight(), 10);
+ }
+}
+
+void
+AttributeTest::testGeneration(const AttributePtr & attr, bool exactStatus)
+{
+ LOG(info, "testGeneration(%s)", attr->getName().c_str());
+ IntegerAttribute & ia = static_cast<IntegerAttribute &>(*attr.get());
+ // add docs to trigger inc generation when data vector is full
+ AttributeVector::DocId docId;
+ EXPECT_EQUAL(0u, ia.getCurrentGeneration());
+ EXPECT_TRUE(ia.addDoc(docId));
+ EXPECT_EQUAL(0u, ia.getCurrentGeneration());
+ EXPECT_TRUE(ia.addDoc(docId));
+ EXPECT_EQUAL(0u, ia.getCurrentGeneration());
+ ia.commit(true);
+ EXPECT_EQUAL(1u, ia.getCurrentGeneration());
+ uint64_t lastAllocated;
+ uint64_t lastOnHold;
+ if (exactStatus) {
+ EXPECT_EQUAL(2u, ia.getStatus().getAllocated());
+ EXPECT_EQUAL(0u, ia.getStatus().getOnHold());
+ } else {
+ EXPECT_LESS(0u, ia.getStatus().getAllocated());
+ EXPECT_EQUAL(0u, ia.getStatus().getOnHold());
+ lastAllocated = ia.getStatus().getAllocated();
+ lastOnHold = ia.getStatus().getOnHold();
+ }
+ {
+ AttributeGuard ag(attr); // guard on generation 1
+ EXPECT_TRUE(ia.addDoc(docId)); // inc gen
+ EXPECT_EQUAL(2u, ia.getCurrentGeneration());
+ ia.commit(true);
+ EXPECT_EQUAL(3u, ia.getCurrentGeneration());
+ if (exactStatus) {
+ EXPECT_EQUAL(4u, ia.getStatus().getAllocated());
+ EXPECT_EQUAL(2u, ia.getStatus().getOnHold()); // no cleanup due to guard
+ } else {
+ EXPECT_LESS(lastAllocated, ia.getStatus().getAllocated());
+ EXPECT_LESS(lastOnHold, ia.getStatus().getOnHold());
+ lastAllocated = ia.getStatus().getAllocated();
+ lastOnHold = ia.getStatus().getOnHold();
+ }
+ }
+ EXPECT_TRUE(ia.addDoc(docId));
+ EXPECT_EQUAL(3u, ia.getCurrentGeneration());
+ {
+ AttributeGuard ag(attr); // guard on generation 3
+ ia.commit(true);
+ EXPECT_EQUAL(4u, ia.getCurrentGeneration());
+ if (exactStatus) {
+ EXPECT_EQUAL(4u, ia.getStatus().getAllocated());
+ EXPECT_EQUAL(0u, ia.getStatus().getOnHold()); // cleanup at end of addDoc()
+ } else {
+ EXPECT_EQUAL(lastAllocated, ia.getStatus().getAllocated());
+ EXPECT_GREATER(lastOnHold, ia.getStatus().getOnHold());
+ lastAllocated = ia.getStatus().getAllocated();
+ lastOnHold = ia.getStatus().getOnHold();
+ }
+ }
+ {
+ AttributeGuard ag(attr); // guard on generation 4
+ EXPECT_TRUE(ia.addDoc(docId)); // inc gen
+ EXPECT_EQUAL(5u, ia.getCurrentGeneration());
+ ia.commit();
+ EXPECT_EQUAL(6u, ia.getCurrentGeneration());
+ if (exactStatus) {
+ EXPECT_EQUAL(6u, ia.getStatus().getAllocated());
+ EXPECT_EQUAL(4u, ia.getStatus().getOnHold()); // no cleanup due to guard
+ } else {
+ EXPECT_LESS(lastAllocated, ia.getStatus().getAllocated());
+ EXPECT_LESS(lastOnHold, ia.getStatus().getOnHold());
+ lastAllocated = ia.getStatus().getAllocated();
+ lastOnHold = ia.getStatus().getOnHold();
+ }
+ }
+ ia.commit(true);
+ EXPECT_EQUAL(7u, ia.getCurrentGeneration());
+ if (exactStatus) {
+ EXPECT_EQUAL(6u, ia.getStatus().getAllocated());
+ EXPECT_EQUAL(0u, ia.getStatus().getOnHold()); // cleanup at end of commit()
+ } else {
+ EXPECT_EQUAL(lastAllocated, ia.getStatus().getAllocated());
+ EXPECT_GREATER(lastOnHold, ia.getStatus().getOnHold());
+ }
+}
+
+void
+AttributeTest::testGeneration()
+{
+ { // single value attribute
+ Config cfg(BasicType::INT8);
+ cfg.setGrowStrategy(GrowStrategy(2, 0, 2));
+ AttributePtr attr = AttributeFactory::createAttribute("int8", cfg);
+ testGeneration(attr, true);
+ }
+ { // enum attribute (with fast search)
+ Config cfg(BasicType::INT8);
+ cfg.setFastSearch(true);
+ cfg.setGrowStrategy(GrowStrategy(2, 0, 2));
+ AttributePtr attr = AttributeFactory::createAttribute("faint8", cfg);
+ testGeneration(attr, false);
+ }
+ { // multi value attribute
+ Config cfg(BasicType::INT8, CollectionType::ARRAY);
+ cfg.setGrowStrategy(GrowStrategy(2, 0, 2));
+ AttributePtr attr = AttributeFactory::createAttribute("aint8", cfg);
+ testGeneration(attr, false);
+ }
+ { // multi value enum attribute (with fast search)
+ Config cfg(BasicType::INT8, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ cfg.setGrowStrategy(GrowStrategy(2, 0, 2));
+ AttributePtr attr = AttributeFactory::createAttribute("faaint8", cfg);
+ testGeneration(attr, false);
+ }
+}
+
+
+void
+AttributeTest::testCreateSerialNum()
+{
+ Config cfg(BasicType::INT32);
+ AttributePtr attr = AttributeFactory::createAttribute("int32", cfg);
+ attr->setCreateSerialNum(42u);
+ EXPECT_TRUE(attr->save());
+ AttributePtr attr2 = AttributeFactory::createAttribute("int32", cfg);
+ EXPECT_TRUE(attr2->load());
+ EXPECT_EQUAL(42u, attr2->getCreateSerialNum());
+}
+
+
+template <typename VectorType, typename BufferType>
+void
+AttributeTest::testCompactLidSpace(const Config &config,
+ bool fs,
+ bool es)
+{
+ uint32_t highDocs = 100;
+ uint32_t trimmedDocs = 30;
+ vespalib::string bts = config.basicType().asString();
+ vespalib::string cts = config.collectionType().asString();
+ vespalib::string fas = fs ? "-fs" : "";
+ vespalib::string ess = es ? "-es" : "";
+ Config cfg = config;
+ cfg.setFastSearch(fs);
+
+ vespalib::string name = clstmp + "/" + bts + "-" + cts + fas + ess;
+ LOG(info, "testCompactLidSpace(%s)", name.c_str());
+ AttributePtr attr = AttributeFactory::createAttribute(name, cfg);
+ VectorType &v = static_cast<VectorType &>(*attr.get());
+ attr->enableEnumeratedSave(es);
+ attr->addDocs(highDocs);
+ populate(v, 17);
+ AttributePtr attr2 = AttributeFactory::createAttribute(name, cfg);
+ VectorType &v2 = static_cast<VectorType &>(*attr2.get());
+ attr2->enableEnumeratedSave(es);
+ attr2->addDocs(trimmedDocs);
+ populate(v2, 17);
+ EXPECT_EQUAL(trimmedDocs, attr2->getNumDocs());
+ EXPECT_EQUAL(trimmedDocs, attr2->getCommittedDocIdLimit());
+ EXPECT_EQUAL(highDocs, attr->getNumDocs());
+ EXPECT_EQUAL(highDocs, attr->getCommittedDocIdLimit());
+ attr->compactLidSpace(trimmedDocs);
+ EXPECT_EQUAL(highDocs, attr->getNumDocs());
+ EXPECT_EQUAL(trimmedDocs, attr->getCommittedDocIdLimit());
+ EXPECT_TRUE(attr->save());
+ EXPECT_EQUAL(highDocs, attr->getNumDocs());
+ EXPECT_EQUAL(trimmedDocs, attr->getCommittedDocIdLimit());
+ AttributePtr attr3 = AttributeFactory::createAttribute(name, cfg);
+ EXPECT_TRUE(attr3->load());
+ EXPECT_EQUAL(trimmedDocs, attr3->getNumDocs());
+ EXPECT_EQUAL(trimmedDocs, attr3->getCommittedDocIdLimit());
+ VectorType &v3 = static_cast<VectorType &>(*attr3.get());
+ compare<VectorType, BufferType>(v2, v3);
+ attr->shrinkLidSpace();
+ EXPECT_EQUAL(trimmedDocs, attr->getNumDocs());
+ EXPECT_EQUAL(trimmedDocs, attr->getCommittedDocIdLimit());
+ compare<VectorType, BufferType>(v, v3);
+}
+
+
+template <typename VectorType, typename BufferType>
+void
+AttributeTest::testCompactLidSpace(const Config &config)
+{
+ testCompactLidSpace<VectorType, BufferType>(config, false, false);
+ testCompactLidSpace<VectorType, BufferType>(config, false, true);
+ bool smallUInt = isUnsignedSmallIntAttribute(config.basicType().type());
+ if (smallUInt)
+ return;
+ testCompactLidSpace<VectorType, BufferType>(config, true, false);
+ testCompactLidSpace<VectorType, BufferType>(config, true, true);
+}
+
+
+void
+AttributeTest::testCompactLidSpace(const Config &config)
+{
+ switch (config.basicType().type()) {
+ case BasicType::UINT1:
+ case BasicType::UINT2:
+ case BasicType::UINT4:
+ case BasicType::INT8:
+ case BasicType::INT16:
+ case BasicType::INT32:
+ case BasicType::INT64:
+ if (config.collectionType() == CollectionType::WSET) {
+ testCompactLidSpace<IntegerAttribute,
+ IntegerAttribute::WeightedInt>(config);
+ } else {
+ testCompactLidSpace<IntegerAttribute,
+ IntegerAttribute::largeint_t>(config);
+ }
+ break;
+ case BasicType::FLOAT:
+ case BasicType::DOUBLE:
+ if (config.collectionType() == CollectionType::WSET) {
+ testCompactLidSpace<FloatingPointAttribute,
+ FloatingPointAttribute::WeightedFloat>(config);
+ } else {
+ testCompactLidSpace<FloatingPointAttribute, double>(config);
+ }
+ break;
+ case BasicType::STRING:
+ if (config.collectionType() == CollectionType::WSET) {
+ testCompactLidSpace<StringAttribute,
+ StringAttribute::WeightedString>(config);
+ } else {
+ testCompactLidSpace<StringAttribute, vespalib::string>(config);
+ }
+ break;
+ default:
+ abort();
+ }
+}
+
+
+void
+AttributeTest::testCompactLidSpace()
+{
+ vespalib::rmdir(clstmp, true);
+ vespalib::mkdir(clstmp);
+ TEST_DO(testCompactLidSpace(Config(BasicType::UINT1,
+ CollectionType::SINGLE)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::UINT2,
+ CollectionType::SINGLE)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::UINT4,
+ CollectionType::SINGLE)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::INT8,
+ CollectionType::SINGLE)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::INT8,
+ CollectionType::ARRAY)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::INT8,
+ CollectionType::WSET)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::INT16,
+ CollectionType::SINGLE)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::INT16,
+ CollectionType::ARRAY)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::INT16,
+ CollectionType::WSET)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::INT32,
+ CollectionType::SINGLE)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::INT32,
+ CollectionType::ARRAY)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::INT32,
+ CollectionType::WSET)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::INT64,
+ CollectionType::SINGLE)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::INT64,
+ CollectionType::ARRAY)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::INT64,
+ CollectionType::WSET)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::FLOAT,
+ CollectionType::SINGLE)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::FLOAT,
+ CollectionType::ARRAY)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::FLOAT,
+ CollectionType::WSET)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::DOUBLE,
+ CollectionType::SINGLE)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::DOUBLE,
+ CollectionType::ARRAY)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::DOUBLE,
+ CollectionType::WSET)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::STRING,
+ CollectionType::SINGLE)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::STRING,
+ CollectionType::ARRAY)));
+ TEST_DO(testCompactLidSpace(Config(BasicType::STRING,
+ CollectionType::WSET)));
+ vespalib::rmdir(clstmp, true);
+}
+
+template <typename AttributeType>
+void
+AttributeTest::requireThatAddressSpaceUsageIsReported(const Config &config, bool fastSearch)
+{
+ uint32_t numDocs = 10;
+ vespalib::string attrName = asuDir + "/" + config.basicType().asString() + "-" +
+ config.collectionType().asString() + (fastSearch ? "-fs" : "");
+ Config cfg = config;
+ cfg.setFastSearch(fastSearch);
+
+ AttributePtr attrPtr = AttributeFactory::createAttribute(attrName, cfg);
+ addDocs(attrPtr, numDocs);
+ AddressSpaceUsage before = attrPtr->getAddressSpaceUsage();
+ populate(static_cast<AttributeType &>(*attrPtr.get()), 5);
+ AddressSpaceUsage after = attrPtr->getAddressSpaceUsage();
+ if (attrPtr->hasEnum()) {
+ LOG(info, "requireThatAddressSpaceUsageIsReported(%s): Has enum", attrName.c_str());
+ EXPECT_EQUAL(before.enumStoreUsage().used(), 0u);
+ EXPECT_GREATER(after.enumStoreUsage().used(), before.enumStoreUsage().used());
+ EXPECT_EQUAL(after.enumStoreUsage().limit(), before.enumStoreUsage().limit());
+ EXPECT_EQUAL(34359738368u, after.enumStoreUsage().limit()); // EnumStoreBase::DataStoreType::RefType::offsetSize()
+ } else {
+ LOG(info, "requireThatAddressSpaceUsageIsReported(%s): NOT enum", attrName.c_str());
+ EXPECT_EQUAL(before.enumStoreUsage().used(), 0u);
+ EXPECT_EQUAL(after.enumStoreUsage(), before.enumStoreUsage());
+ EXPECT_EQUAL(AddressSpaceUsage::defaultEnumStoreUsage(), after.enumStoreUsage());
+ }
+ if (attrPtr->hasMultiValue()) {
+ LOG(info, "requireThatAddressSpaceUsageIsReported(%s): Has multi-value", attrName.c_str());
+ EXPECT_EQUAL(before.multiValueUsage().used(), 0u);
+ EXPECT_GREATER(after.multiValueUsage().used(), before.multiValueUsage().used());
+ EXPECT_EQUAL(after.multiValueUsage().limit(), before.multiValueUsage().limit());
+ EXPECT_EQUAL(134217728u, after.multiValueUsage().limit()); // multivalue::Index32::offsetSize()
+ } else {
+ LOG(info, "requireThatAddressSpaceUsageIsReported(%s): NOT multi-value", attrName.c_str());
+ EXPECT_EQUAL(before.multiValueUsage().used(), 0u);
+ EXPECT_EQUAL(after.multiValueUsage(), before.multiValueUsage());
+ EXPECT_EQUAL(AddressSpaceUsage::defaultMultiValueUsage(), after.multiValueUsage());
+ }
+}
+
+template <typename AttributeType>
+void
+AttributeTest::requireThatAddressSpaceUsageIsReported(const Config &config)
+{
+ requireThatAddressSpaceUsageIsReported<AttributeType>(config, false);
+ requireThatAddressSpaceUsageIsReported<AttributeType>(config, true);
+}
+
+void
+AttributeTest::requireThatAddressSpaceUsageIsReported()
+{
+ vespalib::rmdir(asuDir, true);
+ vespalib::mkdir(asuDir);
+ TEST_DO(requireThatAddressSpaceUsageIsReported<IntegerAttribute>(Config(BasicType::INT32, CollectionType::SINGLE)));
+ TEST_DO(requireThatAddressSpaceUsageIsReported<IntegerAttribute>(Config(BasicType::INT32, CollectionType::ARRAY)));
+ TEST_DO(requireThatAddressSpaceUsageIsReported<FloatingPointAttribute>(Config(BasicType::FLOAT, CollectionType::SINGLE)));
+ TEST_DO(requireThatAddressSpaceUsageIsReported<FloatingPointAttribute>(Config(BasicType::FLOAT, CollectionType::ARRAY)));
+ TEST_DO(requireThatAddressSpaceUsageIsReported<StringAttribute>(Config(BasicType::STRING, CollectionType::SINGLE)));
+ TEST_DO(requireThatAddressSpaceUsageIsReported<StringAttribute>(Config(BasicType::STRING, CollectionType::ARRAY)));
+}
+
+int AttributeTest::Main()
+{
+ TEST_INIT("attribute_test");
+
+ if (_argc > 0) {
+ DummyFileHeaderContext::setCreator(_argv[0]);
+ }
+ testBaseName();
+ testReload();
+ testHasLoadData();
+ testMemorySaver();
+
+ testSingle();
+ testArray();
+ testWeightedSet();
+ testArithmeticValueUpdate();
+ testArithmeticWithUndefinedValue();
+ testMapValueUpdate();
+ testStatus();
+ testNullProtection();
+ testGeneration();
+ testCreateSerialNum();
+ TEST_DO(testCompactLidSpace());
+ TEST_DO(requireThatAddressSpaceUsageIsReported());
+
+ TEST_DONE();
+}
+
+}
+
+
+TEST_APPHOOK(search::AttributeTest);
diff --git a/searchlib/src/tests/attribute/attribute_test.sh b/searchlib/src/tests/attribute/attribute_test.sh
new file mode 100644
index 00000000000..89c52129b74
--- /dev/null
+++ b/searchlib/src/tests/attribute/attribute_test.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+$VALGRIND ./searchlib_attribute_test_app
+rm -rf *.dat
+rm -rf *.idx
+rm -rf *.weight
+rm -rf clstmp
+rm -rf alstmp
diff --git a/searchlib/src/tests/attribute/attributebenchmark.cpp b/searchlib/src/tests/attribute/attributebenchmark.cpp
new file mode 100644
index 00000000000..88446ef71f7
--- /dev/null
+++ b/searchlib/src/tests/attribute/attributebenchmark.cpp
@@ -0,0 +1,678 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include "attributesearcher.h"
+#include "attributeupdater.h"
+#include <vespa/searchlib/util/randomgenerator.h>
+#include "runnable.h"
+#include <vespa/searchlib/attribute/attribute.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/singlenumericattribute.h>
+#include <vespa/searchlib/attribute/multinumericattribute.h>
+#include <vespa/searchlib/attribute/singlestringattribute.h>
+#include <vespa/searchlib/attribute/multistringattribute.h>
+#include <vespa/searchlib/attribute/attrvector.h>
+#include <vespa/vespalib/util/sync.h>
+#include <iostream>
+#include <fstream>
+#include <vespa/log/log.h>
+
+LOG_SETUP("attributebenchmark");
+
+#include <vespa/searchlib/attribute/attributevector.hpp>
+
+using vespalib::Monitor;
+using vespalib::MonitorGuard;
+using std::shared_ptr;
+
+typedef std::vector<uint32_t> NumVector;
+typedef std::vector<vespalib::string> StringVector;
+typedef AttributeVector::SP AttributePtr;
+typedef AttributeVector::DocId DocId;
+typedef search::attribute::Config AttrConfig;
+using search::attribute::BasicType;
+using search::attribute::CollectionType;
+
+namespace search {
+
+class AttributeBenchmark : public FastOS_Application
+{
+private:
+ class Config {
+ public:
+ vespalib::string _attribute;
+ uint32_t _numDocs;
+ uint32_t _numUpdates;
+ uint32_t _numValues;
+ uint32_t _numSearchers;
+ uint32_t _numQueries;
+ bool _searchersOnly;
+ bool _validate;
+ uint32_t _populateRuns;
+ uint32_t _updateRuns;
+ uint32_t _commitFreq;
+ uint32_t _minValueCount;
+ uint32_t _maxValueCount;
+ uint32_t _minStringLen;
+ uint32_t _maxStringLen;
+ uint32_t _seed;
+ bool _writeAttribute;
+ int64_t _rangeStart;
+ int64_t _rangeEnd;
+ int64_t _rangeDelta;
+ bool _rangeSearch;
+ uint32_t _prefixLength;
+ bool _prefixSearch;
+
+
+ Config() : _attribute(""), _numDocs(0), _numUpdates(0), _numValues(0),
+ _numSearchers(0), _numQueries(0), _searchersOnly(true), _validate(false), _populateRuns(0), _updateRuns(0),
+ _commitFreq(0), _minValueCount(0), _maxValueCount(0), _minStringLen(0), _maxStringLen(0), _seed(0),
+ _writeAttribute(false), _rangeStart(0), _rangeEnd(0), _rangeDelta(0), _rangeSearch(false),
+ _prefixLength(0), _prefixSearch(false) {}
+ void printXML() const;
+ };
+
+ class Resource {
+ private:
+ std::vector<struct rusage> _usages;
+ struct rusage _reset;
+
+ public:
+ Resource() : _usages(), _reset() { reset(); };
+ void reset() {
+ getrusage(0, &_reset);
+ }
+ void saveUsage() {
+ struct rusage now;
+ getrusage(0, &now);
+ struct rusage usage = computeDifference(_reset, now);
+ _usages.push_back(usage);
+ }
+ void printLastXML(uint32_t opCount) {
+ (void) opCount;
+ struct rusage & usage = _usages.back();
+ std::cout << "<ru_utime>" << usage.ru_utime.tv_sec * 1000 + usage.ru_utime.tv_usec / 1000
+ << "</ru_utime>" << std::endl;
+ std::cout << "<ru_stime>" << usage.ru_stime.tv_sec * 1000 + usage.ru_stime.tv_usec / 1000
+ << "</ru_stime>" << std::endl;
+ std::cout << "<ru_nvcsw>" << usage.ru_nvcsw << "</ru_nvcsw>" << std::endl;
+ std::cout << "<ru_nivcsw>" << usage.ru_nivcsw << "</ru_nivcsw>" << std::endl;
+ }
+ static struct rusage computeDifference(struct rusage & first, struct rusage & second);
+ };
+
+ FastOS_ThreadPool * _threadPool;
+ Config _config;
+ RandomGenerator _rndGen;
+
+ void init(const Config & config);
+ void usage();
+
+ // benchmark helper methods
+ void addDocs(const AttributePtr & ptr, uint32_t numDocs);
+ template <typename Vector, typename T, typename BT>
+ void benchmarkPopulate(const AttributePtr & ptr, const std::vector<T> & values, uint32_t id);
+ template <typename Vector, typename T, typename BT>
+ void benchmarkUpdate(const AttributePtr & ptr, const std::vector<T> & values, uint32_t id);
+
+ template <typename T>
+ std::vector<vespalib::string> prepareForPrefixSearch(const std::vector<T> & values) const;
+ template <typename T>
+ void benchmarkSearch(const AttributePtr & ptr, const std::vector<T> & values);
+ template <typename Vector, typename T, typename BT>
+ void benchmarkSearchWithUpdater(const AttributePtr & ptr,
+ const std::vector<T> & values);
+
+ template <typename Vector, typename T, typename BT>
+ void benchmarkAttribute(const AttributePtr & ptr, const std::vector<T> & values);
+
+ // Numeric Attribute
+ void benchmarkNumeric(const AttributePtr & ptr);
+
+ // String Attribute
+ void benchmarkString(const AttributePtr & ptr);
+
+
+public:
+ AttributeBenchmark() : _threadPool(NULL), _config(), _rndGen() {}
+ ~AttributeBenchmark() {
+ if (_threadPool != NULL) {
+ delete _threadPool;
+ }
+ }
+ int Main();
+};
+
+
+void
+AttributeBenchmark::Config::printXML() const
+{
+ std::cout << "<config>" << std::endl;
+ std::cout << "<attribute>" << _attribute << "</attribute>" << std::endl;
+ std::cout << "<num-docs>" << _numDocs << "</num-docs>" << std::endl;
+ std::cout << "<num-updates>" << _numUpdates << "</num-updates>" << std::endl;
+ std::cout << "<num-values>" << _numValues << "</num-values>" << std::endl;
+ std::cout << "<num-searchers>" << _numSearchers << "</num-searchers>" << std::endl;
+ std::cout << "<num-queries>" << _numQueries << "</num-queries>" << std::endl;
+ std::cout << "<searchers-only>" << (_searchersOnly ? "true" : "false") << "</searchers-only>" << std::endl;
+ std::cout << "<validate>" << (_validate ? "true" : "false") << "</validate>" << std::endl;
+ std::cout << "<populate-runs>" << _populateRuns << "</populate-runs>" << std::endl;
+ std::cout << "<update-runs>" << _updateRuns << "</update-runs>" << std::endl;
+ std::cout << "<commit-freq>" << _commitFreq << "</commit-freq>" << std::endl;
+ std::cout << "<min-value-count>" << _minValueCount << "</min-value-count>" << std::endl;
+ std::cout << "<max-value-count>" << _maxValueCount << "</max-value-count>" << std::endl;
+ std::cout << "<min-string-len>" << _minStringLen << "</min-string-len>" << std::endl;
+ std::cout << "<max-string-len>" << _maxStringLen << "</max-string-len>" << std::endl;
+ std::cout << "<seed>" << _seed << "</seed>" << std::endl;
+ std::cout << "<range-start>" << _rangeStart << "</range-start>" << std::endl;
+ std::cout << "<range-end>" << _rangeEnd << "</range-end>" << std::endl;
+ std::cout << "<range-delta>" << _rangeDelta << "</range-delta>" << std::endl;
+ std::cout << "<range-search>" << (_rangeSearch ? "true" : "false") << "</range-search>" << std::endl;
+ std::cout << "<prefix-length>" << _prefixLength << "</range-length>" << std::endl;
+ std::cout << "<prefix-search>" << (_prefixSearch ? "true" : "false") << "</prefix-search>" << std::endl;
+ std::cout << "</config>" << std::endl;
+}
+
+void
+AttributeBenchmark::init(const Config & config)
+{
+ _config = config;
+ _rndGen.srand(_config._seed);
+}
+
+
+//-----------------------------------------------------------------------------
+// Benchmark helper methods
+//-----------------------------------------------------------------------------
+void
+AttributeBenchmark::addDocs(const AttributePtr & ptr, uint32_t numDocs)
+{
+ DocId startDoc;
+ DocId lastDoc;
+ bool success = ptr->addDocs(startDoc, lastDoc, numDocs);
+ assert(success);
+ (void) success;
+ assert(startDoc == 0);
+ assert(lastDoc + 1 == numDocs);
+ assert(ptr->getNumDocs() == numDocs);
+}
+
+template <typename Vector, typename T, typename BT>
+void
+AttributeBenchmark::benchmarkPopulate(const AttributePtr & ptr, const std::vector<T> & values, uint32_t id)
+{
+ std::cout << "<!-- Populate " << _config._numDocs << " documents -->" << std::endl;
+ AttributeUpdater<Vector, T, BT>
+ updater(ptr, values, _rndGen, _config._validate, _config._commitFreq,
+ _config._minValueCount, _config._maxValueCount);
+ updater.populate();
+ std::cout << "<populate id='" << id << "'>" << std::endl;
+ updater.getStatus().printXML();
+ std::cout << "</populate>" << std::endl;
+ if (_config._validate) {
+ std::cout << "<!-- All " << updater.getValidator().getTotalCnt()
+ << " asserts passed -->" << std::endl;
+ }
+}
+
+template <typename Vector, typename T, typename BT>
+void
+AttributeBenchmark::benchmarkUpdate(const AttributePtr & ptr, const std::vector<T> & values, uint32_t id)
+{
+ std::cout << "<!-- Apply " << _config._numUpdates << " updates -->" << std::endl;
+ AttributeUpdater<Vector, T, BT>
+ updater(ptr, values, _rndGen, _config._validate, _config._commitFreq,
+ _config._minValueCount, _config._maxValueCount);
+ updater.update(_config._numUpdates);
+ std::cout << "<update id='" << id << "'>" << std::endl;
+ updater.getStatus().printXML();
+ std::cout << "</update>" << std::endl;
+ if (_config._validate) {
+ std::cout << "<!-- All " << updater.getValidator().getTotalCnt()
+ << " asserts passed -->" << std::endl;
+ }
+}
+
+template <typename T>
+std::vector<vespalib::string>
+AttributeBenchmark::prepareForPrefixSearch(const std::vector<T> & values) const
+{
+ (void) values;
+ return std::vector<vespalib::string>();
+}
+
+template <>
+std::vector<vespalib::string>
+AttributeBenchmark::prepareForPrefixSearch(const std::vector<AttributeVector::WeightedString> & values) const
+{
+ std::vector<vespalib::string> retval;
+ retval.reserve(values.size());
+ for (size_t i = 0; i < values.size(); ++i) {
+ retval.push_back(values[i].getValue().substr(0, _config._prefixLength));
+ }
+ return retval;
+}
+
+template <typename T>
+void
+AttributeBenchmark::benchmarkSearch(const AttributePtr & ptr, const std::vector<T> & values)
+{
+ std::vector<AttributeSearcher *> searchers;
+ if (_config._numSearchers > 0) {
+ std::cout << "<!-- Starting " << _config._numSearchers << " searcher threads with "
+ << _config._numQueries << " queries each -->" << std::endl;
+
+ std::vector<vespalib::string> prefixStrings = prepareForPrefixSearch(values);
+
+ for (uint32_t i = 0; i < _config._numSearchers; ++i) {
+ if (_config._rangeSearch) {
+ RangeSpec spec(_config._rangeStart, _config._rangeEnd, _config._rangeDelta);
+ searchers.push_back(new AttributeRangeSearcher(i, ptr, spec, _config._numQueries));
+ } else if (_config._prefixSearch) {
+ searchers.push_back(new AttributePrefixSearcher(i, ptr, prefixStrings, _config._numQueries));
+ } else {
+ searchers.push_back(new AttributeFindSearcher<T>(i, ptr, values, _config._numQueries));
+ }
+ _threadPool->NewThread(searchers.back());
+ }
+
+ for (uint32_t i = 0; i < searchers.size(); ++i) {
+ searchers[i]->join();
+ }
+
+ AttributeSearcherStatus totalStatus;
+ for (uint32_t i = 0; i < searchers.size(); ++i) {
+ std::cout << "<searcher-summary id='" << i << "'>" << std::endl;
+ searchers[i]->getStatus().printXML();
+ std::cout << "</searcher-summary>" << std::endl;
+ totalStatus.merge(searchers[i]->getStatus());
+ delete searchers[i];
+ }
+ std::cout << "<total-searcher-summary>" << std::endl;
+ totalStatus.printXML();
+ std::cout << "</total-searcher-summary>" << std::endl;
+ }
+}
+
+template <typename Vector, typename T, typename BT>
+void
+AttributeBenchmark::benchmarkSearchWithUpdater(const AttributePtr & ptr,
+ const std::vector<T> & values)
+{
+ if (_config._numSearchers > 0) {
+ std::cout << "<!-- Starting 1 updater thread -->" << std::endl;
+ AttributeUpdaterThread<Vector, T, BT>
+ updater(ptr, values, _rndGen, _config._validate, _config._commitFreq,
+ _config._minValueCount, _config._maxValueCount);
+ _threadPool->NewThread(&updater);
+ benchmarkSearch(ptr, values);
+ updater.stop();
+ updater.join();
+ std::cout << "<updater-summary>" << std::endl;
+ updater.getStatus().printXML();
+ std::cout << "</updater-summary>" << std::endl;
+ if (_config._validate) {
+ std::cout << "<!-- All " << updater.getValidator().getTotalCnt()
+ << " asserts passed -->" << std::endl;
+ }
+ }
+}
+
+template <typename Vector, typename T, typename BT>
+void
+AttributeBenchmark::benchmarkAttribute(const AttributePtr & ptr, const std::vector<T> & values)
+{
+ addDocs(ptr, _config._numDocs);
+
+ // populate
+ for (uint32_t i = 0; i < _config._populateRuns; ++i) {
+ benchmarkPopulate<Vector, T, BT>(ptr, values, i);
+ }
+
+ // update
+ if (_config._numUpdates > 0) {
+ for (uint32_t i = 0; i < _config._updateRuns; ++i) {
+ benchmarkUpdate<Vector, T, BT>(ptr, values, i);
+ }
+ }
+
+ // search
+ if (_config._searchersOnly) {
+ benchmarkSearch(ptr, values);
+ } else {
+ benchmarkSearchWithUpdater<Vector, T, BT>(ptr, values);
+ }
+
+ _threadPool->Close();
+}
+
+
+//-----------------------------------------------------------------------------
+// Numeric Attribute
+//-----------------------------------------------------------------------------
+void
+AttributeBenchmark::benchmarkNumeric(const AttributePtr & ptr)
+{
+ NumVector values;
+ if (_config._rangeSearch) {
+ values.reserve(_config._numValues);
+ for (uint32_t i = 0; i < _config._numValues; ++i) {
+ values.push_back(i);
+ }
+ } else {
+ _rndGen.fillRandomIntegers(values, _config._numValues);
+ }
+
+ std::vector<int32_t> weights;
+ _rndGen.fillRandomIntegers(weights, _config._numValues);
+
+ std::vector<AttributeVector::WeightedInt> weightedVector;
+ weightedVector.reserve(values.size());
+ for (size_t i = 0; i < values.size(); ++i) {
+ if (!ptr->hasWeightedSetType()) {
+ weightedVector.push_back(AttributeVector::WeightedInt(values[i]));
+ } else {
+ weightedVector.push_back(AttributeVector::WeightedInt(values[i], weights[i]));
+ }
+ }
+ benchmarkAttribute<IntegerAttribute, AttributeVector::WeightedInt, AttributeVector::WeightedInt>
+ (ptr, weightedVector);
+}
+
+
+//-----------------------------------------------------------------------------
+// String Attribute
+//-----------------------------------------------------------------------------
+void
+AttributeBenchmark::benchmarkString(const AttributePtr & ptr)
+{
+ StringVector strings;
+ _rndGen.fillRandomStrings(strings, _config._numValues, _config._minStringLen, _config._maxStringLen);
+
+ std::vector<int32_t> weights;
+ _rndGen.fillRandomIntegers(weights, _config._numValues);
+
+ std::vector<AttributeVector::WeightedString> weightedVector;
+ weightedVector.reserve(strings.size());
+ for (size_t i = 0; i < strings.size(); ++i) {
+ if (!ptr->hasWeightedSetType()) {
+ weightedVector.push_back(AttributeVector::WeightedString(strings[i]));
+ } else {
+ weightedVector.push_back(AttributeVector::WeightedString(strings[i], weights[i]));
+ }
+ }
+ benchmarkAttribute<StringAttribute, AttributeVector::WeightedString, AttributeVector::WeightedString>
+ (ptr, weightedVector);
+}
+
+
+//-----------------------------------------------------------------------------
+// Resource utilization
+//-----------------------------------------------------------------------------
+struct rusage
+AttributeBenchmark::Resource::computeDifference(struct rusage & first, struct rusage & second)
+{
+ struct rusage result;
+ // utime
+ uint64_t firstutime = first.ru_utime.tv_sec * 1000000 + first.ru_utime.tv_usec;
+ uint64_t secondutime = second.ru_utime.tv_sec * 1000000 + second.ru_utime.tv_usec;
+ uint64_t resultutime = secondutime - firstutime;
+ result.ru_utime.tv_sec = resultutime / 1000000;
+ result.ru_utime.tv_usec = resultutime % 1000000;
+
+ // stime
+ uint64_t firststime = first.ru_stime.tv_sec * 1000000 + first.ru_stime.tv_usec;
+ uint64_t secondstime = second.ru_stime.tv_sec * 1000000 + second.ru_stime.tv_usec;
+ uint64_t resultstime = secondstime - firststime;
+ result.ru_stime.tv_sec = resultstime / 1000000;
+ result.ru_stime.tv_usec = resultstime % 1000000;
+
+ result.ru_maxrss = second.ru_maxrss; // - first.ru_maxrss;
+ result.ru_ixrss = second.ru_ixrss; // - first.ru_ixrss;
+ result.ru_idrss = second.ru_idrss; // - first.ru_idrss;
+ result.ru_isrss = second.ru_isrss; // - first.ru_isrss;
+ result.ru_minflt = second.ru_minflt - first.ru_minflt;
+ result.ru_majflt = second.ru_majflt - first.ru_majflt;
+ result.ru_nswap = second.ru_nswap - first.ru_nswap;
+ result.ru_inblock = second.ru_inblock - first.ru_inblock;
+ result.ru_oublock = second.ru_oublock - first.ru_oublock;
+ result.ru_msgsnd = second.ru_msgsnd - first.ru_msgsnd;
+ result.ru_msgrcv = second.ru_msgrcv - first.ru_msgrcv;
+ result.ru_nsignals = second.ru_nsignals - first.ru_nsignals;
+ result.ru_nvcsw = second.ru_nvcsw - first.ru_nvcsw;
+ result.ru_nivcsw = second.ru_nivcsw - first.ru_nivcsw;
+
+ return result;
+}
+
+
+void
+AttributeBenchmark::usage()
+{
+ std::cout << "usage: attributebenchmark [-n numDocs] [-u numUpdates] [-v numValues]" << std::endl;
+ std::cout << " [-s numSearchers] [-q numQueries] [-p populateRuns] [-r updateRuns]" << std::endl;
+ std::cout << " [-c commitFrequency] [-l minValueCount] [-h maxValueCount]" << std::endl;
+ std::cout << " [-i minStringLen] [-a maxStringLen] [-e seed]" << std::endl;
+ std::cout << " [-S rangeStart] [-E rangeEnd] [-D rangeDelta] [-L prefixLength]" << std::endl;
+ std::cout << " [-b (searchers with updater)] [-R (range search)] [-P (prefix search)]" << std::endl;
+ std::cout << " [-t (validate updates)] [-w (write attribute to disk)]" << std::endl;
+ std::cout << " <attribute>" << std::endl;
+ std::cout << " <attribute> : s-uint32, a-uint32, ws-uint32" << std::endl;
+ std::cout << " s-fa-uint32, a-fa-uint32, ws-fa-uint32" << std::endl;
+ std::cout << " s-fs-uint32, a-fs-uint32, ws-fs-uint32 ws-frs-uint32" << std::endl;
+ std::cout << " s-string, a-string, ws-string" << std::endl;
+ std::cout << " s-fs-string, a-fs-string, ws-fs-string ws-frs-string" << std::endl;
+}
+
+int
+AttributeBenchmark::Main()
+{
+ Config dc;
+ dc._numDocs = 50000;
+ dc._numUpdates = 50000;
+ dc._numValues = 1000;
+ dc._numSearchers = 0;
+ dc._numQueries = 1000;
+ dc._searchersOnly = true;
+ dc._validate = false;
+ dc._populateRuns = 1;
+ dc._updateRuns = 1;
+ dc._commitFreq = 1000;
+ dc._minValueCount = 0;
+ dc._maxValueCount = 20;
+ dc._minStringLen = 1;
+ dc._maxStringLen = 50;
+ dc._seed = 555;
+ dc._writeAttribute = false;
+ dc._rangeStart = 0;
+ dc._rangeEnd = 1000;
+ dc._rangeDelta = 10;
+ dc._rangeSearch = false;
+ dc._prefixLength = 2;
+ dc._prefixSearch = false;
+
+ int idx = 1;
+ char opt;
+ const char * arg;
+ bool optError = false;
+ while ((opt = GetOpt("n:u:v:s:q:p:r:c:l:h:i:a:e:S:E:D:L:bRPtw", arg, idx)) != -1) {
+ switch (opt) {
+ case 'n':
+ dc._numDocs = atoi(arg);
+ break;
+ case 'u':
+ dc._numUpdates = atoi(arg);
+ break;
+ case 'v':
+ dc._numValues = atoi(arg);
+ break;
+ case 's':
+ dc._numSearchers = atoi(arg);
+ break;
+ case 'q':
+ dc._numQueries = atoi(arg);
+ break;
+ case 'p':
+ dc._populateRuns = atoi(arg);
+ break;
+ case 'r':
+ dc._updateRuns = atoi(arg);
+ break;
+ case 'c':
+ dc._commitFreq = atoi(arg);
+ break;
+ case 'l':
+ dc._minValueCount = atoi(arg);
+ break;
+ case 'h':
+ dc._maxValueCount = atoi(arg);
+ break;
+ case 'i':
+ dc._minStringLen = atoi(arg);
+ break;
+ case 'a':
+ dc._maxStringLen = atoi(arg);
+ break;
+ case 'e':
+ dc._seed = atoi(arg);
+ break;
+ case 'S':
+ dc._rangeStart = strtoll(arg, NULL, 10);
+ break;
+ case 'E':
+ dc._rangeEnd = strtoll(arg, NULL, 10);
+ break;
+ case 'D':
+ dc._rangeDelta = strtoll(arg, NULL, 10);
+ break;
+ case 'L':
+ dc._prefixLength = atoi(arg);
+ break;
+ case 'b':
+ dc._searchersOnly = false;
+ break;
+ case 'R':
+ dc._rangeSearch = true;
+ break;
+ case 'P':
+ dc._prefixSearch = true;
+ break;
+ case 't':
+ dc._validate = true;
+ break;
+ case 'w':
+ dc._writeAttribute = true;
+ break;
+ default:
+ optError = true;
+ break;
+ }
+ }
+
+ if (_argc != (idx + 1) || optError) {
+ usage();
+ return -1;
+ }
+
+ dc._attribute = vespalib::string(_argv[idx]);
+
+ _threadPool = new FastOS_ThreadPool(256000);
+
+ std::cout << "<attribute-benchmark>" << std::endl;
+ init(dc);
+ _config.printXML();
+
+ AttributePtr ptr;
+
+ if (_config._attribute == "s-int32") {
+ std::cout << "<!-- Benchmark SingleValueNumericAttribute<int32_t> -->" << std::endl;
+ ptr = AttributeFactory::createAttribute("s-int32", AttrConfig(BasicType::INT32, CollectionType::SINGLE));
+ benchmarkNumeric(ptr);
+
+ } else if (_config._attribute == "a-int32") {
+ std::cout << "<!-- Benchmark MultiValueNumericAttribute<int32_t> (array) -->" << std::endl;
+ ptr = AttributeFactory::createAttribute("a-int32", AttrConfig(BasicType::INT32, CollectionType::ARRAY));
+ benchmarkNumeric(ptr);
+
+ } else if (_config._attribute == "ws-int32") {
+ std::cout << "<!-- Benchmark MultiValueNumericAttribute<int32_t> (wset) -->" << std::endl;
+ ptr = AttributeFactory::createAttribute("ws-int32", AttrConfig(BasicType::INT32, CollectionType::WSET));
+ benchmarkNumeric(ptr);
+
+ } else if (_config._attribute == "s-fs-int32") {
+ std::cout << "<!-- Benchmark SingleValueNumericPostingAttribute<int32_t> -->" << std::endl;
+ AttrConfig cfg(BasicType::INT32, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ ptr = AttributeFactory::createAttribute("s-fs-int32", cfg);
+ benchmarkNumeric(ptr);
+
+ } else if (_config._attribute == "a-fs-int32") {
+ std::cout << "<!-- Benchmark MultiValueNumericPostingAttribute<int32_t> (array) -->" << std::endl;
+ AttrConfig cfg(BasicType::INT32, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ ptr = AttributeFactory::createAttribute("a-fs-int32", cfg);
+ benchmarkNumeric(ptr);
+
+ } else if (_config._attribute == "ws-fs-int32") {
+ std::cout << "<!-- Benchmark MultiValueNumericPostingAttribute<int32_t> (wset) -->" << std::endl;
+ AttrConfig cfg(BasicType::INT32, CollectionType::WSET);
+ cfg.setFastSearch(true);
+ ptr = AttributeFactory::createAttribute("ws-fs-int32", cfg);
+ benchmarkNumeric(ptr);
+
+ } else if (_config._attribute == "s-string") {
+ std::cout << "<!-- Benchmark SingleValueStringAttribute -->" << std::endl;
+ ptr = AttributeFactory::createAttribute("s-string", AttrConfig(BasicType::STRING, CollectionType::SINGLE));
+ benchmarkString(ptr);
+
+ } else if (_config._attribute == "a-string") {
+ std::cout << "<!-- Benchmark ArrayStringAttribute (array) -->" << std::endl;
+ ptr = AttributeFactory::createAttribute("a-string", AttrConfig(BasicType::STRING, CollectionType::ARRAY));
+ benchmarkString(ptr);
+
+ } else if (_config._attribute == "ws-string") {
+ std::cout << "<!-- Benchmark WeightedSetStringAttribute (wset) -->" << std::endl;
+ ptr = AttributeFactory::createAttribute("ws-string", AttrConfig(BasicType::STRING, CollectionType::WSET));
+ benchmarkString(ptr);
+
+ } else if (_config._attribute == "s-fs-string") {
+ std::cout << "<!-- Benchmark SingleValueStringPostingAttribute (single fast search) -->" << std::endl;
+ AttrConfig cfg(BasicType::STRING, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ ptr = AttributeFactory::createAttribute("s-fs-string", cfg);
+ benchmarkString(ptr);
+
+ } else if (_config._attribute == "a-fs-string") {
+ std::cout << "<!-- Benchmark ArrayStringPostingAttribute (array fast search) -->" << std::endl;
+ AttrConfig cfg(BasicType::STRING, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ ptr = AttributeFactory::createAttribute("a-fs-string", cfg);
+ benchmarkString(ptr);
+
+ } else if (_config._attribute == "ws-fs-string") {
+ std::cout << "<!-- Benchmark WeightedSetStringPostingAttribute (wset fast search) -->" << std::endl;
+ AttrConfig cfg(BasicType::STRING, CollectionType::WSET);
+ cfg.setFastSearch(true);
+ ptr = AttributeFactory::createAttribute("ws-fs-string", cfg);
+ benchmarkString(ptr);
+
+ }
+
+ if (dc._writeAttribute) {
+ std::cout << "<!-- Writing attribute to disk -->" << std::endl;
+ ptr->saveAs(ptr->getBaseFileName());
+ }
+
+ std::cout << "</attribute-benchmark>" << std::endl;
+
+ return 0;
+}
+}
+
+int main(int argc, char ** argv)
+{
+ search::AttributeBenchmark myapp;
+ return myapp.Entry(argc, argv);
+}
+
diff --git a/searchlib/src/tests/attribute/attributebenchmark.rb b/searchlib/src/tests/attribute/attributebenchmark.rb
new file mode 100644
index 00000000000..44b08ec4389
--- /dev/null
+++ b/searchlib/src/tests/attribute/attributebenchmark.rb
@@ -0,0 +1,22 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vectors = ["sv-num-new", "mv-num-new", "sv-string-new", "mv-string-new"]#, "sv-num-old", "mv-num-old", "sv-string-old", "mv-string-old"]
+num_docs = [500000, 1000000, 2000000, 4000000, 8000000, 16000000]
+unique_percent = [0.001, 0.01, 0.05, 0.20, 0.50]
+
+vectors.each do |vector|
+ num_docs.each do |num|
+ unique_percent.each do |percent|
+ unique = num * percent
+ command = "./attributebenchmark -n #{num} -u 1000000 -v #{unique} -p 2 -r 1 -s 1 -q 1000 #{vector} > 03-27-full/#{vector}-n#{num}-v#{unique}-p2-r1-s1-q1000.log 2>&1"
+ puts command
+ `#{command}`
+ s = 1
+ 5.times do
+ command = "./attributebenchmark -n #{num} -v #{unique} -p 1 -r 0 -s #{s} -q 100 -b #{vector} > 03-27-full/#{vector}-n#{num}-v#{unique}-s#{s}-q100-b.log 2>&1"
+ puts command
+ `#{command}`
+ s = s*2;
+ end
+ end
+ end
+end
diff --git a/searchlib/src/tests/attribute/attributefilewriter/.gitignore b/searchlib/src/tests/attribute/attributefilewriter/.gitignore
new file mode 100644
index 00000000000..ea6a0e03bf2
--- /dev/null
+++ b/searchlib/src/tests/attribute/attributefilewriter/.gitignore
@@ -0,0 +1 @@
+searchlib_attributefilewriter_test_app
diff --git a/searchlib/src/tests/attribute/attributefilewriter/CMakeLists.txt b/searchlib/src/tests/attribute/attributefilewriter/CMakeLists.txt
new file mode 100644
index 00000000000..a1d859bbfb9
--- /dev/null
+++ b/searchlib/src/tests/attribute/attributefilewriter/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_attributefilewriter_test_app
+ SOURCES
+ attributefilewriter_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_attributefilewriter_test_app COMMAND searchlib_attributefilewriter_test_app)
diff --git a/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp b/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp
new file mode 100644
index 00000000000..acf61cd58bb
--- /dev/null
+++ b/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp
@@ -0,0 +1,116 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("attributefilewriter_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/searchlib/attribute/attributefilewriter.h>
+#include <vespa/searchlib/attribute/attributefilebufferwriter.h>
+#include <vespa/searchlib/util/fileutil.h>
+#include <vespa/searchlib/util/rand48.h>
+#include <vespa/searchlib/common/tunefileinfo.h>
+#include <vespa/searchlib/common/fileheadercontext.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+
+using search::index::DummyFileHeaderContext;
+
+namespace search
+{
+
+namespace
+{
+
+vespalib::string testFileName("test.dat");
+vespalib::string hello("Hello world");
+
+void removeTestFile() { FastOS_File::Delete(testFileName.c_str()); }
+
+struct Fixture {
+ TuneFileAttributes _tuneFileAttributes;
+ DummyFileHeaderContext _fileHeaderContext;
+ IAttributeSaveTarget::Config _cfg;
+ const vespalib::string _desc;
+ AttributeFileWriter _writer;
+
+ Fixture()
+ : _tuneFileAttributes(),
+ _fileHeaderContext(),
+ _cfg(),
+ _desc("Attribute file sample description"),
+ _writer(_tuneFileAttributes,
+ _fileHeaderContext,
+ _cfg,
+ _desc)
+ {
+ removeTestFile();
+ }
+
+ ~Fixture() {
+ removeTestFile();
+ }
+
+};
+
+}
+
+
+TEST_F("Test that we can write empty attribute file", Fixture)
+{
+ EXPECT_TRUE(f._writer.open(testFileName));
+ f._writer.close();
+ FileUtil::LoadedBuffer::UP loaded(FileUtil::loadFile(testFileName));
+ EXPECT_EQUAL(0u, loaded->size());
+}
+
+
+TEST_F("Test that we destroy writer without calling close", Fixture)
+{
+ EXPECT_TRUE(f._writer.open(testFileName));
+}
+
+
+TEST_F("Test that buffer writer passes on written data", Fixture)
+{
+ std::vector<int> a;
+ const size_t mysize = 3000000;
+ const size_t writerBufferSize = AttributeFileBufferWriter::BUFFER_SIZE;
+ EXPECT_GREATER(mysize * sizeof(int), writerBufferSize);
+ a.reserve(mysize);
+ search::Rand48 rnd;
+ for (uint32_t i = 0; i < mysize; ++i) {
+ a.emplace_back(rnd.lrand48());
+ }
+ EXPECT_TRUE(f._writer.open(testFileName));
+ std::unique_ptr<BufferWriter> writer(f._writer.allocBufferWriter());
+ writer->write(&a[0], a.size() * sizeof(int));
+ writer->flush();
+ writer.reset();
+ f._writer.close();
+ FileUtil::LoadedBuffer::UP loaded(FileUtil::loadFile(testFileName));
+ EXPECT_EQUAL(a.size() * sizeof(int), loaded->size());
+ EXPECT_TRUE(memcmp(&a[0], loaded->buffer(), loaded->size()) == 0);
+}
+
+
+TEST_F("Test that we can pass buffer directly", Fixture)
+{
+ using Buffer = IAttributeFileWriter::Buffer;
+ Buffer buf = f._writer.allocBuf(hello.size());
+ buf->writeBytes(hello.c_str(), hello.size());
+ EXPECT_TRUE(f._writer.open(testFileName));
+ f._writer.writeBuf(std::move(buf));
+ f._writer.close();
+ FileUtil::LoadedBuffer::UP loaded(FileUtil::loadFile(testFileName));
+ EXPECT_EQUAL(hello.size(), loaded->size());
+ EXPECT_TRUE(memcmp(hello.c_str(), loaded->buffer(), loaded->size()) == 0);
+}
+
+
+}
+
+
+TEST_MAIN()
+{
+ TEST_RUN_ALL();
+}
diff --git a/searchlib/src/tests/attribute/attributeguard.cpp b/searchlib/src/tests/attribute/attributeguard.cpp
new file mode 100644
index 00000000000..5c90caa094b
--- /dev/null
+++ b/searchlib/src/tests/attribute/attributeguard.cpp
@@ -0,0 +1,32 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("attributeguard_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/extendableattributes.h>
+
+namespace search {
+
+class AttributeGuardTest : public vespalib::TestApp
+{
+public:
+ int Main();
+};
+
+int
+AttributeGuardTest::Main()
+{
+ TEST_INIT("attributeguard_test");
+
+
+ AttributeVector::SP ssattr(new SingleStringExtAttribute("ss1"));
+ AttributeEnumGuard guard(ssattr);
+ EXPECT_TRUE(guard.valid());
+
+ TEST_DONE();
+}
+
+}
+
+TEST_APPHOOK(search::AttributeGuardTest);
diff --git a/searchlib/src/tests/attribute/attributeguard_test.sh b/searchlib/src/tests/attribute/attributeguard_test.sh
new file mode 100644
index 00000000000..6a9557e7da7
--- /dev/null
+++ b/searchlib/src/tests/attribute/attributeguard_test.sh
@@ -0,0 +1,7 @@
+#!/bin/bahs
+$VALGRIND ./searchlib_attributeguard_test_app
+rm -rf *.dat
+rm -rf *.idx
+rm -rf *.weight
+rm -rf clstmp
+rm -rf alstmp
diff --git a/searchlib/src/tests/attribute/attributemanager/.gitignore b/searchlib/src/tests/attribute/attributemanager/.gitignore
new file mode 100644
index 00000000000..6fa89f09572
--- /dev/null
+++ b/searchlib/src/tests/attribute/attributemanager/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+attributemanager_test
+searchlib_attributemanager_test_app
diff --git a/searchlib/src/tests/attribute/attributemanager/CMakeLists.txt b/searchlib/src/tests/attribute/attributemanager/CMakeLists.txt
new file mode 100644
index 00000000000..ed3eeee1065
--- /dev/null
+++ b/searchlib/src/tests/attribute/attributemanager/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_attributemanager_test_app
+ SOURCES
+ attributemanager_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_attributemanager_test_app COMMAND searchlib_attributemanager_test_app)
diff --git a/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp
new file mode 100644
index 00000000000..bf247668843
--- /dev/null
+++ b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp
@@ -0,0 +1,422 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("attribute_test");
+#include <vespa/searchlib/attribute/attribute.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/attributemanager.h>
+#include <vespa/searchlib/attribute/configconverter.h>
+#include <vespa/searchlib/attribute/multinumericattribute.h>
+#include <vespa/searchlib/attribute/multinumericattribute.hpp>
+#include <vespa/searchlib/attribute/stringattribute.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <algorithm>
+
+using namespace config;
+using namespace vespa::config::search;
+using namespace search;
+using namespace search::attribute;
+using vespalib::tensor::TensorType;
+using std::shared_ptr;
+
+typedef BasicType BT;
+typedef CollectionType CT;
+typedef AttributeVector::SP AVSP;
+
+namespace search {
+
+class AttributeManagerTest : public vespalib::TestApp
+{
+private:
+ void verifyLoad(AttributeVector & v);
+ void testLoad();
+ void testGuards();
+ void testConfigConvert();
+ void testContext();
+
+ bool
+ assertDataType(BT::Type exp,
+ AttributesConfig::Attribute::Datatype in);
+
+ bool
+ assertCollectionType(CollectionType exp,
+ AttributesConfig::Attribute::Collectiontype in,
+ bool removeIfZ = false,
+ bool createIfNe = false);
+
+public:
+ AttributeManagerTest()
+ {
+ }
+ int Main();
+};
+
+
+typedef MultiValueNumericAttribute< IntegerAttributeTemplate<int32_t>,
+ multivalue::MVMTemplateArg<
+ multivalue::Value<int32_t>, multivalue::Index32> >
+TestAttributeBase;
+
+class TestAttribute : public TestAttributeBase
+{
+public:
+ TestAttribute(const std::string &name)
+ :
+ TestAttributeBase(name)
+ {
+ }
+
+ generation_t
+ getGen() const
+ {
+ return getCurrentGeneration();
+ }
+
+ uint32_t
+ getRefCount(generation_t gen) const
+ {
+ return getGenerationRefCount(gen);
+ }
+
+ void
+ incGen()
+ {
+ incGeneration();
+ }
+
+ void
+ updateFirstUsedGen(void)
+ {
+ updateFirstUsedGeneration();
+ }
+
+ generation_t
+ getFirstUsedGen() const
+ {
+ return getFirstUsedGeneration();
+ }
+};
+
+
+void
+AttributeManagerTest::testGuards()
+{
+ AttributeVector::SP vec(new TestAttribute("mvint") );
+ TestAttribute * v = static_cast<TestAttribute *> (vec.get());
+ EXPECT_EQUAL(v->getGen(), unsigned(0));
+ EXPECT_EQUAL(v->getRefCount(0), unsigned(0));
+ EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0));
+ {
+ AttributeGuard g0(vec);
+ EXPECT_EQUAL(v->getGen(), unsigned(0));
+ EXPECT_EQUAL(v->getRefCount(0), unsigned(1));
+ EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0));
+ {
+ AttributeGuard g1(vec);
+ EXPECT_EQUAL(v->getGen(), unsigned(0));
+ EXPECT_EQUAL(v->getRefCount(0), unsigned(2));
+ EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0));
+ }
+ EXPECT_EQUAL(v->getRefCount(0), unsigned(1));
+ EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0));
+ }
+ EXPECT_EQUAL(v->getRefCount(0), unsigned(0));
+ EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0));
+
+ v->incGen();
+ EXPECT_EQUAL(v->getGen(), unsigned(1));
+ EXPECT_EQUAL(v->getRefCount(0), unsigned(0));
+ EXPECT_EQUAL(v->getRefCount(1), unsigned(0));
+ EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1));
+ {
+ AttributeGuard g0(vec);
+ EXPECT_EQUAL(v->getGen(), unsigned(1));
+ EXPECT_EQUAL(v->getRefCount(0), unsigned(0));
+ EXPECT_EQUAL(v->getRefCount(1), unsigned(1));
+ EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1));
+ {
+ v->incGen();
+ AttributeGuard g1(vec);
+ EXPECT_EQUAL(v->getGen(), unsigned(2));
+ EXPECT_EQUAL(v->getRefCount(0), unsigned(0));
+ EXPECT_EQUAL(v->getRefCount(1), unsigned(1));
+ EXPECT_EQUAL(v->getRefCount(2), unsigned(1));
+ EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1));
+ }
+ EXPECT_EQUAL(v->getRefCount(0), unsigned(0));
+ EXPECT_EQUAL(v->getRefCount(1), unsigned(1));
+ EXPECT_EQUAL(v->getRefCount(2), unsigned(0));
+ EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1));
+ }
+ EXPECT_EQUAL(v->getRefCount(0), unsigned(0));
+ EXPECT_EQUAL(v->getRefCount(1), unsigned(0));
+ EXPECT_EQUAL(v->getRefCount(2), unsigned(0));
+ EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1));
+ v->updateFirstUsedGeneration();
+ EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(2));
+ EXPECT_EQUAL(v->getGen(), unsigned(2));
+}
+
+
+void
+AttributeManagerTest::verifyLoad(AttributeVector & v)
+{
+ EXPECT_TRUE( !v.isLoaded() );
+ EXPECT_TRUE( v.load() );
+ EXPECT_TRUE( v.isLoaded() );
+ EXPECT_EQUAL( v.getNumDocs(), size_t(100) );
+}
+
+
+void
+AttributeManagerTest::testLoad()
+{
+ {
+ TestAttributeBase v("mvint");
+ EXPECT_TRUE(!v.isLoaded());
+ for(size_t i(0); i < 100; i++) {
+ AttributeVector::DocId doc;
+ EXPECT_TRUE( v.addDoc(doc) );
+ EXPECT_TRUE( doc == i);
+ }
+ EXPECT_TRUE( v.getNumDocs() == 100);
+ for(size_t i(0); i < 100; i++) {
+ for(size_t j(0); j < i; j++) {
+ EXPECT_TRUE( v.append(i, j, 1) );
+ }
+ v.commit();
+ EXPECT_TRUE(size_t(v.getValueCount(i)) == i);
+ EXPECT_EQUAL(v.getMaxValueCount(), std::max(size_t(1), i));
+ }
+ EXPECT_TRUE(v.isLoaded());
+ EXPECT_TRUE(v.save());
+ EXPECT_TRUE(v.isLoaded());
+ }
+ {
+ TestAttributeBase v("mvint");
+ verifyLoad(v);
+ }
+ {
+ AttributeVector::Config config(BT::INT32,
+ CollectionType::ARRAY);
+ TestAttributeBase v("mvint", config);
+ verifyLoad(v);
+ }
+ {
+ AttributeManager manager;
+ AttributeVector::Config config(BT::INT32,
+ CollectionType::ARRAY);
+ EXPECT_TRUE(manager.addVector("mvint", config));
+ AttributeManager::AttributeList list;
+ manager.getAttributeList(list);
+ EXPECT_TRUE(list.size() == 1);
+ EXPECT_TRUE( list[0]->isLoaded());
+ AttributeGuard::UP attrG(manager.getAttribute("mvint"));
+ EXPECT_TRUE( attrG->valid() );
+ }
+}
+
+
+bool
+AttributeManagerTest::assertDataType(BT::Type exp,
+ AttributesConfig::Attribute::Datatype in)
+{
+ AttributesConfig::Attribute a;
+ a.datatype = in;
+ return EXPECT_EQUAL(exp, ConfigConverter::convert(a).basicType().type());
+}
+
+
+bool
+AttributeManagerTest::
+assertCollectionType(CollectionType exp,
+ AttributesConfig::Attribute::Collectiontype in,
+ bool removeIfZ,
+ bool createIfNe)
+{
+ AttributesConfig::Attribute a;
+ a.collectiontype = in;
+ a.removeifzero = removeIfZ;
+ a.createifnonexistent = createIfNe;
+ AttributeVector::Config out = ConfigConverter::convert(a);
+ return EXPECT_EQUAL(exp.type(), out.collectionType().type()) &&
+ EXPECT_EQUAL(exp.removeIfZero(), out.collectionType().removeIfZero()) &&
+ EXPECT_EQUAL(exp.createIfNonExistant(),
+ out.collectionType().createIfNonExistant());
+}
+
+
+void
+AttributeManagerTest::testConfigConvert()
+{
+ // typedef AttributeVector::Config AVC;
+ typedef BT AVBT;
+ typedef CollectionType AVCT;
+ typedef AttributesConfig::Attribute CACA;
+ typedef ConfigConverter CC;
+
+ EXPECT_TRUE(assertDataType(AVBT::STRING, CACA::STRING));
+ EXPECT_TRUE(assertDataType(AVBT::INT8, CACA::INT8));
+ EXPECT_TRUE(assertDataType(AVBT::INT16, CACA::INT16));
+ EXPECT_TRUE(assertDataType(AVBT::INT32, CACA::INT32));
+ EXPECT_TRUE(assertDataType(AVBT::INT64, CACA::INT64));
+ EXPECT_TRUE(assertDataType(AVBT::FLOAT, CACA::FLOAT));
+ EXPECT_TRUE(assertDataType(AVBT::DOUBLE, CACA::DOUBLE));
+ EXPECT_TRUE(assertDataType(AVBT::PREDICATE, CACA::PREDICATE));
+ EXPECT_TRUE(assertDataType(AVBT::TENSOR, CACA::TENSOR));
+ EXPECT_TRUE(assertDataType(AVBT::NONE, CACA::NONE));
+
+ EXPECT_TRUE(assertCollectionType(AVCT::SINGLE, CACA::SINGLE));
+ EXPECT_TRUE(assertCollectionType(AVCT::ARRAY, CACA::ARRAY));
+ EXPECT_TRUE(assertCollectionType(AVCT::WSET, CACA::WEIGHTEDSET));
+ EXPECT_TRUE(assertCollectionType(AVCT(AVCT::SINGLE, true, false),
+ CACA::SINGLE, true, false));
+ EXPECT_TRUE(assertCollectionType(AVCT(AVCT::SINGLE, false, true),
+ CACA::SINGLE, false, true));
+
+ { // fastsearch
+ CACA a;
+ EXPECT_TRUE(!CC::convert(a).fastSearch());
+ a.fastsearch = true;
+ EXPECT_TRUE(CC::convert(a).fastSearch());
+ }
+ { // huge
+ CACA a;
+ EXPECT_TRUE(!CC::convert(a).huge());
+ a.huge = true;
+ EXPECT_TRUE(CC::convert(a).huge());
+ }
+ { // fastAccess
+ CACA a;
+ EXPECT_TRUE(!CC::convert(a).fastAccess());
+ a.fastaccess = true;
+ EXPECT_TRUE(CC::convert(a).fastAccess());
+ }
+ { // tensor
+ CACA a;
+ a.datatype = CACA::TENSOR;
+ a.tensortype = "tensor(x[5])";
+ AttributeVector::Config out = ConfigConverter::convert(a);
+ EXPECT_EQUAL("tensor(x[5])", out.tensorType().toSpec());
+ }
+}
+
+bool gt_attribute(const attribute::IAttributeVector * a, const attribute::IAttributeVector * b) {
+ return a->getName() < b->getName();
+}
+
+void
+AttributeManagerTest::testContext()
+{
+ std::vector<AVSP> attrs;
+ // create various attributes vectors
+ attrs.push_back(AttributeFactory::createAttribute("sint32",
+ Config(BT::INT32, CT::SINGLE)));
+ attrs.push_back(AttributeFactory::createAttribute("aint32",
+ Config(BT::INT32, CT::ARRAY)));
+ attrs.push_back(AttributeFactory::createAttribute("wsint32",
+ Config(BT::INT32, CT::WSET)));
+ attrs.push_back(AttributeFactory::createAttribute("dontcare",
+ Config(BT::INT32, CT::SINGLE)));
+
+ // add docs
+ for (uint32_t i = 0; i < attrs.size(); ++i) {
+ attrs[i]->addDocs(64);
+ }
+
+ // commit all attributes (current generation -> 1);
+ for (uint32_t i = 0; i < attrs.size(); ++i) {
+ attrs[i]->commit();
+ }
+
+ AttributeManager manager;
+ // add to manager
+ for (uint32_t i = 0; i < attrs.size(); ++i) {
+ manager.add(attrs[i]);
+ }
+
+ {
+ IAttributeContext::UP first = manager.createContext();
+
+ // no generation guards taken yet
+ for (uint32_t i = 0; i < attrs.size(); ++i) {
+ EXPECT_EQUAL(attrs[i]->getCurrentGeneration(), 1u);
+ EXPECT_EQUAL(attrs[i]->getGenerationRefCount(1u), 0u);
+ }
+
+ for (uint32_t i = 0; i < 2; ++i) {
+ EXPECT_TRUE(first->getAttribute("sint32") != NULL);
+ EXPECT_TRUE(first->getAttribute("aint32") != NULL);
+ EXPECT_TRUE(first->getAttribute("wsint32") != NULL);
+ EXPECT_TRUE(first->getAttributeStableEnum("wsint32") != NULL);
+ }
+ EXPECT_TRUE(first->getAttribute("foo") == NULL);
+ EXPECT_TRUE(first->getAttribute("bar") == NULL);
+
+ // one generation guard taken per attribute asked for
+ for (uint32_t i = 0; i < attrs.size(); ++i) {
+ EXPECT_EQUAL(attrs[i]->getCurrentGeneration(), 1u);
+ EXPECT_EQUAL(attrs[i]->getGenerationRefCount(1u),
+ (i < 3) ? (i == 2 ? 2u : 1u) : 0u);
+ }
+
+ {
+ IAttributeContext::UP second = manager.createContext();
+
+ EXPECT_TRUE(second->getAttribute("sint32") != NULL);
+ EXPECT_TRUE(second->getAttribute("aint32") != NULL);
+ EXPECT_TRUE(second->getAttribute("wsint32") != NULL);
+ EXPECT_TRUE(second->getAttributeStableEnum("wsint32") != NULL);
+
+ // two generation guards taken per attribute asked for
+ for (uint32_t i = 0; i < attrs.size(); ++i) {
+ EXPECT_EQUAL(attrs[i]->getCurrentGeneration(), 1u);
+ EXPECT_EQUAL(attrs[i]->getGenerationRefCount(1u),
+ (i < 3) ? (i == 2 ? 4u : 2u) : 0u);
+ }
+ }
+
+ // one generation guard taken per attribute asked for
+ for (uint32_t i = 0; i < attrs.size(); ++i) {
+ EXPECT_EQUAL(attrs[i]->getCurrentGeneration(), 1u);
+ EXPECT_EQUAL(attrs[i]->getGenerationRefCount(1u),
+ (i < 3) ? (i == 2 ? 2u : 1u) : 0u);
+ }
+ }
+
+ // no generation guards taken
+ for (uint32_t i = 0; i < attrs.size(); ++i) {
+ EXPECT_EQUAL(attrs[i]->getCurrentGeneration(), 1u);
+ EXPECT_EQUAL(attrs[i]->getGenerationRefCount(1u), 0u);
+ }
+
+ {
+ IAttributeContext::UP ctx = manager.createContext();
+ std::vector<const attribute::IAttributeVector *> all;
+ ctx->getAttributeList(all);
+ EXPECT_EQUAL(4u, all.size());
+ std::sort(all.begin(), all.end(), gt_attribute);
+ EXPECT_EQUAL("aint32", all[0]->getName());
+ EXPECT_EQUAL("dontcare", all[1]->getName());
+ EXPECT_EQUAL("sint32", all[2]->getName());
+ EXPECT_EQUAL("wsint32", all[3]->getName());
+ }
+}
+
+int AttributeManagerTest::Main()
+{
+ TEST_INIT("attributemanager_test");
+
+ testLoad();
+ testGuards();
+ testConfigConvert();
+ testContext();
+
+ TEST_DONE();
+}
+
+} // namespace search
+
+
+TEST_APPHOOK(search::AttributeManagerTest);
diff --git a/searchlib/src/tests/attribute/attributesearcher.h b/searchlib/src/tests/attribute/attributesearcher.h
new file mode 100644
index 00000000000..7456d22f306
--- /dev/null
+++ b/searchlib/src/tests/attribute/attributesearcher.h
@@ -0,0 +1,265 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "runnable.h"
+#include <vespa/fastos/fastos.h>
+#include <vespa/searchlib/attribute/attribute.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/queryeval/hitcollector.h>
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/vespalib/util/compress.h>
+
+namespace search {
+
+std::unique_ptr<ResultSet>
+performSearch(queryeval::SearchIterator & sb, uint32_t numDocs)
+{
+ queryeval::HitCollector hc(numDocs, numDocs, 0);
+ // assume strict toplevel search object located at start
+ for (sb.seek(1); ! sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) {
+ hc.addHit(sb.getDocId(), 0.0);
+ }
+ return hc.getResultSet();
+}
+
+class AttributeSearcherStatus
+{
+public:
+ double _totalSearchTime;
+ uint64_t _totalHitCount;
+ uint64_t _numQueries;
+ uint64_t _numClients;
+
+ AttributeSearcherStatus() : _totalSearchTime(0), _totalHitCount(0), _numQueries(0), _numClients(0) {}
+ void merge(const AttributeSearcherStatus & status) {
+ _totalSearchTime += status._totalSearchTime;
+ _totalHitCount += status._totalHitCount;
+ _numQueries += status._numQueries;
+ _numClients += status._numClients;
+ }
+ void printXML() const {
+ std::cout << "<total-search-time>" << _totalSearchTime << "</total-search-time>" << std::endl; // ms
+ std::cout << "<avg-search-time>" << avgSearchTime() << "</avg-search-time>" << std::endl; // ms
+ std::cout << "<search-throughput>" << searchThroughout() << "</search-throughput>" << std::endl; // per/sec
+ std::cout << "<total-hit-count>" << _totalHitCount << "</total-hit-count>" << std::endl;
+ std::cout << "<avg-hit-count>" << avgHitCount() << "</avg-hit-count>" << std::endl;
+ }
+ double avgSearchTime() const {
+ return _totalSearchTime / _numQueries;
+ }
+ double searchThroughout() const {
+ return _numClients * 1000 * _numQueries / _totalSearchTime;
+ }
+ double avgHitCount() const {
+ return _totalHitCount / static_cast<double>(_numQueries);
+ }
+};
+
+
+class AttributeSearcher : public Runnable
+{
+protected:
+ typedef AttributeVector::SP AttributePtr;
+
+ const AttributePtr & _attrPtr;
+ FastOS_Time _timer;
+ AttributeSearcherStatus _status;
+
+public:
+ AttributeSearcher(uint32_t id, const AttributePtr & attrPtr) :
+ Runnable(id), _attrPtr(attrPtr), _timer(), _status()
+ {
+ _status._numClients = 1;
+ }
+ virtual void doRun() = 0;
+ AttributeSearcherStatus & getStatus() { return _status; }
+ void buildTermQuery(std::vector<char> & buffer, const vespalib::string & index, const char * term, bool prefix = false);
+};
+
+void
+AttributeSearcher::buildTermQuery(std::vector<char> & buffer, const vespalib::string & index, const char * term, bool prefix)
+{
+ uint32_t indexLen = index.size();
+ uint32_t termLen = strlen(term);
+ uint32_t termIdx = prefix ? ParseItem::ITEM_PREFIXTERM : ParseItem::ITEM_TERM;
+ uint32_t queryPacketSize = vespalib::compress::Integer::compressedPositiveLength(termIdx)
+ + vespalib::compress::Integer::compressedPositiveLength(indexLen)
+ + vespalib::compress::Integer::compressedPositiveLength(termLen)
+ + indexLen + termLen;
+ buffer.resize(queryPacketSize);
+ char * p = &buffer[0];
+ p += vespalib::compress::Integer::compressPositive(termIdx, p);
+ p += vespalib::compress::Integer::compressPositive(indexLen, p);
+ memcpy(p, index.c_str(), indexLen);
+ p += indexLen;
+ p += vespalib::compress::Integer::compressPositive(termLen, p);
+ memcpy(p, term, termLen);
+ p += termLen;
+ assert(p == (&buffer[0] + buffer.size()));
+}
+
+
+template <typename T>
+class AttributeFindSearcher : public AttributeSearcher
+{
+private:
+ const std::vector<T> & _values;
+ std::vector<char> _query;
+
+public:
+ AttributeFindSearcher(uint32_t id, const AttributePtr & attrPtr, const std::vector<T> & values,
+ uint32_t numQueries) :
+ AttributeSearcher(id, attrPtr), _values(values), _query()
+ {
+ _status._numQueries = numQueries;
+ }
+ virtual void doRun();
+};
+
+template <typename T>
+void
+AttributeFindSearcher<T>::doRun()
+{
+ _timer.SetNow();
+ for (uint32_t i = 0; i < _status._numQueries; ++i) {
+ // build simple term query
+ vespalib::asciistream ss;
+ ss << _values[i % _values.size()].getValue();
+ this->buildTermQuery(_query, _attrPtr->getName(), ss.str().c_str());
+
+ AttributeGuard guard(_attrPtr);
+ std::unique_ptr<AttributeVector::SearchContext> searchContext =
+ _attrPtr->getSearch(vespalib::stringref(&_query[0], _query.size()),
+ AttributeVector::SearchContext::Params());
+
+ searchContext->fetchPostings(true);
+ std::unique_ptr<queryeval::SearchIterator> iterator = searchContext->createIterator(NULL, true);
+ std::unique_ptr<ResultSet> results = performSearch(*iterator, _attrPtr->getNumDocs());
+
+ _status._totalHitCount += results->getNumHits();
+ }
+ _status._totalSearchTime += _timer.MilliSecsToNow();
+}
+
+
+class RangeSpec
+{
+public:
+ int64_t _min;
+ int64_t _max;
+ int64_t _range;
+ RangeSpec(int64_t min, int64_t max, int64_t range) :
+ _min(min), _max(max), _range(range)
+ {
+ assert(_min < _max);
+ assert(_range <= (_max - _min));
+ }
+};
+
+class RangeIterator
+{
+private:
+ RangeSpec _spec;
+ int64_t _a;
+ int64_t _b;
+
+public:
+ RangeIterator(const RangeSpec & spec) : _spec(spec), _a(spec._min), _b(spec._min + _spec._range) {}
+ RangeIterator & operator++() {
+ _a += _spec._range;
+ _b += _spec._range;
+ if (_b > _spec._max) {
+ _a = _spec._min;
+ _b = _spec._min + _spec._range;
+ }
+ return *this;
+ }
+ int64_t a() const { return _a; }
+ int64_t b() const { return _b; }
+};
+
+class AttributeRangeSearcher : public AttributeSearcher
+{
+private:
+ RangeSpec _spec;
+ std::vector<char> _query;
+
+public:
+ AttributeRangeSearcher(uint32_t id, const AttributePtr & attrPtr, const RangeSpec & spec,
+ uint32_t numQueries) :
+ AttributeSearcher(id, attrPtr), _spec(spec), _query()
+ {
+ _status._numQueries = numQueries;
+ }
+ virtual void doRun();
+};
+
+void
+AttributeRangeSearcher::doRun()
+{
+ _timer.SetNow();
+ RangeIterator iter(_spec);
+ for (uint32_t i = 0; i < _status._numQueries; ++i, ++iter) {
+ // build simple range term query
+ vespalib::asciistream ss;
+ ss << "[" << iter.a() << ";" << iter.b() << "]";
+ buildTermQuery(_query, _attrPtr->getName(), ss.str().c_str());
+
+ AttributeGuard guard(_attrPtr);
+ std::unique_ptr<AttributeVector::SearchContext> searchContext =
+ _attrPtr->getSearch(vespalib::stringref(&_query[0], _query.size()),
+ AttributeVector::SearchContext::Params());
+
+ searchContext->fetchPostings(true);
+ std::unique_ptr<queryeval::SearchIterator> iterator = searchContext->createIterator(NULL, true);
+ std::unique_ptr<ResultSet> results = performSearch(*iterator, _attrPtr->getNumDocs());
+
+ _status._totalHitCount += results->getNumHits();
+ }
+ _status._totalSearchTime += _timer.MilliSecsToNow();
+}
+
+
+class AttributePrefixSearcher : public AttributeSearcher
+{
+private:
+ const std::vector<vespalib::string> & _values;
+ std::vector<char> _query;
+
+public:
+ AttributePrefixSearcher(uint32_t id, const AttributePtr & attrPtr,
+ const std::vector<vespalib::string> & values, uint32_t numQueries) :
+ AttributeSearcher(id, attrPtr), _values(values), _query()
+ {
+ _status._numQueries = numQueries;
+ }
+ virtual void doRun();
+};
+
+void
+AttributePrefixSearcher::doRun()
+{
+ _timer.SetNow();
+ for (uint32_t i = 0; i < _status._numQueries; ++i) {
+ // build simple prefix term query
+ buildTermQuery(_query, _attrPtr->getName(), _values[i % _values.size()].c_str(), true);
+
+ AttributeGuard guard(_attrPtr);
+ std::unique_ptr<AttributeVector::SearchContext> searchContext =
+ _attrPtr->getSearch(vespalib::stringref(&_query[0], _query.size()),
+ AttributeVector::SearchContext::Params());
+
+ searchContext->fetchPostings(true);
+ std::unique_ptr<queryeval::SearchIterator> iterator = searchContext->createIterator(NULL, true);
+ std::unique_ptr<ResultSet> results = performSearch(*iterator, _attrPtr->getNumDocs());
+
+ _status._totalHitCount += results->getNumHits();
+ }
+ _status._totalSearchTime += _timer.MilliSecsToNow();
+}
+
+
+
+} // search
+
diff --git a/searchlib/src/tests/attribute/attributeupdater.h b/searchlib/src/tests/attribute/attributeupdater.h
new file mode 100644
index 00000000000..5193ca0f873
--- /dev/null
+++ b/searchlib/src/tests/attribute/attributeupdater.h
@@ -0,0 +1,299 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/searchlib/util/randomgenerator.h>
+#include "runnable.h"
+#include <vespa/searchlib/attribute/attribute.h>
+
+#define VALIDATOR_STR(str) #str
+#define VALIDATOR_ASSERT(rc) reportAssert(rc, __FILE__, __LINE__, VALIDATOR_STR(rc))
+#define VALIDATOR_ASSERT_EQUAL(a, b) reportAssertEqual(__FILE__, __LINE__, VALIDATOR_STR(a), VALIDATOR_STR(b), a, b)
+
+namespace search {
+
+class AttributeValidator
+{
+private:
+ uint32_t _totalCnt;
+
+public:
+ AttributeValidator() : _totalCnt(0) {}
+ uint32_t getTotalCnt() const { return _totalCnt; }
+ bool reportAssert(bool rc, const vespalib::string & file, uint32_t line, const vespalib::string & str) {
+ _totalCnt++;
+ if (!rc) {
+ std::cout << "Assert " << _totalCnt << " failed: \"" << str << "\" ("
+ << file << ":" << line << ")" << std::endl;
+ abort();
+ }
+ return true;
+ }
+ template <class A, class B>
+ bool reportAssertEqual(const vespalib::string & file, uint32_t line,
+ const vespalib::string & aStr, const vespalib::string & bStr,
+ const A & a, const B & b) {
+ _totalCnt++;
+ if (!(a == b)) {
+ std::cout << "Assert equal failed: " << std::endl;
+ std::cout << aStr << ": " << a << std::endl;
+ std::cout << bStr << ": " << b << std::endl;
+ std::cout << "(" << file << ":" << line << ")" << std::endl;
+ abort();
+ }
+ return true;
+ }
+};
+
+class AttributeUpdaterStatus
+{
+public:
+ double _totalUpdateTime;
+ uint64_t _numDocumentUpdates;
+ uint64_t _numValueUpdates;
+
+ AttributeUpdaterStatus() :
+ _totalUpdateTime(0), _numDocumentUpdates(0), _numValueUpdates(0) {}
+ void reset() {
+ _totalUpdateTime = 0;
+ _numDocumentUpdates = 0;
+ _numValueUpdates = 0;
+ }
+ void printXML() const {
+ std::cout << "<total-update-time>" << _totalUpdateTime << "</total-update-time>" << std::endl;
+ std::cout << "<documents-updated>" << _numDocumentUpdates << "</documents-updated>" << std::endl;
+ std::cout << "<document-update-throughput>" << documentUpdateThroughput() << "</document-update-throughput>" << std::endl;
+ std::cout << "<avg-document-update-time>" << avgDocumentUpdateTime() << "</avg-document-update-time>" << std::endl;
+ std::cout << "<values-updated>" << _numValueUpdates << "</values-updated>" << std::endl;
+ std::cout << "<value-update-throughput>" << valueUpdateThroughput() << "</value-update-throughput>" << std::endl;
+ std::cout << "<avg-value-update-time>" << avgValueUpdateTime() << "</avg-value-update-time>" << std::endl;
+ }
+ double documentUpdateThroughput() const {
+ return _numDocumentUpdates * 1000 / _totalUpdateTime;
+ }
+ double avgDocumentUpdateTime() const {
+ return _totalUpdateTime / _numDocumentUpdates;
+ }
+ double valueUpdateThroughput() const {
+ return _numValueUpdates * 1000 / _totalUpdateTime;
+ }
+ double avgValueUpdateTime() const {
+ return _totalUpdateTime / _numValueUpdates;
+ }
+};
+
+// AttributeVectorInstance, AttributeVectorType, AttributeVectorBufferType
+template <typename Vector, typename T, typename BT>
+class AttributeUpdater
+{
+protected:
+ typedef AttributeVector::SP AttributePtr;
+ typedef std::map<uint32_t, std::vector<T> > AttributeCommit;
+
+ const AttributePtr & _attrPtr;
+ Vector & _attrVec;
+ const std::vector<T> & _values;
+ std::vector<T> _buffer;
+ std::vector<BT> _getBuffer;
+ RandomGenerator & _rndGen;
+ AttributeCommit _expected;
+ FastOS_Time _timer;
+ AttributeUpdaterStatus _status;
+ AttributeValidator _validator;
+
+ // config
+ bool _validate;
+ uint32_t _commitFreq;
+ uint32_t _minValueCount;
+ uint32_t _maxValueCount;
+
+ uint32_t getRandomCount() {
+ return _rndGen.rand(_minValueCount, _maxValueCount);
+ }
+ uint32_t getRandomDoc() {
+ return _rndGen.rand(0, _attrPtr->getNumDocs() - 1);
+ }
+ const T & getRandomValue() {
+ return _values[_rndGen.rand(0, _values.size() - 1)];
+ }
+ void updateValues(uint32_t doc);
+ void commit();
+
+public:
+ AttributeUpdater(const AttributePtr & attrPtr, const std::vector<T> & values,
+ RandomGenerator & rndGen, bool validate, uint32_t commitFreq,
+ uint32_t minValueCount, uint32_t maxValueCount) :
+ _attrPtr(attrPtr), _attrVec(*(static_cast<Vector *>(attrPtr.get()))),
+ _values(values), _buffer(), _getBuffer(), _rndGen(rndGen), _expected(), _timer(), _status(), _validator(),
+ _validate(validate), _commitFreq(commitFreq), _minValueCount(minValueCount), _maxValueCount(maxValueCount)
+ {
+ }
+ void resetStatus() {
+ _status.reset();
+ }
+ const AttributeUpdaterStatus & getStatus() const {
+ return _status;
+ }
+ const AttributeValidator & getValidator() const {
+ return _validator;
+ }
+ void populate();
+ void update(uint32_t numUpdates);
+};
+
+template <typename Vector, typename T, typename BT>
+class AttributeUpdaterThread : public AttributeUpdater<Vector, T, BT>, public Runnable
+{
+private:
+ typedef AttributeVector::SP AttributePtr;
+
+public:
+ AttributeUpdaterThread(const AttributePtr & attrPtr, const std::vector<T> & values,
+ RandomGenerator & rndGen, bool validate, uint32_t commitFreq,
+ uint32_t minValueCount, uint32_t maxValueCount) :
+ AttributeUpdater<Vector, T, BT>(attrPtr, values, rndGen, validate, commitFreq, minValueCount, maxValueCount),
+ Runnable(0) {}
+
+ virtual void doRun();
+};
+
+
+template <typename Vector, typename T, typename BT>
+void
+AttributeUpdater<Vector, T, BT>::updateValues(uint32_t doc)
+{
+ uint32_t valueCount = getRandomCount();
+
+ if (_validate) {
+ _buffer.clear();
+ if (_attrPtr->hasMultiValue()) {
+ _attrPtr->clearDoc(doc);
+ for (uint32_t j = 0; j < valueCount; ++j) {
+ T value = getRandomValue();
+ if (_attrPtr->hasWeightedSetType()) {
+ bool exists = false;
+ for (typename std::vector<T>::iterator iter = _buffer.begin(); iter != _buffer.end(); ++iter) {
+ if (iter->getValue() == value.getValue()) {
+ exists = true;
+ iter->setWeight(value.getWeight());
+ break;
+ }
+ }
+ if (!exists) {
+ _buffer.push_back(value);
+ }
+ } else {
+ _buffer.push_back(value);
+ }
+ _attrVec.append(doc, value.getValue(), value.getWeight());
+ }
+ } else {
+ _buffer.push_back(getRandomValue());
+ _attrVec.update(doc, _buffer.back().getValue());
+ }
+ _expected[doc] = _buffer;
+
+ } else {
+ if (_attrPtr->hasMultiValue()) {
+ _attrPtr->clearDoc(doc);
+ for (uint32_t j = 0; j < valueCount; ++j) {
+ T value = getRandomValue();
+ _attrVec.append(doc, value.getValue(), value.getWeight());
+ }
+ } else {
+ _attrVec.update(doc, getRandomValue().getValue());
+ }
+ }
+
+ _status._numDocumentUpdates++;
+ _status._numValueUpdates += (_attrPtr->hasMultiValue() ? valueCount: 1);
+}
+
+template <typename Vector, typename T, typename BT>
+void
+AttributeUpdater<Vector, T, BT>::commit()
+{
+ AttributeGuard guard(this->_attrPtr);
+ if (_validate) {
+ _attrPtr->commit();
+ _getBuffer.resize(_maxValueCount);
+ for (typename AttributeCommit::iterator iter = _expected.begin();
+ iter != _expected.end(); ++iter)
+ {
+ uint32_t valueCount = _attrPtr->get(iter->first, &_getBuffer[0], _getBuffer.size());
+ _validator.VALIDATOR_ASSERT(_minValueCount <= valueCount && valueCount <= _maxValueCount);
+ if (valueCount != iter->second.size()) {
+ std::cout << "validate(" << iter->first << ")" << std::endl;
+ std::cout << "expected(" << iter->second.size() << ")" << std::endl;
+ for (size_t i = 0; i < iter->second.size(); ++i) {
+ std::cout << " [" << iter->second[i].getValue() << ", " << iter->second[i].getWeight() << "]" << std::endl;
+ }
+ std::cout << "actual(" << valueCount << ")" << std::endl;
+ for (size_t i = 0; i < valueCount; ++i) {
+ std::cout << " [" << _getBuffer[i].getValue() << ", " << _getBuffer[i].getWeight() << "]" << std::endl;
+ }
+ }
+ _validator.VALIDATOR_ASSERT_EQUAL(valueCount, iter->second.size());
+ for (uint32_t i = 0; i < valueCount; ++i) {
+ _validator.VALIDATOR_ASSERT_EQUAL(_getBuffer[i].getValue(), iter->second[i].getValue());
+ _validator.VALIDATOR_ASSERT_EQUAL(_getBuffer[i].getWeight(), iter->second[i].getWeight());
+ }
+ }
+ _expected.clear();
+ } else {
+ _attrPtr->commit();
+ }
+}
+
+template <typename Vector, typename T, typename BT>
+void
+AttributeUpdater<Vector, T, BT>::populate()
+{
+ _timer.SetNow();
+ for (uint32_t doc = 0; doc < _attrPtr->getNumDocs(); ++doc) {
+ updateValues(doc);
+ if (doc % _commitFreq == (_commitFreq - 1)) {
+ commit();
+ }
+ }
+ commit();
+ _status._totalUpdateTime += _timer.MilliSecsToNow();
+}
+
+
+template <typename Vector, typename T, typename BT>
+void
+AttributeUpdater<Vector, T, BT>::update(uint32_t numUpdates)
+{
+ _timer.SetNow();
+ for (uint32_t i = 0; i < numUpdates; ++i) {
+ uint32_t doc = getRandomDoc();
+ updateValues(doc);
+ if (i % _commitFreq == (_commitFreq - 1)) {
+ commit();
+ }
+ }
+ commit();
+ _status._totalUpdateTime += _timer.MilliSecsToNow();
+}
+
+
+template <typename Vector, typename T, typename BT>
+void
+AttributeUpdaterThread<Vector, T, BT>::doRun()
+{
+ this->_timer.SetNow();
+ while(!_done) {
+ uint32_t doc = this->getRandomDoc();
+ this->updateValues(doc);
+ if (this->_status._numDocumentUpdates % this->_commitFreq == (this->_commitFreq - 1)) {
+ this->commit();
+ }
+ }
+ this->commit();
+ this->_status._totalUpdateTime += this->_timer.MilliSecsToNow();
+}
+
+
+} // search
+
diff --git a/searchlib/src/tests/attribute/benchmarkplotter.rb b/searchlib/src/tests/attribute/benchmarkplotter.rb
new file mode 100644
index 00000000000..d77c92c8acd
--- /dev/null
+++ b/searchlib/src/tests/attribute/benchmarkplotter.rb
@@ -0,0 +1,134 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+require 'rexml/document'
+
+def plot_graph(plot_data, plot_png, title, xlabel, ylabel, graph_titles)
+ plot_cmd = "";
+ plot_cmd += "set terminal png\n"
+ plot_cmd += "set output \"#{plot_png}\"\n"
+ plot_cmd += "set title \"#{title}\"\n"
+ plot_cmd += "set xlabel \"#{xlabel}\"\n"
+ plot_cmd += "set ylabel \"#{ylabel}\"\n"
+ c = 2
+ plots = []
+ plot_cmd += "plot "
+ graph_titles.each do |title|
+ plots.push("\"#{plot_data}\" using 1:#{c} title \"#{title}\" with linespoints")
+ c += 1
+ end
+ plot_cmd += plots.join(", ")
+
+ plot_cmd_file = File.open("plot_graph.cmd", "w")
+ plot_cmd_file.write(plot_cmd);
+ plot_cmd_file.close
+ cmd = "gnuplot plot_graph.cmd"
+ puts cmd
+ puts `#{cmd}`
+end
+
+def extract_alpha(num_docs, percentages, input, output, xml_getter)
+ plot_data = File.open(output, "w");
+ num_docs.each do |num|
+ data_line = "#{num} "
+ percentages.each do |prc|
+ unique = num * prc
+ filename = input.sub("#N", "#{num}").sub("#V", "#{unique}")
+ value = 0
+ begin
+ xml_root = REXML::Document.new(File.open(filename)).root
+ value = send(xml_getter, xml_root)
+ rescue REXML::ParseException
+ puts "Could not parse file: #{filename}"
+ end
+ data_line += "#{value} "
+ end
+ plot_data.write(data_line + "\n")
+ end
+ plot_data.close
+end
+
+def extract_beta(num_docs, percentage, num_threads, input, output, xml_getter)
+ plot_data = File.open(output, "w");
+ num_docs.each do |num|
+ data_line = "#{num} "
+ unique = num * percentage
+ num_threads.each do |thread|
+ filename = input.sub("#N", "#{num}").sub("#V", "#{unique}").sub("#S", "#{thread}")
+ value = 0
+ begin
+ xml_root = REXML::Document.new(File.open(filename)).root
+ value = send(xml_getter, xml_root)
+ rescue REXML::ParseException
+ puts "Could not parse file: #{filename}"
+ end
+ data_line += "#{value} "
+ end
+ plot_data.write(data_line + "\n")
+ end
+ plot_data.close
+end
+
+def xml_getter_update_0_throughput(xml_root)
+ return xml_root.elements["update[@id='0']"].elements["throughput"].text
+end
+
+def xml_getter_search_throughput(xml_root)
+ return xml_root.elements["total-searcher-summary"].elements["search-throughput"].text
+end
+
+def xml_getter_updater_thread_throughput(xml_root)
+ return throughput = xml_root.elements["updater-summary"].elements["throughput"].text
+end
+
+
+vectors = ["mv-num-new"]#, "mv-num-new", "sv-string-new", "mv-string-new"]#, "sv-num-old", "mv-num-old", "sv-string-old", "mv-string-old"]
+num_docs = [500000, 1000000, 2000000, 4000000, 8000000, 16000000]
+unique_percentages = [0.001, 0.01, 0.05, 0.20, 0.50]
+num_threads = [1, 2, 4, 8, 16]
+
+inputs = ["03-27-full/#AV-n#N-v#V-p2-r1-s1-q1000.log",
+ "03-27-full/#AV-n#N-v#V-s#S-q100-b.log"]
+graph_titles = [[], []]
+unique_percentages.each do |percentage|
+ graph_titles[0].push("#{percentage * 100} % uniques")
+end
+num_threads.each do |thread|
+ graph_titles[1].push("#{thread} searcher thread(s)")
+end
+
+vectors.each do |vector|
+ extract_alpha(num_docs, unique_percentages,
+ inputs[0].sub("#AV", vector),
+ "#{vector}-update-speed.dat",
+ :xml_getter_update_0_throughput)
+ plot_graph("#{vector}-update-speed.dat",
+ "#{vector}-update-speed.png",
+ "Update speed when applying 1M updates",
+ "Number of documents", "Updates per/sec", graph_titles[0])
+
+ extract_alpha(num_docs, unique_percentages,
+ inputs[0].sub("#AV", vector),
+ "#{vector}-search-speed.dat",
+ :xml_getter_search_throughput)
+ plot_graph("#{vector}-search-speed.dat",
+ "#{vector}-search-speed.png",
+ "Search speed with 1 searcher thread",
+ "Number of documents", "Queries per/sec", graph_titles[0])
+
+ extract_beta(num_docs, 0.01, num_threads,
+ inputs[1].sub("#AV", vector),
+ "#{vector}-search-speed-multiple.dat",
+ :xml_getter_search_throughput)
+ plot_graph("#{vector}-search-speed-multiple.dat",
+ "#{vector}-search-speed-multiple.png",
+ "Search speed with 1 update thread and X searcher threads",
+ "Number of documents", "Queries per/sec", graph_titles[1])
+
+ extract_beta(num_docs, 0.01, num_threads,
+ inputs[1].sub("#AV", vector),
+ "#{vector}-update-speed-multiple.dat",
+ :xml_getter_updater_thread_throughput)
+ plot_graph("#{vector}-update-speed-multiple.dat",
+ "#{vector}-update-speed-multiple.png",
+ "Update speed with 1 update thread and X searcher threads",
+ "Number of documents", "Updates per/sec", graph_titles[1])
+end
diff --git a/searchlib/src/tests/attribute/bitvector/.gitignore b/searchlib/src/tests/attribute/bitvector/.gitignore
new file mode 100644
index 00000000000..05ec0a4df59
--- /dev/null
+++ b/searchlib/src/tests/attribute/bitvector/.gitignore
@@ -0,0 +1 @@
+searchlib_bitvector_test_app
diff --git a/searchlib/src/tests/attribute/bitvector/CMakeLists.txt b/searchlib/src/tests/attribute/bitvector/CMakeLists.txt
new file mode 100644
index 00000000000..bc65fc04dc4
--- /dev/null
+++ b/searchlib/src/tests/attribute/bitvector/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_bitvector_test_app
+ SOURCES
+ bitvector_test.cpp
+ DEPENDS
+ searchlib
+ searchlib_test
+)
+vespa_add_test(NAME searchlib_bitvector_test_app COMMAND searchlib_bitvector_test_app)
diff --git a/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp
new file mode 100644
index 00000000000..85f83d217eb
--- /dev/null
+++ b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp
@@ -0,0 +1,632 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("bitvector_test");
+#include <vespa/vespalib/testkit/testapp.h>
+
+#include <vespa/searchlib/attribute/attribute.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchlib/util/randomgenerator.h>
+#include <vespa/vespalib/util/compress.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+
+#include <vespa/searchlib/attribute/attributevector.hpp>
+#include <vespa/searchlib/attribute/i_document_weight_attribute.h>
+#include <vespa/searchlib/queryeval/document_weight_search_iterator.h>
+#include <vespa/searchlib/test/initrange.h>
+#include <vespa/searchlib/common/bitvectoriterator.h>
+
+using search::attribute::BasicType;
+using search::attribute::CollectionType;
+using search::attribute::Config;
+using search::AttributeFactory;
+using search::FloatingPointAttribute;
+using search::IntegerAttribute;
+using search::StringAttribute;
+using search::AttributeVector;
+using search::ParseItem;
+using search::fef::TermFieldMatchData;
+using search::BitVector;
+using search::BitVectorIterator;
+using search::queryeval::SearchIterator;
+
+typedef std::unique_ptr<AttributeVector::SearchContext> SearchContextPtr;
+typedef std::unique_ptr<search::queryeval::SearchIterator> SearchBasePtr;
+
+struct BitVectorTest
+{
+ typedef AttributeVector::SP AttributePtr;
+
+ BitVectorTest() { }
+
+ ~BitVectorTest() { }
+
+ template <typename VectorType>
+ VectorType & as(AttributePtr &v);
+ IntegerAttribute & asInt(AttributePtr &v);
+ StringAttribute & asString(AttributePtr &v);
+ FloatingPointAttribute & asFloat(AttributePtr &v);
+
+ AttributePtr
+ make(Config cfg,
+ const vespalib::string &pref,
+ bool fastSearch,
+ bool enableBitVectors,
+ bool enableOnlyBitVector,
+ bool filter);
+
+ void
+ addDocs(const AttributePtr &v, size_t sz);
+
+ template <typename VectorType>
+ void populate(VectorType &v,
+ uint32_t low,
+ uint32_t high,
+ bool set);
+
+ template <typename VectorType>
+ void populateAll(VectorType &v,
+ uint32_t low,
+ uint32_t high,
+ bool set);
+
+ void
+ buildTermQuery(std::vector<char> & buffer,
+ const vespalib::string & index,
+ const vespalib::string & term, bool prefix);
+
+ template <typename V>
+ vespalib::string
+ getSearchStr();
+
+ template <typename V, typename T>
+ SearchContextPtr
+ getSearch(const V & vec, const T & term, bool prefix, bool useBitVector);
+
+ template <typename V>
+ SearchContextPtr
+ getSearch(const V & vec, bool useBitVector);
+
+ void
+ checkSearch(AttributePtr v,
+ SearchBasePtr sb,
+ TermFieldMatchData &md,
+ uint32_t expFirstDocId,
+ uint32_t expFastDocId,
+ uint32_t expDocFreq,
+ bool weights,
+ bool checkStride);
+
+ void
+ checkSearch(AttributePtr v,
+ SearchContextPtr sc,
+ uint32_t expFirstDocId,
+ uint32_t expLastDocId,
+ uint32_t expDocFreq,
+ bool weights,
+ bool checkStride);
+
+ template <typename VectorType, typename BufferType>
+ void
+ test(BasicType bt, CollectionType ct, const vespalib::string &pref,
+ bool fastSearch,
+ bool enableBitVectors,
+ bool enableOnlyBitVector,
+ bool filter);
+
+ template <typename VectorType, typename BufferType>
+ void
+ test(BasicType bt, CollectionType ct, const vespalib::string &pref);
+};
+
+
+template <typename VectorType>
+VectorType &
+BitVectorTest::as(AttributePtr &v)
+{
+ VectorType *res = dynamic_cast<VectorType *>(v.get());
+ assert(res != NULL);
+ return *res;
+}
+
+
+IntegerAttribute &
+BitVectorTest::asInt(AttributePtr &v)
+{
+ return as<IntegerAttribute>(v);
+}
+
+
+StringAttribute &
+BitVectorTest::asString(AttributePtr &v)
+{
+ return as<StringAttribute>(v);
+}
+
+
+FloatingPointAttribute &
+BitVectorTest::asFloat(AttributePtr &v)
+{
+ return as<FloatingPointAttribute>(v);
+}
+
+
+void
+BitVectorTest::buildTermQuery(std::vector<char> &buffer,
+ const vespalib::string &index,
+ const vespalib::string &term,
+ bool prefix)
+{
+ uint32_t indexLen = index.size();
+ uint32_t termLen = term.size();
+ uint32_t queryPacketSize = 1 + 2 * 4 + indexLen + termLen;
+ uint32_t p = 0;
+ buffer.resize(queryPacketSize);
+ buffer[p++] = prefix ? ParseItem::ITEM_PREFIXTERM : ParseItem::ITEM_TERM;
+ p += vespalib::compress::Integer::compressPositive(indexLen, &buffer[p]);
+ memcpy(&buffer[p], index.c_str(), indexLen);
+ p += indexLen;
+ p += vespalib::compress::Integer::compressPositive(termLen, &buffer[p]);
+ memcpy(&buffer[p], term.c_str(), termLen);
+ p += termLen;
+ buffer.resize(p);
+}
+
+
+template <>
+vespalib::string
+BitVectorTest::getSearchStr<IntegerAttribute>()
+{
+ return "[-42;-42]";
+}
+
+template <>
+vespalib::string
+BitVectorTest::getSearchStr<FloatingPointAttribute>()
+{
+ return "[-42.0;-42.0]";
+}
+
+template <>
+vespalib::string
+BitVectorTest::getSearchStr<StringAttribute>()
+{
+ return "foo";
+}
+
+
+template <typename V, typename T>
+SearchContextPtr
+BitVectorTest::getSearch(const V &vec, const T &term, bool prefix,
+ bool useBitVector)
+{
+ std::vector<char> query;
+ vespalib::asciistream ss;
+ ss << term;
+ buildTermQuery(query, vec.getName(), ss.str(), prefix);
+
+ return (static_cast<const AttributeVector &>(vec)).
+ getSearch(vespalib::stringref(&query[0], query.size()),
+ AttributeVector::SearchContext::Params().useBitVector(useBitVector));
+}
+
+
+template <>
+SearchContextPtr
+BitVectorTest::getSearch<IntegerAttribute>(const IntegerAttribute &v,
+ bool useBitVector)
+{
+ return getSearch<IntegerAttribute>(v, "[-42;-42]", false, useBitVector);
+}
+
+template <>
+SearchContextPtr
+BitVectorTest::
+getSearch<FloatingPointAttribute>(const FloatingPointAttribute &v,
+ bool useBitVector)
+{
+ return getSearch<FloatingPointAttribute>(v, "[-42.0;-42.0]", false,
+ useBitVector);
+}
+
+template <>
+SearchContextPtr
+BitVectorTest::getSearch<StringAttribute>(const StringAttribute &v,
+ bool useBitVector)
+{
+ return getSearch<StringAttribute, const vespalib::string &>
+ (v, "foo", false, useBitVector);
+}
+
+
+BitVectorTest::AttributePtr
+BitVectorTest::make(Config cfg,
+ const vespalib::string &pref,
+ bool fastSearch,
+ bool enableBitVectors,
+ bool enableOnlyBitVector,
+ bool filter)
+{
+ cfg.setFastSearch(fastSearch);
+ cfg.setEnableBitVectors(enableBitVectors);
+ cfg.setEnableOnlyBitVector(enableOnlyBitVector);
+ cfg.setIsFilter(filter);
+ AttributePtr v = AttributeFactory::createAttribute(pref, cfg);
+ return v;
+}
+
+
+void
+BitVectorTest::addDocs(const AttributePtr &v, size_t sz)
+{
+ while (v->getNumDocs() < sz) {
+ AttributeVector::DocId docId = 0;
+ EXPECT_TRUE(v->addDoc(docId));
+ v->clearDoc(docId);
+ }
+ EXPECT_TRUE(v->getNumDocs() == sz);
+ v->commit(true);
+}
+
+
+template <>
+void
+BitVectorTest::populate(IntegerAttribute &v,
+ uint32_t low, uint32_t high,
+ bool set)
+{
+ for(size_t i(low), m(high); i < m; i+= 5) {
+ if (!set) {
+ v.clearDoc(i);
+ } else if (v.hasMultiValue()) {
+ v.append(i, -42, 27);
+ v.append(i, -43, 14);
+ v.append(i, -42, -3);
+ } else {
+ EXPECT_TRUE(v.update(i, -42));
+ }
+ }
+ v.commit();
+}
+
+
+template <>
+void
+BitVectorTest::populate(FloatingPointAttribute &v,
+ uint32_t low, uint32_t high,
+ bool set)
+{
+ for(size_t i(low), m(high); i < m; i+= 5) {
+ if (!set) {
+ v.clearDoc(i);
+ } else if (v.hasMultiValue()) {
+ v.append(i, -42.0, 27);
+ v.append(i, -43.0, 14);
+ v.append(i, -42.0, -3);
+ } else {
+ EXPECT_TRUE(v.update(i, -42.0));
+ }
+ }
+ v.commit();
+}
+
+
+template <>
+void
+BitVectorTest::populate(StringAttribute &v,
+ uint32_t low, uint32_t high,
+ bool set)
+{
+ for(size_t i(low), m(high); i < m; i+= 5) {
+ if (!set) {
+ v.clearDoc(i);
+ } else if (v.hasMultiValue()) {
+ v.append(i, "foo", 27);
+ v.append(i, "bar", 14);
+ v.append(i, "foO", -3);
+ } else {
+ EXPECT_TRUE(v.update(i, "foo"));
+ }
+ }
+ v.commit();
+}
+
+template <>
+void
+BitVectorTest::populateAll(IntegerAttribute &v,
+ uint32_t low, uint32_t high,
+ bool set)
+{
+ for(size_t i(low), m(high); i < m; ++i) {
+ if (!set) {
+ v.clearDoc(i);
+ } else if (v.hasMultiValue()) {
+ v.clearDoc(i);
+ v.append(i, -42, 27);
+ v.append(i, -43, 14);
+ v.append(i, -42, -3);
+ } else {
+ EXPECT_TRUE(v.update(i, -42));
+ }
+ }
+ v.commit();
+}
+
+
+template <>
+void
+BitVectorTest::populateAll(FloatingPointAttribute &v,
+ uint32_t low, uint32_t high,
+ bool set)
+{
+ for(size_t i(low), m(high); i < m; ++i) {
+ if (!set) {
+ v.clearDoc(i);
+ } else if (v.hasMultiValue()) {
+ v.clearDoc(i);
+ v.append(i, -42.0, 27);
+ v.append(i, -43.0, 14);
+ v.append(i, -42.0, -3);
+ } else {
+ EXPECT_TRUE(v.update(i, -42.0));
+ }
+ }
+ v.commit();
+}
+
+
+template <>
+void
+BitVectorTest::populateAll(StringAttribute &v,
+ uint32_t low, uint32_t high,
+ bool set)
+{
+ for(size_t i(low), m(high); i < m; ++i) {
+ if (!set) {
+ v.clearDoc(i);
+ } else if (v.hasMultiValue()) {
+ v.clearDoc(i);
+ v.append(i, "foo", 27);
+ v.append(i, "bar", 14);
+ v.append(i, "foO", -3);
+ } else {
+ EXPECT_TRUE(v.update(i, "foo"));
+ }
+ }
+ v.commit();
+}
+
+
+void
+BitVectorTest::checkSearch(AttributePtr v,
+ SearchBasePtr sb,
+ TermFieldMatchData &md,
+ uint32_t expFirstDocId,
+ uint32_t expLastDocId,
+ uint32_t expDocFreq,
+ bool weights,
+ bool checkStride)
+{
+ sb->initFullRange();
+ sb->seek(1u);
+ uint32_t docId = sb->getDocId();
+ uint32_t lastDocId = 0;
+ uint32_t docFreq = 0;
+ EXPECT_EQUAL(expFirstDocId, docId);
+ while (docId != search::endDocId) {
+ lastDocId = docId;
+ ++docFreq,
+ assert(!checkStride || (docId % 5) == 2u);
+ sb->unpack(docId);
+ EXPECT_EQUAL(md.getDocId(), docId);
+ if (v->getCollectionType() == CollectionType::SINGLE ||
+ !weights) {
+ EXPECT_EQUAL(1, md.getWeight());
+ } else if (v->getCollectionType() == CollectionType::ARRAY) {
+ EXPECT_EQUAL(2, md.getWeight());
+ } else {
+ if (v->getBasicType() == BasicType::STRING) {
+ EXPECT_EQUAL(24, md.getWeight());
+ } else {
+ EXPECT_EQUAL(-3, md.getWeight());
+ }
+ }
+ sb->seek(docId + 1);
+ docId = sb->getDocId();
+ }
+ EXPECT_EQUAL(expLastDocId, lastDocId);
+ EXPECT_EQUAL(expDocFreq, docFreq);
+}
+
+
+void
+BitVectorTest::checkSearch(AttributePtr v,
+ SearchContextPtr sc,
+ uint32_t expFirstDocId,
+ uint32_t expLastDocId,
+ uint32_t expDocFreq,
+ bool weights,
+ bool checkStride)
+{
+ TermFieldMatchData md;
+ sc->fetchPostings(true);
+ SearchBasePtr sb = sc->createIterator(&md, true);
+ checkSearch(v, std::move(sb), md,
+ expFirstDocId, expLastDocId, expDocFreq, weights,
+ checkStride);
+}
+
+
+template <typename VectorType, typename BufferType>
+void
+BitVectorTest::test(BasicType bt,
+ CollectionType ct,
+ const vespalib::string &pref,
+ bool fastSearch,
+ bool enableBitVectors,
+ bool enableOnlyBitVector,
+ bool filter)
+{
+ Config cfg(bt, ct);
+ AttributePtr v = make(cfg, pref, fastSearch,
+ enableBitVectors, enableOnlyBitVector, filter);
+ addDocs(v, 1024);
+ VectorType &tv = as<VectorType>(v);
+ populate(tv, 2, 1023, true);
+
+ SearchContextPtr sc = getSearch<VectorType>(tv, true);
+ checkSearch(v, std::move(sc), 2, 1022, 205, !enableBitVectors && !filter,
+ true);
+ sc = getSearch<VectorType>(tv, false);
+ checkSearch(v, std::move(sc), 2, 1022, 205, !enableOnlyBitVector &&
+ !filter, true);
+ const search::IDocumentWeightAttribute *dwa =
+ v->asDocumentWeightAttribute();
+ if (dwa != NULL) {
+ search::IDocumentWeightAttribute::LookupResult lres =
+ dwa->lookup(getSearchStr<VectorType>());
+ typedef search::queryeval::DocumentWeightSearchIterator DWSI;
+ typedef search::queryeval::SearchIterator SI;
+ TermFieldMatchData md;
+ SI::UP dwsi(new DWSI(md, *dwa, lres));
+ if (!enableOnlyBitVector) {
+ checkSearch(v, std::move(dwsi), md, 2, 1022, 205, !filter, true);
+ } else {
+ dwsi->initFullRange();
+ EXPECT_TRUE(dwsi->isAtEnd());
+ }
+ }
+ populate(tv, 2, 973, false);
+ sc = getSearch<VectorType>(tv, true);
+ checkSearch(v, std::move(sc), 977, 1022, 10, !enableOnlyBitVector &&
+ !filter, true);
+ populate(tv, 2, 973, true);
+ sc = getSearch<VectorType>(tv, true);
+ checkSearch(v, std::move(sc), 2, 1022, 205, !enableBitVectors && !filter,
+ true);
+ addDocs(v, 15000);
+ sc = getSearch<VectorType>(tv, true);
+ checkSearch(v, std::move(sc), 2, 1022, 205, !enableOnlyBitVector &&
+ !filter, true);
+ populateAll(tv, 10, 15000, true);
+ sc = getSearch<VectorType>(tv, true);
+ checkSearch(v, std::move(sc), 2, 14999, 14992,
+ !enableBitVectors && !filter,
+ false);
+}
+
+
+template <typename VectorType, typename BufferType>
+void
+BitVectorTest::test(BasicType bt,
+ CollectionType ct,
+ const vespalib::string &pref)
+{
+ LOG(info,
+ "test run, pref is %s",
+ pref.c_str());
+ test<VectorType, BufferType>(bt, ct, pref,
+ false, false, false, false);
+ test<VectorType, BufferType>(bt, ct, pref,
+ false, false, false, true);
+ test<VectorType, BufferType>(bt, ct, pref,
+ true, false, false, false);
+ test<VectorType, BufferType>(bt, ct, pref,
+ true, false, false, true);
+ test<VectorType, BufferType>(bt, ct, pref,
+ true, true, false, false);
+ test<VectorType, BufferType>(bt, ct, pref,
+ true, true, false, true);
+ test<VectorType, BufferType>(bt, ct, pref,
+ true, true, true, false);
+ test<VectorType, BufferType>(bt, ct, pref,
+ true, true, true, true);
+}
+
+
+TEST_F("Test bitvectors with single value int32", BitVectorTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::largeint_t>(BasicType::INT32,
+ CollectionType::SINGLE,
+ "int32_sv");
+}
+
+TEST_F("Test bitvectors with array value int32", BitVectorTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::largeint_t>(BasicType::INT32,
+ CollectionType::ARRAY,
+ "int32_a");
+}
+
+TEST_F("Test bitvectors with weighted set value int32", BitVectorTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::WeightedInt>(BasicType::INT32,
+ CollectionType::WSET,
+ "int32_sv");
+}
+
+TEST_F("Test bitvectors with single value double", BitVectorTest)
+{
+ f.template test<FloatingPointAttribute,
+ double>(BasicType::DOUBLE,
+ CollectionType::SINGLE,
+ "double_sv");
+}
+
+TEST_F("Test bitvectors with array value double", BitVectorTest)
+{
+ f.template test<FloatingPointAttribute,
+ double>(BasicType::DOUBLE,
+ CollectionType::ARRAY,
+ "double_a");
+}
+
+TEST_F("Test bitvectors with weighted set value double", BitVectorTest)
+{
+ f.template test<FloatingPointAttribute,
+ FloatingPointAttribute::WeightedFloat>(BasicType::DOUBLE,
+ CollectionType::WSET,
+ "double_ws");
+}
+
+TEST_F("Test bitvectors with single value string", BitVectorTest)
+{
+ f.template test<StringAttribute,
+ vespalib::string>(BasicType::STRING,
+ CollectionType::SINGLE,
+ "string_sv");
+}
+
+TEST_F("Test bitvectors with array value string", BitVectorTest)
+{
+ f.template test<StringAttribute,
+ vespalib::string>(BasicType::STRING,
+ CollectionType::ARRAY,
+ "string_a");
+}
+
+TEST_F("Test bitvectors with weighted set value string", BitVectorTest)
+{
+ f.template test<StringAttribute,
+ StringAttribute::WeightedString>(BasicType::STRING,
+ CollectionType::WSET,
+ "string_ws");
+}
+
+TEST("Test bitvector iterators adheres to initRange") {
+ search::test::InitRangeVerifier initRangeTest;
+ BitVector::UP bv = BitVector::create(initRangeTest.getDocIdLimit());
+ for (uint32_t docId: initRangeTest.getExpectedDocIds()) {
+ bv->setBit(docId);
+ }
+ TermFieldMatchData tfmd;
+ initRangeTest.verify(*BitVectorIterator::create(bv.get(), initRangeTest.getDocIdLimit(), tfmd, false));
+ initRangeTest.verify(*BitVectorIterator::create(bv.get(), initRangeTest.getDocIdLimit(), tfmd, true));
+}
+
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/attribute/changevector_test.cpp b/searchlib/src/tests/attribute/changevector_test.cpp
new file mode 100644
index 00000000000..9f0a796fd3e
--- /dev/null
+++ b/searchlib/src/tests/attribute/changevector_test.cpp
@@ -0,0 +1,92 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("changevector_test");
+#include <vespa/vespalib/testkit/testapp.h>
+
+#include <vespa/searchlib/attribute/changevector.h>
+
+using namespace search;
+
+template <typename T>
+void verifyStrictOrdering(const T & v) {
+ long count(0);
+ for (const auto & c : v) {
+ count++;
+ EXPECT_EQUAL(count, c._data.get());
+ }
+ EXPECT_EQUAL(v.size(), size_t(count));
+}
+
+class Accessor {
+public:
+ Accessor(const std::vector<long> & v) : _size(v.size()), _current(v.begin()), _end(v.end()) { }
+ size_t size() const { return _size; }
+ void next() { _current++; }
+ long value() const { return *_current; }
+ int weight() const { return *_current; }
+private:
+ size_t _size;
+ std::vector<long>::const_iterator _current;
+ std::vector<long>::const_iterator _end;
+};
+
+TEST("require insert ordering is preserved for same doc")
+{
+ typedef ChangeTemplate<NumericChangeData<long>> Change;
+ typedef ChangeVectorT<Change> CV;
+ CV a;
+ a.push_back(Change(Change::NOOP, 7, 1));
+ EXPECT_EQUAL(1u, a.size());
+ a.push_back(Change(Change::NOOP, 7, 2));
+ EXPECT_EQUAL(2u, a.size());
+ verifyStrictOrdering(a);
+}
+
+TEST("require insert ordering is preserved ")
+{
+ typedef ChangeTemplate<NumericChangeData<long>> Change;
+ typedef ChangeVectorT<Change> CV;
+ CV a;
+ a.push_back(Change(Change::NOOP, 7, 1));
+ EXPECT_EQUAL(1u, a.size());
+ a.push_back(Change(Change::NOOP, 5, 2));
+ EXPECT_EQUAL(2u, a.size());
+ a.push_back(Change(Change::NOOP, 6, 3));
+ EXPECT_EQUAL(3u, a.size());
+ verifyStrictOrdering(a);
+}
+
+TEST("require insert ordering is preserved with mix")
+{
+ typedef ChangeTemplate<NumericChangeData<long>> Change;
+ typedef ChangeVectorT<Change> CV;
+ CV a;
+ a.push_back(Change(Change::NOOP, 7, 1));
+ EXPECT_EQUAL(1u, a.size());
+ a.push_back(Change(Change::NOOP, 5, 2));
+ EXPECT_EQUAL(2u, a.size());
+ a.push_back(Change(Change::NOOP, 5, 3));
+ EXPECT_EQUAL(3u, a.size());
+ a.push_back(Change(Change::NOOP, 6, 10));
+ EXPECT_EQUAL(4u, a.size());
+ std::vector<long> v({4,5,6,7,8});
+ Accessor ac(v);
+ a.push_back(5, ac);
+ EXPECT_EQUAL(9u, a.size());
+ a.push_back(Change(Change::NOOP, 5, 9));
+ EXPECT_EQUAL(10u, a.size());
+ verifyStrictOrdering(a);
+}
+
+TEST("require that inserting empty vector does not affect the vector.") {
+ typedef ChangeTemplate<NumericChangeData<long>> Change;
+ typedef ChangeVectorT<Change> CV;
+ CV a;
+ std::vector<long> v;
+ Accessor ac(v);
+ a.push_back(1, ac);
+ EXPECT_EQUAL(0u, a.size());
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/attribute/changevector_test.sh b/searchlib/src/tests/attribute/changevector_test.sh
new file mode 100644
index 00000000000..cb70f5465a4
--- /dev/null
+++ b/searchlib/src/tests/attribute/changevector_test.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+$VALGRIND ./searchlib_changevector_test_app
+rm -rf *.dat
+rm -rf *.idx
+rm -rf *.weight
+rm -rf clstmp
+rm -rf alstmp
diff --git a/searchlib/src/tests/attribute/comparator/.gitignore b/searchlib/src/tests/attribute/comparator/.gitignore
new file mode 100644
index 00000000000..51c5b5944c9
--- /dev/null
+++ b/searchlib/src/tests/attribute/comparator/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+comparator_test
+searchlib_comparator_test_app
diff --git a/searchlib/src/tests/attribute/comparator/CMakeLists.txt b/searchlib/src/tests/attribute/comparator/CMakeLists.txt
new file mode 100644
index 00000000000..4a14181db3c
--- /dev/null
+++ b/searchlib/src/tests/attribute/comparator/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_comparator_test_app
+ SOURCES
+ comparator_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_comparator_test_app COMMAND searchlib_comparator_test_app)
diff --git a/searchlib/src/tests/attribute/comparator/DESC b/searchlib/src/tests/attribute/comparator/DESC
new file mode 100644
index 00000000000..6b3ba01c89b
--- /dev/null
+++ b/searchlib/src/tests/attribute/comparator/DESC
@@ -0,0 +1 @@
+comparator test. Take a look at comparator_test.cpp for details.
diff --git a/searchlib/src/tests/attribute/comparator/FILES b/searchlib/src/tests/attribute/comparator/FILES
new file mode 100644
index 00000000000..b4c23c09022
--- /dev/null
+++ b/searchlib/src/tests/attribute/comparator/FILES
@@ -0,0 +1 @@
+comparator_test.cpp
diff --git a/searchlib/src/tests/attribute/comparator/comparator_test.cpp b/searchlib/src/tests/attribute/comparator/comparator_test.cpp
new file mode 100644
index 00000000000..2a4c3c6fb87
--- /dev/null
+++ b/searchlib/src/tests/attribute/comparator/comparator_test.cpp
@@ -0,0 +1,169 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("comparator_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/attribute/enumcomparator.h>
+#include <vespa/searchlib/btree/btreeroot.h>
+
+#include <vespa/searchlib/attribute/enumstore.hpp>
+#include <vespa/searchlib/btree/btreenode.hpp>
+#include <vespa/searchlib/btree/btreenodeallocator.hpp>
+#include <vespa/searchlib/btree/btreeroot.hpp>
+
+namespace search {
+
+using namespace btree;
+
+typedef EnumStoreT<NumericEntryType<int32_t> > NumericEnumStore;
+typedef EnumStoreComparatorT<NumericEntryType<int32_t> > NumericComparator;
+
+typedef EnumStoreT<NumericEntryType<float> > FloatEnumStore;
+typedef EnumStoreComparatorT<NumericEntryType<float> > FloatComparator;
+
+typedef EnumStoreT<StringEntryType> StringEnumStore;
+typedef EnumStoreComparatorT<StringEntryType> StringComparator;
+typedef EnumStoreFoldedComparatorT<StringEntryType> FoldedStringComparator;
+
+typedef EnumStoreBase::Index EnumIndex;
+
+typedef BTreeRoot<EnumIndex, BTreeNoLeafData,
+ btree::NoAggregated,
+ const EnumStoreComparatorWrapper> TreeType;
+typedef TreeType::NodeAllocatorType NodeAllocator;
+
+class Test : public vespalib::TestApp {
+private:
+ void requireThatNumericComparatorIsWorking();
+ void requireThatFloatComparatorIsWorking();
+ void requireThatStringComparatorIsWorking();
+ void requireThatComparatorWithTreeIsWorking();
+ void requireThatFoldedComparatorIsWorking();
+
+public:
+ Test() {}
+ int Main();
+};
+
+void
+Test::requireThatNumericComparatorIsWorking()
+{
+ NumericEnumStore es(1024, false);
+ EnumIndex e1, e2;
+ es.addEnum(10, e1);
+ es.addEnum(30, e2);
+ NumericComparator cmp1(es);
+ EXPECT_TRUE(cmp1(e1, e2));
+ EXPECT_TRUE(!cmp1(e2, e1));
+ EXPECT_TRUE(!cmp1(e1, e1));
+ NumericComparator cmp2(es, 20);
+ EXPECT_TRUE(cmp2(EnumIndex(), e2));
+ EXPECT_TRUE(!cmp2(e2, EnumIndex()));
+}
+
+void
+Test::requireThatFloatComparatorIsWorking()
+{
+ FloatEnumStore es(1024, false);
+ EnumIndex e1, e2, e3;
+ es.addEnum(10.5, e1);
+ es.addEnum(30.5, e2);
+ es.addEnum(std::numeric_limits<float>::quiet_NaN(), e3);
+ FloatComparator cmp1(es);
+ EXPECT_TRUE(cmp1(e1, e2));
+ EXPECT_TRUE(!cmp1(e2, e1));
+ EXPECT_TRUE(!cmp1(e1, e1));
+ EXPECT_TRUE(cmp1(e3, e1)); // nan
+ EXPECT_TRUE(!cmp1(e1, e3)); // nan
+ EXPECT_TRUE(!cmp1(e3, e3)); // nan
+ FloatComparator cmp2(es, 20.5);
+ EXPECT_TRUE(cmp2(EnumIndex(), e2));
+ EXPECT_TRUE(!cmp2(e2, EnumIndex()));
+}
+
+void
+Test::requireThatStringComparatorIsWorking()
+{
+ StringEnumStore es(1024, false);
+ EnumIndex e1, e2, e3;
+ es.addEnum("Aa", e1);
+ es.addEnum("aa", e2);
+ es.addEnum("aB", e3);
+ StringComparator cmp1(es);
+ EXPECT_TRUE(cmp1(e1, e2)); // similar folded, fallback to regular
+ EXPECT_TRUE(!cmp1(e2, e1));
+ EXPECT_TRUE(!cmp1(e1, e1));
+ EXPECT_TRUE(cmp1(e2, e3)); // folded compare
+ EXPECT_TRUE(strcmp("aa", "aB") > 0); // regular
+ StringComparator cmp2(es, "AB");
+ EXPECT_TRUE(cmp2(EnumIndex(), e3));
+ EXPECT_TRUE(!cmp2(e3, EnumIndex()));
+}
+
+void
+Test::requireThatComparatorWithTreeIsWorking()
+{
+ NumericEnumStore es(2048, false);
+ vespalib::GenerationHandler g;
+ TreeType t;
+ NodeAllocator m;
+ EnumIndex ei;
+ for (int32_t v = 100; v > 0; --v) {
+ NumericComparator cmp(es, v);
+ EXPECT_TRUE(!t.find(EnumIndex(), m, cmp).valid());
+ es.addEnum(v, ei);
+ t.insert(ei, BTreeNoLeafData(), m, cmp);
+ }
+ EXPECT_EQUAL(100u, t.size(m));
+ int32_t exp = 1;
+ for (TreeType::Iterator itr = t.begin(m); itr.valid(); ++itr) {
+ EXPECT_EQUAL(exp++, es.getValue(itr.getKey()));
+ }
+ EXPECT_EQUAL(101, exp);
+ t.clear(m);
+ m.freeze();
+ m.transferHoldLists(g.getCurrentGeneration());
+ g.incGeneration();
+ m.trimHoldLists(g.getFirstUsedGeneration());
+}
+
+void
+Test::requireThatFoldedComparatorIsWorking()
+{
+ StringEnumStore es(1024, false);
+ EnumIndex e1, e2, e3, e4;
+ es.addEnum("Aa", e1);
+ es.addEnum("aa", e2);
+ es.addEnum("aB", e3);
+ es.addEnum("Folded", e4);
+ FoldedStringComparator cmp1(es);
+ EXPECT_TRUE(!cmp1(e1, e2)); // similar folded
+ EXPECT_TRUE(!cmp1(e2, e1)); // similar folded
+ EXPECT_TRUE(cmp1(e2, e3)); // folded compare
+ EXPECT_TRUE(!cmp1(e3, e2)); // folded compare
+ FoldedStringComparator cmp2(es, "fol", false);
+ FoldedStringComparator cmp3(es, "fol", true);
+ EXPECT_TRUE(cmp2(EnumIndex(), e4));
+ EXPECT_TRUE(!cmp2(e4, EnumIndex()));
+ EXPECT_TRUE(!cmp3(EnumIndex(), e4)); // similar when prefix
+ EXPECT_TRUE(!cmp3(e4, EnumIndex())); // similar when prefix
+}
+
+int
+Test::Main()
+{
+ TEST_INIT("comparator_test");
+
+ requireThatNumericComparatorIsWorking();
+ requireThatFloatComparatorIsWorking();
+ requireThatStringComparatorIsWorking();
+ requireThatComparatorWithTreeIsWorking();
+ requireThatFoldedComparatorIsWorking();
+
+ TEST_DONE();
+}
+
+}
+
+TEST_APPHOOK(search::Test);
+
diff --git a/searchlib/src/tests/attribute/document_weight_iterator/.gitignore b/searchlib/src/tests/attribute/document_weight_iterator/.gitignore
new file mode 100644
index 00000000000..08cae9a48df
--- /dev/null
+++ b/searchlib/src/tests/attribute/document_weight_iterator/.gitignore
@@ -0,0 +1 @@
+searchlib_document_weight_iterator_test_app
diff --git a/searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt b/searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt
new file mode 100644
index 00000000000..2a1b36a626d
--- /dev/null
+++ b/searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_document_weight_iterator_test_app
+ SOURCES
+ document_weight_iterator_test.cpp
+ DEPENDS
+ searchlib
+ searchlib_test
+)
+vespa_add_test(NAME searchlib_document_weight_iterator_test_app COMMAND searchlib_document_weight_iterator_test_app)
diff --git a/searchlib/src/tests/attribute/document_weight_iterator/FILES b/searchlib/src/tests/attribute/document_weight_iterator/FILES
new file mode 100644
index 00000000000..9bb94dc8770
--- /dev/null
+++ b/searchlib/src/tests/attribute/document_weight_iterator/FILES
@@ -0,0 +1 @@
+document_weight_iterator_test.cpp
diff --git a/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp b/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp
new file mode 100644
index 00000000000..fbe62f80843
--- /dev/null
+++ b/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp
@@ -0,0 +1,189 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/vespalib/testkit/test_kit.h>
+
+#include <vespa/searchlib/attribute/i_document_weight_attribute.h>
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/attribute/attribute.h>
+#include <vespa/searchlib/attribute/attributefile.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/attributememorysavetarget.h>
+#include <vespa/searchlib/attribute/singlenumericattribute.h>
+#include <vespa/searchlib/attribute/multinumericattribute.h>
+#include <vespa/searchlib/attribute/singlestringattribute.h>
+#include <vespa/searchlib/attribute/multistringattribute.h>
+#include <vespa/searchlib/attribute/attrvector.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchlib/util/randomgenerator.h>
+#include <vespa/searchlib/attribute/attributevector.hpp>
+#include <vespa/searchlib/attribute/multinumericpostattribute.hpp>
+#include <vespa/searchlib/attribute/multistringpostattribute.hpp>
+#include <vespa/searchlib/attribute/iattributemanager.h>
+#include <vespa/searchlib/query/tree/location.h>
+#include <vespa/searchlib/query/tree/point.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/query/weight.h>
+#include <vespa/searchlib/test/initrange.h>
+#include <vespa/searchlib/queryeval/document_weight_search_iterator.h>
+
+using namespace search;
+using namespace search::attribute;
+
+AttributeVector::SP make_attribute(BasicType type, CollectionType collection, bool fast_search) {
+ Config cfg(type, collection);
+ cfg.setFastSearch(fast_search);
+ return AttributeFactory::createAttribute("my_attribute", cfg);
+}
+
+void add_docs(AttributeVector::SP attr_ptr, size_t limit = 1000) {
+ AttributeVector::DocId docid;
+ for (size_t i = 0; i < limit; ++i) {
+ attr_ptr->addDoc(docid);
+ }
+ attr_ptr->commit();
+ ASSERT_EQUAL((limit - 1), docid);
+}
+
+template <typename ATTR, typename KEY>
+void set_doc(ATTR *attr, uint32_t docid, KEY key, int32_t weight) {
+ attr->clearDoc(docid);
+ attr->append(docid, key, weight);
+ attr->commit();
+}
+
+void populate_long(AttributeVector::SP attr_ptr) {
+ IntegerAttribute *attr = static_cast<IntegerAttribute *>(attr_ptr.get());
+ set_doc(attr, 1, int64_t(111), 20);
+ set_doc(attr, 5, int64_t(111), 5);
+ set_doc(attr, 7, int64_t(111), 10);
+}
+
+void populate_string(AttributeVector::SP attr_ptr) {
+ StringAttribute *attr = static_cast<StringAttribute *>(attr_ptr.get());
+ set_doc(attr, 1, "foo", 20);
+ set_doc(attr, 5, "foo", 5);
+ set_doc(attr, 7, "foo", 10);
+}
+
+struct LongFixture {
+ AttributeVector::SP attr;
+ const IDocumentWeightAttribute *api;
+ LongFixture() : attr(make_attribute(BasicType::INT64, CollectionType::WSET, true)),
+ api(attr->asDocumentWeightAttribute())
+ {
+ ASSERT_TRUE(api != nullptr);
+ add_docs(attr);
+ populate_long(attr);
+ }
+};
+
+struct StringFixture {
+ AttributeVector::SP attr;
+ const IDocumentWeightAttribute *api;
+ StringFixture() : attr(make_attribute(BasicType::STRING, CollectionType::WSET, true)),
+ api(attr->asDocumentWeightAttribute())
+ {
+ ASSERT_TRUE(api != nullptr);
+ add_docs(attr);
+ populate_string(attr);
+ }
+};
+
+TEST("require that appropriate attributes support the document weight attribute interface") {
+ EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::WSET, true)->asDocumentWeightAttribute() != nullptr);
+ EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::WSET, true)->asDocumentWeightAttribute() != nullptr);
+}
+
+TEST("require that inappropriate attributes do not support the document weight attribute interface") {
+ EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::SINGLE, false)->asDocumentWeightAttribute() == nullptr);
+ EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::ARRAY, false)->asDocumentWeightAttribute() == nullptr);
+ EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::WSET, false)->asDocumentWeightAttribute() == nullptr);
+ EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::SINGLE, true)->asDocumentWeightAttribute() == nullptr);
+ EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::ARRAY, true)->asDocumentWeightAttribute() == nullptr);
+ EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::SINGLE, false)->asDocumentWeightAttribute() == nullptr);
+ EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::ARRAY, false)->asDocumentWeightAttribute() == nullptr);
+ EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::WSET, false)->asDocumentWeightAttribute() == nullptr);
+ EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::SINGLE, true)->asDocumentWeightAttribute() == nullptr);
+ EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::ARRAY, true)->asDocumentWeightAttribute() == nullptr);
+ EXPECT_TRUE(make_attribute(BasicType::INT32, CollectionType::WSET, true)->asDocumentWeightAttribute() == nullptr);
+ EXPECT_TRUE(make_attribute(BasicType::DOUBLE, CollectionType::WSET, true)->asDocumentWeightAttribute() == nullptr);
+}
+
+void verify_valid_lookup(IDocumentWeightAttribute::LookupResult result) {
+ EXPECT_TRUE(result.posting_idx.valid());
+ EXPECT_EQUAL(3u, result.posting_size);
+ EXPECT_EQUAL(5, result.min_weight);
+ EXPECT_EQUAL(20, result.max_weight);
+}
+
+void verify_invalid_lookup(IDocumentWeightAttribute::LookupResult result) {
+ EXPECT_FALSE(result.posting_idx.valid());
+ EXPECT_EQUAL(0u, result.posting_size);
+ EXPECT_EQUAL(0, result.min_weight);
+ EXPECT_EQUAL(0, result.max_weight);
+}
+
+TEST_F("require that integer lookup works correctly", LongFixture) {
+ verify_valid_lookup(f1.api->lookup("111"));
+ verify_invalid_lookup(f1.api->lookup("222"));
+}
+
+TEST_F("require string lookup works correctly", StringFixture) {
+ verify_valid_lookup(f1.api->lookup("foo"));
+ verify_invalid_lookup(f1.api->lookup("bar"));
+}
+
+void verify_posting(const IDocumentWeightAttribute &api, const char *term) {
+ auto result = api.lookup(term);
+ ASSERT_TRUE(result.posting_idx.valid());
+ std::vector<DocumentWeightIterator> itr_store;
+ api.create(result.posting_idx, itr_store);
+ ASSERT_EQUAL(1u, itr_store.size());
+ {
+ DocumentWeightIterator &itr = itr_store[0];
+ if (itr.valid() && itr.getKey() < 1) {
+ itr.linearSeek(1);
+ }
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQUAL(1u, itr.getKey()); // docid
+ EXPECT_EQUAL(20, itr.getData()); // weight
+ itr.linearSeek(2);
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQUAL(5u, itr.getKey()); // docid
+ EXPECT_EQUAL(5, itr.getData()); // weight
+ itr.linearSeek(6);
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQUAL(7u, itr.getKey()); // docid
+ EXPECT_EQUAL(10, itr.getData()); // weight
+ itr.linearSeek(8);
+ EXPECT_FALSE(itr.valid());
+ }
+}
+
+TEST_F("require that integer iterators are created correctly", LongFixture) {
+ verify_posting(*f1.api, "111");
+}
+
+TEST_F("require that string iterators are created correctly", StringFixture) {
+ verify_posting(*f1.api, "foo");
+}
+
+TEST("verify init range for document weight search iterator") {
+ search::test::InitRangeVerifier ir;
+ AttributeVector::SP attr(make_attribute(BasicType::INT64, CollectionType::WSET, true));
+ add_docs(attr, ir.getDocIdLimit());
+ auto docids = ir.getExpectedDocIds();
+ IntegerAttribute *int_attr = static_cast<IntegerAttribute *>(attr.get());
+ for (auto docid: docids) {
+ set_doc(int_attr, docid, int64_t(123), 1);
+ }
+ const IDocumentWeightAttribute *api(attr->asDocumentWeightAttribute());
+ ASSERT_TRUE(api != nullptr);
+ auto dict_entry = api->lookup("123");
+ ASSERT_TRUE(dict_entry.posting_idx.valid());
+ fef::TermFieldMatchData tfmd;
+ queryeval::DocumentWeightSearchIterator itr(tfmd, *api, dict_entry);
+ ir.verify(itr);
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/attribute/enumeratedsave/.gitignore b/searchlib/src/tests/attribute/enumeratedsave/.gitignore
new file mode 100644
index 00000000000..a4680f95f72
--- /dev/null
+++ b/searchlib/src/tests/attribute/enumeratedsave/.gitignore
@@ -0,0 +1,127 @@
+/double_a0_e.udat
+/double_a0_ee.udat
+/double_a1_e.udat
+/double_a1_ee.udat
+/double_a2_e.udat
+/double_a2_ee.udat
+/double_sv0_e.udat
+/double_sv0_ee.udat
+/double_sv1_e.udat
+/double_sv1_ee.udat
+/double_sv2_e.udat
+/double_sv2_ee.udat
+/double_ws0_e.udat
+/double_ws0_ee.udat
+/double_ws1_e.udat
+/double_ws1_ee.udat
+/double_ws2_e.udat
+/double_ws2_ee.udat
+/float_a0_e.udat
+/float_a0_ee.udat
+/float_a1_e.udat
+/float_a1_ee.udat
+/float_a2_e.udat
+/float_a2_ee.udat
+/float_sv0_e.udat
+/float_sv0_ee.udat
+/float_sv1_e.udat
+/float_sv1_ee.udat
+/float_sv2_e.udat
+/float_sv2_ee.udat
+/float_ws0_e.udat
+/float_ws0_ee.udat
+/float_ws1_e.udat
+/float_ws1_ee.udat
+/float_ws2_e.udat
+/float_ws2_ee.udat
+/int16_a0_e.udat
+/int16_a0_ee.udat
+/int16_a1_e.udat
+/int16_a1_ee.udat
+/int16_a2_e.udat
+/int16_a2_ee.udat
+/int16_sv0_e.udat
+/int16_sv0_ee.udat
+/int16_sv1_e.udat
+/int16_sv1_ee.udat
+/int16_sv2_e.udat
+/int16_sv2_ee.udat
+/int16_ws0_e.udat
+/int16_ws0_ee.udat
+/int16_ws1_e.udat
+/int16_ws1_ee.udat
+/int16_ws2_e.udat
+/int16_ws2_ee.udat
+/int32_a0_e.udat
+/int32_a0_ee.udat
+/int32_a1_e.udat
+/int32_a1_ee.udat
+/int32_a2_e.udat
+/int32_a2_ee.udat
+/int32_sv0_e.udat
+/int32_sv0_ee.udat
+/int32_sv1_e.udat
+/int32_sv1_ee.udat
+/int32_sv2_e.udat
+/int32_sv2_ee.udat
+/int32_ws0_e.udat
+/int32_ws0_ee.udat
+/int32_ws1_e.udat
+/int32_ws1_ee.udat
+/int32_ws2_e.udat
+/int32_ws2_ee.udat
+/int64_a0_e.udat
+/int64_a0_ee.udat
+/int64_a1_e.udat
+/int64_a1_ee.udat
+/int64_a2_e.udat
+/int64_a2_ee.udat
+/int64_sv0_e.udat
+/int64_sv0_ee.udat
+/int64_sv1_e.udat
+/int64_sv1_ee.udat
+/int64_sv2_e.udat
+/int64_sv2_ee.udat
+/int64_ws0_e.udat
+/int64_ws0_ee.udat
+/int64_ws1_e.udat
+/int64_ws1_ee.udat
+/int64_ws2_e.udat
+/int64_ws2_ee.udat
+/int8_a0_e.udat
+/int8_a0_ee.udat
+/int8_a1_e.udat
+/int8_a1_ee.udat
+/int8_a2_e.udat
+/int8_a2_ee.udat
+/int8_sv0_e.udat
+/int8_sv0_ee.udat
+/int8_sv1_e.udat
+/int8_sv1_ee.udat
+/int8_sv2_e.udat
+/int8_sv2_ee.udat
+/int8_ws0_e.udat
+/int8_ws0_ee.udat
+/int8_ws1_e.udat
+/int8_ws1_ee.udat
+/int8_ws2_e.udat
+/int8_ws2_ee.udat
+/str_a0_e.udat
+/str_a0_ee.udat
+/str_a1_e.udat
+/str_a1_ee.udat
+/str_a2_e.udat
+/str_a2_ee.udat
+/str_sv0_e.udat
+/str_sv0_ee.udat
+/str_sv1_e.udat
+/str_sv1_ee.udat
+/str_sv2_e.udat
+/str_sv2_ee.udat
+/str_ws0_e.udat
+/str_ws0_ee.udat
+/str_ws1_e.udat
+/str_ws1_ee.udat
+/str_ws2_e.udat
+/str_ws2_ee.udat
+searchlib_enumeratedsave_test_app
diff --git a/searchlib/src/tests/attribute/enumeratedsave/CMakeLists.txt b/searchlib/src/tests/attribute/enumeratedsave/CMakeLists.txt
new file mode 100644
index 00000000000..0dbb59043c1
--- /dev/null
+++ b/searchlib/src/tests/attribute/enumeratedsave/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_enumeratedsave_test_app
+ SOURCES
+ enumeratedsave_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_enumeratedsave_test_app COMMAND searchlib_enumeratedsave_test_app)
diff --git a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp
new file mode 100644
index 00000000000..312814eb55a
--- /dev/null
+++ b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp
@@ -0,0 +1,944 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/document/fieldvalue/intfieldvalue.h>
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/searchlib/attribute/attribute.h>
+#include <vespa/searchlib/attribute/attributefile.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/attributememorysavetarget.h>
+#include <vespa/searchlib/attribute/singlenumericattribute.h>
+#include <vespa/searchlib/attribute/multinumericattribute.h>
+#include <vespa/searchlib/attribute/singlestringattribute.h>
+#include <vespa/searchlib/attribute/multistringattribute.h>
+#include <vespa/searchlib/attribute/attrvector.h>
+#include <vespa/searchlib/attribute/attributefilesavetarget.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/util/compress.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/searchlib/util/randomgenerator.h>
+#include <vespa/searchlib/util/bufferwriter.h>
+#include <vespa/searchlib/attribute/attributememoryfilebufferwriter.h>
+
+#include <vespa/searchlib/attribute/attributevector.hpp>
+
+#include <vespa/log/log.h>
+LOG_SETUP("enumeratedsave_test");
+#include <limits>
+#include <string>
+#include <iostream>
+
+
+using search::attribute::BasicType;
+using search::attribute::CollectionType;
+using search::attribute::Config;
+using search::AttributeFactory;
+using search::FloatingPointAttribute;
+using search::IntegerAttribute;
+using search::StringAttribute;
+using search::RandomGenerator;
+using search::ParseItem;
+using search::fef::TermFieldMatchData;
+using search::IAttributeFileWriter;
+using search::BufferWriter;
+using search::AttributeMemoryFileBufferWriter;
+
+typedef std::unique_ptr<AttributeVector::SearchContext> SearchContextPtr;
+typedef std::unique_ptr<search::queryeval::SearchIterator> SearchBasePtr;
+
+bool
+FastOS_UNIX_File::Sync(void)
+{
+ // LOG(info, "Skip sync");
+ return true;
+}
+
+
+class MemAttrFileWriter : public IAttributeFileWriter
+{
+private:
+ Buffer _buf;
+
+public:
+ MemAttrFileWriter()
+ : _buf()
+ {
+ }
+
+ virtual Buffer allocBuf(size_t size) override {
+ return std::make_unique<BufferBuf>(size, 4096);
+ }
+
+ virtual void writeBuf(Buffer buf_in) override {
+ if (!_buf) {
+ _buf = std::move(buf_in);
+ } else {
+ _buf->writeBytes(buf_in->getData(), buf_in->getDataLen());
+ }
+ }
+
+ const Buffer &buf() const { return _buf; }
+
+ std::unique_ptr<BufferWriter> allocBufferWriter() override;
+};
+
+std::unique_ptr<BufferWriter>
+MemAttrFileWriter::allocBufferWriter()
+{
+ if (!_buf) {
+ _buf = allocBuf(1);
+ }
+ return std::make_unique<AttributeMemoryFileBufferWriter>(*this);
+}
+
+class MemAttr : public search::IAttributeSaveTarget
+{
+private:
+ MemAttrFileWriter _datWriter;
+ MemAttrFileWriter _idxWriter;
+ MemAttrFileWriter _weightWriter;
+ MemAttrFileWriter _udatWriter;
+
+public:
+ typedef std::shared_ptr<MemAttr> SP;
+
+ MemAttr(void)
+ : _datWriter(),
+ _idxWriter(),
+ _weightWriter(),
+ _udatWriter()
+ {
+ }
+
+ // Implements IAttributeSaveTarget
+ virtual bool setup() { return true; }
+ virtual void close() {}
+ virtual IAttributeFileWriter &datWriter() override { return _datWriter; }
+ virtual IAttributeFileWriter &idxWriter() override { return _idxWriter; }
+ virtual IAttributeFileWriter &weightWriter() override {
+ return _weightWriter;
+ }
+ virtual IAttributeFileWriter &udatWriter() override { return _udatWriter; }
+
+ bool
+ bufEqual(const Buffer &lhs, const Buffer &rhs) const;
+
+ bool
+ operator==(const MemAttr &rhs) const;
+};
+
+class EnumeratedSaveTest
+{
+private:
+ typedef AttributeVector::SP AttributePtr;
+
+ template <typename VectorType>
+ VectorType &
+ as(AttributePtr &v);
+
+ IntegerAttribute &
+ asInt(AttributePtr &v);
+
+ StringAttribute &
+ asString(AttributePtr &v);
+
+ FloatingPointAttribute &
+ asFloat(AttributePtr &v);
+
+ void
+ addDocs(const AttributePtr &v, size_t sz);
+
+ template <typename VectorType>
+ void populate(VectorType &v, unsigned seed, BasicType bt);
+
+ template <typename VectorType, typename BufferType>
+ void compare(VectorType &a, VectorType &b);
+
+ void
+ buildTermQuery(std::vector<char> & buffer,
+ const vespalib::string & index,
+ const vespalib::string & term, bool prefix);
+
+ template <typename V, typename T>
+ SearchContextPtr
+ getSearch(const V & vec, const T & term, bool prefix);
+
+ template <typename V>
+ SearchContextPtr
+ getSearch(const V & vec);
+
+ MemAttr::SP
+ saveMem(AttributeVector &v);
+
+ void
+ checkMem(AttributeVector &v, const MemAttr &e, bool enumerated);
+
+ MemAttr::SP
+ saveBoth(AttributePtr v);
+
+ AttributePtr
+ make(Config cfg,
+ const vespalib::string &pref,
+ bool fastSearch = false);
+
+ void
+ load(AttributePtr v, const vespalib::string &name);
+
+ template <typename VectorType, typename BufferType>
+ void
+ checkLoad(AttributePtr v,
+ const vespalib::string &name,
+ AttributePtr ev);
+
+ template <typename VectorType, typename BufferType>
+ void
+ testReload(AttributePtr v0,
+ AttributePtr v1,
+ AttributePtr v2,
+ MemAttr::SP mv0,
+ MemAttr::SP mv1,
+ MemAttr::SP mv2,
+ MemAttr::SP emv0,
+ MemAttr::SP emv1,
+ MemAttr::SP emv2,
+ Config cfg,
+ const vespalib::string &pref,
+ bool fastSearch);
+
+public:
+ template <typename VectorType, typename BufferType>
+ void
+ test(BasicType bt, CollectionType ct, const vespalib::string &pref);
+
+ EnumeratedSaveTest()
+ {
+ }
+};
+
+
+bool
+MemAttr::bufEqual(const Buffer &lhs, const Buffer &rhs) const
+{
+ if (!EXPECT_TRUE((lhs.get() != NULL) == (rhs.get() != NULL)))
+ return false;
+ if (lhs.get() == NULL)
+ return true;
+ if (!EXPECT_TRUE(lhs->getDataLen() == rhs->getDataLen()))
+ return false;
+ if (!EXPECT_TRUE(memcmp(lhs->getData(), rhs->getData(),
+ lhs->getDataLen()) == 0))
+ return false;
+ return true;
+}
+
+bool
+MemAttr::operator==(const MemAttr &rhs) const
+{
+ if (!EXPECT_TRUE(bufEqual(_datWriter.buf(), rhs._datWriter.buf())))
+ return false;
+ if (!EXPECT_TRUE(bufEqual(_idxWriter.buf(), rhs._idxWriter.buf())))
+ return false;
+ if (!EXPECT_TRUE(bufEqual(_weightWriter.buf(), rhs._weightWriter.buf())))
+ return false;
+ if (!EXPECT_TRUE(bufEqual(_udatWriter.buf(), rhs._udatWriter.buf())))
+ return false;
+ return true;
+}
+
+
+void
+EnumeratedSaveTest::addDocs(const AttributePtr &v, size_t sz)
+{
+ if (sz) {
+ AttributeVector::DocId docId;
+ for(size_t i(0); i< sz; i++) {
+ EXPECT_TRUE( v->addDoc(docId) );
+ }
+ EXPECT_TRUE( docId+1 == sz );
+ EXPECT_TRUE( v->getNumDocs() == sz );
+ v->commit(true);
+ }
+}
+
+
+template <>
+void
+EnumeratedSaveTest::populate(IntegerAttribute &v, unsigned seed,
+ BasicType bt)
+{
+ search::Rand48 rnd;
+ IntegerAttribute::largeint_t mask(std::numeric_limits
+ <IntegerAttribute::largeint_t>::max());
+ switch (bt.type()) {
+ case BasicType::INT8:
+ mask = 0x7f;
+ break;
+ case BasicType::INT16:
+ mask = 0x7fff;
+ break;
+ default:
+ ;
+ }
+ rnd.srand48(seed);
+ int weight = 1;
+ for(size_t i(0), m(v.getNumDocs()); i < m; i++) {
+ v.clearDoc(i);
+ if (i == 9)
+ continue;
+ if (i == 7) {
+ if (v.hasMultiValue()) {
+ v.append(i, -42, 27);
+ v.append(i, -43, 14);
+ v.append(i, -42, -3);
+ } else {
+ EXPECT_TRUE( v.update(i, -42) );
+ }
+ v.commit();
+ continue;
+ }
+ if (v.hasMultiValue()) {
+ if (v.hasWeightedSetType()) {
+ weight = (rand() % 256) - 128;
+ }
+ for (size_t j(0); j <= i; j++) {
+ EXPECT_TRUE( v.append(i, rnd.lrand48() & mask, weight) );
+ }
+ v.commit();
+ if (!v.hasWeightedSetType()) {
+ EXPECT_EQUAL(static_cast<uint32_t>(v.getValueCount(i)), i + 1);
+ ASSERT_TRUE(static_cast<uint32_t>(v.getValueCount(i)) ==
+ i + 1);
+ }
+ } else {
+ EXPECT_TRUE( v.update(i, lrand48() & mask) );
+ }
+ }
+ v.commit();
+}
+
+
+template <>
+void
+EnumeratedSaveTest::populate(FloatingPointAttribute &v, unsigned seed,
+ BasicType bt)
+{
+ (void) bt;
+ search::Rand48 rnd;
+ rnd.srand48(seed);
+ int weight = 1;
+ for(size_t i(0), m(v.getNumDocs()); i < m; i++) {
+ v.clearDoc(i);
+ if (i == 9)
+ continue;
+ if (i == 7) {
+ if (v.hasMultiValue()) {
+ v.append(i, -42.0, 27);
+ v.append(i, -43.0, 14);
+ v.append(i, -42.0, -3);
+ } else {
+ EXPECT_TRUE( v.update(i, -42.0) );
+ }
+ v.commit();
+ continue;
+ }
+ if (v.hasMultiValue()) {
+ if (v.hasWeightedSetType()) {
+ weight = (rand() % 256) - 128;
+ }
+ for (size_t j(0); j <= i; j++) {
+ EXPECT_TRUE( v.append(i, rnd.lrand48(), weight) );
+ }
+ v.commit();
+ if (!v.hasWeightedSetType()) {
+ EXPECT_EQUAL(static_cast<uint32_t>(v.getValueCount(i)), i + 1);
+ ASSERT_TRUE(static_cast<uint32_t>(v.getValueCount(i)) ==
+ i + 1);
+ }
+ } else {
+ EXPECT_TRUE( v.update(i, lrand48()) );
+ }
+ }
+ v.commit();
+}
+
+
+template <>
+void
+EnumeratedSaveTest::populate(StringAttribute &v, unsigned seed,
+ BasicType bt)
+{
+ (void) bt;
+ RandomGenerator rnd(seed);
+ int weight = 1;
+ for(size_t i(0), m(v.getNumDocs()); i < m; i++) {
+ v.clearDoc(i);
+ if (i == 9)
+ continue;
+ if (i == 7) {
+ if (v.hasMultiValue()) {
+ v.append(i, "foo", 27);
+ v.append(i, "bar", 14);
+ v.append(i, "foO", -3);
+ } else {
+ EXPECT_TRUE( v.update(i, "foo") );
+ }
+ v.commit();
+ continue;
+ }
+ if (v.hasMultiValue()) {
+ if (v.hasWeightedSetType()) {
+ weight = rnd.rand(0, 256) - 128;
+ }
+ for (size_t j(0); j <= i; j++) {
+ EXPECT_TRUE( v.append(i, rnd.getRandomString(2, 50), weight) );
+ }
+ v.commit();
+ if (!v.hasWeightedSetType()) {
+ EXPECT_EQUAL(static_cast<uint32_t>(v.getValueCount(i)), i + 1);
+ }
+ } else {
+ EXPECT_TRUE( v.update(i, rnd.getRandomString(2, 50)) );
+ }
+ }
+ v.commit();
+}
+
+namespace
+{
+
+template <typename T>
+inline bool
+equalsHelper(const T &lhs, const T &rhs)
+{
+ return lhs == rhs;
+}
+
+template <>
+inline bool
+equalsHelper<float>(const float &lhs, const float &rhs)
+{
+ if (std::isnan(lhs))
+ return std::isnan(rhs);
+ if (std::isnan(rhs))
+ return false;
+ return lhs == rhs;
+}
+
+template <>
+inline bool
+equalsHelper<double>(const double &lhs, const double &rhs)
+{
+ if (std::isnan(lhs))
+ return std::isnan(rhs);
+ if (std::isnan(rhs))
+ return false;
+ return lhs == rhs;
+}
+
+}
+
+template <typename VectorType, typename BufferType>
+void
+EnumeratedSaveTest::compare(VectorType &a, VectorType &b)
+{
+ EXPECT_EQUAL(a.getNumDocs(), b.getNumDocs());
+ ASSERT_TRUE(a.getNumDocs() == b.getNumDocs());
+ // EXPECT_EQUAL(a.getMaxValueCount(), b.getMaxValueCount());
+ EXPECT_EQUAL(a.getCommittedDocIdLimit(), b.getCommittedDocIdLimit());
+ uint32_t asz(a.getMaxValueCount());
+ uint32_t bsz(b.getMaxValueCount());
+ BufferType *av = new BufferType[asz];
+ BufferType *bv = new BufferType[bsz];
+
+ for (size_t i(0), m(a.getNumDocs()); i < m; i++) {
+ ASSERT_TRUE(asz >= static_cast<uint32_t>(a.getValueCount(i)));
+ ASSERT_TRUE(bsz >= static_cast<uint32_t>(b.getValueCount(i)));
+ EXPECT_EQUAL(a.getValueCount(i), b.getValueCount(i));
+ ASSERT_TRUE(a.getValueCount(i) == b.getValueCount(i));
+ EXPECT_EQUAL(static_cast<const AttributeVector &>(a).get(i, av, asz),
+ static_cast<uint32_t>(a.getValueCount(i)));
+ EXPECT_EQUAL(static_cast<const AttributeVector &>(b).get(i, bv, bsz),
+ static_cast<uint32_t>(b.getValueCount(i)));
+ for(size_t j(0), k(std::min(a.getValueCount(i), b.getValueCount(i)));
+ j < k; j++) {
+ EXPECT_TRUE(equalsHelper(av[j], bv[j]));
+ }
+ }
+ delete [] bv;
+ delete [] av;
+}
+
+
+template <typename VectorType>
+VectorType &
+EnumeratedSaveTest::as(AttributePtr &v)
+{
+ VectorType *res = dynamic_cast<VectorType *>(v.get());
+ assert(res != NULL);
+ return *res;
+}
+
+
+IntegerAttribute &
+EnumeratedSaveTest::asInt(AttributePtr &v)
+{
+ return as<IntegerAttribute>(v);
+}
+
+
+StringAttribute &
+EnumeratedSaveTest::asString(AttributePtr &v)
+{
+ return as<StringAttribute>(v);
+}
+
+
+FloatingPointAttribute &
+EnumeratedSaveTest::asFloat(AttributePtr &v)
+{
+ return as<FloatingPointAttribute>(v);
+}
+
+
+void
+EnumeratedSaveTest::buildTermQuery(std::vector<char> &buffer,
+ const vespalib::string &index,
+ const vespalib::string &term,
+ bool prefix)
+{
+ uint32_t indexLen = index.size();
+ uint32_t termLen = term.size();
+ uint32_t queryPacketSize = 1 + 2 * 4 + indexLen + termLen;
+ uint32_t p = 0;
+ buffer.resize(queryPacketSize);
+ buffer[p++] = prefix ? ParseItem::ITEM_PREFIXTERM : ParseItem::ITEM_TERM;
+ p += vespalib::compress::Integer::compressPositive(indexLen, &buffer[p]);
+ memcpy(&buffer[p], index.c_str(), indexLen);
+ p += indexLen;
+ p += vespalib::compress::Integer::compressPositive(termLen, &buffer[p]);
+ memcpy(&buffer[p], term.c_str(), termLen);
+ p += termLen;
+ buffer.resize(p);
+}
+
+
+template <typename V, typename T>
+SearchContextPtr
+EnumeratedSaveTest::getSearch(const V &vec, const T &term, bool prefix)
+{
+ std::vector<char> query;
+ vespalib::asciistream ss;
+ ss << term;
+ buildTermQuery(query, vec.getName(), ss.str(), prefix);
+
+ return (static_cast<const AttributeVector &>(vec)).
+ getSearch(vespalib::stringref(&query[0], query.size()),
+ AttributeVector::SearchContext::Params());
+}
+
+
+template <>
+SearchContextPtr
+EnumeratedSaveTest::getSearch<IntegerAttribute>(const IntegerAttribute &v)
+{
+ return getSearch<IntegerAttribute>(v, "[-42;-42]", false);
+}
+
+template <>
+SearchContextPtr
+EnumeratedSaveTest::getSearch<FloatingPointAttribute>(const FloatingPointAttribute &v)
+{
+ return getSearch<FloatingPointAttribute>(v, "[-42.0;-42.0]", false);
+}
+
+template <>
+SearchContextPtr
+EnumeratedSaveTest::getSearch<StringAttribute>(const StringAttribute &v)
+{
+ return getSearch<StringAttribute, const vespalib::string &>
+ (v, "foo", false);
+}
+
+MemAttr::SP
+EnumeratedSaveTest::saveMem(AttributeVector &v)
+{
+ MemAttr::SP res(new MemAttr);
+ EXPECT_TRUE(v.save(*res));
+ return res;
+}
+
+
+void
+EnumeratedSaveTest::checkMem(AttributeVector &v, const MemAttr &e,
+ bool enumerated)
+{
+ MemAttr m;
+ v.enableEnumeratedSave(enumerated);
+ EXPECT_TRUE(v.save(m));
+ v.enableEnumeratedSave(false);
+ ASSERT_TRUE(m == e);
+}
+
+
+MemAttr::SP
+EnumeratedSaveTest::saveBoth(AttributePtr v)
+{
+ EXPECT_TRUE(v->save());
+ vespalib::string basename = v->getBaseFileName();
+ AttributePtr v2 = make(v->getConfig(), basename, true);
+ EXPECT_TRUE(v2->load());
+ v2->enableEnumeratedSave(true);
+ EXPECT_TRUE(v2->saveAs(basename + "_e"));
+ if ((v->getConfig().basicType() == BasicType::INT32 &&
+ v->getConfig().collectionType() == CollectionType::WSET) || true) {
+ search::AttributeMemorySaveTarget ms;
+ search::TuneFileAttributes tune;
+ search::index::DummyFileHeaderContext fileHeaderContext;
+ EXPECT_TRUE(v2->saveAs(basename + "_ee", ms));
+ EXPECT_TRUE(ms.writeToFile(tune, fileHeaderContext));
+ }
+ return saveMem(*v2);
+}
+
+
+EnumeratedSaveTest::AttributePtr
+EnumeratedSaveTest::make(Config cfg,
+ const vespalib::string &pref,
+ bool fastSearch)
+{
+ cfg.setFastSearch(fastSearch);
+ AttributePtr v = AttributeFactory::createAttribute(pref, cfg);
+ return v;
+}
+
+
+void
+EnumeratedSaveTest::load(AttributePtr v, const vespalib::string &name)
+{
+ v->setBaseFileName(name);
+ EXPECT_TRUE(v->load());
+}
+
+template <typename VectorType, typename BufferType>
+void
+EnumeratedSaveTest::checkLoad(AttributePtr v, const vespalib::string &name,
+ AttributePtr ev)
+{
+ v->setBaseFileName(name);
+ EXPECT_TRUE(v->load());
+ compare<VectorType, BufferType>(as<VectorType>(v), as<VectorType>(ev));
+}
+
+
+template <typename VectorType, typename BufferType>
+void
+EnumeratedSaveTest::testReload(AttributePtr v0,
+ AttributePtr v1,
+ AttributePtr v2,
+ MemAttr::SP mv0,
+ MemAttr::SP mv1,
+ MemAttr::SP mv2,
+ MemAttr::SP emv0,
+ MemAttr::SP emv1,
+ MemAttr::SP emv2,
+ Config cfg,
+ const vespalib::string &pref,
+ bool fastSearch)
+{
+ // typedef AttributePtr AVP;
+
+ bool flagAttr =
+ cfg.collectionType() == CollectionType::ARRAY &&
+ cfg.basicType() == BasicType::INT8 &&
+ fastSearch;
+ bool supportsEnumerated = (fastSearch ||
+ cfg.basicType() == BasicType::STRING) &&
+ !flagAttr;
+
+
+ AttributePtr v = make(cfg, pref, fastSearch);
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "0", v0)));
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "1", v1)));
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "2", v2)));
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "1", v1)));
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "0", v0)));
+
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "0", v0)));
+ TEST_DO(checkMem(*v, *mv0, false));
+ TEST_DO(checkMem(*v, supportsEnumerated ? *emv0 : *mv0, true));
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "1", v1)));
+ TEST_DO(checkMem(*v, *mv1, false));
+ TEST_DO(checkMem(*v, supportsEnumerated ? *emv1 : *mv1, true));
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "2", v2)));
+ TEST_DO(checkMem(*v, *mv2, false));
+ TEST_DO(checkMem(*v, supportsEnumerated ? *emv2 : *mv2, true));
+
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "0_e", v0)));
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "1_e", v1)));
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "2_e", v2)));
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "1_e", v1)));
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "0_e", v0)));
+
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "0_e", v0)));
+ TEST_DO(checkMem(*v, *mv0, false));
+ TEST_DO(checkMem(*v, supportsEnumerated ? *emv0 : *mv0, true));
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "1_e", v1)));
+ TEST_DO(checkMem(*v, *mv1, false));
+ TEST_DO(checkMem(*v, supportsEnumerated ? *emv1 : *mv1, true));
+ TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "2_e", v2)));
+ TEST_DO(checkMem(*v, *mv2, false));
+ TEST_DO(checkMem(*v, supportsEnumerated ? *emv2 : *mv2, true));
+
+ TermFieldMatchData md;
+ SearchContextPtr sc = getSearch<VectorType>(as<VectorType>(v));
+ sc->fetchPostings(true);
+ SearchBasePtr sb = sc->createIterator(&md, true);
+ sb->initFullRange();
+ sb->seek(1u);
+ EXPECT_EQUAL(7u, sb->getDocId());
+ sb->unpack(7u);
+ EXPECT_EQUAL(md.getDocId(), 7u);
+ if (v->getCollectionType() == CollectionType::SINGLE ||
+ flagAttr) {
+ EXPECT_EQUAL(md.getWeight(), 1);
+ } else if (v->getCollectionType() == CollectionType::ARRAY) {
+ EXPECT_EQUAL(md.getWeight(), 2);
+ } else {
+ if (cfg.basicType() == BasicType::STRING) {
+ EXPECT_EQUAL(md.getWeight(), 24);
+ } else {
+ EXPECT_EQUAL(md.getWeight(), -3);
+ }
+ }
+}
+
+
+template <typename VectorType, typename BufferType>
+void
+EnumeratedSaveTest::test(BasicType bt, CollectionType ct,
+ const vespalib::string &pref)
+{
+ Config cfg(bt, ct);
+ AttributePtr v0 = AttributeFactory::createAttribute(pref + "0", cfg);
+ AttributePtr v1 = AttributeFactory::createAttribute(pref + "1", cfg);
+ AttributePtr v2 = AttributeFactory::createAttribute(pref + "2", cfg);
+
+ addDocs(v0, 0);
+ addDocs(v1, 10);
+ addDocs(v2, 30);
+
+ populate(as<VectorType>(v0), 0, bt);
+ populate(as<VectorType>(v1), 10, bt);
+ populate(as<VectorType>(v2), 30, bt);
+
+ MemAttr::SP mv0 = saveMem(*v0);
+ MemAttr::SP mv1 = saveMem(*v1);
+ MemAttr::SP mv2 = saveMem(*v2);
+
+ MemAttr::SP emv0 = saveBoth(v0);
+ MemAttr::SP emv1 = saveBoth(v1);
+ MemAttr::SP emv2 = saveBoth(v2);
+
+ AttributePtr v = make(cfg, pref, true);
+ checkLoad<VectorType, BufferType>(v, pref + "0_ee", v0);
+ checkLoad<VectorType, BufferType>(v, pref + "1_ee", v1);
+ checkLoad<VectorType, BufferType>(v, pref + "2_ee", v2);
+ v.reset();
+
+ TEST_DO((testReload<VectorType, BufferType>(v0, v1, v2,
+ mv0, mv1, mv2,
+ emv0, emv1, emv2,
+ cfg, pref,
+ false)));
+ TEST_DO((testReload<VectorType, BufferType>(v0, v1, v2,
+ mv0, mv1, mv2,
+ emv0, emv1, emv2,
+ cfg, pref,
+ true)));
+}
+
+TEST_F("Test enumerated save with single value int8", EnumeratedSaveTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::largeint_t>(BasicType::INT8,
+ CollectionType::SINGLE,
+ "int8_sv");
+}
+
+TEST_F("Test enumerated save with array value int8", EnumeratedSaveTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::largeint_t>(BasicType::INT8,
+ CollectionType::ARRAY,
+ "int8_a");
+}
+
+TEST_F("Test enumerated save with weighted set value int8",
+ EnumeratedSaveTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::WeightedInt>(BasicType::INT8,
+ CollectionType::WSET,
+ "int8_ws");
+}
+
+TEST_F("Test enumerated save with single value int16", EnumeratedSaveTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::largeint_t>(BasicType::INT16,
+ CollectionType::SINGLE,
+ "int16_sv");
+}
+
+TEST_F("Test enumerated save with array value int16", EnumeratedSaveTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::largeint_t>(BasicType::INT16,
+ CollectionType::ARRAY,
+ "int16_a");
+}
+
+TEST_F("Test enumerated save with weighted set value int16",
+ EnumeratedSaveTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::WeightedInt>(BasicType::INT16,
+ CollectionType::WSET,
+ "int16_ws");
+}
+
+TEST_F("Test enumerated save with single value int32", EnumeratedSaveTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::largeint_t>(BasicType::INT32,
+ CollectionType::SINGLE,
+ "int32_sv");
+}
+
+TEST_F("Test enumerated save with array value int32", EnumeratedSaveTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::largeint_t>(BasicType::INT32,
+ CollectionType::ARRAY,
+ "int32_a");
+}
+
+TEST_F("Test enumerated save with weighted set value int32",
+ EnumeratedSaveTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::WeightedInt>(BasicType::INT32,
+ CollectionType::WSET,
+ "int32_ws");
+}
+
+TEST_F("Test enumerated save with single value int64", EnumeratedSaveTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::largeint_t>(BasicType::INT64,
+ CollectionType::SINGLE,
+ "int64_sv");
+}
+
+TEST_F("Test enumerated save with array value int64", EnumeratedSaveTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::largeint_t>(BasicType::INT64,
+ CollectionType::ARRAY,
+ "int64_a");
+}
+
+TEST_F("Test enumerated save with weighted set value int64",
+ EnumeratedSaveTest)
+{
+ f.template test<IntegerAttribute,
+ IntegerAttribute::WeightedInt>(BasicType::INT64,
+ CollectionType::WSET,
+ "int64_ws");
+}
+
+TEST_F("Test enumerated save with single value float", EnumeratedSaveTest)
+{
+ f.template test<FloatingPointAttribute,
+ double>(BasicType::FLOAT,
+ CollectionType::SINGLE,
+ "float_sv");
+}
+
+TEST_F("Test enumerated save with array value float", EnumeratedSaveTest)
+{
+ f.template test<FloatingPointAttribute,
+ double>(BasicType::FLOAT,
+ CollectionType::ARRAY,
+ "float_a");
+}
+
+TEST_F("Test enumerated save with weighted set value float",
+ EnumeratedSaveTest)
+{
+ f.template test<FloatingPointAttribute,
+ FloatingPointAttribute::WeightedFloat>(
+ BasicType::FLOAT,
+ CollectionType::WSET,
+ "float_ws");
+}
+
+
+TEST_F("Test enumerated save with single value double", EnumeratedSaveTest)
+{
+ f.template test<FloatingPointAttribute,
+ double>(BasicType::DOUBLE,
+ CollectionType::SINGLE,
+ "double_sv");
+}
+
+TEST_F("Test enumerated save with array value double", EnumeratedSaveTest)
+{
+ f.template test<FloatingPointAttribute,
+ double>(BasicType::DOUBLE,
+ CollectionType::ARRAY,
+ "double_a");
+}
+
+TEST_F("Test enumerated save with weighted set value double",
+ EnumeratedSaveTest)
+{
+ f.template test<FloatingPointAttribute,
+ FloatingPointAttribute::WeightedFloat>(
+ BasicType::DOUBLE,
+ CollectionType::WSET,
+ "double_ws");
+}
+
+
+TEST_F("Test enumerated save with single value string", EnumeratedSaveTest)
+{
+ f.template test<StringAttribute,
+ vespalib::string>(BasicType::STRING,
+ CollectionType::SINGLE,
+ "str_sv");
+}
+
+TEST_F("Test enumerated save with array value string", EnumeratedSaveTest)
+{
+ f.template test<StringAttribute,
+ vespalib::string>(BasicType::STRING,
+ CollectionType::ARRAY,
+ "str_a");
+}
+
+TEST_F("Test enumerated save with weighted set value string",
+ EnumeratedSaveTest)
+{
+ f.template test<StringAttribute,
+ StringAttribute::WeightedString>(
+ BasicType::STRING,
+ CollectionType::WSET,
+ "str_ws");
+}
+
+TEST_MAIN()
+{
+ AttributeVector::enableEnumeratedLoad();
+ TEST_RUN_ALL();
+}
diff --git a/searchlib/src/tests/attribute/enumstore/.gitignore b/searchlib/src/tests/attribute/enumstore/.gitignore
new file mode 100644
index 00000000000..c58a018bbd9
--- /dev/null
+++ b/searchlib/src/tests/attribute/enumstore/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+enumstore_test
+searchlib_enumstore_test_app
diff --git a/searchlib/src/tests/attribute/enumstore/CMakeLists.txt b/searchlib/src/tests/attribute/enumstore/CMakeLists.txt
new file mode 100644
index 00000000000..33190553747
--- /dev/null
+++ b/searchlib/src/tests/attribute/enumstore/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_enumstore_test_app
+ SOURCES
+ enumstore_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_enumstore_test_app COMMAND searchlib_enumstore_test_app)
diff --git a/searchlib/src/tests/attribute/enumstore/DESC b/searchlib/src/tests/attribute/enumstore/DESC
new file mode 100644
index 00000000000..514c9a47caf
--- /dev/null
+++ b/searchlib/src/tests/attribute/enumstore/DESC
@@ -0,0 +1 @@
+This is a test for the EnumStore class.
diff --git a/searchlib/src/tests/attribute/enumstore/FILES b/searchlib/src/tests/attribute/enumstore/FILES
new file mode 100644
index 00000000000..6fdb2381292
--- /dev/null
+++ b/searchlib/src/tests/attribute/enumstore/FILES
@@ -0,0 +1 @@
+enumstore.cpp
diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
new file mode 100644
index 00000000000..e63889bbeb8
--- /dev/null
+++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp
@@ -0,0 +1,879 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("enumstore_test");
+#include <vespa/vespalib/testkit/testapp.h>
+//#define LOG_ENUM_STORE
+#include <vespa/searchlib/attribute/enumstore.hpp>
+#include <limits>
+#include <string>
+#include <iostream>
+
+namespace search {
+
+size_t enumStoreAlign(size_t size)
+{
+ return (size + 15) & -UINT64_C(16);
+}
+
+// EnumStoreBase::Index(0,0) is reserved thus 16 bytes are reserved in buffer 0
+const uint32_t RESERVED_BYTES = 16u;
+typedef EnumStoreT<NumericEntryType<uint32_t> > NumericEnumStore;
+
+class EnumStoreTest : public vespalib::TestApp
+{
+private:
+ typedef EnumStoreT<StringEntryType> StringEnumStore;
+ typedef EnumStoreT<NumericEntryType<float> > FloatEnumStore;
+ typedef EnumStoreT<NumericEntryType<double> > DoubleEnumStore;
+
+ typedef EnumStoreBase::Index EnumIndex;
+ typedef vespalib::GenerationHandler::generation_t generation_t;
+
+ void testIndex();
+ void fillDataBuffer(char * data, uint32_t enumValue, uint32_t refCount,
+ const std::string & string);
+ void fillDataBuffer(char * data, uint32_t enumValue, uint32_t refCount,
+ uint32_t value);
+ void testStringEntry();
+ void testNumericEntry();
+
+ template <typename EnumStoreType, typename T>
+ void testFloatEnumStore(EnumStoreType & es);
+ void testFloatEnumStore();
+
+ void testAddEnum();
+ template <typename EnumStoreType>
+ void testAddEnum(bool hasPostings);
+
+ template <typename EnumStoreType, typename Dictionary>
+ void
+ testUniques(const EnumStoreType &ses,
+ const std::vector<std::string> &unique);
+
+
+ void testCompaction();
+ template <typename EnumStoreType>
+ void testCompaction(bool hasPostings, bool disableReEnumerate);
+
+ void testReset();
+ template <typename EnumStoreType>
+ void testReset(bool hasPostings);
+
+ void testHoldListAndGeneration();
+ void testMemoryUsage();
+ void requireThatAddressSpaceUsageIsReported();
+ void testBufferLimit();
+
+ // helper methods
+ typedef std::vector<std::string> StringVector;
+ template <typename T>
+ T random(T low, T high);
+ std::string getRandomString(uint32_t minLen, uint32_t maxLen);
+ StringVector fillRandomStrings(uint32_t numStrings, uint32_t minLen, uint32_t maxLen);
+ StringVector sortRandomStrings(StringVector & strings);
+
+ struct StringEntry {
+ StringEntry(uint32_t e, uint32_t r, const std::string & s) :
+ _enum(e), _refCount(r), _string(s) {}
+ uint32_t _enum;
+ uint32_t _refCount;
+ std::string _string;
+ };
+
+ struct Reader {
+ typedef StringEnumStore::Index Index;
+ typedef std::vector<Index> IndexVector;
+ typedef std::vector<StringEntry> ExpectedVector;
+ uint32_t _generation;
+ IndexVector _indices;
+ ExpectedVector _expected;
+ Reader(uint32_t generation, const IndexVector & indices,
+ const ExpectedVector & expected) :
+ _generation(generation), _indices(indices), _expected(expected) {}
+ };
+
+ void
+ checkReaders(const StringEnumStore &ses,
+ generation_t sesGen,
+ const std::vector<Reader> &readers);
+
+public:
+ EnumStoreTest() {}
+ int Main();
+};
+
+void
+EnumStoreTest::testIndex()
+{
+ {
+ StringEnumStore::Index idx;
+ EXPECT_TRUE( ! idx.valid());
+ EXPECT_EQUAL(idx.offset(), 0u);
+ EXPECT_TRUE(idx.bufferId() == 0);
+ }
+ {
+ StringEnumStore::Index idx(enumStoreAlign(1000), 0);
+ EXPECT_TRUE(idx.offset() == enumStoreAlign(1000));
+ EXPECT_TRUE(idx.bufferId() == 0);
+ }
+ {
+ StringEnumStore::Index idx((UINT64_C(1) << 31)- RESERVED_BYTES, 1);
+ EXPECT_TRUE(idx.offset() == (UINT64_C(1) << 31) - RESERVED_BYTES);
+ EXPECT_TRUE(idx.bufferId() == 1);
+ }
+ {
+ StringEnumStore::Index idx((UINT64_C(1) << 33) - RESERVED_BYTES, 1);
+ EXPECT_TRUE(idx.offset() == (UINT64_C(1) << 33) - RESERVED_BYTES);
+ EXPECT_TRUE(idx.bufferId() == 1);
+ }
+ {
+ StringEnumStore::Index idx((UINT64_C(1) << 35) - RESERVED_BYTES, 1);
+ EXPECT_TRUE(idx.offset() == (UINT64_C(1) << 35) - RESERVED_BYTES);
+ EXPECT_TRUE(idx.bufferId() == 1);
+ }
+ {
+ // Change offsets when alignment changes.
+ StringEnumStore::Index idx1(48, 0);
+ StringEnumStore::Index idx2(80, 0);
+ StringEnumStore::Index idx3(48, 0);
+ EXPECT_TRUE(!(idx1 == idx2));
+ EXPECT_TRUE(idx1 == idx3);
+ }
+ {
+ EXPECT_TRUE(StringEnumStore::Index::numBuffers() == 2);
+ }
+}
+
+void
+EnumStoreTest::fillDataBuffer(char * data, uint32_t enumValue, uint32_t refCount,
+ const std::string & string)
+{
+ StringEnumStore::insertEntry(data, enumValue, refCount, string.c_str());
+}
+
+void
+EnumStoreTest::fillDataBuffer(char * data, uint32_t enumValue, uint32_t refCount,
+ uint32_t value)
+{
+ NumericEnumStore::insertEntry(data, enumValue, refCount, value);
+}
+
+void
+EnumStoreTest::testStringEntry()
+{
+ {
+ char data[9];
+ fillDataBuffer(data, 0, 0, "");
+ StringEnumStore::Entry e(data);
+ EXPECT_TRUE(StringEnumStore::getEntrySize("") ==
+ StringEnumStore::alignEntrySize(8 + 1));
+
+ EXPECT_TRUE(e.getEnum() == 0);
+ EXPECT_TRUE(e.getRefCount() == 0);
+ EXPECT_TRUE(strcmp(e.getValue(), "") == 0);
+
+ e.incRefCount();
+ EXPECT_TRUE(e.getEnum() == 0);
+ EXPECT_TRUE(e.getRefCount() == 1);
+ EXPECT_TRUE(strcmp(e.getValue(), "") == 0);
+ e.decRefCount();
+ EXPECT_TRUE(e.getEnum() == 0);
+ EXPECT_TRUE(e.getRefCount() == 0);
+ EXPECT_TRUE(strcmp(e.getValue(), "") == 0);
+ }
+ {
+ char data[18];
+ fillDataBuffer(data, 10, 5, "enumstore");
+ StringEnumStore::Entry e(data);
+ EXPECT_TRUE(StringEnumStore::getEntrySize("enumstore") ==
+ StringEnumStore::alignEntrySize(8 + 1 + 9));
+
+ EXPECT_TRUE(e.getEnum() == 10);
+ EXPECT_TRUE(e.getRefCount() == 5);
+ EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0);
+
+ e.incRefCount();
+ EXPECT_TRUE(e.getEnum() == 10);
+ EXPECT_TRUE(e.getRefCount() == 6);
+ EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0);
+ e.decRefCount();
+ EXPECT_TRUE(e.getEnum() == 10);
+ EXPECT_TRUE(e.getRefCount() == 5);
+ EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0);
+ }
+}
+
+void
+EnumStoreTest::testNumericEntry()
+{
+ {
+ char data[12];
+ fillDataBuffer(data, 10, 20, 30);
+ NumericEnumStore::Entry e(data);
+ EXPECT_TRUE(NumericEnumStore::getEntrySize(30) ==
+ NumericEnumStore::alignEntrySize(8 + 4));
+
+ EXPECT_TRUE(e.getEnum() == 10);
+ EXPECT_TRUE(e.getRefCount() == 20);
+ EXPECT_TRUE(e.getValue() == 30);
+
+ e.incRefCount();
+ EXPECT_TRUE(e.getEnum() == 10);
+ EXPECT_TRUE(e.getRefCount() == 21);
+ EXPECT_TRUE(e.getValue() == 30);
+ e.decRefCount();
+ EXPECT_TRUE(e.getEnum() == 10);
+ EXPECT_TRUE(e.getRefCount() == 20);
+ EXPECT_TRUE(e.getValue() == 30);
+ }
+}
+
+template <typename EnumStoreType, typename T>
+void
+EnumStoreTest::testFloatEnumStore(EnumStoreType & es)
+{
+ EnumIndex idx;
+
+ T a[5] = {-20.5f, -10.5f, -0.5f, 9.5f, 19.5f};
+ T b[5] = {-25.5f, -15.5f, -5.5f, 4.5f, 14.5f};
+
+ for (uint32_t i = 0; i < 5; ++i) {
+ es.addEnum(a[i], idx);
+ }
+
+ for (uint32_t i = 0; i < 5; ++i) {
+ EXPECT_TRUE(es.findIndex(a[i], idx));
+ EXPECT_TRUE(!es.findIndex(b[i], idx));
+ }
+
+ es.addEnum(std::numeric_limits<T>::quiet_NaN(), idx);
+ EXPECT_TRUE(es.findIndex(std::numeric_limits<T>::quiet_NaN(), idx));
+ EXPECT_TRUE(es.findIndex(std::numeric_limits<T>::quiet_NaN(), idx));
+
+ for (uint32_t i = 0; i < 5; ++i) {
+ EXPECT_TRUE(es.findIndex(a[i], idx));
+ EXPECT_TRUE(!es.findIndex(b[i], idx));
+ }
+}
+
+void
+EnumStoreTest::testFloatEnumStore()
+{
+ {
+ FloatEnumStore fes(1000, false);
+ testFloatEnumStore<FloatEnumStore, float>(fes);
+ }
+ {
+ DoubleEnumStore des(1000, false);
+ testFloatEnumStore<DoubleEnumStore, double>(des);
+ }
+}
+
+void
+EnumStoreTest::testAddEnum()
+{
+ testAddEnum<StringEnumStore>(false);
+
+ testAddEnum<StringEnumStore>(true);
+}
+
+template <typename EnumStoreType>
+void
+EnumStoreTest::testAddEnum(bool hasPostings)
+{
+ EnumStoreType ses(100, hasPostings);
+ EXPECT_EQUAL(enumStoreAlign(100u) + RESERVED_BYTES,
+ ses.getBuffer(0).capacity());
+ EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0).size());
+ EXPECT_EQUAL(enumStoreAlign(100u), ses.getBuffer(0).remaining());
+ EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0)._deadElems);
+
+ EnumIndex idx;
+ uint64_t offset = ses.getBuffer(0).size();
+ std::vector<EnumIndex> indices;
+ std::vector<std::string> unique;
+ unique.push_back("");
+ unique.push_back("add");
+ unique.push_back("enumstore");
+ unique.push_back("unique");
+
+ for (uint32_t i = 0; i < unique.size(); ++i) {
+ ses.addEnum(unique[i].c_str(), idx);
+ EXPECT_EQUAL(offset, idx.offset());
+ EXPECT_EQUAL(0u, idx.bufferId());
+ ses.incRefCount(idx);
+ EXPECT_EQUAL(1u, ses.getRefCount(idx));
+ indices.push_back(idx);
+ offset += EnumStoreType::alignEntrySize(unique[i].size() + 1 + 8);
+ EXPECT_TRUE(ses.findIndex(unique[i].c_str(), idx));
+ EXPECT_TRUE(ses.getLastEnum() == i);
+ }
+ ses.freezeTree();
+
+ for (uint32_t i = 0; i < indices.size(); ++i) {
+ uint32_t e = ses.getEnum(indices[i]);
+ EXPECT_EQUAL(i, e);
+ EXPECT_TRUE(ses.findEnum(unique[i].c_str(), e));
+ EXPECT_TRUE(ses.getEnum(btree::EntryRef(e)) == i);
+ EXPECT_TRUE(ses.findIndex(unique[i].c_str(), idx));
+ EXPECT_TRUE(idx == indices[i]);
+ EXPECT_EQUAL(1u, ses.getRefCount(indices[i]));
+ StringEntryType::Type value = 0;
+ EXPECT_TRUE(ses.getValue(indices[i], value));
+ EXPECT_TRUE(strcmp(unique[i].c_str(), value) == 0);
+ }
+
+ if (hasPostings) {
+ testUniques<EnumStoreType, EnumPostingTree>(ses, unique);
+ } else {
+ testUniques<EnumStoreType, EnumTree>(ses, unique);
+ }
+}
+
+template <typename EnumStoreType, typename Dictionary>
+void
+EnumStoreTest::testUniques
+(const EnumStoreType &ses, const std::vector<std::string> &unique)
+{
+ const EnumStoreDict<Dictionary> *enumDict =
+ dynamic_cast<const EnumStoreDict<Dictionary> *>
+ (&ses.getEnumStoreDict());
+ assert(enumDict != NULL);
+ const Dictionary &dict = enumDict->getDictionary();
+ uint32_t i = 0;
+ EnumIndex idx;
+ for (typename Dictionary::Iterator iter = dict.begin();
+ iter.valid(); ++iter, ++i) {
+ idx = iter.getKey();
+ EXPECT_TRUE(strcmp(unique[i].c_str(), ses.getValue(idx)) == 0);
+ }
+ EXPECT_EQUAL(static_cast<uint32_t>(unique.size()), i);
+}
+
+
+void
+EnumStoreTest::testCompaction()
+{
+ testCompaction<StringEnumStore>(false, false);
+ testCompaction<StringEnumStore>(true, false);
+ testCompaction<StringEnumStore>(false, true);
+ testCompaction<StringEnumStore>(true, true);
+}
+
+template <typename EnumStoreType>
+void
+EnumStoreTest::testCompaction(bool hasPostings, bool disableReEnumerate)
+{
+ // entrySize = 15 before alignment
+ uint32_t entrySize = EnumStoreType::alignEntrySize(15);
+ uint32_t bufferSize = entrySize * 5;
+ EnumStoreType ses(bufferSize, hasPostings);
+ EnumIndex idx;
+ std::vector<EnumIndex> indices;
+ typename EnumStoreType::Type t = "foo";
+ std::vector<std::string> uniques;
+ uniques.push_back("enum00");
+ uniques.push_back("enum01");
+ uniques.push_back("enum02");
+ uniques.push_back("enum03");
+ uniques.push_back("enum04");
+
+ // fill with unique values
+ for (uint32_t i = 0; i < 5; ++i) {
+ EXPECT_TRUE(ses.getRemaining() == bufferSize - i * entrySize);
+ ses.addEnum(uniques[i].c_str(), idx);
+ ses.incRefCount(idx);
+ EXPECT_TRUE(ses.getRefCount(idx));
+ indices.push_back(idx);
+ }
+ EXPECT_EQUAL(0u, ses.getRemaining());
+ EXPECT_EQUAL(0u, ses.getBuffer(0).remaining());
+ EXPECT_EQUAL(entrySize * 5 + RESERVED_BYTES, ses.getBuffer(0).size());
+ EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0)._deadElems);
+ uint32_t failEntrySize = ses.getEntrySize("enum05");
+ EXPECT_TRUE(failEntrySize > ses.getRemaining());
+
+ // change from enum00 -> enum01
+ ses.decRefCount(indices[0]);
+ ses.incRefCount(indices[1]);
+ indices[0] = indices[1];
+
+ // check correct refcount
+ for (uint32_t i = 0; i < 5; ++i) {
+ EXPECT_TRUE(ses.findIndex(uniques[i].c_str(), idx));
+ uint32_t refCount = ses.getRefCount(idx);
+ if (i == 0) {
+ EXPECT_TRUE(refCount == 0);
+ } else if (i == 1) {
+ EXPECT_TRUE(refCount == 2);
+ } else {
+ EXPECT_TRUE(refCount == 1);
+ }
+ }
+
+ // free unused enums
+ ses.freeUnusedEnums(true);
+ EXPECT_TRUE(!ses.findIndex("enum00", idx));
+ EXPECT_EQUAL(entrySize + RESERVED_BYTES, ses.getBuffer(0)._deadElems);
+
+ // perform compaction
+ if (disableReEnumerate) {
+ ses.disableReEnumerate();
+ }
+ EXPECT_TRUE(ses.performCompaction(3 * entrySize));
+ if (disableReEnumerate) {
+ ses.enableReEnumerate();
+ }
+ EXPECT_TRUE(ses.getRemaining() >= 3 * entrySize);
+ EXPECT_TRUE(ses.getBuffer(1).remaining() >= 3 * entrySize);
+ EXPECT_TRUE(ses.getBuffer(1).size() == entrySize * 4);
+ EXPECT_TRUE(ses.getBuffer(1)._deadElems == 0);
+
+ EXPECT_EQUAL((disableReEnumerate ? 4u : 3u), ses.getLastEnum());
+
+ // add new unique strings
+ ses.addEnum("enum05", idx);
+ EXPECT_EQUAL((disableReEnumerate ? 5u : 4u), ses.getEnum(idx));
+ ses.addEnum("enum06", idx);
+ EXPECT_EQUAL((disableReEnumerate ? 6u : 5u), ses.getEnum(idx));
+ ses.addEnum("enum00", idx);
+ EXPECT_EQUAL((disableReEnumerate ? 7u : 6u), ses.getEnum(idx));
+
+ EXPECT_EQUAL((disableReEnumerate ? 7u : 6u), ses.getLastEnum());
+
+ // compare old and new indices
+ for (uint32_t i = 0; i < indices.size(); ++i) {
+ EXPECT_TRUE(ses.getCurrentIndex(indices[i], idx));
+ EXPECT_TRUE(indices[i].bufferId() == 0);
+ EXPECT_TRUE(idx.bufferId() == 1);
+ EXPECT_TRUE(ses.getValue(indices[i], t));
+ typename EnumStoreType::Type s = "bar";
+ EXPECT_TRUE(ses.getValue(idx, s));
+ EXPECT_TRUE(strcmp(t, s) == 0);
+ }
+ // EnumIndex(0,0) is reserved so we have 4 bytes extra at the start of buffer 0
+ EXPECT_TRUE(ses.getCurrentIndex(indices[0], idx));
+ EXPECT_EQUAL(entrySize + RESERVED_BYTES, indices[0].offset());
+ EXPECT_EQUAL(0u, idx.offset());
+ EXPECT_TRUE(ses.getCurrentIndex(indices[1], idx));
+ EXPECT_EQUAL(entrySize + RESERVED_BYTES, indices[1].offset());
+ EXPECT_EQUAL(0u, idx.offset());
+ EXPECT_TRUE(ses.getCurrentIndex(indices[2], idx));
+ EXPECT_EQUAL(2 * entrySize + RESERVED_BYTES, indices[2].offset());
+ EXPECT_EQUAL(entrySize, idx.offset());
+ EXPECT_TRUE(ses.getCurrentIndex(indices[3], idx));
+ EXPECT_EQUAL(3 * entrySize + RESERVED_BYTES, indices[3].offset());
+ EXPECT_EQUAL(2 * entrySize, idx.offset());
+ EXPECT_TRUE(ses.getCurrentIndex(indices[4], idx));
+ EXPECT_EQUAL(4 * entrySize + RESERVED_BYTES, indices[4].offset());
+ EXPECT_EQUAL(3 * entrySize, idx.offset());
+}
+
+void
+EnumStoreTest::testReset()
+{
+ testReset<StringEnumStore>(false);
+
+ testReset<StringEnumStore>(true);
+}
+
+template <typename EnumStoreType>
+void
+EnumStoreTest::testReset(bool hasPostings)
+{
+ uint32_t numUniques = 10000;
+ srand(123456789);
+ StringVector rndStrings = fillRandomStrings(numUniques, 10, 15);
+ EXPECT_EQUAL(rndStrings.size(), size_t(numUniques));
+ StringVector uniques = sortRandomStrings(rndStrings);
+ EXPECT_EQUAL(uniques.size(), size_t(numUniques));
+ // max entrySize = 25 before alignment
+ uint32_t maxEntrySize = EnumStoreType::alignEntrySize(8 + 1 + 16);
+ EnumStoreType ses(numUniques * maxEntrySize, hasPostings);
+ EnumIndex idx;
+
+ uint32_t cnt = 0;
+ // add new unique strings
+ for (StringVector::reverse_iterator iter = uniques.rbegin(); iter != uniques.rend(); ++iter) {
+ ses.addEnum(iter->c_str(), idx);
+ EXPECT_EQUAL(ses.getNumUniques(), ++cnt);
+ }
+
+ // check for unique strings
+ for (StringVector::iterator iter = uniques.begin(); iter != uniques.end(); ++iter) {
+ EXPECT_TRUE(ses.findIndex(iter->c_str(), idx));
+ }
+
+ EXPECT_EQUAL(ses.getNumUniques(), numUniques);
+ if (hasPostings) {
+ testUniques<EnumStoreType, EnumPostingTree>(ses, uniques);
+ } else {
+ testUniques<EnumStoreType, EnumTree>(ses, uniques);
+ }
+
+ rndStrings = fillRandomStrings(numUniques, 15, 20);
+ StringVector newUniques = sortRandomStrings(rndStrings);
+
+ typename EnumStoreType::Builder builder;
+ for (StringVector::iterator iter = newUniques.begin(); iter != newUniques.end(); ++iter) {
+ builder.insert(iter->c_str());
+ }
+
+ ses.reset(builder);
+ EXPECT_EQUAL(RESERVED_BYTES, ses.getRemaining());
+
+ // check for old unique strings
+ for (StringVector::iterator iter = uniques.begin(); iter != uniques.end(); ++iter) {
+ EXPECT_TRUE(!ses.findIndex(iter->c_str(), idx));
+ }
+
+ // check for new unique strings
+ for (StringVector::iterator iter = newUniques.begin(); iter != newUniques.end(); ++iter) {
+ EXPECT_TRUE(ses.findIndex(iter->c_str(), idx));
+ }
+
+ EXPECT_EQUAL(ses.getNumUniques(), numUniques);
+ if (hasPostings) {
+ testUniques<EnumStoreType, EnumPostingTree>(ses, newUniques);
+ } else {
+ testUniques<EnumStoreType, EnumTree>(ses, newUniques);
+ }
+}
+
+void
+EnumStoreTest::testHoldListAndGeneration()
+{
+ uint32_t entrySize = StringEnumStore::alignEntrySize(8 + 1 + 6);
+ StringEnumStore ses(100 * entrySize, false);
+ StringEnumStore::Index idx;
+ StringVector uniques;
+ generation_t sesGen = 0u;
+ uniques.reserve(100);
+ for (uint32_t i = 0; i < 100; ++i) {
+ char tmp[16];
+ sprintf(tmp, i < 10 ? "enum0%u" : "enum%u", i);
+ uniques.push_back(tmp);
+ }
+ StringVector newUniques;
+ newUniques.reserve(100);
+ for (uint32_t i = 0; i < 100; ++i) {
+ char tmp[16];
+ sprintf(tmp, i < 10 ? "unique0%u" : "unique%u", i);
+ newUniques.push_back(tmp);
+ }
+ uint32_t generation = 0;
+ std::vector<Reader> readers;
+
+ // insert first batch of unique strings
+ for (uint32_t i = 0; i < 100; ++i) {
+ ses.addEnum(uniques[i].c_str(), idx);
+ ses.incRefCount(idx);
+ EXPECT_TRUE(ses.getRefCount(idx));
+
+ // associate readers
+ if (i % 10 == 9) {
+ Reader::IndexVector indices;
+ Reader::ExpectedVector expected;
+ for (uint32_t j = i - 9; j <= i; ++j) {
+ EXPECT_TRUE(ses.findIndex(uniques[j].c_str(), idx));
+ indices.push_back(idx);
+ StringEnumStore::Entry entry = ses.getEntry(idx);
+ EXPECT_TRUE(entry.getEnum() == j);
+ EXPECT_TRUE(entry.getRefCount() == 1);
+ EXPECT_TRUE(strcmp(entry.getValue(), uniques[j].c_str()) == 0);
+ expected.push_back(StringEntry(entry.getEnum(), entry.getRefCount(),
+ std::string(entry.getValue())));
+ }
+ EXPECT_TRUE(indices.size() == 10);
+ EXPECT_TRUE(expected.size() == 10);
+ sesGen = generation++;
+ readers.push_back(Reader(sesGen, indices, expected));
+ checkReaders(ses, sesGen, readers);
+ }
+ }
+
+ EXPECT_EQUAL(0u, ses.getRemaining());
+ EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0)._deadElems);
+
+ // remove all uniques
+ for (uint32_t i = 0; i < 100; ++i) {
+ EXPECT_TRUE(ses.findIndex(uniques[i].c_str(), idx));
+ ses.decRefCount(idx);
+ EXPECT_EQUAL(0u, ses.getRefCount(idx));
+ }
+ ses.freeUnusedEnums(true);
+ EXPECT_EQUAL(100 * entrySize + RESERVED_BYTES, ses.getBuffer(0)._deadElems);
+
+ // perform compaction
+ uint32_t newEntrySize = StringEnumStore::alignEntrySize(8 + 1 + 8);
+ EXPECT_TRUE(ses.performCompaction(5 * newEntrySize));
+
+ // check readers again
+ checkReaders(ses, sesGen, readers);
+
+ // fill up buffer
+ uint32_t i = 0;
+ while (ses.getRemaining() >= newEntrySize) {
+ //LOG(info, "fill: %s", newUniques[i].c_str());
+ ses.addEnum(newUniques[i++].c_str(), idx);
+ ses.incRefCount(idx);
+ EXPECT_TRUE(ses.getRefCount(idx));
+ }
+ EXPECT_LESS(ses.getRemaining(), newEntrySize);
+ // buffer on hold list
+ EXPECT_TRUE(!ses.performCompaction(5 * newEntrySize));
+
+ checkReaders(ses, sesGen, readers);
+ ses.transferHoldLists(sesGen);
+ ses.trimHoldLists(sesGen + 1);
+
+ // buffer no longer on hold list
+ EXPECT_LESS(ses.getRemaining(), newEntrySize);
+ EXPECT_TRUE(ses.performCompaction(5 * newEntrySize));
+ EXPECT_TRUE(ses.getRemaining() >= 5 * newEntrySize);
+}
+
+void
+EnumStoreTest::testMemoryUsage()
+{
+ StringEnumStore ses(200, false);
+ StringEnumStore::Index idx;
+ uint32_t num = 8;
+ std::vector<StringEnumStore::Index> indices;
+ std::vector<std::string> uniques;
+ for (uint32_t i = 0; i < num; ++i) {
+ std::stringstream ss;
+ ss << "enum" << i;
+ uniques.push_back(ss.str());
+ }
+ generation_t sesGen = 0u;
+ uint32_t entrySize = StringEnumStore::alignEntrySize(8 + 1 + 5); // enum(4) + refcount(4) + 1(\0) + strlen("enumx")
+
+ // usage before inserting enums
+ MemoryUsage usage = ses.getMemoryUsage();
+ EXPECT_EQUAL(ses.getNumUniques(), uint32_t(0));
+ EXPECT_EQUAL(enumStoreAlign(200u) + RESERVED_BYTES, usage.allocatedBytes());
+ EXPECT_EQUAL(RESERVED_BYTES, usage.usedBytes());
+ EXPECT_EQUAL(RESERVED_BYTES, usage.deadBytes());
+ EXPECT_EQUAL(0u, usage.allocatedBytesOnHold());
+
+ for (uint32_t i = 0; i < num; ++i) {
+ ses.addEnum(uniques[i].c_str(), idx);
+ indices.push_back(idx);
+ ses.incRefCount(idx);
+ EXPECT_TRUE(ses.getRefCount(idx));
+ }
+
+ // usage after inserting enums
+ usage = ses.getMemoryUsage();
+ EXPECT_EQUAL(ses.getNumUniques(), num);
+ EXPECT_EQUAL(enumStoreAlign(200u) + RESERVED_BYTES, usage.allocatedBytes());
+ EXPECT_EQUAL(num * entrySize + RESERVED_BYTES, usage.usedBytes());
+ EXPECT_EQUAL(RESERVED_BYTES, usage.deadBytes());
+ EXPECT_EQUAL(0u, usage.allocatedBytesOnHold());
+
+ // assign new enum for num / 2 of indices
+ for (uint32_t i = 0; i < num / 2; ++i) {
+ ses.decRefCount(indices[i]);
+ EXPECT_TRUE(ses.findIndex(uniques.back().c_str(), idx));
+ ses.incRefCount(idx);
+ indices[i] = idx;
+ }
+ ses.freeUnusedEnums(true);
+
+ // usage after removing enums
+ usage = ses.getMemoryUsage();
+ EXPECT_EQUAL(ses.getNumUniques(), num / 2);
+ EXPECT_EQUAL(enumStoreAlign(200u) + RESERVED_BYTES, usage.allocatedBytes());
+ EXPECT_EQUAL(num * entrySize + RESERVED_BYTES, usage.usedBytes());
+ EXPECT_EQUAL((num / 2) * entrySize + RESERVED_BYTES, usage.deadBytes());
+ EXPECT_EQUAL(0u, usage.allocatedBytesOnHold());
+
+ ses.performCompaction(400);
+
+ // usage after compaction
+ MemoryUsage usage2 = ses.getMemoryUsage();
+ EXPECT_EQUAL(ses.getNumUniques(), num / 2);
+ EXPECT_EQUAL(usage.usedBytes() + (num / 2) * entrySize, usage2.usedBytes());
+ EXPECT_EQUAL(usage.deadBytes(), usage2.deadBytes());
+ EXPECT_EQUAL(usage.usedBytes() - usage.deadBytes(), usage2.allocatedBytesOnHold());
+
+ ses.transferHoldLists(sesGen);
+ ses.trimHoldLists(sesGen + 1);
+
+ // usage after hold list trimming
+ MemoryUsage usage3 = ses.getMemoryUsage();
+ EXPECT_EQUAL((num / 2) * entrySize, usage3.usedBytes());
+ EXPECT_EQUAL(0u, usage3.deadBytes());
+ EXPECT_EQUAL(0u, usage3.allocatedBytesOnHold());
+}
+
+namespace {
+
+NumericEnumStore::Index
+addEnum(NumericEnumStore &store, uint32_t value)
+{
+ NumericEnumStore::Index result;
+ store.addEnum(value, result);
+ store.incRefCount(result);
+ return result;
+}
+
+void
+decRefCount(NumericEnumStore &store, NumericEnumStore::Index idx)
+{
+ store.decRefCount(idx);
+ store.freeUnusedEnums(false);
+}
+
+}
+
+void
+EnumStoreTest::requireThatAddressSpaceUsageIsReported()
+{
+ const size_t ADDRESS_LIMIT = 34359738368; // NumericEnumStore::DataStoreType::RefType::offsetSize()
+ NumericEnumStore store(200, false);
+
+ EXPECT_EQUAL(AddressSpace(0, ADDRESS_LIMIT), store.getAddressSpaceUsage());
+ NumericEnumStore::Index idx1 = addEnum(store, 10);
+ EXPECT_EQUAL(AddressSpace(16, ADDRESS_LIMIT), store.getAddressSpaceUsage());
+ NumericEnumStore::Index idx2 = addEnum(store, 20);
+ EXPECT_EQUAL(AddressSpace(32, ADDRESS_LIMIT), store.getAddressSpaceUsage());
+ decRefCount(store, idx1);
+ EXPECT_EQUAL(AddressSpace(16, ADDRESS_LIMIT), store.getAddressSpaceUsage());
+ decRefCount(store, idx2);
+ EXPECT_EQUAL(AddressSpace(0, ADDRESS_LIMIT), store.getAddressSpaceUsage());
+}
+
+size_t
+digits(size_t num)
+{
+ size_t digits = 1;
+ while (num / 10 > 0) {
+ num /= 10;
+ digits++;
+ }
+ return digits;
+}
+
+void
+EnumStoreTest::testBufferLimit()
+{
+ size_t enumSize = StringEnumStore::Index::offsetSize();
+ StringEnumStore es(enumSize, false);
+
+ size_t strLen = 65536;
+ char str[strLen + 1];
+ for (size_t i = 0; i < strLen; ++i) {
+ str[i] = 'X';
+ }
+ str[strLen] = 0;
+
+ size_t entrySize = StringEnumStore::getEntrySize(str);
+ size_t numUniques = enumSize / entrySize;
+ size_t uniqDigits = digits(numUniques);
+
+ EnumIndex idx;
+ EnumIndex lastIdx;
+ for (size_t i = 0; i < numUniques; ++i) {
+ sprintf(str, "%0*zu", (int)uniqDigits, i);
+ str[uniqDigits] = 'X';
+ es.addEnum(str, idx);
+ if (i % (numUniques / 32) == 1) {
+ EXPECT_TRUE(idx.offset() > lastIdx.offset());
+ EXPECT_EQUAL(i + 1, es.getNumUniques());
+ std::cout << "idx.offset(" << idx.offset() << "), str(" << std::string(str, uniqDigits) << ")" << std::endl;
+ }
+ lastIdx = idx;
+ }
+ EXPECT_EQUAL(idx.offset(), lastIdx.offset());
+ EXPECT_EQUAL(numUniques, es.getNumUniques());
+ std::cout << "idx.offset(" << idx.offset() << "), str(" << std::string(str, uniqDigits) << ")" << std::endl;
+}
+
+template <typename T>
+T
+EnumStoreTest::random(T low, T high)
+{
+ return (rand() % (high - low)) + low;
+}
+
+std::string
+EnumStoreTest::getRandomString(uint32_t minLen, uint32_t maxLen)
+{
+ uint32_t len = random(minLen, maxLen);
+ std::string retval;
+ for (uint32_t i = 0; i < len; ++i) {
+ char c = random('a', 'z');
+ retval.push_back(c);
+ }
+ return retval;
+}
+
+EnumStoreTest::StringVector
+EnumStoreTest::fillRandomStrings(uint32_t numStrings, uint32_t minLen, uint32_t maxLen)
+{
+ StringVector retval;
+ retval.reserve(numStrings);
+ for (uint32_t i = 0; i < numStrings; ++i) {
+ retval.push_back(getRandomString(minLen, maxLen));
+ }
+ return retval;
+}
+
+EnumStoreTest::StringVector
+EnumStoreTest::sortRandomStrings(StringVector & strings)
+{
+ std::sort(strings.begin(), strings.end());
+ std::vector<std::string> retval;
+ retval.reserve(strings.size());
+ std::vector<std::string>::iterator pos = std::unique(strings.begin(), strings.end());
+ std::copy(strings.begin(), pos, std::back_inserter(retval));
+ return retval;
+}
+
+void
+EnumStoreTest::checkReaders(const StringEnumStore & ses,
+ generation_t sesGen,
+ const std::vector<Reader> & readers)
+{
+ (void) sesGen;
+ //uint32_t refCount = 1000;
+ StringEnumStore::Type t = "";
+ for (uint32_t i = 0; i < readers.size(); ++i) {
+ const Reader & r = readers[i];
+ for (uint32_t j = 0; j < r._indices.size(); ++j) {
+ EXPECT_EQUAL(r._expected[j]._enum, ses.getEnum(r._indices[j]));
+ EXPECT_TRUE(ses.getValue(r._indices[j], t));
+ EXPECT_TRUE(r._expected[j]._string == std::string(t));
+ }
+ }
+}
+
+
+int
+EnumStoreTest::Main()
+{
+ TEST_INIT("enumstore_test");
+
+ testIndex();
+ testStringEntry();
+ testNumericEntry();
+ testFloatEnumStore();
+ testAddEnum();
+ testCompaction();
+ testReset();
+ testHoldListAndGeneration();
+ testMemoryUsage();
+ TEST_DO(requireThatAddressSpaceUsageIsReported());
+ if (_argc > 1) {
+ testBufferLimit(); // large test with 8 GB buffer
+ }
+
+ TEST_DONE();
+}
+}
+
+
+TEST_APPHOOK(search::EnumStoreTest);
diff --git a/searchlib/src/tests/attribute/extendattributes/.gitignore b/searchlib/src/tests/attribute/extendattributes/.gitignore
new file mode 100644
index 00000000000..4018a7d4f5b
--- /dev/null
+++ b/searchlib/src/tests/attribute/extendattributes/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+extendattribute_test
+searchlib_extendattribute_test_app
diff --git a/searchlib/src/tests/attribute/extendattributes/CMakeLists.txt b/searchlib/src/tests/attribute/extendattributes/CMakeLists.txt
new file mode 100644
index 00000000000..b0803f0a232
--- /dev/null
+++ b/searchlib/src/tests/attribute/extendattributes/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_extendattribute_test_app
+ SOURCES
+ extendattribute.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_extendattribute_test_app COMMAND sh extendattribute_test.sh)
diff --git a/searchlib/src/tests/attribute/extendattributes/DESC b/searchlib/src/tests/attribute/extendattributes/DESC
new file mode 100644
index 00000000000..4f88189a1d7
--- /dev/null
+++ b/searchlib/src/tests/attribute/extendattributes/DESC
@@ -0,0 +1 @@
+Unit tests for extendable attributes.
diff --git a/searchlib/src/tests/attribute/extendattributes/FILES b/searchlib/src/tests/attribute/extendattributes/FILES
new file mode 100644
index 00000000000..930039cae19
--- /dev/null
+++ b/searchlib/src/tests/attribute/extendattributes/FILES
@@ -0,0 +1 @@
+extendattribute.cpp
diff --git a/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp b/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp
new file mode 100644
index 00000000000..0bb751d26ee
--- /dev/null
+++ b/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp
@@ -0,0 +1,176 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("extendattribute_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/attribute/extendableattributes.h>
+
+namespace search {
+
+class ExtendAttributeTest : public vespalib::TestApp
+{
+private:
+ template <typename Attribute>
+ void testExtendInteger(Attribute & attr);
+ template <typename Attribute>
+ void testExtendFloat(Attribute & attr);
+ template <typename Attribute>
+ void testExtendString(Attribute & attr);
+
+public:
+ int Main();
+};
+
+template <typename Attribute>
+void ExtendAttributeTest::testExtendInteger(Attribute & attr)
+{
+ uint32_t docId(0);
+ EXPECT_EQUAL(attr.getNumDocs(), 0u);
+ attr.addDoc(docId);
+ EXPECT_EQUAL(docId, 0u);
+ EXPECT_EQUAL(attr.getNumDocs(), 1u);
+ attr.add(1, 10);
+ EXPECT_EQUAL(attr.getInt(0), 1);
+ attr.add(2, 20);
+ EXPECT_EQUAL(attr.getInt(0), attr.hasMultiValue() ? 1 : 2);
+ if (attr.hasMultiValue()) {
+ AttributeVector::WeightedInt v[2];
+ EXPECT_EQUAL((static_cast<AttributeVector &>(attr)).get(0, v, 2), 2u);
+ EXPECT_EQUAL(v[0].getValue(), 1);
+ EXPECT_EQUAL(v[1].getValue(), 2);
+ if (attr.hasWeightedSetType()) {
+ EXPECT_EQUAL(v[0].getWeight(), 10);
+ EXPECT_EQUAL(v[1].getWeight(), 20);
+ }
+ }
+ attr.addDoc(docId);
+ EXPECT_EQUAL(docId, 1u);
+ EXPECT_EQUAL(attr.getNumDocs(), 2u);
+ attr.add(3, 30);
+ EXPECT_EQUAL(attr.getInt(1), 3);
+ if (attr.hasMultiValue()) {
+ AttributeVector::WeightedInt v[1];
+ EXPECT_EQUAL((static_cast<AttributeVector &>(attr)).get(1, v, 1), 1u);
+ EXPECT_EQUAL(v[0].getValue(), 3);
+ if (attr.hasWeightedSetType()) {
+ EXPECT_EQUAL(v[0].getWeight(), 30);
+ }
+ }
+}
+
+template <typename Attribute>
+void ExtendAttributeTest::testExtendFloat(Attribute & attr)
+{
+ uint32_t docId(0);
+ EXPECT_EQUAL(attr.getNumDocs(), 0u);
+ attr.addDoc(docId);
+ EXPECT_EQUAL(docId, 0u);
+ EXPECT_EQUAL(attr.getNumDocs(), 1u);
+ attr.add(1.7, 10);
+ EXPECT_EQUAL(attr.getInt(0), 1);
+ EXPECT_EQUAL(attr.getFloat(0), 1.7);
+ attr.add(2.3, 20);
+ EXPECT_EQUAL(attr.getFloat(0), attr.hasMultiValue() ? 1.7 : 2.3);
+ if (attr.hasMultiValue()) {
+ AttributeVector::WeightedFloat v[2];
+ EXPECT_EQUAL((static_cast<AttributeVector &>(attr)).get(0, v, 2), 2u);
+ EXPECT_EQUAL(v[0].getValue(), 1.7);
+ EXPECT_EQUAL(v[1].getValue(), 2.3);
+ if (attr.hasWeightedSetType()) {
+ EXPECT_EQUAL(v[0].getWeight(), 10);
+ EXPECT_EQUAL(v[1].getWeight(), 20);
+ }
+ }
+ attr.addDoc(docId);
+ EXPECT_EQUAL(docId, 1u);
+ EXPECT_EQUAL(attr.getNumDocs(), 2u);
+ attr.add(3.6, 30);
+ EXPECT_EQUAL(attr.getFloat(1), 3.6);
+ if (attr.hasMultiValue()) {
+ AttributeVector::WeightedFloat v[1];
+ EXPECT_EQUAL((static_cast<AttributeVector &>(attr)).get(1, v, 1), 1u);
+ EXPECT_EQUAL(v[0].getValue(), 3.6);
+ if (attr.hasWeightedSetType()) {
+ EXPECT_EQUAL(v[0].getWeight(), 30);
+ }
+ }
+}
+
+template <typename Attribute>
+void ExtendAttributeTest::testExtendString(Attribute & attr)
+{
+ uint32_t docId(0);
+ EXPECT_EQUAL(attr.getNumDocs(), 0u);
+ attr.addDoc(docId);
+ EXPECT_EQUAL(docId, 0u);
+ EXPECT_EQUAL(attr.getNumDocs(), 1u);
+ attr.add("1.7", 10);
+ EXPECT_EQUAL(std::string(attr.getString(0, NULL, 0)), "1.7");
+ attr.add("2.3", 20);
+ EXPECT_EQUAL(std::string(attr.getString(0, NULL, 0)), attr.hasMultiValue() ? "1.7" : "2.3");
+ if (attr.hasMultiValue()) {
+ AttributeVector::WeightedString v[2];
+ EXPECT_EQUAL((static_cast<AttributeVector &>(attr)).get(0, v, 2), 2u);
+ EXPECT_EQUAL(v[0].getValue(), "1.7");
+ EXPECT_EQUAL(v[1].getValue(), "2.3");
+ if (attr.hasWeightedSetType()) {
+ EXPECT_EQUAL(v[0].getWeight(), 10);
+ EXPECT_EQUAL(v[1].getWeight(), 20);
+ }
+ }
+ attr.addDoc(docId);
+ EXPECT_EQUAL(docId, 1u);
+ EXPECT_EQUAL(attr.getNumDocs(), 2u);
+ attr.add("3.6", 30);
+ EXPECT_EQUAL(std::string(attr.getString(1, NULL, 0)), "3.6");
+ if (attr.hasMultiValue()) {
+ AttributeVector::WeightedString v[1];
+ EXPECT_EQUAL((static_cast<AttributeVector &>(attr)).get(1, v, 1), 1u);
+ EXPECT_EQUAL(v[0].getValue(), "3.6");
+ if (attr.hasWeightedSetType()) {
+ EXPECT_EQUAL(v[0].getWeight(), 30);
+ }
+ }
+}
+
+int
+ExtendAttributeTest::Main()
+{
+ TEST_INIT("extendattribute_test");
+
+ SingleIntegerExtAttribute siattr("si1");
+ MultiIntegerExtAttribute miattr("mi1");
+ WeightedSetIntegerExtAttribute wsiattr("wsi1");
+ EXPECT_TRUE( ! siattr.hasMultiValue() );
+ EXPECT_TRUE( miattr.hasMultiValue() );
+ EXPECT_TRUE( wsiattr.hasWeightedSetType() );
+ testExtendInteger(siattr);
+ testExtendInteger(miattr);
+ testExtendInteger(wsiattr);
+
+ SingleFloatExtAttribute sdattr("sd1");
+ MultiFloatExtAttribute mdattr("md1");
+ WeightedSetFloatExtAttribute wsdattr("wsd1");
+ EXPECT_TRUE( ! sdattr.hasMultiValue() );
+ EXPECT_TRUE( mdattr.hasMultiValue() );
+ EXPECT_TRUE( wsdattr.hasWeightedSetType() );
+ testExtendFloat(sdattr);
+ testExtendFloat(mdattr);
+ testExtendFloat(wsdattr);
+
+ SingleStringExtAttribute ssattr("ss1");
+ MultiStringExtAttribute msattr("ms1");
+ WeightedSetStringExtAttribute wssattr("wss1");
+ EXPECT_TRUE( ! ssattr.hasMultiValue() );
+ EXPECT_TRUE( msattr.hasMultiValue() );
+ EXPECT_TRUE( wssattr.hasWeightedSetType() );
+ testExtendString(ssattr);
+ testExtendString(msattr);
+ testExtendString(wssattr);
+
+ TEST_DONE();
+}
+
+}
+
+TEST_APPHOOK(search::ExtendAttributeTest);
diff --git a/searchlib/src/tests/attribute/extendattributes/extendattribute_test.sh b/searchlib/src/tests/attribute/extendattributes/extendattribute_test.sh
new file mode 100755
index 00000000000..6f335b18229
--- /dev/null
+++ b/searchlib/src/tests/attribute/extendattributes/extendattribute_test.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+$VALGRIND ./searchlib_extendattribute_test_app
+rm -rf *.dat
diff --git a/searchlib/src/tests/attribute/gidmapattribute/.gitignore b/searchlib/src/tests/attribute/gidmapattribute/.gitignore
new file mode 100644
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/searchlib/src/tests/attribute/gidmapattribute/.gitignore
diff --git a/searchlib/src/tests/attribute/multivaluemapping/.gitignore b/searchlib/src/tests/attribute/multivaluemapping/.gitignore
new file mode 100644
index 00000000000..743c738a0a2
--- /dev/null
+++ b/searchlib/src/tests/attribute/multivaluemapping/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+multivaluemapping_test
+searchlib_multivaluemapping_test_app
diff --git a/searchlib/src/tests/attribute/multivaluemapping/CMakeLists.txt b/searchlib/src/tests/attribute/multivaluemapping/CMakeLists.txt
new file mode 100644
index 00000000000..36c66b09966
--- /dev/null
+++ b/searchlib/src/tests/attribute/multivaluemapping/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_multivaluemapping_test_app
+ SOURCES
+ multivaluemapping_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_multivaluemapping_test_app COMMAND searchlib_multivaluemapping_test_app)
diff --git a/searchlib/src/tests/attribute/multivaluemapping/DESC b/searchlib/src/tests/attribute/multivaluemapping/DESC
new file mode 100644
index 00000000000..44c27ec9926
--- /dev/null
+++ b/searchlib/src/tests/attribute/multivaluemapping/DESC
@@ -0,0 +1 @@
+This is a test for the MultivalueMapping class.
diff --git a/searchlib/src/tests/attribute/multivaluemapping/FILES b/searchlib/src/tests/attribute/multivaluemapping/FILES
new file mode 100644
index 00000000000..bf22403a5fe
--- /dev/null
+++ b/searchlib/src/tests/attribute/multivaluemapping/FILES
@@ -0,0 +1 @@
+multivaluemapping.cpp
diff --git a/searchlib/src/tests/attribute/multivaluemapping/multivaluemapping_test.cpp b/searchlib/src/tests/attribute/multivaluemapping/multivaluemapping_test.cpp
new file mode 100644
index 00000000000..e78e180856b
--- /dev/null
+++ b/searchlib/src/tests/attribute/multivaluemapping/multivaluemapping_test.cpp
@@ -0,0 +1,836 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("multivaluemapping_test");
+#include <vespa/vespalib/testkit/testapp.h>
+//#define DEBUG_MULTIVALUE_MAPPING
+//#define LOG_MULTIVALUE_MAPPING
+#include <vespa/searchlib/attribute/multivaluemapping.h>
+#include <algorithm>
+#include <limits>
+
+namespace search {
+
+namespace
+{
+
+uint32_t dummyCommittedDocIdLimit = std::numeric_limits<uint32_t>::max();
+
+}
+
+typedef MultiValueMappingT<uint32_t> MvMapping;
+typedef MvMapping::Index Index;
+typedef multivalue::Index64 Index64;
+typedef multivalue::Index32 Index32;
+typedef MvMapping::Histogram Histogram;
+
+class MultiValueMappingTest : public vespalib::TestApp
+{
+private:
+ typedef std::vector<Index> IndexVector;
+ typedef std::vector<std::vector<uint32_t> > ExpectedVector;
+ typedef vespalib::GenerationHandler::generation_t generation_t;
+
+ class Reader {
+ public:
+ uint32_t _startGen;
+ uint32_t _endGen;
+ IndexVector _indices;
+ ExpectedVector _expected;
+ uint32_t numKeys() { return _indices.size(); }
+ Reader(uint32_t startGen, uint32_t endGen, const IndexVector & indices,
+ const ExpectedVector & expected) :
+ _startGen(startGen), _endGen(endGen), _indices(indices), _expected(expected) {}
+ };
+
+ typedef std::vector<Reader> ReaderVector;
+
+ void testIndex32();
+ void testIndex64();
+ void testSimpleSetAndGet();
+ void testChangingValueCount();
+
+ void
+ checkReaders(MvMapping &mvm,
+ generation_t mvmGen,
+ ReaderVector &readers);
+
+ void testHoldListAndGeneration();
+ void testManualCompaction();
+ void testVariousGets();
+ void testReplace();
+ void testMemoryUsage();
+ void testShrink();
+ void testHoldElem();
+ void requireThatAddressSpaceUsageIsReported();
+ void requireThatDeadIsNotAccountedInAddressSpaceUsage();
+
+public:
+ int Main();
+};
+
+void
+MultiValueMappingTest::testIndex32()
+{
+ {
+ Index32 idx;
+ EXPECT_EQUAL(idx.values(), 0u);
+ EXPECT_EQUAL(idx.alternative(), 0u);
+ EXPECT_EQUAL(idx.vectorIdx(), 0u);
+ EXPECT_EQUAL(idx.offset(), 0u);
+ }
+ {
+ Index32 idx(3, 0, 1000);
+ EXPECT_EQUAL(idx.values(), 3u);
+ EXPECT_EQUAL(idx.alternative(), 0u);
+ EXPECT_EQUAL(idx.vectorIdx(), 6u);
+ EXPECT_EQUAL(idx.offset(), 1000u);
+ EXPECT_EQUAL(idx.idx(), 0x300003e8u);
+ }
+ {
+ Index32 idx(15, 1, 134217727);
+ EXPECT_EQUAL(idx.values(), 15u);
+ EXPECT_EQUAL(idx.alternative(), 1u);
+ EXPECT_EQUAL(idx.vectorIdx(), 31u);
+ EXPECT_EQUAL(idx.offset(), 134217727u);
+ EXPECT_EQUAL(idx.idx(), 0xffffffffu);
+ }
+ {
+ EXPECT_EQUAL(Index32::maxValues(), 15u);
+ EXPECT_EQUAL(Index32::alternativeSize(), 2u);
+ }
+}
+
+void
+MultiValueMappingTest::testIndex64()
+{
+ {
+ Index64 idx;
+ EXPECT_EQUAL(idx.values(), 0u);
+ EXPECT_EQUAL(idx.alternative(), 0u);
+ EXPECT_EQUAL(idx.vectorIdx(), 0u);
+ EXPECT_EQUAL(idx.offset(), 0u);
+ }
+ {
+ Index64 idx(3, 0, 1000);
+ EXPECT_EQUAL(idx.values(), 3u);
+ EXPECT_EQUAL(idx.alternative(), 0u);
+ EXPECT_EQUAL(idx.vectorIdx(), 6u);
+ EXPECT_EQUAL(idx.offset(), 1000u);
+ EXPECT_EQUAL(idx.idx(), 0x3000003e8ull);
+ }
+ {
+ Index64 idx(15, 1, 134217727);
+ EXPECT_EQUAL(idx.values(), 15u);
+ EXPECT_EQUAL(idx.alternative(), 1u);
+ EXPECT_EQUAL(idx.vectorIdx(), 31u);
+ EXPECT_EQUAL(idx.offset(), 134217727u);
+ EXPECT_EQUAL(idx.idx(), 0xf87ffffffull);
+ }
+ {
+ EXPECT_EQUAL(Index64::maxValues(), 1023u);
+ EXPECT_EQUAL(Index64::alternativeSize(), 2u);
+ }
+}
+
+void
+MultiValueMappingTest::testSimpleSetAndGet()
+{
+ uint32_t maxValueCount = Index::maxValues() * 2;
+ uint32_t numKeys = maxValueCount * 2;
+ MvMapping mvm(dummyCommittedDocIdLimit, numKeys);
+ EXPECT_EQUAL(mvm.getNumKeys(), numKeys);
+ Index idx;
+
+ // insert values
+ for (uint32_t key = 0; key < numKeys; ++key) {
+ uint32_t valueCount = key / maxValueCount;
+ std::vector<uint32_t> values(valueCount, key);
+ Histogram needed(Index::maxValues());
+ needed[valueCount] = 1;
+ if (!mvm.enoughCapacity(needed)) {
+ mvm.trimHoldLists(1);
+ mvm.performCompaction(needed);
+ }
+ mvm.set(key, values);
+ EXPECT_EQUAL(mvm.getValueCount(key), valueCount);
+ idx = mvm._indices[key];
+ if (valueCount < Index::maxValues()) {
+ EXPECT_EQUAL(idx.values(), valueCount);
+ } else {
+ EXPECT_EQUAL(idx.values(), Index::maxValues());
+ }
+#ifdef LOG_MULTIVALUE_MAPPING
+ LOG(info, "------------------------------------------------------------");
+#endif
+ }
+ EXPECT_TRUE(!mvm.hasKey(numKeys));
+
+ // check for expected values
+ for (uint32_t key = 0; key < numKeys; ++key) {
+ uint32_t valueCount = key / maxValueCount;
+ EXPECT_EQUAL(mvm.getValueCount(key), valueCount);
+ std::vector<uint32_t> buffer(valueCount);
+ EXPECT_EQUAL(mvm.get(key, buffer), valueCount);
+ EXPECT_TRUE(buffer.size() == valueCount);
+ EXPECT_EQUAL(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), key)), valueCount);
+ uint32_t value;
+ const uint32_t * handle = NULL;
+ EXPECT_EQUAL(mvm.get(key, handle), valueCount);
+ EXPECT_TRUE(valueCount == 0 ? handle == NULL : handle != NULL);
+ for (uint32_t i = 0; i < valueCount; ++i) {
+ EXPECT_TRUE(mvm.get(key, i, value));
+ EXPECT_EQUAL(value, key);
+ EXPECT_TRUE(handle[i] == key);
+ }
+ EXPECT_TRUE(!mvm.get(key, valueCount, value));
+ }
+
+ // reset
+ mvm.reset(10);
+ EXPECT_TRUE(mvm.getNumKeys() == 10);
+ EXPECT_TRUE(!mvm.hasKey(10));
+ EXPECT_TRUE(mvm._genHolder.getHeldBytes() == 0);
+ for (uint32_t key = 0; key < 10; ++key) {
+ EXPECT_TRUE(mvm.getValueCount(key) == 0);
+ std::vector<uint32_t> buffer;
+ EXPECT_TRUE(mvm.get(key, buffer) == 0);
+ EXPECT_TRUE(buffer.size() == 0);
+ }
+
+ // add more keys
+ for (uint32_t i = 0; i < 5; ++i) {
+ uint32_t key;
+ mvm.addKey(key);
+ EXPECT_TRUE(key == 10 + i);
+ EXPECT_TRUE(mvm.getNumKeys() == 11 + i);
+ }
+}
+
+void
+MultiValueMappingTest::testChangingValueCount()
+{
+ uint32_t numKeys = 10;
+ uint32_t maxCount = Index::maxValues() + 1;
+ Histogram initCapacity(Index::maxValues());
+ for (uint32_t i = 0; i < Index::maxValues(); ++i) {
+ initCapacity[i] = numKeys;
+ }
+ initCapacity[Index::maxValues()] = numKeys * 2;
+ MvMapping mvm(dummyCommittedDocIdLimit, numKeys, initCapacity);
+
+ // Increasing the value count for some keys
+ for (uint32_t valueCount = 1; valueCount <= maxCount; ++valueCount) {
+#ifdef LOG_MULTIVALUE_MAPPING
+ LOG(info, "########################### %u ##############################", valueCount);
+#endif
+ uint32_t lastValueCount = valueCount - 1;
+ // set values
+ for (uint32_t key = 0; key < numKeys; ++key) {
+ std::vector<uint32_t> buffer(valueCount, key);
+ mvm.set(key, buffer);
+ }
+
+ Histogram remaining = mvm.getRemaining();
+ if (valueCount < Index::maxValues()) {
+ EXPECT_TRUE(remaining[valueCount] == 0);
+ } else {
+ EXPECT_TRUE(remaining[Index::maxValues()] == numKeys * (maxCount - valueCount));
+ }
+
+ if (valueCount < Index::maxValues()) {
+ MvMapping::SingleVectorPtr current = mvm.getSingleVector(valueCount, MvMapping::ACTIVE);
+ EXPECT_TRUE(current.first->used() == numKeys * (valueCount));
+ EXPECT_TRUE(current.first->dead() == 0);
+
+ if (lastValueCount != 0) {
+ MvMapping::SingleVectorPtr last = mvm.getSingleVector(lastValueCount, MvMapping::ACTIVE);
+ EXPECT_TRUE(last.first->used() == numKeys * (lastValueCount));
+ EXPECT_TRUE(last.first->dead() == numKeys * (lastValueCount));
+ }
+ } else {
+ MvMapping::VectorVectorPtr current = mvm.getVectorVector(MvMapping::ACTIVE);
+ EXPECT_TRUE(current.first->used() == numKeys * (valueCount - Index::maxValues() + 1));
+ EXPECT_TRUE(current.first->dead() == numKeys * (valueCount - Index::maxValues()));
+ }
+
+ // check values
+ for (uint32_t key = 0; key < numKeys; ++key) {
+ std::vector<uint32_t> buffer(valueCount);
+ EXPECT_TRUE(mvm.get(key, buffer) == valueCount);
+ EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), key)) == valueCount);
+ }
+ }
+}
+
+void
+MultiValueMappingTest::checkReaders(MvMapping &mvm,
+ generation_t mvmGen,
+ ReaderVector &readers)
+{
+ for (ReaderVector::iterator iter = readers.begin();
+ iter != readers.end(); ) {
+ if (iter->_endGen <= mvmGen) {
+#ifdef LOG_MULTIVALUE_MAPPING
+ LOG(info, "check and remove reader: start = %u, end = %u",
+ iter->_startGen, iter->_endGen);
+#endif
+ for (uint32_t key = 0; key < iter->numKeys(); ++key) {
+ Index idx = iter->_indices[key];
+ uint32_t valueCount = iter->_expected[key].size();
+ if (valueCount < Index::maxValues()) {
+ EXPECT_TRUE(idx.values() == valueCount);
+ for (uint32_t i = idx.offset() * idx.values(), j = 0;
+ i < (idx.offset() + 1) * idx.values() && j < iter->_expected[key].size();
+ ++i, ++j)
+ {
+ EXPECT_TRUE(mvm._singleVectors[idx.vectorIdx()][i] == iter->_expected[key][j]);
+ }
+ } else {
+ EXPECT_TRUE(mvm._vectorVectors[idx.alternative()][idx.offset()].size() ==
+ valueCount);
+ EXPECT_TRUE(std::equal(mvm._vectorVectors[idx.alternative()][idx.offset()].begin(),
+ mvm._vectorVectors[idx.alternative()][idx.offset()].end(),
+ iter->_expected[key].begin()));
+ }
+ }
+ iter = readers.erase(iter);
+ } else {
+ ++iter;
+ }
+ }
+}
+
+void
+MultiValueMappingTest::testHoldListAndGeneration()
+{
+ uint32_t numKeys = 10;
+ uint32_t maxCount = Index::maxValues() + 1;
+ uint32_t maxKeys = numKeys * 2;
+
+ Histogram initCapacity(Index::maxValues());
+ for (uint32_t i = 1; i < maxCount; ++i) {
+ initCapacity[i] = numKeys; // make enough capacity for 1/2 of the keys
+ }
+ MvMapping mvm(dummyCommittedDocIdLimit, maxKeys, initCapacity);
+ EXPECT_TRUE(mvm.enoughCapacity(initCapacity));
+
+ ReaderVector readers;
+ uint32_t safeGen = std::numeric_limits<uint32_t>::max();
+ uint32_t readDuration = 2;
+ generation_t mvmGen = 0u;
+
+ for (uint32_t valueCount = 1; valueCount < maxCount; ++valueCount) {
+#ifdef LOG_MULTIVALUE_MAPPING
+ LOG(info, "#################### count(%u) - gen(%u) ####################",
+ valueCount, mvm.getGeneration());
+#endif
+
+ // check and remove readers
+ checkReaders(mvm, mvmGen, readers);
+
+ // update safe generation and removeOldGenerations
+ safeGen = std::numeric_limits<uint32_t>::max();
+ for (ReaderVector::iterator iter = readers.begin(); iter != readers.end(); ++iter) {
+ if ((*iter)._startGen < safeGen) {
+ safeGen= (*iter)._startGen;
+ }
+ }
+ mvm.trimHoldLists(safeGen);
+
+ // set new values for 1/2 of the keys
+ for (uint32_t key = 0; key < numKeys; ++key) {
+ std::vector<uint32_t> values(valueCount, valueCount * numKeys + key);
+ mvm.set(key, values);
+ }
+ // check new values
+ for (uint32_t key = 0; key < numKeys; ++key) {
+ EXPECT_TRUE(mvm.getValueCount(key) == valueCount);
+ std::vector<uint32_t> buffer(valueCount);
+ EXPECT_TRUE(mvm.get(key, buffer) == valueCount);
+ EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), valueCount * numKeys + key)) == valueCount);
+ }
+ mvm.transferHoldLists(mvmGen);
+ ++mvmGen;
+
+ // associate reader with current generation
+ IndexVector indices;
+ ExpectedVector expected;
+ for (uint32_t key = 0; key < numKeys; ++key) {
+ indices.push_back(mvm._indices[key]);
+ expected.push_back(std::vector<uint32_t>(valueCount, valueCount * numKeys + key));
+ }
+ readers.push_back(Reader(mvmGen, mvmGen + readDuration,
+ indices, expected));
+ readDuration = (readDuration % 4) + 2;
+
+ // perform compaction
+ Histogram needed(Index::maxValues());
+ needed[valueCount] = maxKeys;
+ EXPECT_TRUE(!mvm.enoughCapacity(needed));
+ mvm.performCompaction(needed);
+
+ // set new value for all keys (the associated reader should see the old values)
+ for (uint32_t key = 0; key < maxKeys; ++key) {
+ std::vector<uint32_t> values(valueCount, valueCount * maxKeys + key);
+ mvm.set(key, values);
+ }
+ // check new values
+ for (uint32_t key = 0; key < maxKeys; ++key) {
+ EXPECT_TRUE(mvm.getValueCount(key) == valueCount);
+ std::vector<uint32_t> buffer(valueCount);
+ EXPECT_TRUE(mvm.get(key, buffer) == valueCount);
+ EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), valueCount * maxKeys + key)) == valueCount);
+ }
+
+ mvm.transferHoldLists(mvmGen);
+ ++mvmGen;
+ }
+ while (!readers.empty()) {
+ checkReaders(mvm, mvmGen, readers);
+ mvm.transferHoldLists(mvmGen);
+ ++mvmGen;
+ }
+}
+
+void
+MultiValueMappingTest::testManualCompaction()
+{
+ Histogram initCapacity(Index::maxValues());
+ uint32_t maxCount = Index::maxValues() + 1;
+ for (uint32_t i = 1; i < maxCount; ++i) {
+ initCapacity[i] = 1;
+ }
+ MvMapping mvm(dummyCommittedDocIdLimit, maxCount * 2, initCapacity);
+ EXPECT_TRUE(mvm.enoughCapacity(initCapacity));
+
+ // first update pass. use all capacity
+ for (uint32_t key = 1; key < maxCount; ++key) {
+ std::vector<uint32_t> values(key, key);
+ Histogram needed(Index::maxValues());
+ needed[key] = 1;
+ EXPECT_TRUE(mvm.enoughCapacity(needed));
+ mvm.set(key, values);
+ EXPECT_TRUE(!mvm.enoughCapacity(needed));
+ }
+ // second update pass. must perform compaction
+ for (uint32_t key = maxCount + 1; key < maxCount * 2; ++key) {
+ uint32_t valueCount = key % maxCount;
+ std::vector<uint32_t> values(valueCount, key);
+ Histogram needed(Index::maxValues());
+ needed[valueCount] = 1;
+ EXPECT_TRUE(!mvm.enoughCapacity(needed));
+ mvm.performCompaction(needed);
+ EXPECT_TRUE(mvm.enoughCapacity(needed));
+ mvm.set(key, values);
+ }
+ // check for correct buffer values
+ for (uint32_t key = 0; key < maxCount * 2; ++key) {
+ uint32_t valueCount = key % maxCount;
+ EXPECT_TRUE(mvm.getValueCount(key) == valueCount);
+ std::vector<uint32_t> buffer(valueCount);
+ EXPECT_TRUE(mvm.get(key, buffer) == valueCount);
+ EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), key)) == valueCount);
+ }
+
+ // reset
+ mvm.reset(maxCount, initCapacity);
+ EXPECT_TRUE(mvm.getNumKeys() == maxCount);
+ EXPECT_TRUE(mvm.enoughCapacity(initCapacity));
+
+ // new update pass. use all capacity
+ for (uint32_t key = 1; key < maxCount; ++key) {
+ std::vector<uint32_t> values(key, key);
+ Histogram needed(Index::maxValues());
+ needed[key] = 1;
+ EXPECT_EQUAL(mvm.getValueCount(key), 0u);
+ EXPECT_TRUE(mvm.enoughCapacity(needed));
+ mvm.set(key, values);
+ EXPECT_TRUE(!mvm.enoughCapacity(needed));
+ }
+}
+
+void
+MultiValueMappingTest::testVariousGets()
+{
+ MvMapping::Histogram initCapacity(Index::maxValues());
+ initCapacity[5] = 1;
+ initCapacity[Index::maxValues()] = 1;
+ MvMapping mvm(dummyCommittedDocIdLimit, 3, initCapacity);
+ Index idx;
+
+ mvm.set(1, std::vector<uint32_t>(5, 50));
+ mvm.set(2, std::vector<uint32_t>(25, 250));
+ EXPECT_TRUE(25 >= Index::maxValues());
+
+ {
+ std::vector<uint32_t> buffer(5);
+ EXPECT_TRUE(mvm.get(0, &buffer[0], 0) == 0);
+ EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)5)) == 0);
+ }
+ {
+ std::vector<uint32_t> buffer(5);
+ EXPECT_TRUE(mvm.get(0, &buffer[0], 5) == 0);
+ EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)5)) == 0);
+ }
+ {
+ std::vector<uint32_t> buffer(10);
+ EXPECT_TRUE(mvm.get(1, &buffer[0], 3) == 5);
+ EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)50)) == 3);
+ }
+ {
+ std::vector<uint32_t> buffer(10);
+ EXPECT_TRUE(mvm.get(1, &buffer[0], 10) == 5);
+ EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)50)) == 5);
+ }
+ {
+ std::vector<uint32_t> buffer(30);
+ EXPECT_TRUE(mvm.get(2, &buffer[0], 23) == 25);
+ EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)250)) == 23);
+ }
+ {
+ std::vector<uint32_t> buffer(30);
+ EXPECT_TRUE(mvm.get(2, &buffer[0], 30) == 25);
+ EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)250)) == 25);
+ }
+}
+
+void
+MultiValueMappingTest::testReplace()
+{
+ MvMapping::Histogram initCapacity(Index::maxValues());
+ initCapacity[5] = 1;
+ initCapacity[Index::maxValues()] = 1;
+ MvMapping mvm(dummyCommittedDocIdLimit, 3, initCapacity);
+ Index idx;
+
+ mvm.set(1, std::vector<uint32_t>(5, 50));
+ mvm.set(2, std::vector<uint32_t>(25, 100));
+ EXPECT_TRUE(25 >= Index::maxValues());
+
+ {
+ EXPECT_TRUE(mvm.getValueCount(0) == 0);
+ std::vector<uint32_t> replace(5, 50);
+ mvm.replace(0, replace);
+ EXPECT_TRUE(mvm.getValueCount(0) == 0);
+ }
+ {
+ EXPECT_TRUE(mvm.getValueCount(1) == 5);
+ std::vector<uint32_t> buffer(5);
+ EXPECT_TRUE(mvm.get(1, buffer) == 5);
+ EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)50)) == 5);
+
+ std::vector<uint32_t> replace(5, 55);
+ mvm.replace(1, replace);
+ EXPECT_TRUE(mvm.getValueCount(1) == 5);
+ EXPECT_TRUE(mvm.get(1, buffer) == 5);
+ EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)55)) == 5);
+ }
+ {
+ EXPECT_TRUE(mvm.getValueCount(2) == 25);
+ std::vector<uint32_t> buffer(25);
+ EXPECT_TRUE(mvm.get(2, buffer) == 25);
+ EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)100)) == 25);
+
+ std::vector<uint32_t> replace(25, 200);
+ mvm.replace(2, replace);
+ EXPECT_TRUE(mvm.getValueCount(2) == 25);
+ EXPECT_TRUE(mvm.get(2, buffer) == 25);
+ EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)200)) == 25);
+ }
+}
+
+void
+MultiValueMappingTest::testMemoryUsage()
+{
+ uint32_t numKeys = Index::maxValues() + 4;
+ MemoryUsage exp;
+ exp.incAllocatedBytes(numKeys * sizeof(Index));
+ exp.incUsedBytes(numKeys * sizeof(Index));
+ uint32_t totalCnt = 0;
+
+ Histogram initCapacity(Index::maxValues());
+ for (uint32_t i = 0; i < Index::maxValues(); ++i) {
+ initCapacity[i] = 2;
+ exp.incAllocatedBytes(i * 2 * sizeof(uint32_t));
+ }
+ initCapacity[Index::maxValues()] = 12;
+ exp.incAllocatedBytes(12 * sizeof(vespalib::Array<uint32_t>)); // due to vector vector
+
+ MvMapping mvm(dummyCommittedDocIdLimit,
+ numKeys, initCapacity, GrowStrategy(numKeys));
+
+ // usage before inserting values
+ MemoryUsage usage = mvm.getMemoryUsage();
+ EXPECT_EQUAL(mvm.getTotalValueCnt(), totalCnt);
+ EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes());
+ EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes());
+ EXPECT_EQUAL(usage.deadBytes(), uint32_t(0));
+ EXPECT_EQUAL(usage.allocatedBytesOnHold(), uint32_t(0));
+
+ // insert values for all keys
+ for (uint32_t key = 0; key < numKeys; ++key) {
+ uint32_t cnt = key + 1;
+ std::vector<uint32_t> values(cnt, key);
+ mvm.set(key, values);
+ EXPECT_EQUAL(mvm.getValueCount(key), cnt);
+ totalCnt += cnt;
+ exp.incUsedBytes(cnt * sizeof(uint32_t));
+ if (cnt >= Index::maxValues()) {
+ exp.incAllocatedBytes(cnt * sizeof(uint32_t));
+ exp.incUsedBytes(sizeof(vespalib::Array<uint32_t>)); // due to vector vector
+ }
+ }
+
+ // usage after inserting values
+ usage = mvm.getMemoryUsage();
+ EXPECT_EQUAL(mvm.getTotalValueCnt(), totalCnt);
+ EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes());
+ EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes());
+ EXPECT_EQUAL(usage.deadBytes(), uint32_t(0));
+ EXPECT_EQUAL(usage.allocatedBytesOnHold(), uint32_t(0));
+
+ totalCnt = 0;
+ // insert new values for all keys making dead bytes
+ for (uint32_t key = 0; key < numKeys; ++key) {
+ uint32_t cnt = key + 2;
+ std::vector<uint32_t> values(cnt, key);
+ mvm.set(key, values);
+ EXPECT_EQUAL(mvm.getValueCount(key), cnt);
+ totalCnt += cnt;
+ exp.incUsedBytes(cnt * sizeof(uint32_t));
+ if ((cnt - 1) < Index::maxValues()) {
+ exp.incDeadBytes((cnt - 1) * sizeof(uint32_t)); // the previous values are marked dead
+ } else {
+ exp.incAllocatedBytesOnHold((cnt - 1) * sizeof(uint32_t) +
+ sizeof(vespalib::Array<uint32_t>));
+ }
+ if (cnt >= Index::maxValues()) {
+ exp.incAllocatedBytes(cnt * sizeof(uint32_t));
+ exp.incUsedBytes(sizeof(vespalib::Array<uint32_t>)); // due to vector vector
+ }
+ }
+
+ // usage after inserting new values making dead bytes
+ usage = mvm.getMemoryUsage();
+ EXPECT_EQUAL(mvm.getTotalValueCnt(), totalCnt);
+ EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes());
+ EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes());
+ EXPECT_EQUAL(usage.deadBytes(), exp.deadBytes());
+ EXPECT_EQUAL(usage.allocatedBytesOnHold(), exp.allocatedBytesOnHold());
+
+ // make sure all internal vectors are put on hold list
+ mvm.performCompaction(initCapacity);
+ usage = mvm.getMemoryUsage();
+ EXPECT_EQUAL(mvm.getTotalValueCnt(), totalCnt);
+ EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes() - exp.deadBytes() - exp.allocatedBytesOnHold());
+ EXPECT_EQUAL(usage.deadBytes(), uint32_t(0));
+ EXPECT_EQUAL(usage.allocatedBytesOnHold(), exp.allocatedBytes() - numKeys * sizeof(Index) + exp.allocatedBytesOnHold());
+ mvm.transferHoldLists(0);
+ mvm.trimHoldLists(1);
+ usage = mvm.getMemoryUsage();
+ EXPECT_EQUAL(mvm.getTotalValueCnt(), totalCnt);
+ EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes() - exp.deadBytes() - exp.allocatedBytesOnHold());
+ EXPECT_EQUAL(usage.deadBytes(), uint32_t(0));
+ EXPECT_EQUAL(usage.allocatedBytesOnHold(), 0u);
+}
+
+
+void
+MultiValueMappingTest::testShrink()
+{
+ uint32_t committedDocIdLimit = dummyCommittedDocIdLimit;
+ MvMapping mvm(committedDocIdLimit);
+ for (uint32_t i = 0; i < 10; ++i) {
+ uint32_t k;
+ mvm.addKey(k);
+ EXPECT_EQUAL(i, k);
+ }
+ mvm.transferHoldLists(0);
+ mvm.trimHoldLists(1);
+ uint32_t shrinkTarget = 4;
+ committedDocIdLimit = shrinkTarget;
+ mvm.shrinkKeys(shrinkTarget);
+ mvm.transferHoldLists(1);
+ mvm.trimHoldLists(2);
+ EXPECT_EQUAL(shrinkTarget, mvm.getNumKeys());
+ EXPECT_EQUAL(shrinkTarget, mvm.getCapacityKeys());
+}
+
+
+void
+MultiValueMappingTest::testHoldElem()
+{
+ uint32_t numKeys = 1;
+ MemoryUsage exp;
+ exp.incAllocatedBytes(numKeys * sizeof(Index));
+ exp.incUsedBytes(numKeys * sizeof(Index));
+
+ Histogram initCapacity(Index::maxValues());
+ initCapacity[Index::maxValues()] = 3;
+ exp.incAllocatedBytes(3 * sizeof(vespalib::Array<uint32_t>)); // due to vector vector
+
+ MvMapping mvm(dummyCommittedDocIdLimit,
+ numKeys, initCapacity, GrowStrategy(numKeys));
+
+ // usage before inserting values
+ MemoryUsage usage = mvm.getMemoryUsage();
+ EXPECT_EQUAL(mvm.getTotalValueCnt(), 0u);
+ EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes());
+ EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes());
+ EXPECT_EQUAL(usage.deadBytes(), exp.deadBytes());
+ EXPECT_EQUAL(usage.allocatedBytesOnHold(), exp.allocatedBytesOnHold());
+
+ uint32_t key = 0;
+ uint32_t cnt = Index::maxValues() + 3;
+ {
+ std::vector<uint32_t> values(cnt, key);
+ mvm.set(key, values);
+ exp.incAllocatedBytes(cnt * sizeof(uint32_t));
+ exp.incUsedBytes(cnt * sizeof(uint32_t) +
+ sizeof(vespalib::Array<uint32_t>));
+ }
+ usage = mvm.getMemoryUsage();
+ EXPECT_EQUAL(mvm.getTotalValueCnt(), cnt);
+ EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes());
+ EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes());
+ EXPECT_EQUAL(usage.deadBytes(), exp.deadBytes());
+ EXPECT_EQUAL(usage.allocatedBytesOnHold(), exp.allocatedBytesOnHold());
+ ++cnt;
+ {
+ std::vector<uint32_t> values(cnt, key);
+ mvm.set(key, values);
+ exp.incAllocatedBytes(cnt * sizeof(uint32_t));
+ exp.incUsedBytes(cnt * sizeof(uint32_t) +
+ sizeof(vespalib::Array<uint32_t>));
+ exp.incAllocatedBytesOnHold((cnt - 1) * sizeof(uint32_t) +
+ sizeof(vespalib::Array<uint32_t>));
+ }
+ usage = mvm.getMemoryUsage();
+ EXPECT_EQUAL(mvm.getTotalValueCnt(), cnt);
+ EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes());
+ EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes());
+ EXPECT_EQUAL(usage.deadBytes(), exp.deadBytes());
+ EXPECT_EQUAL(usage.allocatedBytesOnHold(), exp.allocatedBytesOnHold());
+ mvm.transferHoldLists(0);
+ mvm.trimHoldLists(1);
+ exp.incDeadBytes(sizeof(vespalib::Array<uint32_t>));
+ exp.decAllocatedBytes((cnt - 1) * sizeof(uint32_t));
+ usage = mvm.getMemoryUsage();
+ EXPECT_EQUAL(mvm.getTotalValueCnt(), cnt);
+ EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes());
+ EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes());
+ EXPECT_EQUAL(usage.deadBytes(), exp.deadBytes());
+ EXPECT_EQUAL(usage.allocatedBytesOnHold(), 0u);
+}
+
+namespace {
+
+void
+insertValues(MvMapping &mvm, uint32_t key, uint32_t count)
+{
+ std::vector<uint32_t> values(count, 13);
+ mvm.set(key, values);
+}
+
+Histogram
+createHistogram(uint32_t numValuesPerValueClass)
+{
+ Histogram result(Index32::maxValues());
+ for (uint32_t i = 0; i <= Index32::maxValues(); ++i) {
+ result[i] = numValuesPerValueClass;
+ }
+ return result;
+}
+
+const size_t ADDRESS_LIMIT = 134217728; // Index32::offsetSize()
+
+struct AddressSpaceFixture
+{
+ MvMapping mvm;
+ AddressSpaceFixture()
+ : mvm(dummyCommittedDocIdLimit, 20, createHistogram(4), GrowStrategy(20))
+ {}
+};
+
+}
+
+void
+MultiValueMappingTest::requireThatAddressSpaceUsageIsReported()
+{
+ AddressSpaceFixture f;
+ MvMapping &mvm = f.mvm;
+
+ EXPECT_EQUAL(AddressSpace(0, ADDRESS_LIMIT), mvm.getAddressSpaceUsage());
+ insertValues(mvm, 1, 1);
+ EXPECT_EQUAL(AddressSpace(1, ADDRESS_LIMIT), mvm.getAddressSpaceUsage());
+ insertValues(mvm, 2, 2);
+ insertValues(mvm, 3, 2);
+ EXPECT_EQUAL(AddressSpace(2, ADDRESS_LIMIT), mvm.getAddressSpaceUsage());
+ insertValues(mvm, 4, 13);
+ insertValues(mvm, 5, 13);
+ insertValues(mvm, 6, 13);
+ EXPECT_EQUAL(AddressSpace(3, ADDRESS_LIMIT), mvm.getAddressSpaceUsage());
+ insertValues(mvm, 7, 14);
+ insertValues(mvm, 8, 14);
+ insertValues(mvm, 9, 14);
+ EXPECT_EQUAL(AddressSpace(3, ADDRESS_LIMIT), mvm.getAddressSpaceUsage());
+ insertValues(mvm, 10, 15);
+ insertValues(mvm, 11, 16);
+ insertValues(mvm, 12, 17);
+ insertValues(mvm, 13, 18);
+ EXPECT_EQUAL(AddressSpace(4, ADDRESS_LIMIT), mvm.getAddressSpaceUsage());
+}
+
+void
+MultiValueMappingTest::requireThatDeadIsNotAccountedInAddressSpaceUsage()
+{
+ AddressSpaceFixture f;
+ MvMapping &mvm = f.mvm;
+
+ EXPECT_EQUAL(AddressSpace(0, ADDRESS_LIMIT), mvm.getAddressSpaceUsage());
+ insertValues(mvm, 1, 3);
+ insertValues(mvm, 2, 3);
+ insertValues(mvm, 3, 3);
+ insertValues(mvm, 4, 3);
+ EXPECT_EQUAL(AddressSpace(4, ADDRESS_LIMIT), mvm.getAddressSpaceUsage());
+ insertValues(mvm, 1, 4);
+ EXPECT_EQUAL(AddressSpace(3, ADDRESS_LIMIT), mvm.getAddressSpaceUsage());
+ insertValues(mvm, 2, 5);
+ EXPECT_EQUAL(AddressSpace(2, ADDRESS_LIMIT), mvm.getAddressSpaceUsage());
+ insertValues(mvm, 10, 15);
+ insertValues(mvm, 11, 15);
+ insertValues(mvm, 12, 15);
+ insertValues(mvm, 13, 15);
+ EXPECT_EQUAL(AddressSpace(4, ADDRESS_LIMIT), mvm.getAddressSpaceUsage());
+ insertValues(mvm, 10, 14);
+ EXPECT_EQUAL(AddressSpace(3, ADDRESS_LIMIT), mvm.getAddressSpaceUsage());
+ insertValues(mvm, 11, 14);
+ EXPECT_EQUAL(AddressSpace(2, ADDRESS_LIMIT), mvm.getAddressSpaceUsage());
+}
+
+int
+MultiValueMappingTest::Main()
+{
+ TEST_INIT("multivaluemapping_test");
+
+ testIndex32();
+ testIndex64();
+ testSimpleSetAndGet();
+ testChangingValueCount();
+ testHoldListAndGeneration();
+ testManualCompaction();
+ testVariousGets();
+ testReplace();
+ testMemoryUsage();
+ testShrink();
+ testHoldElem();
+ TEST_DO(requireThatAddressSpaceUsageIsReported());
+ TEST_DO(requireThatDeadIsNotAccountedInAddressSpaceUsage());
+
+ TEST_DONE();
+}
+
+}
+
+TEST_APPHOOK(search::MultiValueMappingTest);
diff --git a/searchlib/src/tests/attribute/postinglist/.gitignore b/searchlib/src/tests/attribute/postinglist/.gitignore
new file mode 100644
index 00000000000..8cf10f7f9dc
--- /dev/null
+++ b/searchlib/src/tests/attribute/postinglist/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+postinglist_test
+searchlib_postinglist_test_app
diff --git a/searchlib/src/tests/attribute/postinglist/CMakeLists.txt b/searchlib/src/tests/attribute/postinglist/CMakeLists.txt
new file mode 100644
index 00000000000..a22d1ae2fdc
--- /dev/null
+++ b/searchlib/src/tests/attribute/postinglist/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_postinglist_test_app
+ SOURCES
+ postinglist.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_postinglist_test_app COMMAND searchlib_postinglist_test_app)
diff --git a/searchlib/src/tests/attribute/postinglist/DESC b/searchlib/src/tests/attribute/postinglist/DESC
new file mode 100644
index 00000000000..1499e3070fb
--- /dev/null
+++ b/searchlib/src/tests/attribute/postinglist/DESC
@@ -0,0 +1 @@
+This is a test for the AttributePostingList class.
diff --git a/searchlib/src/tests/attribute/postinglist/FILES b/searchlib/src/tests/attribute/postinglist/FILES
new file mode 100644
index 00000000000..268f6c09f1e
--- /dev/null
+++ b/searchlib/src/tests/attribute/postinglist/FILES
@@ -0,0 +1 @@
+postinglist.cpp
diff --git a/searchlib/src/tests/attribute/postinglist/postinglist.cpp b/searchlib/src/tests/attribute/postinglist/postinglist.cpp
new file mode 100644
index 00000000000..ab95ce27a0e
--- /dev/null
+++ b/searchlib/src/tests/attribute/postinglist/postinglist.cpp
@@ -0,0 +1,707 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("postinglist_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/util/rand48.h>
+#include <algorithm>
+#include <limits>
+#include <map>
+#include <set>
+
+#include <vespa/searchlib/btree/datastore.h>
+#include <vespa/searchlib/btree/btreenodeallocator.hpp>
+#include <vespa/searchlib/btree/btreenode.hpp>
+#include <vespa/searchlib/btree/btreenodestore.hpp>
+#include <vespa/searchlib/btree/btreeiterator.hpp>
+#include <vespa/searchlib/btree/btreeroot.hpp>
+#include <vespa/searchlib/btree/btreestore.hpp>
+
+namespace search {
+
+using vespalib::GenerationHandler;
+
+/*
+ * TODO: Make it pass MALLOC_OPTIONS=AJ on freebsd and valgrind on Linux.
+ */
+
+class AttributePostingListTest : public vespalib::TestApp
+{
+private:
+ /* Limited STL version for validation of full version */
+ typedef std::set<uint32_t> STLPostingList;
+ typedef std::map<int, STLPostingList> STLValueTree;
+
+ class RandomValue
+ {
+ public:
+ uint32_t _docId;
+ int _value;
+ uint32_t _order;
+
+ RandomValue(void)
+ : _docId(0),
+ _value(0u),
+ _order(0u)
+ {
+ }
+
+ RandomValue(uint32_t docId, uint32_t value, uint32_t order)
+ : _docId(docId),
+ _value(value),
+ _order(order)
+ {
+ }
+
+ bool
+ operator<(const RandomValue &rhs) const
+ {
+ return (_value < rhs._value ||
+ (_value == rhs._value &&
+ (_docId < rhs._docId ||
+ (_docId == rhs._docId &&
+ _order < rhs._order))));
+ }
+
+ bool
+ operator>(const RandomValue &rhs) const
+ {
+ return (_value > rhs._value ||
+ (_value == rhs._value &&
+ (_docId > rhs._docId ||
+ (_docId == rhs._docId &&
+ _order > rhs._order))));
+ }
+
+ bool
+ operator==(const RandomValue &rhs) const
+ {
+ return (_value == rhs._value &&
+ _docId == rhs._docId &&
+ _order == rhs._order);
+ }
+ };
+
+ class CompareOrder
+ {
+ public:
+ bool
+ operator()(const RandomValue &a, const RandomValue &b)
+ {
+ return (a._order < b._order ||
+ (a._order == b._order &&
+ (a._value < b._value ||
+ (a._value == b._value &&
+ a._docId < b._docId))));
+ }
+ };
+ std::vector<RandomValue> _randomValues;
+
+public:
+ typedef btree::DataStore<int> IntKeyStore;
+ typedef btree::BTreeKeyData<uint32_t, btree::BTreeNoLeafData>
+ AttributePosting;
+ typedef btree::BTreeStore<uint32_t,
+ btree::BTreeNoLeafData,
+ btree::NoAggregated,
+ std::less<uint32_t>,
+ btree::BTreeDefaultTraits>
+ PostingList;
+ typedef PostingList::NodeAllocatorType PostingListNodeAllocator;
+ typedef btree::EntryRef PostingIdx;
+ typedef btree::EntryRef StoreIndex;
+
+ class IntComp {
+ private:
+ const IntKeyStore & _store;
+ int _value;
+ int getValue(const StoreIndex & idx) const {
+ if (idx.valid()) {
+ return _store.getEntry(idx);
+ }
+ return _value;
+ }
+ public:
+ IntComp(const IntKeyStore & store) : _store(store), _value(0) {}
+ IntComp(const IntKeyStore & store, int value) : _store(store), _value(value) {}
+ bool operator() (const StoreIndex & lhs, const StoreIndex & rhs) const {
+ return getValue(lhs) < getValue(rhs);
+ }
+ };
+
+ typedef btree::BTreeRoot<StoreIndex, PostingIdx,
+ btree::NoAggregated,
+ const IntComp &> IntEnumTree;
+ typedef IntEnumTree::NodeAllocatorType IntEnumNodeAllocator;
+ typedef IntEnumTree Tree;
+ typedef IntEnumNodeAllocator TreeManager;
+ typedef IntKeyStore ValueHandle;
+ typedef std::vector<RandomValue> RandomValuesVector;
+private:
+ GenerationHandler _handler;
+ IntKeyStore *_intKeyStore;
+ IntEnumNodeAllocator *_intNodeAlloc;
+ IntEnumTree *_intTree;
+ PostingList *_intPostings;
+ STLValueTree *_stlTree;
+
+ Rand48 _randomGenerator;
+ uint32_t _generation;
+
+ void
+ allocTree(void);
+
+ void
+ freeTree(bool verbose);
+
+ void
+ fillRandomValues(unsigned int count,
+ unsigned int mvcount);
+
+ void
+ insertRandomValues(Tree &tree,
+ TreeManager &treeMgr,
+ ValueHandle &valueHandle,
+ PostingList &postings,
+ STLValueTree *stlTree,
+ RandomValuesVector &values);
+
+ void
+ removeRandomValues(Tree &tree,
+ TreeManager &treeMgr,
+ ValueHandle &valueHandle,
+ PostingList &postings,
+ STLValueTree *stlTree,
+ RandomValuesVector &values);
+
+ void
+ lookupRandomValues(Tree &tree,
+ TreeManager &treeMgr,
+ const ValueHandle &valueHandle,
+ PostingList &postings,
+ STLValueTree *stlTree,
+ RandomValuesVector &values);
+
+ void
+ sortRandomValues(void);
+
+ void
+ doCompactEnumStore(Tree &tree,
+ TreeManager &treeMgr,
+ ValueHandle &valueHandle);
+
+ void
+ doCompactPostingList(Tree &tree,
+ TreeManager &treeMgr,
+ PostingList &postings,
+ PostingListNodeAllocator &postingsAlloc);
+
+ void
+ bumpGeneration(Tree &tree,
+ ValueHandle &valueHandle,
+ PostingList &postings,
+ PostingListNodeAllocator &postingsAlloc);
+
+ void
+ removeOldGenerations(Tree &tree,
+ ValueHandle &valueHandle,
+ PostingList &postings,
+ PostingListNodeAllocator &postingsAlloc);
+
+ static const char *
+ frozenName(bool frozen)
+ {
+ return frozen ? "frozen" : "thawed";
+ }
+public:
+ AttributePostingListTest(void)
+ : vespalib::TestApp(),
+ _randomValues(),
+ _handler(),
+ _intKeyStore(NULL),
+ _intNodeAlloc(NULL),
+ _intTree(NULL),
+ _intPostings(NULL),
+ _stlTree(NULL),
+ _randomGenerator()
+ {
+ }
+
+ int Main(void);
+};
+
+
+
+void
+AttributePostingListTest::allocTree(void)
+{
+ _intKeyStore = new IntKeyStore;
+ _intNodeAlloc = new IntEnumNodeAllocator();
+ _intTree = new IntEnumTree();
+ _intPostings = new PostingList();
+ _stlTree = new STLValueTree;
+}
+
+
+void
+AttributePostingListTest::freeTree(bool verbose)
+{
+ (void) verbose;
+ LOG(info,
+ "freeTree before clear: %" PRIu64 " (%" PRIu64 " held)"
+ ", %zu leaves",
+ static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytes()),
+ static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytesOnHold()),
+ _intTree->size(*_intNodeAlloc));
+ _intTree->clear(*_intNodeAlloc);
+ LOG(info,
+ "freeTree before unhold: %" PRIu64 " (%" PRIu64 " held)",
+ static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytes()),
+ static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytesOnHold()));
+ _intNodeAlloc->freeze();
+ _intPostings->freeze();
+ _intNodeAlloc->transferHoldLists(_handler.getCurrentGeneration());
+ _intPostings->clearBuilder();
+ _intPostings->transferHoldLists(_handler.getCurrentGeneration());
+ _handler.incGeneration();
+ _intNodeAlloc->trimHoldLists(_handler.getFirstUsedGeneration());
+ _intPostings->trimHoldLists(_handler.getFirstUsedGeneration());
+ LOG(info,
+ "freeTree after unhold: %" PRIu64 " (%" PRIu64 " held)",
+ static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytes()),
+ static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytesOnHold()));
+ delete _stlTree;
+ _stlTree = NULL;
+ delete _intTree;
+ _intTree = NULL;
+ delete _intNodeAlloc;
+ _intNodeAlloc = NULL;
+ delete _intKeyStore;
+ _intKeyStore = NULL;
+ delete _intPostings;
+ _intPostings = NULL;
+}
+
+
+void
+AttributePostingListTest::
+fillRandomValues(unsigned int count,
+ unsigned int mvcount)
+{
+ unsigned int i;
+ unsigned int j;
+ unsigned int mv;
+ unsigned int mvmax;
+ unsigned int mvcount2;
+ unsigned int mvcount3;
+
+ mvmax = 100;
+ mvcount2 = mvcount * (mvmax * (mvmax - 1)) / 2;
+ LOG(info,
+ "Filling %u+%u random values", count, mvcount2);
+ _randomValues.clear();
+ _randomValues.reserve(count);
+ _randomGenerator.srand48(42);
+ for (i = 0; i <count; i++) {
+ uint32_t docId = _randomGenerator.lrand48();
+ uint32_t val = _randomGenerator.lrand48();
+ uint32_t order = _randomGenerator.lrand48();
+ _randomValues.push_back(RandomValue(docId, val, order));
+ }
+ for (mv = 1; mv < mvmax; mv++) {
+ for (i = 0; i < mvcount; i++) {
+ for (j = 0; j < mv; j++) {
+ uint32_t docId = _randomGenerator.lrand48();
+ uint32_t val = _randomGenerator.lrand48();
+ uint32_t order = _randomGenerator.lrand48();
+ _randomValues.push_back(RandomValue(docId, val, order));
+ }
+ }
+ }
+ mvcount3 = 0;
+ for (mv = 10; mv < 4000; mv = mv * 3)
+ {
+ mvcount3 += mv * 2;
+ for (j = 0; j < mv; j++) {
+ uint32_t val = _randomGenerator.lrand48();
+ uint32_t docId = _randomGenerator.lrand48();
+ uint32_t order = _randomGenerator.lrand48();
+ _randomValues.push_back(RandomValue(docId, val, order));
+ val = _randomGenerator.lrand48();
+ docId = _randomGenerator.lrand48();
+ order = _randomGenerator.lrand48();
+ _randomValues.push_back(RandomValue(docId, val, order));
+ }
+ }
+ std::sort(_randomValues.begin(),
+ _randomValues.end(),
+ CompareOrder());
+
+ EXPECT_TRUE(_randomValues.size() == count + mvcount2 + mvcount3);
+}
+
+
+void
+AttributePostingListTest::
+insertRandomValues(Tree &tree,
+ TreeManager &treeMgr,
+ ValueHandle &valueHandle,
+ PostingList &postings,
+ STLValueTree *stlTree,
+ RandomValuesVector &
+ values)
+{
+ RandomValuesVector::iterator i;
+ RandomValuesVector::iterator ie;
+
+ LOG(info, "insertRandomValues start");
+ ie = values.end();
+ for (i = values.begin(); i != ie; ++i) {
+ Tree::Iterator itr = tree.find(StoreIndex(), treeMgr, IntComp(valueHandle, i->_value));
+ if (!itr.valid()) {
+#if 0
+ if (valueHandle.needResize())
+ doCompactEnumStore(tree, treeMgr, valueHandle);
+#endif
+ StoreIndex idx = valueHandle.addEntry(i->_value);
+ if (tree.insert(idx, PostingIdx(), treeMgr, IntComp(valueHandle))) {
+ itr = tree.find(idx, treeMgr, IntComp(valueHandle));
+ }
+ } else {
+ }
+ ASSERT_TRUE(itr.valid());
+ EXPECT_EQUAL(i->_value, valueHandle.getEntry(itr.getKey()));
+
+ /* TODO: Insert docid to postinglist */
+ PostingIdx oldIdx = itr.getData();
+ PostingIdx newIdx = oldIdx;
+ AttributePosting newPosting(i->_docId,
+ btree::BTreeNoLeafData());
+ std::vector<AttributePosting> additions;
+ std::vector<uint32_t> removals;
+ additions.push_back(newPosting);
+ postings.apply(newIdx, &additions[0], &additions[0] + additions.size(),
+ &removals[0], &removals[0] + removals.size());
+ std::atomic_thread_fence(std::memory_order_release);
+ itr.writeData(newIdx);
+
+ if (stlTree != NULL) {
+ STLValueTree::iterator it;
+ it = stlTree->find(i->_value);
+ if (it == stlTree->end()) {
+ std::pair<STLValueTree::iterator,bool> ir =
+ stlTree->insert(std::make_pair(i->_value,
+ STLPostingList()));
+ ASSERT_TRUE(ir.second && ir.first != stlTree->end() &&
+ ir.first->first == i->_value);
+ it = ir.first;
+ }
+ ASSERT_TRUE(it != stlTree->end() && it->first == i->_value);
+ it->second.insert(i->_docId);
+
+ if (it->second.empty()) {
+ stlTree->erase(it);
+ ASSERT_TRUE(!itr.valid());
+ } else {
+ size_t postingsize;
+
+ ASSERT_TRUE(itr.valid());
+ postingsize = postings.size(newIdx);
+ ASSERT_TRUE(postingsize > 0 &&
+ postingsize == it->second.size());
+ STLPostingList::iterator it3;
+ STLPostingList::iterator it3b;
+ STLPostingList::iterator it3e;
+
+ PostingList::Iterator it0;
+
+ it3b = it->second.begin();
+ it3e = it->second.end();
+ it0 = postings.begin(newIdx);
+ it3 = it3b;
+
+ while (it3 != it3e) {
+ ASSERT_TRUE(it0.valid());
+ ASSERT_TRUE(*it3 == it0.getKey());
+ ++it3;
+ ++it0;
+ }
+ ASSERT_TRUE(!it0.valid());
+ }
+ }
+ }
+ ASSERT_TRUE(tree.isValid(treeMgr, IntComp(valueHandle)));
+ LOG(info, "insertRandomValues done");
+}
+
+
+void
+AttributePostingListTest::
+removeRandomValues(Tree &tree,
+ TreeManager &treeMgr,
+ ValueHandle &valueHandle,
+ PostingList &postings,
+ STLValueTree *stlTree,
+ RandomValuesVector &values)
+{
+ RandomValuesVector::iterator i;
+ RandomValuesVector::iterator ie;
+
+ LOG(info, "removeRandomValues start");
+ ie = values.end();
+ for (i = values.begin(); i != ie; ++i) {
+ Tree::Iterator itr = tree.find(StoreIndex(), treeMgr, IntComp(valueHandle, i->_value));
+ PostingIdx newIdx;
+ /*
+ * TODO: Remove docid from postinglist, and only remove
+ * value from tree if postinglist is empty
+ */
+ if (itr.valid()) {
+ PostingIdx oldIdx = itr.getData();
+ newIdx = oldIdx;
+ std::vector<AttributePosting> additions;
+ std::vector<uint32_t> removals;
+ removals.push_back(i->_docId);
+ postings.apply(newIdx, &additions[0], &additions[0]+additions.size(),
+ &removals[0], &removals[0] + removals.size());
+ if (newIdx != oldIdx) {
+ std::atomic_thread_fence(std::memory_order_release);
+ itr.writeData(newIdx);
+ }
+ if (!newIdx.valid()) {
+ if (tree.remove(StoreIndex(), treeMgr, IntComp(valueHandle, i->_value))) {
+ itr = tree.find(StoreIndex(), treeMgr, IntComp(valueHandle, i->_value));
+ }
+ }
+ }
+ if (stlTree != NULL) {
+ STLValueTree::iterator it;
+ it = stlTree->find(i->_value);
+ ASSERT_TRUE(it != stlTree->end() && it->first == i->_value);
+ STLPostingList::iterator it2;
+ it2 = it->second.find(i->_docId);
+ ASSERT_TRUE(it2 != it->second.end() &&
+ *it2 == i->_docId);
+ it->second.erase(it2);
+
+ if (it->second.empty()) {
+ stlTree->erase(it);
+ ASSERT_TRUE(!itr.valid());
+ } else {
+ size_t postingsize;
+
+ ASSERT_TRUE(itr.valid());
+ postingsize = postings.size(newIdx);
+ ASSERT_TRUE(postingsize > 0 &&
+ postingsize == it->second.size());
+ STLPostingList::iterator it3;
+ STLPostingList::iterator it3b;
+ STLPostingList::iterator it3e;
+
+ PostingList::Iterator it0;
+
+ it3b = it->second.begin();
+ it3e = it->second.end();
+ it0 = postings.begin(newIdx);
+ it3 = it3b;
+
+ while (it3 != it3e) {
+ ASSERT_TRUE(it0.valid());
+ ASSERT_TRUE(*it3 == it0.getKey());
+ ++it3;
+ ++it0;
+ }
+ ASSERT_TRUE(!it0.valid());
+ }
+ }
+ }
+ ASSERT_TRUE(tree.isValid(treeMgr, IntComp(valueHandle)));
+ LOG(info, "removeRandomValues done");
+}
+
+
+void
+AttributePostingListTest::
+lookupRandomValues(Tree &tree,
+ TreeManager &treeMgr,
+ const ValueHandle &valueHandle,
+ PostingList &postings,
+ STLValueTree *stlTree,
+ RandomValuesVector &values)
+{
+ RandomValuesVector::iterator i;
+ RandomValuesVector::iterator ie;
+
+ LOG(info, "lookupRandomValues start");
+ ie = values.end();
+ for (i = values.begin(); i != ie; ++i) {
+ Tree::Iterator itr = tree.find(StoreIndex(), treeMgr, IntComp(valueHandle, i->_value));
+ ASSERT_TRUE(itr.valid() &&
+ valueHandle.getEntry(itr.getKey()) == i->_value);
+ if (stlTree != NULL) {
+ STLValueTree::iterator it;
+ it = stlTree->find(i->_value);
+ ASSERT_TRUE(it != stlTree->end() && it->first == i->_value);
+
+ if (it->second.empty()) {
+ stlTree->erase(it);
+ ASSERT_TRUE(!itr.valid());
+ } else {
+ size_t postingsize;
+
+ ASSERT_TRUE(itr.valid());
+ postingsize = postings.size(itr.getData());
+ ASSERT_TRUE(postingsize > 0 &&
+ postingsize == it->second.size());
+ STLPostingList::iterator it3;
+ STLPostingList::iterator it3b;
+ STLPostingList::iterator it3e;
+
+ PostingList::Iterator it0;
+
+ it3b = it->second.begin();
+ it3e = it->second.end();
+ it0 = postings.begin(itr.getData());
+ it3 = it3b;
+
+ while (it3 != it3e) {
+ ASSERT_TRUE(it0.valid());
+ ASSERT_TRUE(*it3 == it0.getKey());
+ ++it3;
+ ++it0;
+ }
+ ASSERT_TRUE(!it0.valid());
+ }
+ }
+ }
+ LOG(info, "lookupRandomValues done");
+}
+
+
+void
+AttributePostingListTest::doCompactEnumStore(Tree &tree,
+ TreeManager &treeMgr,
+ ValueHandle &valueHandle)
+{
+ LOG(info,
+ "doCompactEnumStore start");
+
+ Tree::Iterator i = tree.begin(treeMgr);
+
+ uint32_t numBuffers = valueHandle.getNumBuffers();
+ std::vector<uint32_t> toHold;
+
+ for (uint32_t bufferId = 0; bufferId < numBuffers; ++bufferId) {
+ btree::BufferState &state = valueHandle.getBufferState(bufferId);
+ if (state._state == btree::BufferState::ACTIVE) {
+ toHold.push_back(bufferId);
+ // Freelists already disabled due to variable sized data
+ }
+ }
+ valueHandle.switchActiveBuffer(0, 0u);
+
+ for (; i.valid(); ++i)
+ {
+ StoreIndex ov = i.getKey();
+ StoreIndex nv = valueHandle.addEntry(valueHandle.getEntry(ov));
+
+ std::atomic_thread_fence(std::memory_order_release);
+ i.writeKey(nv);
+ }
+ typedef GenerationHandler::generation_t generation_t;
+ for (std::vector<uint32_t>::const_iterator
+ it = toHold.begin(), ite = toHold.end(); it != ite; ++it) {
+ valueHandle.holdBuffer(*it);
+ }
+ generation_t generation = _handler.getCurrentGeneration();
+ valueHandle.transferHoldLists(generation);
+ _handler.incGeneration();
+ valueHandle.trimHoldLists(_handler.getFirstUsedGeneration());
+
+ LOG(info,
+ "doCompactEnumStore done");
+}
+
+
+void
+AttributePostingListTest::
+doCompactPostingList(Tree &tree,
+ TreeManager &treeMgr,
+ PostingList &postings,
+ PostingListNodeAllocator &postingsAlloc)
+{
+ LOG(info,
+ "doCompactPostingList start");
+
+#if 0
+ Tree::Iterator i(tree.begin(treeMgr));
+
+ postings.performCompaction(i, capacityNeeded);
+#else
+ (void) tree;
+ (void) treeMgr;
+ (void) postings;
+ (void) postingsAlloc;
+#endif
+
+ LOG(info,
+ "doCompactPostingList done");
+}
+
+
+void
+AttributePostingListTest::
+bumpGeneration(Tree &tree,
+ ValueHandle &valueHandle,
+ PostingList &postings,
+ PostingListNodeAllocator &postingsAlloc)
+{
+ (void) tree;
+ (void) valueHandle;
+ postingsAlloc.freeze();
+ postingsAlloc.transferHoldLists(_handler.getCurrentGeneration());
+ postings.transferHoldLists(_handler.getCurrentGeneration());
+ _handler.incGeneration();
+}
+
+void
+AttributePostingListTest::
+removeOldGenerations(Tree &tree,
+ ValueHandle &valueHandle,
+ PostingList &postings,
+ PostingListNodeAllocator &postingsAlloc)
+{
+ (void) tree;
+ (void) valueHandle;
+ postingsAlloc.trimHoldLists(_handler.getFirstUsedGeneration());
+ postings.trimHoldLists(_handler.getFirstUsedGeneration());
+}
+
+int
+AttributePostingListTest::Main()
+{
+ TEST_INIT("postinglist_test");
+
+ fillRandomValues(1000, 10);
+
+ allocTree();
+ insertRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings,
+ _stlTree, _randomValues);
+ lookupRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings,
+ _stlTree, _randomValues);
+ _intNodeAlloc->freeze();
+ _intNodeAlloc->transferHoldLists(_handler.getCurrentGeneration());
+ doCompactEnumStore(*_intTree, *_intNodeAlloc, *_intKeyStore);
+ removeRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings,
+ _stlTree, _randomValues);
+ insertRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings,
+ _stlTree, _randomValues);
+ freeTree(true);
+
+ TEST_DONE();
+}
+
+}
+
+TEST_APPHOOK(search::AttributePostingListTest);
diff --git a/searchlib/src/tests/attribute/postinglistattribute/.gitignore b/searchlib/src/tests/attribute/postinglistattribute/.gitignore
new file mode 100644
index 00000000000..9614cdd7626
--- /dev/null
+++ b/searchlib/src/tests/attribute/postinglistattribute/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+postinglistattribute_test
+searchlib_postinglistattribute_test_app
diff --git a/searchlib/src/tests/attribute/postinglistattribute/CMakeLists.txt b/searchlib/src/tests/attribute/postinglistattribute/CMakeLists.txt
new file mode 100644
index 00000000000..77d137c7b6e
--- /dev/null
+++ b/searchlib/src/tests/attribute/postinglistattribute/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_postinglistattribute_test_app
+ SOURCES
+ postinglistattribute_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_postinglistattribute_test_app COMMAND sh postinglistattribute_test.sh)
diff --git a/searchlib/src/tests/attribute/postinglistattribute/DESC b/searchlib/src/tests/attribute/postinglistattribute/DESC
new file mode 100644
index 00000000000..04c97a729a0
--- /dev/null
+++ b/searchlib/src/tests/attribute/postinglistattribute/DESC
@@ -0,0 +1 @@
+Unit tests for subclasses of PostingListAttribute.
diff --git a/searchlib/src/tests/attribute/postinglistattribute/FILES b/searchlib/src/tests/attribute/postinglistattribute/FILES
new file mode 100644
index 00000000000..56029570a21
--- /dev/null
+++ b/searchlib/src/tests/attribute/postinglistattribute/FILES
@@ -0,0 +1 @@
+postinglistattribute.cpp
diff --git a/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp
new file mode 100644
index 00000000000..5e248dc8758
--- /dev/null
+++ b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp
@@ -0,0 +1,1021 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("postinglistattribute_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/stllike/asciistream.h>
+
+#include <vespa/searchlib/attribute/attribute.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/postinglistattribute.h>
+#include <vespa/searchlib/attribute/singlenumericpostattribute.h>
+#include <vespa/searchlib/attribute/multinumericpostattribute.h>
+#include <vespa/searchlib/attribute/singlestringpostattribute.h>
+#include <vespa/searchlib/attribute/multistringpostattribute.h>
+
+#include <vespa/searchlib/attribute/enumstore.hpp>
+#include <vespa/searchlib/attribute/attributevector.hpp>
+#include <vespa/vespalib/util/compress.h>
+
+using std::shared_ptr;
+
+bool
+FastOS_UNIX_File::Sync(void)
+{
+ // LOG(info, "Skip sync");
+ return true;
+}
+
+namespace search {
+
+using attribute::CollectionType;
+using attribute::BasicType;
+using attribute::Config;
+using queryeval::PostingInfo;
+using queryeval::MinMaxPostingInfo;
+using search::fef::TermFieldMatchData;
+using search::queryeval::SearchIterator;
+
+typedef std::unique_ptr<AttributeVector::SearchContext> SearchContextPtr;
+typedef std::unique_ptr<search::queryeval::SearchIterator> SearchBasePtr;
+
+void
+toStr(std::stringstream &ss, SearchIterator &it)
+{
+ it.initFullRange();
+ it.seek(1u);
+ bool first = true;
+ while ( !it.isAtEnd()) {
+ if (first)
+ first = false;
+ else
+ ss << ",";
+ ss << it.getDocId();
+ it.seek(it.getDocId() + 1);
+ }
+}
+
+
+bool
+assertIterator(const std::string &exp, SearchIterator &it)
+{
+ std::stringstream ss;
+ toStr(ss, it);
+ if (!EXPECT_EQUAL(exp, ss.str()))
+ return false;
+ return true;
+}
+
+
+class PostingListAttributeTest : public vespalib::TestApp
+{
+private:
+ typedef IntegerAttribute::largeint_t largeint_t;
+ typedef AttributeVector::SP AttributePtr;
+ typedef std::set<AttributeVector::DocId> DocSet;
+
+ typedef SingleValueNumericPostingAttribute<
+ EnumAttribute<IntegerAttributeTemplate<int32_t> > >
+ Int32PostingListAttribute;
+ typedef MultiValueNumericPostingAttribute<
+ EnumAttribute<IntegerAttributeTemplate<int32_t> >,
+ multivalue::MVMTemplateArg<
+ multivalue::Value<EnumStoreBase::Index>,
+ multivalue::Index32> > Int32ArrayPostingListAttribute;
+ typedef MultiValueNumericPostingAttribute<
+ EnumAttribute<IntegerAttributeTemplate<int32_t> >,
+ multivalue::MVMTemplateArg<
+ multivalue::WeightedValue<EnumStoreBase::Index>,
+ multivalue::Index32> > Int32WsetPostingListAttribute;
+
+ typedef SingleValueNumericPostingAttribute<
+ EnumAttribute<FloatingPointAttributeTemplate<float> > >
+ FloatPostingListAttribute;
+ typedef MultiValueNumericPostingAttribute<
+ EnumAttribute<FloatingPointAttributeTemplate<float> >,
+ multivalue::MVMTemplateArg<
+ multivalue::Value<EnumStoreBase::Index>,
+ multivalue::Index32> > FloatArrayPostingListAttribute;
+ typedef MultiValueNumericPostingAttribute<
+ EnumAttribute<FloatingPointAttributeTemplate<float> >,
+ multivalue::MVMTemplateArg<
+ multivalue::WeightedValue<EnumStoreBase::Index>,
+ multivalue::Index32> > FloatWsetPostingListAttribute;
+
+ typedef SingleValueStringPostingAttribute StringPostingListAttribute;
+ typedef ArrayStringPostingAttribute StringArrayPostingListAttribute;
+ typedef WeightedSetStringPostingAttribute StringWsetPostingListAttribute;
+
+ template <typename VectorType>
+ void
+ populate(VectorType &v);
+
+ template <typename VectorType>
+ VectorType &
+ as(AttributePtr &v);
+
+ IntegerAttribute &
+ asInt(AttributePtr &v);
+
+ StringAttribute &
+ asString(AttributePtr &v);
+
+ void
+ buildTermQuery(std::vector<char> & buffer,
+ const vespalib::string & index,
+ const vespalib::string & term, bool prefix);
+
+ template <typename V, typename T>
+ SearchContextPtr
+ getSearch(const V & vec, const T & term, bool prefix);
+
+ template <typename V>
+ SearchContextPtr
+ getSearch(const V & vec);
+
+ template <typename V>
+ SearchContextPtr
+ getSearch2(const V & vec);
+
+ bool
+ assertSearch(const std::string &exp, StringAttribute &sa);
+
+ void addDocs(const AttributePtr & ptr, uint32_t numDocs);
+
+ template <typename VectorType, typename BufferType, typename Range>
+ void checkPostingList(const VectorType & vec, const std::vector<BufferType> & values, const Range & range);
+
+ template <typename VectorType, typename BufferType>
+ void testPostingList(const AttributePtr & ptr1, const AttributePtr & ptr2,
+ uint32_t numDocs, const std::vector<BufferType> & values);
+ void testPostingList();
+
+ template <typename AttributeType, typename ValueType>
+ void checkPostingList(AttributeType & vec, ValueType value, DocSet expected);
+ template <typename AttributeType, typename ValueType>
+ void checkNonExistantPostingList(AttributeType & vec, ValueType value);
+ template <typename AttributeType, typename ValueType>
+ void testArithmeticValueUpdate(const AttributePtr & ptr);
+ void testArithmeticValueUpdate();
+
+ template <typename VectorType, typename ValueType>
+ void testReload(const AttributePtr & ptr1, const AttributePtr & ptr2, const ValueType & value);
+ void testReload();
+
+ template <typename VectorType>
+ void
+ testMinMax(AttributePtr &ptr1, uint32_t trimmed);
+
+ template <typename VectorType>
+ void
+ testMinMax(AttributePtr &ptr1, AttributePtr &ptr2);
+
+ void
+ testMinMax(void);
+
+ void
+ testStringFold(void);
+public:
+ int Main();
+};
+
+template <>
+void
+PostingListAttributeTest::populate<IntegerAttribute>(IntegerAttribute &v)
+{
+ for(size_t i(0), m(v.getNumDocs()); i < m; i++) {
+ v.clearDoc(i);
+ if (i == 0)
+ continue;
+ if (i == 9)
+ continue;
+ if (i == 7) {
+ if (v.hasMultiValue()) {
+ v.append(i, -42, 27);
+ v.append(i, -43, 14);
+ v.append(i, -42, -3);
+ } else {
+ EXPECT_TRUE( v.update(i, -43) );
+ }
+ v.commit();
+ continue;
+ }
+ if (i == 20) {
+ if (v.hasMultiValue()) {
+ v.append(i, -42, 27);
+ v.append(i, -43, 14);
+ v.append(i, -42, -3);
+ } else {
+ EXPECT_TRUE( v.update(i, -43) );
+ }
+ v.commit();
+ continue;
+ }
+ if (i == 25) {
+ if (v.hasMultiValue()) {
+ v.append(i, -42, 27);
+ v.append(i, -43, 12);
+ v.append(i, -42, -3);
+ } else {
+ EXPECT_TRUE( v.update(i, -43) );
+ }
+ v.commit();
+ continue;
+ }
+ if (v.hasMultiValue()) {
+ v.append(i, -42, 3);
+ } else {
+ v.update(i, -42);
+ }
+ v.commit();
+ }
+ v.commit();
+}
+
+template <>
+void
+PostingListAttributeTest::populate<StringAttribute>(StringAttribute &v)
+{
+ for(size_t i(0), m(v.getNumDocs()); i < m; i++) {
+ v.clearDoc(i);
+ if (i == 0)
+ continue;
+ if (i == 9)
+ continue;
+ if (i == 7) {
+ if (v.hasMultiValue()) {
+ v.append(i, "foo", 27);
+ v.append(i, "bar", 14);
+ v.append(i, "foo", -3);
+ } else {
+ EXPECT_TRUE( v.update(i, "bar") );
+ }
+ v.commit();
+ continue;
+ }
+ if (i == 20) {
+ if (v.hasMultiValue()) {
+ v.append(i, "foo", 27);
+ v.append(i, "bar", 14);
+ v.append(i, "foo", -3);
+ } else {
+ EXPECT_TRUE( v.update(i, "bar") );
+ }
+ v.commit();
+ continue;
+ }
+ if (i == 25) {
+ if (v.hasMultiValue()) {
+ v.append(i, "foo", 27);
+ v.append(i, "bar", 12);
+ v.append(i, "foo", -3);
+ } else {
+ EXPECT_TRUE( v.update(i, "bar") );
+ }
+ v.commit();
+ continue;
+ }
+ if (v.hasMultiValue()) {
+ v.append(i, "foo", 3);
+ } else {
+ v.update(i, "foo");
+ }
+ v.commit();
+ }
+}
+
+
+template <typename VectorType>
+VectorType &
+PostingListAttributeTest::as(AttributePtr &v)
+{
+ VectorType *res = dynamic_cast<VectorType *>(v.get());
+ assert(res != NULL);
+ return *res;
+}
+
+
+IntegerAttribute &
+PostingListAttributeTest::asInt(AttributePtr &v)
+{
+ return as<IntegerAttribute>(v);
+}
+
+
+StringAttribute &
+PostingListAttributeTest::asString(AttributePtr &v)
+{
+ return as<StringAttribute>(v);
+}
+
+
+void
+PostingListAttributeTest::buildTermQuery(std::vector<char> &buffer,
+ const vespalib::string &index,
+ const vespalib::string &term,
+ bool prefix)
+{
+ uint32_t indexLen = index.size();
+ uint32_t termLen = term.size();
+ uint32_t queryPacketSize = 1 + 2 * 4 + indexLen + termLen;
+ uint32_t p = 0;
+ buffer.resize(queryPacketSize);
+ buffer[p++] = prefix ? ParseItem::ITEM_PREFIXTERM : ParseItem::ITEM_TERM;
+ p += vespalib::compress::Integer::compressPositive(indexLen, &buffer[p]);
+ memcpy(&buffer[p], index.c_str(), indexLen);
+ p += indexLen;
+ p += vespalib::compress::Integer::compressPositive(termLen, &buffer[p]);
+ memcpy(&buffer[p], term.c_str(), termLen);
+ p += termLen;
+ buffer.resize(p);
+}
+
+
+template <typename V, typename T>
+SearchContextPtr
+PostingListAttributeTest::getSearch(const V &vec, const T &term, bool prefix)
+{
+ std::vector<char> query;
+ vespalib::asciistream ss;
+ ss << term;
+ buildTermQuery(query, vec.getName(), ss.str(), prefix);
+
+ return (static_cast<const AttributeVector &>(vec)).
+ getSearch(vespalib::stringref(&query[0], query.size()),
+ AttributeVector::SearchContext::Params());
+}
+
+
+template <>
+SearchContextPtr
+PostingListAttributeTest::getSearch<IntegerAttribute>(const IntegerAttribute &v)
+{
+ return getSearch<IntegerAttribute>(v, "[-42;-42]", false);
+}
+
+
+template <>
+SearchContextPtr
+PostingListAttributeTest::getSearch<StringAttribute>(const StringAttribute &v)
+{
+ return getSearch<StringAttribute, const vespalib::string &>
+ (v, "foo", false);
+}
+
+
+template <>
+SearchContextPtr
+PostingListAttributeTest::getSearch2<IntegerAttribute>(const IntegerAttribute &v)
+{
+ return getSearch<IntegerAttribute>(v, "[-43;-43]", false);
+}
+
+
+template <>
+SearchContextPtr
+PostingListAttributeTest::getSearch2<StringAttribute>(const StringAttribute &v)
+{
+ return getSearch<StringAttribute, const vespalib::string &>
+ (v, "bar", false);
+}
+
+
+bool
+PostingListAttributeTest::assertSearch(const std::string &exp,
+ StringAttribute &sa)
+{
+ TermFieldMatchData md;
+ SearchContextPtr sc = getSearch<StringAttribute>(sa);
+ sc->fetchPostings(true);
+ SearchBasePtr sb = sc->createIterator(&md, true);
+ if (!EXPECT_TRUE(assertIterator(exp, *sb)))
+ return false;
+ return true;
+}
+
+
+void
+PostingListAttributeTest::addDocs(const AttributePtr & ptr, uint32_t numDocs)
+{
+ for (uint32_t i = 0; i < numDocs; ++i) {
+ uint32_t doc;
+ ASSERT_TRUE(ptr->addDoc(doc));
+ ASSERT_TRUE(doc == i);
+ ASSERT_TRUE(ptr->getNumDocs() == i + 1);
+ }
+ ASSERT_TRUE(ptr->getNumDocs() == numDocs);
+}
+
+class RangeAlpha {
+private:
+ uint32_t _part;
+public:
+ RangeAlpha(uint32_t part) : _part(part) { }
+ uint32_t getBegin(uint32_t i) const { return i * _part; }
+ uint32_t getEnd(uint32_t i) const { return (i + 1) * _part; }
+};
+
+class RangeBeta {
+private:
+ uint32_t _part;
+ uint32_t _numValues;
+public:
+ RangeBeta(uint32_t part, uint32_t numValues) : _part(part), _numValues(numValues) { }
+ uint32_t getBegin(uint32_t i) const { return (_numValues - 1 - i) * _part; }
+ uint32_t getEnd(uint32_t i) const { return (_numValues - i) * _part; }
+};
+
+template <typename VectorType, typename BufferType, typename RangeGenerator>
+void
+PostingListAttributeTest::checkPostingList(const VectorType & vec, const std::vector<BufferType> & values,
+ const RangeGenerator & range)
+{
+ const typename VectorType::EnumStore & enumStore = vec.getEnumStore();
+ const typename VectorType::Dictionary & dict =
+ enumStore.getPostingDictionary();
+ const typename VectorType::PostingList & postingList = vec.getPostingList();
+
+ for (size_t i = 0; i < values.size(); ++i) {
+ uint32_t docBegin = range.getBegin(i);
+ uint32_t docEnd = range.getEnd(i);
+
+ typename VectorType::DictionaryIterator itr =
+ dict.find(typename VectorType::EnumIndex(),
+ typename VectorType::ComparatorType(enumStore, values[i]));
+ ASSERT_TRUE(itr.valid());
+
+ typename VectorType::PostingList::Iterator postings;
+ postings = postingList.begin(itr.getData());
+
+ uint32_t doc = docBegin;
+ for (; postings.valid(); ++postings) {
+ EXPECT_EQUAL(doc++, postings.getKey());
+ }
+ EXPECT_EQUAL(doc, docEnd);
+ }
+}
+
+template <typename VectorType, typename BufferType>
+void
+PostingListAttributeTest::testPostingList(const AttributePtr & ptr1, const AttributePtr & ptr2,
+ uint32_t numDocs, const std::vector<BufferType> & values)
+{
+ LOG(info, "testPostingList: vector '%s'", ptr1->getName().c_str());
+
+ VectorType & vec1 = static_cast<VectorType &>(*ptr1.get());
+ VectorType & vec2 = static_cast<VectorType &>(*ptr2.get());
+ addDocs(ptr1, numDocs);
+
+ uint32_t part = numDocs / values.size();
+
+ // insert values
+ for (uint32_t doc = 0; doc < numDocs; ++doc) {
+ uint32_t idx = doc / part;
+ EXPECT_TRUE(vec1.update(doc, values[idx]));
+ }
+ vec1.commit();
+
+#if 0
+ std::cout << "***** printBuffer 0 ***** " << std::endl;
+ vec1.getEnumStore().printBuffer(std::cout, 0);
+ std::cout << "***** printBuffer 1 ***** " << std::endl;
+ vec1.getEnumStore().printBuffer(std::cout, 1);
+ std::cout << "***** printCurrentContent ***** " << std::endl;
+ vec1.getEnumStore().printCurrentContent(std::cout);
+ std::cout << "***** printPostingListContent *****" << std::endl;
+ vec1.printPostingListContent(std::cout);
+#endif
+
+ // check posting list for correct content
+ checkPostingList(vec1, values, RangeAlpha(part));
+
+ // load and save vector
+ ptr1->saveAs(ptr2->getBaseFileName());
+ ptr2->load();
+#if 0
+ std::cout << "***** vec2.printPostingListContent *****" << std::endl;
+ vec2.printPostingListContent(std::cout);
+#endif
+ checkPostingList(vec2, values, RangeAlpha(part));
+
+ // insert values in another order
+ for (uint32_t doc = 0; doc < numDocs; ++doc) {
+ uint32_t idx = values.size() - 1 - (doc / part);
+ EXPECT_TRUE(vec1.update(doc, values[idx]));
+ }
+ vec1.commit();
+
+ // check posting list again for correct content
+ checkPostingList(vec1, values, RangeBeta(part, values.size()));
+
+ // load and save vector
+ ptr1->saveAs(ptr2->getBaseFileName());
+ ptr2->load();
+ checkPostingList(vec2, values, RangeBeta(part, values.size()));
+}
+
+void
+PostingListAttributeTest::testPostingList()
+{
+ uint32_t numDocs = 1000;
+ uint32_t numValues = 50;
+
+ { // IntegerAttribute
+ std::vector<largeint_t> values;
+ for (uint32_t i = 0; i < numValues; ++i) {
+ values.push_back(i);
+ }
+ {
+ Config cfg(Config(BasicType::INT32, CollectionType::SINGLE));
+ cfg.setFastSearch(true);
+ AttributePtr ptr1 = AttributeFactory::createAttribute("sint32_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("sint32_2", cfg);
+ testPostingList<Int32PostingListAttribute>(ptr1, ptr2, numDocs, values);
+ }
+ {
+ Config cfg(Config(BasicType::INT32, CollectionType::ARRAY));
+ cfg.setFastSearch(true);
+ AttributePtr ptr1 = AttributeFactory::createAttribute("aint32_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("aint32_2", cfg);
+ testPostingList<Int32ArrayPostingListAttribute>(ptr1, ptr2, numDocs, values);
+ }
+ {
+ Config cfg(Config(BasicType::INT32, CollectionType::WSET));
+ cfg.setFastSearch(true);
+ AttributePtr ptr1 = AttributeFactory::createAttribute("wsint32_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("wsint32_2", cfg);
+ testPostingList<Int32WsetPostingListAttribute>(ptr1, ptr2, numDocs, values);
+ }
+ }
+
+ { // FloatingPointAttribute
+ std::vector<double> values;
+ for (uint32_t i = 0; i < numValues; ++i) {
+ values.push_back(i);
+ }
+ {
+ Config cfg(Config(BasicType::FLOAT, CollectionType::SINGLE));
+ cfg.setFastSearch(true);
+ AttributePtr ptr1 = AttributeFactory::createAttribute("sfloat_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("sfloat_2", cfg);
+ testPostingList<FloatPostingListAttribute>(ptr1, ptr2, numDocs, values);
+ }
+ {
+ Config cfg(Config(BasicType::FLOAT, CollectionType::ARRAY));
+ cfg.setFastSearch(true);
+ AttributePtr ptr1 = AttributeFactory::createAttribute("afloat_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("afloat_2", cfg);
+ testPostingList<FloatArrayPostingListAttribute>(ptr1, ptr2, numDocs, values);
+ }
+ {
+ Config cfg(Config(BasicType::FLOAT, CollectionType::WSET));
+ cfg.setFastSearch(true);
+ AttributePtr ptr1 = AttributeFactory::createAttribute("wsfloat_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("wsfloat_2", cfg);
+ testPostingList<FloatWsetPostingListAttribute>(ptr1, ptr2, numDocs, values);
+ }
+ }
+
+ { // StringAttribute
+ std::vector<vespalib::string> values;
+ std::vector<const char *> charValues;
+ values.reserve(numValues);
+ charValues.reserve(numValues);
+ values.push_back("");
+ charValues.push_back(values.back().c_str());
+ for (uint32_t i = 1; i < numValues; ++i) {
+ vespalib::asciistream ss;
+ ss << "string" << i;
+ values.push_back(ss.str());
+ charValues.push_back(values.back().c_str());
+ }
+ {
+ Config cfg(Config(BasicType::STRING, CollectionType::SINGLE));
+ cfg.setFastSearch(true);
+ AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("sstr_2", cfg);
+ testPostingList<StringPostingListAttribute>(ptr1, ptr2, numDocs, charValues);
+ }
+ {
+ Config cfg(Config(BasicType::STRING, CollectionType::ARRAY));
+ cfg.setFastSearch(true);
+ AttributePtr ptr1 = AttributeFactory::createAttribute("astr_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("astr_2", cfg);
+ testPostingList<StringArrayPostingListAttribute>(ptr1, ptr2, numDocs, charValues);
+ }
+ {
+ Config cfg(Config(BasicType::STRING, CollectionType::WSET));
+ cfg.setFastSearch(true);
+ AttributePtr ptr1 = AttributeFactory::createAttribute("wsstr_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("wsstr_2", cfg);
+ testPostingList<StringWsetPostingListAttribute>(ptr1, ptr2, numDocs, charValues);
+ }
+ }
+}
+
+template <typename AttributeType, typename ValueType>
+void
+PostingListAttributeTest::checkPostingList(AttributeType & vec, ValueType value, DocSet expected)
+{
+ const typename AttributeType::EnumStore & enumStore = vec.getEnumStore();
+ const typename AttributeType::Dictionary & dict =
+ enumStore.getPostingDictionary();
+ const typename AttributeType::PostingList & postingList = vec.getPostingList();
+ typename AttributeType::DictionaryIterator itr =
+ dict.find(typename AttributeType::EnumIndex(),
+ typename AttributeType::ComparatorType(vec.getEnumStore(), value));
+ ASSERT_TRUE(itr.valid());
+
+ typename AttributeType::PostingList::Iterator postings;
+ postings = postingList.begin(itr.getData());
+
+ DocSet::iterator docBegin = expected.begin();
+ DocSet::iterator docEnd = expected.end();
+ for (; postings.valid(); ++postings) {
+ EXPECT_EQUAL(*docBegin++, postings.getKey());
+ }
+ EXPECT_TRUE(docBegin == docEnd);
+}
+
+template <typename AttributeType, typename ValueType>
+void
+PostingListAttributeTest::checkNonExistantPostingList(AttributeType & vec, ValueType value)
+{
+ const typename AttributeType::Dictionary & dict =
+ vec.getEnumStore().getPostingDictionary();
+ typename AttributeType::DictionaryIterator itr =
+ dict.find(typename AttributeType::EnumIndex(),
+ typename AttributeType::ComparatorType(vec.getEnumStore(), value));
+ EXPECT_TRUE(!itr.valid());
+}
+
+template <typename AttributeType, typename ValueType>
+void
+PostingListAttributeTest::testArithmeticValueUpdate(const AttributePtr & ptr)
+{
+ LOG(info, "testArithmeticValueUpdate: vector '%s'", ptr->getName().c_str());
+
+ typedef document::ArithmeticValueUpdate Arith;
+ AttributeType & vec = static_cast<AttributeType &>(*ptr.get());
+
+ addDocs(ptr, 4);
+
+ uint32_t allDocs[] = {0, 1, 2, 3};
+ checkNonExistantPostingList<AttributeType, ValueType>(vec, 0);
+
+ for (uint32_t doc = 0; doc < 4; ++doc) {
+ ASSERT_TRUE(vec.update(doc, 100));
+ }
+ ptr->commit();
+
+ checkNonExistantPostingList<AttributeType, ValueType>(vec, 0);
+ checkPostingList<AttributeType, ValueType>(vec, 100, DocSet(allDocs, allDocs + 4));
+
+ EXPECT_TRUE(vec.apply(0, Arith(Arith::Add, 10)));
+ EXPECT_TRUE(vec.apply(1, Arith(Arith::Sub, 10)));
+ EXPECT_TRUE(vec.apply(2, Arith(Arith::Mul, 10)));
+ EXPECT_TRUE(vec.apply(3, Arith(Arith::Div, 10)));
+ ptr->commit();
+
+ {
+ uint32_t docs[] = {0};
+ checkPostingList<AttributeType, ValueType>(vec, 110, DocSet(docs, docs + 1));
+ }
+ {
+ uint32_t docs[] = {1};
+ checkPostingList<AttributeType, ValueType>(vec, 90, DocSet(docs, docs + 1));
+ }
+ {
+ uint32_t docs[] = {2};
+ checkPostingList<AttributeType, ValueType>(vec, 1000, DocSet(docs, docs + 1));
+ }
+ {
+ uint32_t docs[] = {3};
+ checkPostingList<AttributeType, ValueType>(vec, 10, DocSet(docs, docs + 1));
+ }
+
+
+ // several inside a single commit
+ for (uint32_t doc = 0; doc < 4; ++doc) {
+ ASSERT_TRUE(vec.update(doc, 2000));
+ }
+ EXPECT_TRUE(vec.apply(0, Arith(Arith::Add, 10)));
+ EXPECT_TRUE(vec.apply(0, Arith(Arith::Add, 10)));
+ EXPECT_TRUE(vec.apply(1, Arith(Arith::Sub, 10)));
+ EXPECT_TRUE(vec.apply(1, Arith(Arith::Sub, 10)));
+ EXPECT_TRUE(vec.apply(2, Arith(Arith::Mul, 10)));
+ EXPECT_TRUE(vec.apply(2, Arith(Arith::Mul, 10)));
+ EXPECT_TRUE(vec.apply(3, Arith(Arith::Div, 10)));
+ EXPECT_TRUE(vec.apply(3, Arith(Arith::Div, 10)));
+ ptr->commit();
+
+ vespalib::asciistream ss;
+ vec.printPostingListContent(ss);
+ std::cout << ss.str();
+ {
+ uint32_t docs[] = {0};
+ checkPostingList<AttributeType, ValueType>(vec, 2020, DocSet(docs, docs + 1));
+ }
+ {
+ uint32_t docs[] = {1};
+ checkPostingList<AttributeType, ValueType>(vec, 1980, DocSet(docs, docs + 1));
+ }
+ {
+ uint32_t docs[] = {2};
+ checkPostingList<AttributeType, ValueType>(vec, 200000, DocSet(docs, docs + 1));
+ }
+ {
+ uint32_t docs[] = {3};
+ checkPostingList<AttributeType, ValueType>(vec, 20, DocSet(docs, docs + 1));
+ }
+ checkNonExistantPostingList<AttributeType, ValueType>(vec, 100);
+ checkNonExistantPostingList<AttributeType, ValueType>(vec, 110);
+ checkNonExistantPostingList<AttributeType, ValueType>(vec, 90);
+ checkNonExistantPostingList<AttributeType, ValueType>(vec, 1000);
+ checkNonExistantPostingList<AttributeType, ValueType>(vec, 10);
+ checkNonExistantPostingList<AttributeType, ValueType>(vec, 2000);
+ checkNonExistantPostingList<AttributeType, ValueType>(vec, 2010);
+ checkNonExistantPostingList<AttributeType, ValueType>(vec, 1990);
+ checkNonExistantPostingList<AttributeType, ValueType>(vec, 20000);
+ checkNonExistantPostingList<AttributeType, ValueType>(vec, 200);
+}
+
+void
+PostingListAttributeTest::testArithmeticValueUpdate()
+{
+ { // IntegerAttribute
+ Config cfg(Config(BasicType::INT32, CollectionType::SINGLE));
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("sint32", cfg);
+ testArithmeticValueUpdate<Int32PostingListAttribute, largeint_t>(ptr);
+ }
+
+ { // FloatingPointAttribute
+ Config cfg(Config(BasicType::FLOAT, CollectionType::SINGLE));
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("sfloat", cfg);
+ testArithmeticValueUpdate<FloatPostingListAttribute, double>(ptr);
+ }
+}
+
+
+template <typename VectorType, typename ValueType>
+void
+PostingListAttributeTest::testReload(const AttributePtr & ptr1, const AttributePtr & ptr2, const ValueType & value)
+{
+ LOG(info, "testReload: vector '%s'", ptr1->getName().c_str());
+
+ VectorType & vec1 = static_cast<VectorType &>(*ptr1.get());
+
+ addDocs(ptr1, 5);
+ for (uint32_t doc = 0; doc < 5; ++doc) {
+ EXPECT_TRUE(vec1.update(doc, value));
+ }
+ ptr1->commit();
+
+ ASSERT_TRUE(ptr1->saveAs(ptr2->getBaseFileName()));
+ ASSERT_TRUE(ptr2->load());
+
+ EXPECT_TRUE(ptr2->getNumDocs() == 5);
+ ValueType buffer[1];
+ for (uint32_t doc = 0; doc < 5; ++doc) {
+ EXPECT_TRUE(ptr2->get(doc, buffer, 1) == 1);
+ EXPECT_EQUAL(buffer[0], value);
+ }
+}
+
+void
+PostingListAttributeTest::testReload()
+{
+ { // IntegerAttribute
+ Config cfg(Config(BasicType::INT32, CollectionType::SINGLE));
+ cfg.setFastSearch(true);
+ {
+ AttributePtr ptr1 = AttributeFactory::createAttribute("sint32_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("sint32_2", cfg);
+ testReload<Int32PostingListAttribute, largeint_t>(ptr1, ptr2, 100);
+ }
+ {
+ AttributePtr ptr1 = AttributeFactory::createAttribute("sint32_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("sint32_2", cfg);
+ testReload<Int32PostingListAttribute, largeint_t>(ptr1, ptr2, 0);
+ }
+ }
+
+ { // FloatingPointAttribute
+ Config cfg(Config(BasicType::FLOAT, CollectionType::SINGLE));
+ cfg.setFastSearch(true);
+ {
+ AttributePtr ptr1 = AttributeFactory::createAttribute("sfloat_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("sfloat_2", cfg);
+ testReload<FloatPostingListAttribute, double>(ptr1, ptr2, 100);
+ }
+ {
+ AttributePtr ptr1 = AttributeFactory::createAttribute("sfloat_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("sfloat_2", cfg);
+ testReload<FloatPostingListAttribute, double>(ptr1, ptr2, 0);
+ }
+ }
+
+ { // StringAttribute
+ Config cfg(Config(BasicType::STRING, CollectionType::SINGLE));
+ cfg.setFastSearch(true);
+ {
+ AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("sstr_2", cfg);
+ testReload<StringPostingListAttribute, vespalib::string>(ptr1, ptr2, "unique");
+ }
+ {
+ AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("sstr_2", cfg);
+ testReload<StringPostingListAttribute, vespalib::string>(ptr1, ptr2, "");
+ }
+ }
+}
+
+template <typename VectorType>
+void
+PostingListAttributeTest::testMinMax(AttributePtr &ptr1, uint32_t trimmed)
+{
+ TermFieldMatchData md;
+ SearchContextPtr sc = getSearch<VectorType>(as<VectorType>(ptr1));
+ sc->fetchPostings(true);
+ SearchBasePtr sb = sc->createIterator(&md, true);
+ sb->initFullRange();
+
+ const PostingInfo *pi = sb->getPostingInfo();
+ ASSERT_TRUE(pi != NULL);
+ const MinMaxPostingInfo *mmpi =
+ dynamic_cast<const MinMaxPostingInfo *>(pi);
+ ASSERT_TRUE(mmpi != NULL);
+
+ if (ptr1->hasMultiValue()) {
+ if (trimmed == 2u) {
+ EXPECT_EQUAL(3, mmpi->getMinWeight());
+ } else {
+ EXPECT_EQUAL(-3, mmpi->getMinWeight());
+ }
+ EXPECT_EQUAL(3, mmpi->getMaxWeight());
+ } else {
+ EXPECT_EQUAL(1, mmpi->getMinWeight());
+ EXPECT_EQUAL(1, mmpi->getMaxWeight());
+ }
+
+ sb->seek(1u);
+ EXPECT_EQUAL(1u, sb->getDocId());
+
+ sc = getSearch2<VectorType>(as<VectorType>(ptr1));
+ sc->fetchPostings(true);
+ sb = sc->createIterator(&md, true);
+ sb->initFullRange();
+
+ pi = sb->getPostingInfo();
+ if (trimmed == 2) {
+ ASSERT_TRUE(pi == NULL);
+ } else {
+ ASSERT_TRUE(pi != NULL);
+ mmpi = dynamic_cast<const MinMaxPostingInfo *>(pi);
+ ASSERT_TRUE(mmpi != NULL);
+
+ if (ptr1->hasMultiValue()) {
+ if (trimmed == 0) {
+ EXPECT_EQUAL(12, mmpi->getMinWeight());
+ } else {
+ EXPECT_EQUAL(14, mmpi->getMinWeight());
+ }
+ EXPECT_EQUAL(14, mmpi->getMaxWeight());
+ } else {
+ EXPECT_EQUAL(1, mmpi->getMinWeight());
+ EXPECT_EQUAL(1, mmpi->getMaxWeight());
+ }
+ }
+
+ sb->seek(1u);
+ if (trimmed == 2u) {
+ EXPECT_TRUE(sb->isAtEnd());
+ } else {
+ EXPECT_EQUAL(7u, sb->getDocId());
+ }
+}
+
+template <typename VectorType>
+void
+PostingListAttributeTest::testMinMax(AttributePtr &ptr1, AttributePtr &ptr2)
+{
+ uint32_t numDocs = 100;
+ addDocs(ptr1, numDocs);
+ populate(as<VectorType>(ptr1));
+
+ TEST_DO(testMinMax<VectorType>(ptr1, 0u));
+ ASSERT_TRUE(ptr1->saveAs(ptr2->getBaseFileName()));
+ ASSERT_TRUE(ptr2->load());
+ testMinMax<VectorType>(ptr2, 0u);
+
+ ptr2->clearDoc(20);
+ ptr2->clearDoc(25);
+ ptr2->commit();
+ TEST_DO(testMinMax<VectorType>(ptr2, 1u));
+
+ ptr2->clearDoc(7);
+ ptr2->commit();
+ TEST_DO(testMinMax<VectorType>(ptr2, 2u));
+
+}
+
+void
+PostingListAttributeTest::testMinMax(void)
+{
+ {
+ Config cfg(Config(BasicType::INT32, CollectionType::SINGLE));
+ cfg.setFastSearch(true);
+ AttributePtr ptr1 = AttributeFactory::createAttribute("sint32_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("sint32_2", cfg);
+ testMinMax<IntegerAttribute>(ptr1, ptr2);
+ }
+ {
+ Config cfg(Config(BasicType::INT32, CollectionType::WSET));
+ cfg.setFastSearch(true);
+ AttributePtr ptr1 =
+ AttributeFactory::createAttribute("wsint32_1", cfg);
+ AttributePtr ptr2 =
+ AttributeFactory::createAttribute("wsint32_2", cfg);
+ testMinMax<IntegerAttribute>(ptr1, ptr2);
+ }
+ {
+ Config cfg(Config(BasicType::STRING, CollectionType::SINGLE));
+ cfg.setFastSearch(true);
+ AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("sstr_2", cfg);
+ testMinMax<StringAttribute>(ptr1, ptr2);
+ }
+ {
+ Config cfg(Config(BasicType::STRING, CollectionType::WSET));
+ cfg.setFastSearch(true);
+ AttributePtr ptr1 = AttributeFactory::createAttribute("wsstr_1", cfg);
+ AttributePtr ptr2 = AttributeFactory::createAttribute("wsstr_2", cfg);
+ testMinMax<StringAttribute>(ptr1, ptr2);
+ }
+}
+
+
+void
+PostingListAttributeTest::testStringFold(void)
+{
+ Config cfg(Config(BasicType::STRING, CollectionType::SINGLE));
+ cfg.setFastSearch(true);
+ AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg);
+
+ addDocs(ptr1, 6);
+
+ StringAttribute &sa(asString(ptr1));
+
+ sa.update(1, "a");
+ sa.commit();
+ sa.update(3, "FOo");
+ sa.commit();
+ sa.update(4, "foo");
+ sa.commit();
+ sa.update(5, "z");
+ sa.commit();
+
+ EXPECT_TRUE(assertSearch("3,4", sa));
+
+ sa.update(2, "FOO");
+ sa.commit();
+
+ EXPECT_TRUE(assertSearch("2,3,4", sa));
+
+ sa.update(4, "");
+ sa.commit();
+
+ EXPECT_TRUE(assertSearch("2,3", sa));
+
+ sa.update(2, "");
+ sa.commit();
+
+ EXPECT_TRUE(assertSearch("3", sa));
+
+ sa.update(3, "");
+ sa.commit();
+
+ EXPECT_TRUE(assertSearch("", sa));
+}
+
+
+int
+PostingListAttributeTest::Main()
+{
+ TEST_INIT("postinglistattribute_test");
+
+ testPostingList();
+ testArithmeticValueUpdate();
+ testReload();
+ testMinMax();
+ testStringFold();
+
+ TEST_DONE();
+}
+
+}
+
+TEST_APPHOOK(search::PostingListAttributeTest);
diff --git a/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.sh b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.sh
new file mode 100755
index 00000000000..e6f9c214cb9
--- /dev/null
+++ b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+$VALGRIND ./searchlib_postinglistattribute_test_app
+rm -rf *.dat
+rm -rf *.idx
+rm -rf *.weight
diff --git a/searchlib/src/tests/attribute/runnable.h b/searchlib/src/tests/attribute/runnable.h
new file mode 100644
index 00000000000..418230a2fc5
--- /dev/null
+++ b/searchlib/src/tests/attribute/runnable.h
@@ -0,0 +1,43 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/util/sync.h>
+
+namespace search {
+
+class Runnable : public FastOS_Runnable
+{
+protected:
+ uint32_t _id;
+ vespalib::Monitor _cond;
+ bool _done;
+ bool _stopped;
+
+public:
+ Runnable(uint32_t id) :
+ _id(id), _cond(), _done(false), _stopped(false)
+ { }
+ void Run(FastOS_ThreadInterface *, void *) {
+ doRun();
+
+ vespalib::MonitorGuard guard(_cond);
+ _stopped = true;
+ guard.broadcast();
+ }
+ virtual void doRun() = 0;
+ void stop() {
+ vespalib::MonitorGuard guard(_cond);
+ _done = true;
+ }
+ void join() {
+ vespalib::MonitorGuard guard(_cond);
+ while (!_stopped) {
+ guard.wait();
+ }
+ }
+};
+
+} // search
+
diff --git a/searchlib/src/tests/attribute/searchable/.gitignore b/searchlib/src/tests/attribute/searchable/.gitignore
new file mode 100644
index 00000000000..663692907f6
--- /dev/null
+++ b/searchlib/src/tests/attribute/searchable/.gitignore
@@ -0,0 +1,4 @@
+/my_logctl_file
+searchlib_attribute_blueprint_test_app
+searchlib_attribute_searchable_adapter_test_app
+searchlib_attribute_weighted_set_blueprint_test_app
diff --git a/searchlib/src/tests/attribute/searchable/CMakeLists.txt b/searchlib/src/tests/attribute/searchable/CMakeLists.txt
new file mode 100644
index 00000000000..ed76520af29
--- /dev/null
+++ b/searchlib/src/tests/attribute/searchable/CMakeLists.txt
@@ -0,0 +1,22 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_attribute_searchable_adapter_test_app
+ SOURCES
+ attribute_searchable_adapter_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_attribute_searchable_adapter_test_app COMMAND sh attribute_searchable_adapter_test.sh)
+vespa_add_executable(searchlib_attribute_weighted_set_blueprint_test_app
+ SOURCES
+ attribute_weighted_set_blueprint_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_attribute_weighted_set_blueprint_test_app COMMAND searchlib_attribute_weighted_set_blueprint_test_app)
+vespa_add_executable(searchlib_attribute_blueprint_test_app
+ SOURCES
+ attributeblueprint_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_attribute_blueprint_test_app COMMAND searchlib_attribute_blueprint_test_app)
diff --git a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp
new file mode 100644
index 00000000000..1d69f516b52
--- /dev/null
+++ b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp
@@ -0,0 +1,689 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/vespalib/testkit/test_kit.h>
+
+#include <vespa/searchcommon/attribute/iattributecontext.h>
+#include <vespa/searchlib/attribute/attribute_blueprint_factory.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/attributecontext.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/attribute/extendableattributes.h>
+#include <vespa/searchlib/attribute/iattributemanager.h>
+#include <vespa/searchlib/attribute/predicate_attribute.h>
+#include <vespa/searchlib/attribute/singlenumericattribute.h>
+#include <vespa/searchlib/attribute/singlenumericattribute.hpp>
+#include <vespa/searchlib/attribute/singlenumericpostattribute.hpp>
+#include <vespa/searchlib/fef/fef.h>
+#include <vespa/searchlib/query/tree/location.h>
+#include <vespa/searchlib/query/tree/point.h>
+#include <vespa/searchlib/query/tree/predicate_query_term.h>
+#include <vespa/searchlib/query/tree/rectangle.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/query/weight.h>
+#include <vespa/searchlib/queryeval/blueprint.h>
+#include <vespa/searchlib/queryeval/fake_requestcontext.h>
+#include <vespa/searchlib/queryeval/field_spec.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/searchlib/queryeval/wand/parallel_weak_and_search.h>
+#include <memory>
+
+using search::AttributeEnumGuard;
+using search::AttributeFactory;
+using search::AttributeGuard;
+using search::AttributeVector;
+using search::IAttributeManager;
+using search::IntegerAttribute;
+using search::SingleStringExtAttribute;
+using search::attribute::IAttributeContext;
+using search::fef::MatchData;
+using search::fef::MatchDataLayout;
+using search::fef::TermFieldHandle;
+using search::fef::TermFieldMatchData;
+using search::query::Location;
+using search::query::Node;
+using search::query::Point;
+using search::query::PredicateQueryTerm;
+using search::query::Rectangle;
+using search::query::SimpleDotProduct;
+using search::query::SimpleLocationTerm;
+using search::query::SimplePredicateQuery;
+using search::query::SimplePrefixTerm;
+using search::query::SimpleRangeTerm;
+using search::query::SimpleSuffixTerm;
+using search::query::SimpleSubstringTerm;
+using search::query::SimpleStringTerm;
+using search::query::SimpleWandTerm;
+using search::query::SimpleWeightedSetTerm;
+using search::query::Weight;
+using search::queryeval::Blueprint;
+using search::queryeval::FieldSpec;
+using search::queryeval::FakeRequestContext;
+using search::queryeval::MinMaxPostingInfo;
+using search::queryeval::ParallelWeakAndSearch;
+using search::queryeval::PostingInfo;
+using search::queryeval::SearchIterator;
+using std::vector;
+using vespalib::string;
+using namespace search::attribute;
+using namespace search;
+
+namespace {
+
+const string field = "field";
+const string other = "other";
+const int32_t weight = 1;
+const uint32_t num_docs = 1000;
+
+class MyAttributeManager : public IAttributeManager {
+ AttributeVector::SP _attribute_vector;
+ AttributeVector::SP _other;
+
+public:
+ explicit MyAttributeManager(AttributeVector *attr)
+ : _attribute_vector(attr), _other() {}
+
+ explicit MyAttributeManager(AttributeVector::SP attr)
+ : _attribute_vector(attr), _other() {}
+
+ void set_other(AttributeVector::SP attr) {
+ _other = attr;
+ }
+
+ virtual AttributeGuard::UP getAttribute(const string &name) const {
+ if (name == field) {
+ return AttributeGuard::UP(new AttributeGuard(_attribute_vector));
+ } else if (name == other) {
+ return AttributeGuard::UP(new AttributeGuard(_other));
+ } else {
+ return AttributeGuard::UP(nullptr);
+ }
+ }
+
+ virtual AttributeGuard::UP
+ getAttributeStableEnum(const string &name) const {
+ if (name == field) {
+ return AttributeGuard::UP(new AttributeEnumGuard(_attribute_vector));
+ } else if (name == other) {
+ return AttributeGuard::UP(new AttributeEnumGuard(_other));
+ } else {
+ return AttributeGuard::UP(nullptr);
+ }
+ }
+
+ virtual void getAttributeList(vector<AttributeGuard> &) const {
+ assert(!"Not implemented");
+ }
+ virtual IAttributeContext::UP createContext() const {
+ assert(!"Not implemented");
+ return IAttributeContext::UP();
+ }
+};
+
+struct Result {
+ struct Hit {
+ uint32_t docid;
+ double raw_score;
+ int32_t match_weight;
+ Hit(uint32_t id, double raw, int32_t match_weight_in)
+ : docid(id), raw_score(raw), match_weight(match_weight_in) {}
+ };
+ size_t est_hits;
+ bool est_empty;
+ bool has_minmax;
+ int32_t min_weight;
+ int32_t max_weight;
+ size_t wand_hits;
+ int64_t wand_initial_threshold;
+ double wand_boost_factor;
+ std::vector<Hit> hits;
+ vespalib::string iterator_dump;
+
+ Result(size_t est_hits_in, bool est_empty_in)
+ : est_hits(est_hits_in), est_empty(est_empty_in),
+ has_minmax(false), min_weight(0), max_weight(0),
+ wand_hits(0), wand_initial_threshold(0), wand_boost_factor(0.0),
+ hits(), iterator_dump() {}
+
+ void set_minmax(int32_t min, int32_t max) {
+ has_minmax = true;
+ min_weight = min;
+ max_weight = max;
+ }
+};
+
+void extract_posting_info(Result &result, const PostingInfo *postingInfo) {
+ if (postingInfo != NULL) {
+ const MinMaxPostingInfo *minMax = dynamic_cast<const MinMaxPostingInfo *>(postingInfo);
+ if (minMax != NULL) {
+ result.set_minmax(minMax->getMinWeight(), minMax->getMaxWeight());
+ }
+ }
+}
+
+void extract_wand_params(Result &result, ParallelWeakAndSearch *wand) {
+ if (wand != nullptr) {
+ result.wand_hits = wand->getMatchParams().scores.getScoresToTrack();
+ result.wand_initial_threshold = wand->getMatchParams().scoreThreshold;
+ result.wand_boost_factor = wand->getMatchParams().thresholdBoostFactor;
+ }
+}
+
+Result do_search(IAttributeManager &attribute_manager, const Node &node, bool strict) {
+ uint32_t fieldId = 0;
+ AttributeContext ac(attribute_manager);
+ FakeRequestContext requestContext(&ac);
+ AttributeBlueprintFactory source;
+ MatchDataLayout mdl;
+ TermFieldHandle handle = mdl.allocTermField(fieldId);
+ MatchData::UP match_data = mdl.createMatchData();
+ Blueprint::UP bp = source.createBlueprint(requestContext, FieldSpec(field, fieldId, handle), node);
+ ASSERT_TRUE(bp.get() != nullptr);
+ Result result(bp->getState().estimate().estHits, bp->getState().estimate().empty);
+ bp->fetchPostings(strict);
+ SearchIterator::UP iterator = bp->createSearch(*match_data, strict);
+ ASSERT_TRUE(iterator.get() != nullptr);
+ iterator->initFullRange();
+ extract_posting_info(result, iterator->getPostingInfo());
+ extract_wand_params(result, dynamic_cast<ParallelWeakAndSearch*>(iterator.get()));
+ result.iterator_dump = iterator->asString();
+ for (uint32_t docid = 1; docid < num_docs; ++docid) {
+ if (iterator->seek(docid)) {
+ iterator->unpack(docid);
+ result.hits.emplace_back(docid,
+ match_data->resolveTermField(handle)->getRawScore(),
+ match_data->resolveTermField(handle)->getWeight());
+ }
+ }
+ return result;
+}
+
+bool search(const Node &node, IAttributeManager &attribute_manager,
+ bool fast_search = false, bool strict = true)
+{
+ Result result = do_search(attribute_manager, node, strict);
+ if (fast_search) {
+ EXPECT_LESS(result.est_hits, num_docs / 10);
+ } else {
+ EXPECT_TRUE(!result.est_empty);
+ EXPECT_EQUAL(num_docs, result.est_hits);
+ }
+ return (result.hits.size() == 1) && (result.hits[0].docid == (num_docs - 1));
+}
+
+bool search(const string &term, IAttributeManager &attribute_manager,
+ bool fast_search = false, bool strict = true)
+{
+ TEST_STATE(term.c_str());
+ SimpleStringTerm node(term, "field", 0, Weight(0));
+ return search(node, attribute_manager, fast_search, strict);
+}
+
+template <typename T> struct AttributeVectorTypeFinder {
+ //typedef search::SingleValueStringAttribute Type;
+ typedef SingleStringExtAttribute Type;
+ static void add(Type & a, const T & v) { a.add(v, weight); }
+};
+template <> struct AttributeVectorTypeFinder<int64_t> {
+ typedef search::SingleValueNumericAttribute<search::IntegerAttributeTemplate<int64_t> > Type;
+ static void add(Type & a, int64_t v) { a.set(a.getNumDocs()-1, v); a.commit(); }
+};
+
+void add_docs(AttributeVector *attr, size_t n) {
+ AttributeVector::DocId docid;
+ for (size_t i = 0; i < n; ++i) {
+ attr->addDoc(docid);
+ if (attr->inherits(PredicateAttribute::classId)) {
+ const_cast<uint8_t *>(static_cast<PredicateAttribute *>(attr)->getMinFeatureVector().first)[docid] = 0;
+ }
+ }
+ ASSERT_EQUAL(n - 1, docid);
+}
+
+template <typename T>
+MyAttributeManager makeAttributeManager(T value) {
+ typedef AttributeVectorTypeFinder<T> AT;
+ typedef typename AT::Type AttributeVectorType;
+ AttributeVectorType *attr = new AttributeVectorType(field);
+ add_docs(attr, num_docs);
+ AT::add(*attr, value);
+ MyAttributeManager attribute_manager(attr);
+ return attribute_manager;
+}
+
+MyAttributeManager makeFastSearchLongAttributeManager(int64_t value) {
+ Config cfg(BasicType::INT64, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ AttributeVector::SP attr_ptr = AttributeFactory::createAttribute(field, cfg);
+ IntegerAttribute *attr = static_cast<IntegerAttribute *>(attr_ptr.get());
+ add_docs(attr, num_docs);
+ attr->update(num_docs - 1, value);
+ attr->commit();
+ MyAttributeManager attribute_manager(attr_ptr);
+ return attribute_manager;
+}
+
+TEST("requireThatIteratorsCanBeCreated") {
+ MyAttributeManager attribute_manager = makeAttributeManager("foo");
+
+ EXPECT_TRUE(search("foo", attribute_manager));
+}
+
+TEST("requireThatRangeTermsWorkToo") {
+ MyAttributeManager attribute_manager = makeAttributeManager(int64_t(42));
+
+ EXPECT_TRUE(search("[23;46]", attribute_manager));
+ EXPECT_TRUE(!search("[10;23]", attribute_manager));
+ EXPECT_TRUE(!search(">43", attribute_manager));
+ EXPECT_TRUE(search("[10;]", attribute_manager));
+}
+
+TEST("requireThatPrefixTermsWork") {
+ MyAttributeManager attribute_manager = makeAttributeManager("foo");
+
+ SimplePrefixTerm node("fo", "field", 0, Weight(0));
+ EXPECT_TRUE(search(node, attribute_manager));
+}
+
+TEST("requireThatLocationTermsWork") {
+ // 0xcc is z-curve for (10, 10).
+ MyAttributeManager attribute_manager = makeAttributeManager(int64_t(0xcc));
+
+ SimpleLocationTerm node(Location(Point(10, 10), 3, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(search(node, attribute_manager));
+ node = SimpleLocationTerm(Location(Point(100, 100), 3, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(!search(node, attribute_manager));
+ node = SimpleLocationTerm(Location(Point(13, 13), 4, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(!search(node, attribute_manager));
+ node = SimpleLocationTerm(Location(Point(10, 13), 3, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(search(node, attribute_manager));
+}
+
+TEST("requireThatOptimizedLocationTermsWork") {
+ // 0xcc is z-curve for (10, 10).
+ MyAttributeManager attribute_manager = makeFastSearchLongAttributeManager(int64_t(0xcc));
+
+ SimpleLocationTerm node(Location(Point(10, 10), 3, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(search(node, attribute_manager, true));
+ node = SimpleLocationTerm(Location(Point(100, 100), 3, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(!search(node, attribute_manager, true));
+ node = SimpleLocationTerm(Location(Point(13, 13), 4, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(!search(node, attribute_manager, true));
+ node = SimpleLocationTerm(Location(Point(10, 13), 3, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(search(node, attribute_manager, true));
+}
+
+TEST("require that optimized location search works with wrapped bounding box (no hits)") {
+ // 0xcc is z-curve for (10, 10).
+ MyAttributeManager attribute_manager = makeFastSearchLongAttributeManager(int64_t(0xcc));
+ SimpleLocationTerm term1(Location(Rectangle(5, 5, 15, 15)), field, 0, Weight(0)); // unwrapped
+ SimpleLocationTerm term2(Location(Rectangle(15, 5, 5, 15)), field, 0, Weight(0)); // wrapped x
+ SimpleLocationTerm term3(Location(Rectangle(5, 15, 15, 5)), field, 0, Weight(0)); // wrapped y
+ Result result1 = do_search(attribute_manager, term1, true);
+ Result result2 = do_search(attribute_manager, term2, true);
+ Result result3 = do_search(attribute_manager, term3, true);
+ EXPECT_EQUAL(1u, result1.hits.size());
+ EXPECT_EQUAL(0u, result2.hits.size());
+ EXPECT_EQUAL(0u, result3.hits.size());
+ EXPECT_TRUE(result1.iterator_dump.find("LocationPreFilterIterator") != vespalib::string::npos);
+ EXPECT_TRUE(result2.iterator_dump.find("EmptySearch") != vespalib::string::npos);
+ EXPECT_TRUE(result3.iterator_dump.find("EmptySearch") != vespalib::string::npos);
+}
+
+void set_weights(StringAttribute *attr, uint32_t docid,
+ int32_t foo_weight, int32_t bar_weight, int32_t baz_weight)
+{
+ attr->clearDoc(docid);
+ if (foo_weight > 0) attr->append(docid, "foo", foo_weight);
+ if (bar_weight > 0) attr->append(docid, "bar", bar_weight);
+ if (baz_weight > 0) attr->append(docid, "baz", baz_weight);
+ attr->commit();
+}
+
+MyAttributeManager make_weighted_string_attribute_manager(bool fast_search) {
+ Config cfg(BasicType::STRING, CollectionType::WSET);
+ cfg.setFastSearch(fast_search);
+ AttributeVector::SP attr_ptr = AttributeFactory::createAttribute(field, cfg);
+ StringAttribute *attr = static_cast<StringAttribute *>(attr_ptr.get());
+ add_docs(attr, num_docs);
+ set_weights(attr, 10, 0, 200, 0);
+ set_weights(attr, 20, 100, 200, 300);
+ set_weights(attr, 30, 0, 0, 300);
+ set_weights(attr, 40, 100, 0, 0);
+ set_weights(attr, 50, 1000, 0, 300);
+ MyAttributeManager attribute_manager(attr_ptr);
+ return attribute_manager;
+}
+
+TEST("require that attribute dot product works") {
+ for (int i = 0; i <= 0x3; ++i) {
+ bool fast_search = ((i & 0x1) != 0);
+ bool strict = ((i & 0x2) != 0);
+ MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search);
+ SimpleDotProduct node(field, 0, Weight(1));
+ node.append(Node::UP(new SimpleStringTerm("foo", "", 0, Weight(1))));
+ node.append(Node::UP(new SimpleStringTerm("bar", "", 0, Weight(1))));
+ node.append(Node::UP(new SimpleStringTerm("baz", "", 0, Weight(1))));
+ node.append(Node::UP(new SimpleStringTerm("fox", "", 0, Weight(1))));
+ Result result = do_search(attribute_manager, node, strict);
+ ASSERT_EQUAL(5u, result.hits.size());
+ if (fast_search) {
+ EXPECT_EQUAL(8u, result.est_hits);
+ } else {
+ // 'fox' is detected to produce no hits since it has no enum value
+ EXPECT_EQUAL(num_docs * 3, result.est_hits);
+ }
+ EXPECT_FALSE(result.est_empty);
+ EXPECT_EQUAL(10u, result.hits[0].docid);
+ EXPECT_EQUAL(200.0, result.hits[0].raw_score);
+ EXPECT_EQUAL(20u, result.hits[1].docid);
+ EXPECT_EQUAL(600.0, result.hits[1].raw_score);
+ EXPECT_EQUAL(30u, result.hits[2].docid);
+ EXPECT_EQUAL(300.0, result.hits[2].raw_score);
+ EXPECT_EQUAL(40u, result.hits[3].docid);
+ EXPECT_EQUAL(100.0, result.hits[3].raw_score);
+ EXPECT_EQUAL(50u, result.hits[4].docid);
+ EXPECT_EQUAL(1300.0, result.hits[4].raw_score);
+ }
+}
+
+TEST("require that attribute dot product can produce no hits") {
+ for (int i = 0; i <= 0x3; ++i) {
+ bool fast_search = ((i & 0x1) != 0);
+ bool strict = ((i & 0x2) != 0);
+ MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search);
+ SimpleDotProduct node(field, 0, Weight(1));
+ node.append(Node::UP(new SimpleStringTerm("notfoo", "", 0, Weight(1))));
+ node.append(Node::UP(new SimpleStringTerm("notbar", "", 0, Weight(1))));
+ node.append(Node::UP(new SimpleStringTerm("notbaz", "", 0, Weight(1))));
+ node.append(Node::UP(new SimpleStringTerm("notfox", "", 0, Weight(1))));
+ Result result = do_search(attribute_manager, node, strict);
+ ASSERT_EQUAL(0u, result.hits.size());
+ EXPECT_EQUAL(0u, result.est_hits);
+ EXPECT_TRUE(result.est_empty);
+ }
+}
+
+TEST("require that direct attribute iterators work") {
+ for (int i = 0; i <= 0x3; ++i) {
+ bool fast_search = ((i & 0x1) != 0);
+ bool strict = ((i & 0x2) != 0);
+ MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search);
+ SimpleStringTerm empty_node("notfoo", "", 0, Weight(1));
+ Result empty_result = do_search(attribute_manager, empty_node, strict);
+ EXPECT_EQUAL(0u, empty_result.hits.size());
+ SimpleStringTerm node("foo", "", 0, Weight(1));
+ Result result = do_search(attribute_manager, node, strict);
+ if (fast_search) {
+ EXPECT_EQUAL(3u, result.est_hits);
+ EXPECT_TRUE(result.has_minmax);
+ EXPECT_EQUAL(100, result.min_weight);
+ EXPECT_EQUAL(1000, result.max_weight);
+ EXPECT_TRUE(result.iterator_dump.find("DocumentWeightSearchIterator")
+ != vespalib::string::npos);
+ } else {
+ EXPECT_EQUAL(num_docs, result.est_hits);
+ EXPECT_FALSE(result.has_minmax);
+ EXPECT_TRUE(result.iterator_dump.find("DocumentWeightSearchIterator")
+ == vespalib::string::npos);
+ }
+ ASSERT_EQUAL(3u, result.hits.size());
+ EXPECT_FALSE(result.est_empty);
+ EXPECT_EQUAL(20u, result.hits[0].docid);
+ EXPECT_EQUAL(40u, result.hits[1].docid);
+ EXPECT_EQUAL(50u, result.hits[2].docid);
+ }
+}
+
+const char *as_str(bool flag) { return flag? "true" : "false"; }
+
+TEST("require that attribute parallel wand works") {
+ for (int i = 0; i <= 0x3; ++i) {
+ bool fast_search = ((i & 0x1) != 0);
+ bool strict = ((i & 0x2) != 0);
+ MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search);
+ SimpleWandTerm node(field, 0, Weight(1), 10, 500, 1.5);
+ node.append(Node::UP(new SimpleStringTerm("foo", "", 0, Weight(1))));
+ node.append(Node::UP(new SimpleStringTerm("bar", "", 0, Weight(1))));
+ node.append(Node::UP(new SimpleStringTerm("baz", "", 0, Weight(1))));
+ node.append(Node::UP(new SimpleStringTerm("fox", "", 0, Weight(1))));
+ Result result = do_search(attribute_manager, node, strict);
+ EXPECT_FALSE(result.est_empty);
+ if (fast_search) {
+ EXPECT_EQUAL(8u, result.est_hits);
+ } else {
+ // 'fox' is detected to produce no hits since it has no enum value
+ EXPECT_EQUAL(num_docs * 3, result.est_hits);
+ }
+ if (EXPECT_EQUAL(2u, result.hits.size())) {
+ if (result.iterator_dump.find("MonitoringDumpIterator") == vespalib::string::npos) {
+ EXPECT_EQUAL(10u, result.wand_hits);
+ EXPECT_EQUAL(500, result.wand_initial_threshold);
+ EXPECT_EQUAL(1.5, result.wand_boost_factor);
+ }
+ EXPECT_EQUAL(20u, result.hits[0].docid);
+ EXPECT_EQUAL(600.0, result.hits[0].raw_score);
+ EXPECT_EQUAL(50u, result.hits[1].docid);
+ EXPECT_EQUAL(1300.0, result.hits[1].raw_score);
+ } else {
+ fprintf(stderr, " (fast_search: %s, strict: %s)\n",
+ as_str(fast_search), as_str(strict));
+ assert(false);
+ }
+ }
+}
+
+TEST("require that attribute weighted set term works") {
+ for (int i = 0; i <= 0x3; ++i) {
+ bool fast_search = ((i & 0x1) != 0);
+ bool strict = ((i & 0x2) != 0);
+ MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search);
+ SimpleWeightedSetTerm node(field, 0, Weight(1));
+ node.append(Node::UP(new SimpleStringTerm("foo", "", 0, Weight(10))));
+ node.append(Node::UP(new SimpleStringTerm("bar", "", 0, Weight(20))));
+ node.append(Node::UP(new SimpleStringTerm("baz", "", 0, Weight(30))));
+ node.append(Node::UP(new SimpleStringTerm("fox", "", 0, Weight(40))));
+ Result result = do_search(attribute_manager, node, strict);
+ EXPECT_FALSE(result.est_empty);
+ ASSERT_EQUAL(5u, result.hits.size());
+ if (fast_search && result.iterator_dump.find("MonitoringDumpIterator") == vespalib::string::npos) {
+ fprintf(stderr, "DUMP: %s\n", result.iterator_dump.c_str());
+ EXPECT_TRUE(result.iterator_dump.find("AttributeIteratorPack") != vespalib::string::npos);
+ }
+ EXPECT_EQUAL(10u, result.hits[0].docid);
+ EXPECT_EQUAL(20, result.hits[0].match_weight);
+ EXPECT_EQUAL(20u, result.hits[1].docid);
+ EXPECT_EQUAL(30, result.hits[1].match_weight);
+ EXPECT_EQUAL(30u, result.hits[2].docid);
+ EXPECT_EQUAL(30, result.hits[2].match_weight);
+ EXPECT_EQUAL(40u, result.hits[3].docid);
+ EXPECT_EQUAL(10, result.hits[3].match_weight);
+ EXPECT_EQUAL(50u, result.hits[4].docid);
+ EXPECT_EQUAL(30, result.hits[4].match_weight);
+ }
+}
+
+TEST("require that predicate query in non-predicate field yields empty.") {
+ MyAttributeManager attribute_manager = makeAttributeManager("foo");
+
+ PredicateQueryTerm::UP term(new PredicateQueryTerm);
+ SimplePredicateQuery node(std::move(term), field, 0, Weight(1));
+ Result result = do_search(attribute_manager, node, true);
+ EXPECT_TRUE(result.est_empty);
+ EXPECT_EQUAL(0u, result.hits.size());
+}
+
+TEST("require that predicate query in predicate field yields results.") {
+ PredicateAttribute *attr =
+ new PredicateAttribute(
+ field, Config(BasicType::PREDICATE,
+ CollectionType::SINGLE));
+ add_docs(attr, num_docs);
+ attr->getIndex().indexEmptyDocument(2); // matches anything
+ attr->getIndex().commit();
+ const_cast<PredicateAttribute::IntervalRange *>(attr->getIntervalRangeVector())[2] = 1u;
+ MyAttributeManager attribute_manager(attr);
+
+ PredicateQueryTerm::UP term(new PredicateQueryTerm);
+ SimplePredicateQuery node(std::move(term), field, 0, Weight(1));
+ Result result = do_search(attribute_manager, node, true);
+ EXPECT_FALSE(result.est_empty);
+ EXPECT_EQUAL(1u, result.hits.size());
+}
+
+TEST("require that substring terms work") {
+ MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(true);
+ SimpleSubstringTerm node("a", "", 0, Weight(1));
+ Result result = do_search(attribute_manager, node, true);
+ ASSERT_EQUAL(4u, result.hits.size());
+ EXPECT_EQUAL(10u, result.hits[0].docid);
+ EXPECT_EQUAL(20u, result.hits[1].docid);
+ EXPECT_EQUAL(30u, result.hits[2].docid);
+ EXPECT_EQUAL(50u, result.hits[3].docid);
+}
+
+TEST("require that suffix terms work") {
+ MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(true);
+ SimpleSuffixTerm node("oo", "", 0, Weight(1));
+ Result result = do_search(attribute_manager, node, true);
+ ASSERT_EQUAL(3u, result.hits.size());
+ EXPECT_EQUAL(20u, result.hits[0].docid);
+ EXPECT_EQUAL(40u, result.hits[1].docid);
+ EXPECT_EQUAL(50u, result.hits[2].docid);
+}
+
+void set_attr_value(AttributeVector &attr, uint32_t docid, size_t value) {
+ IntegerAttribute *int_attr = dynamic_cast<IntegerAttribute *>(&attr);
+ FloatingPointAttribute *float_attr = dynamic_cast<FloatingPointAttribute *>(&attr);
+ StringAttribute *string_attr = dynamic_cast<StringAttribute *>(&attr);
+ if (int_attr != nullptr) {
+ int_attr->update(docid, value);
+ int_attr->commit();
+ } else if (float_attr != nullptr) {
+ float_attr->update(docid, value);
+ float_attr->commit();
+ } else if (string_attr != nullptr) {
+ ASSERT_LESS(value, size_t(27*26 + 26));
+ vespalib::string str;
+ str.push_back('a' + value / 27);
+ str.push_back('a' + value % 27);
+ string_attr->update(docid, str);
+ string_attr->commit();
+ } else {
+ ASSERT_TRUE(false);
+ }
+}
+
+MyAttributeManager make_diversity_setup(BasicType::Type field_type,
+ bool field_fast_search,
+ BasicType::Type other_type,
+ bool other_fast_search)
+{
+ Config field_cfg(field_type, CollectionType::SINGLE);
+ field_cfg.setFastSearch(field_fast_search);
+ AttributeVector::SP field_attr = AttributeFactory::createAttribute(field, field_cfg);
+ Config other_cfg(other_type, CollectionType::SINGLE);
+ other_cfg.setFastSearch(other_fast_search);
+ AttributeVector::SP other_attr = AttributeFactory::createAttribute(other, other_cfg);
+ add_docs(&*field_attr, num_docs);
+ add_docs(&*other_attr, num_docs);
+ for (size_t i = 1; i < num_docs; ++i) {
+ set_attr_value(*field_attr, i, i / 5);
+ set_attr_value(*other_attr, i, i / 10);
+ }
+ MyAttributeManager attribute_manager(field_attr);
+ attribute_manager.set_other(other_attr);
+ return attribute_manager;
+}
+
+size_t diversity_hits(IAttributeManager &manager, const vespalib::string &term, bool strict) {
+ SimpleRangeTerm node(term, "", 0, Weight(1));
+ Result result = do_search(manager, node, strict);
+ return result.hits.size();
+}
+
+std::pair<size_t,size_t> diversity_docid_range(IAttributeManager &manager, const vespalib::string &term, bool strict) {
+ SimpleRangeTerm node(term, "", 0, Weight(1));
+ Result result = do_search(manager, node, strict);
+ std::pair<size_t, size_t> range(0, 0);
+ for (const Result::Hit &hit: result.hits) {
+ if (range.first == 0) {
+ range.first = hit.docid;
+ range.second = hit.docid;
+ } else {
+ EXPECT_GREATER(size_t(hit.docid), range.second);
+ range.second = hit.docid;
+ }
+ }
+ return range;
+}
+
+TEST("require that diversity range searches work for various types") {
+ for (auto field_type: std::vector<BasicType::Type>({BasicType::INT32, BasicType::DOUBLE})) {
+ for (auto other_type: std::vector<BasicType::Type>({BasicType::INT16, BasicType::INT32, BasicType::INT64,
+ BasicType::FLOAT, BasicType::DOUBLE, BasicType::STRING}))
+ {
+ for (bool other_fast_search: std::vector<bool>({true, false})) {
+ MyAttributeManager manager = make_diversity_setup(field_type, true, other_type, other_fast_search);
+ for (bool strict: std::vector<bool>({true, false})) {
+ TEST_STATE(vespalib::make_string("field_type: %s, other_type: %s, other_fast_search: %s, strict: %s",
+ BasicType(field_type).asString(), BasicType(other_type).asString(),
+ other_fast_search ? "true" : "false", strict ? "true" : "false").c_str());
+ EXPECT_EQUAL(999u, diversity_hits(manager, "[;;1000;other;10]", strict));
+ EXPECT_EQUAL(999u, diversity_hits(manager, "[;;-1000;other;10]", strict));
+ EXPECT_EQUAL(100u, diversity_hits(manager, "[;;1000;other;1]", strict));
+ EXPECT_EQUAL(100u, diversity_hits(manager, "[;;-1000;other;1]", strict));
+ EXPECT_EQUAL(300u, diversity_hits(manager, "[;;1000;other;3]", strict));
+ EXPECT_EQUAL(300u, diversity_hits(manager, "[;;-1000;other;3]", strict));
+ EXPECT_EQUAL(10u, diversity_hits(manager, "[;;10;other;3]", strict));
+ EXPECT_EQUAL(10u, diversity_hits(manager, "[;;-10;other;3]", strict));
+ EXPECT_EQUAL(1u, diversity_docid_range(manager, "[;;10;other;3]", strict).first);
+ EXPECT_EQUAL(30u, diversity_docid_range(manager, "[;;10;other;3]", strict).second);
+ EXPECT_EQUAL(965u, diversity_docid_range(manager, "[;;-10;other;3]", strict).first);
+ EXPECT_EQUAL(997u, diversity_docid_range(manager, "[;;-10;other;3]", strict).second);
+ }
+ }
+ }
+ }
+}
+
+TEST("require that diversity also works for a single unique value") {
+ MyAttributeManager manager = make_diversity_setup(BasicType::INT32, true, BasicType::INT32, true);
+ EXPECT_EQUAL(2u, diversity_hits(manager, "[2;2;100;other;2]", true));
+ EXPECT_EQUAL(2u, diversity_hits(manager, "[2;2;-100;other;2]", true));
+ EXPECT_EQUAL(2u, diversity_hits(manager, "[2;2;100;other;2]", false));
+ EXPECT_EQUAL(2u, diversity_hits(manager, "[2;2;-100;other;2]", false));
+}
+
+TEST("require that diversity range searches gives empty results for non-existing diversity attributes") {
+ MyAttributeManager manager = make_diversity_setup(BasicType::INT32, true, BasicType::INT32, true);
+ EXPECT_EQUAL(0u, diversity_hits(manager, "[;;1000;bogus;10]", true));
+ EXPECT_EQUAL(0u, diversity_hits(manager, "[;;-1000;bogus;10]", true));
+ EXPECT_EQUAL(0u, diversity_hits(manager, "[;;1000;;10]", true));
+ EXPECT_EQUAL(0u, diversity_hits(manager, "[;;-1000;;10]", true));
+}
+
+TEST("require that loose diversity gives enough diversity and hits while doing less work") {
+ MyAttributeManager manager = make_diversity_setup(BasicType::INT32, true, BasicType::INT32, true);
+ EXPECT_EQUAL(999u, diversity_hits(manager, "[;;1000;other;10;4;loose]", true));
+ EXPECT_EQUAL(1u, diversity_docid_range(manager, "[;;10;other;3;2;loose]", true).first);
+ EXPECT_EQUAL(16u, diversity_docid_range(manager, "[;;10;other;3;2;loose]", true).second);
+}
+
+TEST("require that strict diversity gives enough diversity and hits while doing less work, even though more than loose, but more correct than loose") {
+ MyAttributeManager manager = make_diversity_setup(BasicType::INT32, true, BasicType::INT32, true);
+ EXPECT_EQUAL(999u, diversity_hits(manager, "[;;-1000;other;10;4;strict]", true));
+ EXPECT_EQUAL(1u, diversity_docid_range(manager, "[;;10;other;3;2;strict]", true).first);
+ EXPECT_EQUAL(23u, diversity_docid_range(manager, "[;;10;other;3;2;strict]", true).second);
+}
+
+} // namespace
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.sh b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.sh
new file mode 100755
index 00000000000..9fcee4b1ebb
--- /dev/null
+++ b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+$VALGRIND ./searchlib_attribute_searchable_adapter_test_sh
+rm -f ./my_logctl_file
+VESPA_LOG_CONTROL_FILE=./my_logctl_file VESPA_LOG_LEVEL=all $VALGRIND ./searchlib_attribute_searchable_adapter_test_app
diff --git a/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp b/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp
new file mode 100644
index 00000000000..bd781a37a5b
--- /dev/null
+++ b/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp
@@ -0,0 +1,231 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+#include <vespa/searchlib/attribute/attribute_blueprint_factory.h>
+#include <vespa/searchlib/attribute/attribute_weighted_set_blueprint.h>
+#include <vespa/searchlib/attribute/iattributemanager.h>
+#include <vespa/searchlib/attribute/attributecontext.h>
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/attribute/extendableattributes.h>
+#include <vespa/searchlib/attribute/singlestringattribute.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/fef/fef.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/queryeval/field_spec.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/searchlib/queryeval/blueprint.h>
+#include <vespa/searchlib/queryeval/fake_result.h>
+#include <vespa/searchlib/queryeval/weighted_set_term_search.h>
+#include <vespa/searchlib/queryeval/fake_requestcontext.h>
+#include <memory>
+#include <string>
+#include <map>
+
+#include <vespa/searchlib/attribute/enumstore.hpp>
+#include <vespa/searchlib/attribute/singlestringattribute.h>
+
+using namespace search;
+using namespace search::query;
+using namespace search::fef;
+using namespace search::queryeval;
+using namespace search::attribute;
+
+namespace {
+
+class FakeAttributeManager : public IAttributeManager
+{
+private:
+ typedef std::map<std::string, AttributeVector::SP> Map;
+ Map _map;
+
+ AttributeVector::SP lookup(const std::string &name) const {
+ Map::const_iterator pos = _map.find(name);
+ if (pos == _map.end()) {
+ return AttributeVector::SP();
+ }
+ return pos->second;
+ }
+
+public:
+ FakeAttributeManager() : _map() {}
+
+ void addAttribute(AttributeVector::SP attr) {
+ _map[attr->getName()] = attr;
+ }
+
+ virtual AttributeGuard::UP getAttribute(const vespalib::string &name) const {
+ return AttributeGuard::UP(new AttributeGuard(lookup(name)));
+ }
+
+ virtual AttributeGuard::UP getAttributeStableEnum(const vespalib::string &name) const {
+ return AttributeGuard::UP(new AttributeEnumGuard(lookup(name)));
+ }
+
+ virtual void getAttributeList(std::vector<AttributeGuard> &list) const {
+ Map::const_iterator pos = _map.begin();
+ for (; pos != _map.end(); ++pos) {
+ list.push_back(pos->second);
+ }
+ }
+
+ virtual IAttributeContext::UP createContext() const {
+ return IAttributeContext::UP(new AttributeContext(*this));
+ }
+};
+
+void
+setupAttributeManager(FakeAttributeManager &manager)
+{
+ AttributeVector::DocId docId;
+ {
+ AttributeVector::SP attr_sp = AttributeFactory::createAttribute(
+ "integer", Config(BasicType("int64")));
+ IntegerAttribute *attr = (IntegerAttribute*)(attr_sp.get());
+ attr->addDoc(docId);
+ assert(0u == docId);
+ for (size_t i = 1; i < 10; ++i) {
+ attr->addDoc(docId);
+ assert(i == docId);
+ attr->update(docId, i);
+ attr->commit();
+ }
+ manager.addAttribute(attr_sp);
+ }
+ {
+ AttributeVector::SP attr_sp = AttributeFactory::createAttribute(
+ "string", Config(BasicType("string")));
+ StringAttribute *attr = (StringAttribute*)(attr_sp.get());
+ attr->addDoc(docId);
+ assert(0u == docId);
+ for (size_t i = 1; i < 10; ++i) {
+ attr->addDoc(docId);
+ assert(i == docId);
+ attr->update(i, std::string(1, '1' + i - 1).c_str());
+ attr->commit();
+ }
+ manager.addAttribute(attr_sp);
+ }
+ {
+ AttributeVector::SP attr_sp = AttributeFactory::createAttribute(
+ "multi", Config(BasicType("int64"), search::attribute::CollectionType("array")));
+ IntegerAttribute *attr = (IntegerAttribute*)(attr_sp.get());
+ attr->addDoc(docId);
+ assert(0u == docId);
+ for (size_t i = 1; i < 10; ++i) {
+ attr->addDoc(docId);
+ assert(i == docId);
+ attr->append(docId, i, 0);
+ attr->append(docId, i + 10, 1);
+ attr->commit();
+ }
+ manager.addAttribute(attr_sp);
+ }
+}
+
+struct WS {
+ static const uint32_t fieldId = 42;
+ IAttributeManager & attribute_manager;
+ MatchDataLayout layout;
+ TermFieldHandle handle;
+ std::vector<std::pair<std::string, uint32_t> > tokens;
+
+ WS(IAttributeManager & manager) : attribute_manager(manager), layout(), handle(layout.allocTermField(fieldId)), tokens() {
+ MatchData::UP tmp = layout.createMatchData();
+ ASSERT_TRUE(tmp->resolveTermField(handle)->getFieldId() == fieldId);
+ }
+
+ WS &add(const std::string &token, uint32_t weight) {
+ tokens.push_back(std::make_pair(token, weight));
+ return *this;
+ }
+
+ Node::UP createNode() const {
+ SimpleWeightedSetTerm *node = new SimpleWeightedSetTerm("view", 0, Weight(0));
+ for (size_t i = 0; i < tokens.size(); ++i) {
+ node->append(Node::UP(new SimpleStringTerm(tokens[i].first, "view", 0, Weight(tokens[i].second))));
+ }
+ return Node::UP(node);
+ }
+
+ bool isGenericSearch(Searchable &searchable, const std::string &field, bool strict) const {
+ AttributeContext ac(attribute_manager);
+ FakeRequestContext requestContext(&ac);
+ MatchData::UP md = layout.createMatchData();
+ Node::UP node = createNode();
+ FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle));
+ queryeval::Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, *node);
+ bp->fetchPostings(strict);
+ SearchIterator::UP sb = bp->createSearch(*md, strict);
+ return (dynamic_cast<WeightedSetTermSearch*>(sb.get()) != 0);
+ }
+
+ FakeResult search(Searchable &searchable, const std::string &field, bool strict) const {
+ AttributeContext ac(attribute_manager);
+ FakeRequestContext requestContext(&ac);
+ MatchData::UP md = layout.createMatchData();
+ Node::UP node = createNode();
+ FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle));
+ queryeval::Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, *node);
+ bp->fetchPostings(strict);
+ SearchIterator::UP sb = bp->createSearch(*md, strict);
+ FakeResult result;
+ sb->initFullRange();
+ for (uint32_t docId = 1; docId < 10; ++docId) {
+ if (sb->seek(docId)) {
+ sb->unpack(docId);
+ result.doc(docId);
+ TermFieldMatchData &data = *md->resolveTermField(handle);
+ FieldPositionsIterator itr = data.getIterator();
+ for (; itr.valid(); itr.next()) {
+ result.elem(itr.getElementId());
+ result.weight(itr.getElementWeight());
+ result.pos(itr.getPosition());
+ }
+ }
+ }
+ return result;
+ }
+};
+
+} // namespace <unnamed>
+
+class Test : public vespalib::TestApp
+{
+public:
+ int Main();
+};
+
+int
+Test::Main()
+{
+ TEST_INIT("attribute_weighted_set_test");
+ {
+ FakeAttributeManager manager;
+ setupAttributeManager(manager);
+ AttributeBlueprintFactory adapter;
+
+ FakeResult expect = FakeResult()
+ .doc(3).elem(0).weight(30).pos(0)
+ .doc(5).elem(0).weight(50).pos(0)
+ .doc(7).elem(0).weight(70).pos(0);
+ WS ws = WS(manager).add("7", 70).add("5", 50).add("3", 30);
+
+ EXPECT_TRUE(ws.isGenericSearch(adapter, "integer", true));
+ EXPECT_TRUE(!ws.isGenericSearch(adapter, "integer", false));
+ EXPECT_TRUE(ws.isGenericSearch(adapter, "string", true));
+ EXPECT_TRUE(!ws.isGenericSearch(adapter, "string", false));
+ EXPECT_TRUE(ws.isGenericSearch(adapter, "multi", true));
+ EXPECT_TRUE(ws.isGenericSearch(adapter, "multi", false));
+
+ EXPECT_EQUAL(expect, ws.search(adapter, "integer", true));
+ EXPECT_EQUAL(expect, ws.search(adapter, "integer", false));
+ EXPECT_EQUAL(expect, ws.search(adapter, "string", true));
+ EXPECT_EQUAL(expect, ws.search(adapter, "string", false));
+ EXPECT_EQUAL(expect, ws.search(adapter, "multi", true));
+ EXPECT_EQUAL(expect, ws.search(adapter, "multi", false));
+ }
+ TEST_DONE();
+}
+
+TEST_APPHOOK(Test);
diff --git a/searchlib/src/tests/attribute/searchable/attributeblueprint_test.cpp b/searchlib/src/tests/attribute/searchable/attributeblueprint_test.cpp
new file mode 100644
index 00000000000..ed851d872e1
--- /dev/null
+++ b/searchlib/src/tests/attribute/searchable/attributeblueprint_test.cpp
@@ -0,0 +1,240 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("attributeblueprint_test");
+
+#include <vespa/searchcommon/attribute/iattributecontext.h>
+#include <vespa/searchlib/attribute/attribute_blueprint_factory.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/searchlib/attribute/attributecontext.h>
+#include <vespa/searchlib/attribute/attributevector.h>
+#include <vespa/searchlib/attribute/extendableattributes.h>
+#include <vespa/searchlib/attribute/singlenumericattribute.h>
+#include <vespa/searchlib/attribute/singlenumericattribute.hpp>
+#include <vespa/searchlib/attribute/singlenumericpostattribute.hpp>
+#include <vespa/searchlib/attribute/iattributemanager.h>
+#include <vespa/searchlib/fef/matchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/query/tree/location.h>
+#include <vespa/searchlib/query/tree/point.h>
+#include <vespa/searchlib/query/tree/simplequery.h>
+#include <vespa/searchlib/query/weight.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/searchlib/queryeval/fake_requestcontext.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <memory>
+#include <string>
+
+using search::AttributeEnumGuard;
+using search::AttributeGuard;
+using search::AttributeVector;
+using search::IAttributeManager;
+using search::SingleStringExtAttribute;
+using search::attribute::IAttributeContext;
+using search::fef::MatchData;
+using search::fef::TermFieldMatchData;
+using search::query::Location;
+using search::query::Node;
+using search::query::Point;
+using search::query::SimpleLocationTerm;
+using search::query::SimplePrefixTerm;
+using search::query::SimpleStringTerm;
+using search::query::Weight;
+using search::queryeval::Blueprint;
+using search::queryeval::FieldSpec;
+using search::queryeval::SearchIterator;
+using search::queryeval::FakeRequestContext;
+using std::string;
+using std::vector;
+using namespace search::attribute;
+using namespace search;
+
+namespace {
+
+class Test : public vespalib::TestApp {
+ void requireThatIteratorsCanBeCreated();
+ void requireThatRangeTermsWorkToo();
+ void requireThatPrefixTermsWork();
+ void requireThatLocationTermsWork();
+ void requireThatFastSearchLocationTermsWork();
+
+ bool search(const string &term, IAttributeManager &attribute_manager);
+ bool search(const Node &term, IAttributeManager &attribute_manager);
+
+public:
+ int Main();
+};
+
+int
+Test::Main()
+{
+ TEST_INIT("attributeblueprint_test");
+
+ TEST_DO(requireThatIteratorsCanBeCreated());
+ TEST_DO(requireThatRangeTermsWorkToo());
+ TEST_DO(requireThatPrefixTermsWork());
+ TEST_DO(requireThatLocationTermsWork());
+ TEST_DO(requireThatFastSearchLocationTermsWork());
+
+ TEST_DONE();
+}
+
+const string field = "field";
+const int32_t weight = 1;
+
+class MyAttributeManager : public IAttributeManager {
+ AttributeVector::SP _attribute_vector;
+ AttributeVector::DocId _docid;
+
+public:
+ MyAttributeManager(AttributeVector *attr)
+ : _attribute_vector(attr) {}
+
+ virtual AttributeGuard::UP getAttribute(const string &) const {
+ return AttributeGuard::UP(new AttributeGuard(_attribute_vector));
+ }
+
+ virtual AttributeGuard::UP
+ getAttributeStableEnum(const string &) const {
+ return AttributeGuard::UP(new AttributeEnumGuard(_attribute_vector));
+ }
+
+ virtual void getAttributeList(vector<AttributeGuard> &) const {
+ assert(!"Not implemented");
+ }
+ virtual IAttributeContext::UP createContext() const {
+ assert(!"Not implemented");
+ return IAttributeContext::UP();
+ }
+};
+
+bool Test::search(const string &term, IAttributeManager &attribute_manager) {
+ TEST_STATE(term.c_str());
+ SimpleStringTerm node(term, "field", 0, Weight(0));
+ bool ret = search(node, attribute_manager);
+ return ret;
+}
+
+bool Test::search(const Node &node, IAttributeManager &attribute_manager) {
+ AttributeContext ac(attribute_manager);
+ FakeRequestContext requestContext(&ac);
+ MatchData::UP md(MatchData::makeTestInstance(0, 1, 1));
+ AttributeBlueprintFactory source;
+ Blueprint::UP result = source.createBlueprint(requestContext, FieldSpec(field, 0, 0), node);
+ ASSERT_TRUE(result.get());
+ EXPECT_TRUE(!result->getState().estimate().empty);
+ EXPECT_EQUAL(3u, result->getState().estimate().estHits);
+ result->fetchPostings(true);
+ SearchIterator::UP iterator = result->createSearch(*md, true);
+ ASSERT_TRUE((bool)iterator);
+ iterator->initFullRange();
+ EXPECT_TRUE(!iterator->seek(1));
+ return iterator->seek(2);
+}
+
+template <typename T> struct AttributeVectorTypeFinder {
+ typedef SingleStringExtAttribute Type;
+ static void add(Type & a, const T & v) { a.add(v, weight); }
+};
+template <> struct AttributeVectorTypeFinder<int64_t> {
+ typedef search::SingleValueNumericAttribute<search::IntegerAttributeTemplate<int64_t> > Type;
+ static void add(Type & a, int64_t v) { a.set(a.getNumDocs()-1, v); a.commit(); }
+};
+
+struct FastSearchLongAttribute {
+ typedef search::SingleValueNumericPostingAttribute< search::EnumAttribute<search::IntegerAttributeTemplate<int64_t> > > Type;
+ static void add(Type & a, int64_t v) { a.update(a.getNumDocs()-1, v); a.commit(); }
+};
+
+template <typename AT, typename T>
+MyAttributeManager fill(typename AT::Type * attr, T value) {
+ AttributeVector::DocId docid;
+ attr->addDoc(docid);
+ attr->addDoc(docid);
+ attr->addDoc(docid);
+ assert(2u == docid);
+ AT::add(*attr, value);
+ MyAttributeManager attribute_manager(attr);
+ return attribute_manager;
+}
+
+template <typename T>
+MyAttributeManager makeAttributeManager(T value) {
+ typedef AttributeVectorTypeFinder<T> AT;
+ typedef typename AT::Type AttributeVectorType;
+ AttributeVectorType *attr = new AttributeVectorType(field);
+ return fill<AT, T>(attr, value);
+}
+
+MyAttributeManager makeFastSearchLongAttribute(int64_t value) {
+ typedef FastSearchLongAttribute::Type AttributeVectorType;
+ Config cfg(BasicType::fromType(int64_t()), CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ AttributeVectorType *attr = new AttributeVectorType(field, cfg);
+ return fill<FastSearchLongAttribute, int64_t>(attr, value);
+}
+
+void Test::requireThatIteratorsCanBeCreated() {
+ MyAttributeManager attribute_manager = makeAttributeManager("foo");
+
+ EXPECT_TRUE(search("foo", attribute_manager));
+}
+
+void Test::requireThatRangeTermsWorkToo() {
+ MyAttributeManager attribute_manager = makeAttributeManager(int64_t(42));
+
+ EXPECT_TRUE(search("[23;46]", attribute_manager));
+ EXPECT_TRUE(!search("[10;23]", attribute_manager));
+ EXPECT_TRUE(!search(">43", attribute_manager));
+ EXPECT_TRUE(search("[10;]", attribute_manager));
+}
+
+void Test::requireThatPrefixTermsWork()
+{
+ MyAttributeManager attribute_manager = makeAttributeManager("foo");
+
+ SimplePrefixTerm node("fo", "field", 0, Weight(0));
+ EXPECT_TRUE(search(node, attribute_manager));
+}
+
+void Test::requireThatLocationTermsWork() {
+ // 0xcc is z-curve for (10, 10).
+ MyAttributeManager attribute_manager = makeAttributeManager(int64_t(0xcc));
+
+ SimpleLocationTerm node(Location(Point(10, 10), 3, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(search(node, attribute_manager));
+ node = SimpleLocationTerm(Location(Point(100, 100), 3, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(!search(node, attribute_manager));
+ node = SimpleLocationTerm(Location(Point(13, 13), 4, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(!search(node, attribute_manager));
+ node = SimpleLocationTerm(Location(Point(10, 13), 3, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(search(node, attribute_manager));
+}
+
+void Test::requireThatFastSearchLocationTermsWork() {
+ // 0xcc is z-curve for (10, 10).
+ MyAttributeManager attribute_manager = makeFastSearchLongAttribute(int64_t(0xcc));
+
+ SimpleLocationTerm node(Location(Point(10, 10), 3, 0),
+ field, 0, Weight(0));
+#if 0
+ EXPECT_TRUE(search(node, attribute_manager));
+ node = SimpleLocationTerm(Location(Point(100, 100), 3, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(!search(node, attribute_manager));
+ node = SimpleLocationTerm(Location(Point(13, 13), 4, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(!search(node, attribute_manager));
+ node = SimpleLocationTerm(Location(Point(10, 13), 3, 0),
+ field, 0, Weight(0));
+ EXPECT_TRUE(search(node, attribute_manager));
+#endif
+}
+
+} // namespace
+
+TEST_APPHOOK(Test);
diff --git a/searchlib/src/tests/attribute/searchcontext/.gitignore b/searchlib/src/tests/attribute/searchcontext/.gitignore
new file mode 100644
index 00000000000..61dc5e8fc8e
--- /dev/null
+++ b/searchlib/src/tests/attribute/searchcontext/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+searchcontext_test
+searchlib_searchcontext_test_app
diff --git a/searchlib/src/tests/attribute/searchcontext/CMakeLists.txt b/searchlib/src/tests/attribute/searchcontext/CMakeLists.txt
new file mode 100644
index 00000000000..24652373a00
--- /dev/null
+++ b/searchlib/src/tests/attribute/searchcontext/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_searchcontext_test_app
+ SOURCES
+ searchcontext.cpp
+ DEPENDS
+ searchlib
+ searchlib_test
+)
+vespa_add_test(NAME searchlib_searchcontext_test_app COMMAND sh searchcontext_test.sh)
diff --git a/searchlib/src/tests/attribute/searchcontext/DESC b/searchlib/src/tests/attribute/searchcontext/DESC
new file mode 100644
index 00000000000..8ce9805dbb0
--- /dev/null
+++ b/searchlib/src/tests/attribute/searchcontext/DESC
@@ -0,0 +1 @@
+Unit test for AttributeVector::SearchContext using all attribute vector implementations.
diff --git a/searchlib/src/tests/attribute/searchcontext/FILES b/searchlib/src/tests/attribute/searchcontext/FILES
new file mode 100644
index 00000000000..cebd66e863f
--- /dev/null
+++ b/searchlib/src/tests/attribute/searchcontext/FILES
@@ -0,0 +1 @@
+searchcontext.cpp
diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext.cpp b/searchlib/src/tests/attribute/searchcontext/searchcontext.cpp
new file mode 100644
index 00000000000..6c69e79a93b
--- /dev/null
+++ b/searchlib/src/tests/attribute/searchcontext/searchcontext.cpp
@@ -0,0 +1,1900 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+#include <vespa/searchlib/attribute/attribute.h>
+#include <vespa/searchlib/attribute/attributefactory.h>
+#include <vespa/searchlib/attribute/attributeiterators.h>
+#include <vespa/searchlib/attribute/flagattribute.h>
+#include <vespa/searchlib/attribute/singlenumericattribute.h>
+#include <vespa/searchlib/attribute/multinumericattribute.h>
+#include <vespa/searchlib/attribute/singlestringattribute.h>
+#include <vespa/searchlib/attribute/multistringattribute.h>
+#include <vespa/searchlib/common/bitvectoriterator.h>
+#include <vespa/searchlib/fef/matchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdata.h>
+#include <vespa/searchlib/fef/termfieldmatchdataarray.h>
+#include <vespa/searchlib/fef/termfieldmatchdataposition.h>
+#include <vespa/searchlib/queryeval/searchiterator.h>
+#include <vespa/searchlib/queryeval/emptysearch.h>
+#include <vespa/searchlib/queryeval/hitcollector.h>
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/vespalib/util/compress.h>
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/searchlib/test/initrange.h>
+#include <iterator>
+#include <set>
+
+#include <vespa/searchlib/attribute/attributevector.hpp>
+
+LOG_SETUP("searchcontext_test");
+
+namespace search {
+
+namespace
+{
+
+bool
+isUnsignedSmallIntAttribute(const AttributeVector &a)
+{
+ switch (a.getBasicType())
+ {
+ case attribute::BasicType::UINT1:
+ case attribute::BasicType::UINT2:
+ case attribute::BasicType::UINT4:
+ return true;
+ default:
+ return false;
+ }
+}
+
+}
+
+typedef AttributeVector::SP AttributePtr;
+typedef std::unique_ptr<AttributeVector::SearchContext> SearchContextPtr;
+typedef AttributeVector::SearchContext SearchContext;
+using attribute::Config;
+using attribute::BasicType;
+using attribute::CollectionType;
+typedef AttributeVector::largeint_t largeint_t;
+typedef queryeval::SearchIterator::UP SearchBasePtr;
+typedef std::unique_ptr<ResultSet> ResultSetPtr;
+
+using queryeval::HitCollector;
+using queryeval::SearchIterator;
+using fef::MatchData;
+using fef::TermFieldMatchData;
+using fef::TermFieldMatchDataArray;
+using fef::TermFieldMatchDataPosition;
+
+class DocSet : public std::set<uint32_t>
+{
+public:
+ DocSet() : std::set<uint32_t>() {}
+ DocSet(const uint32_t *b, const uint32_t *e) : std::set<uint32_t>(b, e) {}
+ DocSet & put(const uint32_t &v) {
+ insert(v);
+ return *this;
+ }
+};
+
+template <typename V, typename T>
+class PostingList
+{
+private:
+ V * _vec;
+ T _value;
+ DocSet _hits;
+
+public:
+ PostingList(V & vec, T value) : _vec(&vec), _value(value), _hits() {}
+ const V & getAttribute() const { return *_vec; }
+ V & getAttribute() { return *_vec; }
+ const T & getValue() const { return _value; }
+ DocSet & getHits() { return _hits; }
+ const DocSet & getHits() const { return _hits; }
+ uint32_t getHitCount() const { return _hits.size(); }
+};
+
+class DocRange
+{
+public:
+ uint32_t start;
+ uint32_t end;
+ DocRange(uint32_t start_, uint32_t end_) : start(start_), end(end_) {}
+};
+
+class SearchContextTest : public vespalib::TestApp
+{
+private:
+ typedef std::map<vespalib::string, Config> ConfigMap;
+ // Map of all config objects
+ ConfigMap _integerCfg;
+ ConfigMap _floatCfg;
+ ConfigMap _stringCfg;
+
+
+ // helper functions
+ void
+ addReservedDoc(AttributeVector &ptr);
+
+ void addDocs(AttributeVector & ptr, uint32_t numDocs);
+ template <typename T>
+ void fillVector(std::vector<T> & values, size_t numValues);
+ template <typename V, typename T>
+ void fillAttribute(V & vec, const std::vector<T> & values);
+ template <typename V, typename T>
+ void resetAttribute(V & vec, const T & value);
+ template <typename V, typename T>
+ void fillPostingList(PostingList<V, T> & pl, const DocRange & range);
+ template <typename V, typename T>
+ void fillPostingList(PostingList<V, T> & pl);
+ void buildTermQuery(std::vector<char> & buffer, const vespalib::string & index, const vespalib::string & term,
+ QueryTermSimple::SearchTerm termType=QueryTermSimple::WORD);
+ template <typename V, typename T>
+ SearchContextPtr getSearch(const V & vec, const T & term, QueryTermSimple::SearchTerm termType=QueryTermSimple::WORD);
+ ResultSetPtr performSearch(SearchIterator & sb, uint32_t numDocs);
+ template <typename V, typename T>
+ ResultSetPtr performSearch(const V & vec, const T & term, QueryTermSimple::SearchTerm termType=QueryTermSimple::WORD);
+ template <typename V>
+ void performSearch(const V & vec, const vespalib::string & term,
+ const DocSet & expected, QueryTermSimple::SearchTerm termType);
+ void checkResultSet(const ResultSet & rs, const DocSet & exp, bool bitVector);
+
+ template<typename T, typename A>
+ void testInitRange(T key, const vespalib::string & keyAsString, const ConfigMap & cfgs);
+ void testInitRange();
+ // test search functionality
+ template <typename V, typename T>
+ void testFind(const PostingList<V, T> & first);
+
+ template <typename V, typename T>
+ void testSearch(V & attribute, uint32_t numDocs, const std::vector<T> & values);
+ template<typename T, typename A>
+ void testSearch(const ConfigMap & cfgs);
+ template <typename V, typename T>
+ void testMultiValueSearchHelper(V & vec, const std::vector<T> & values);
+ template <typename V, typename T>
+ void testMultiValueSearch(V & first, V & second, const std::vector<T> & values);
+ void testSearch();
+
+ class IteratorTester {
+ public:
+ virtual bool matches(const SearchIterator & base) const = 0;
+ virtual ~IteratorTester() { }
+ };
+ class AttributeIteratorTester : public IteratorTester
+ {
+ public:
+ virtual bool matches(const SearchIterator & base) const {
+ return dynamic_cast<const AttributeIterator *>(&base) != NULL;
+ }
+ };
+ class FlagAttributeIteratorTester : public IteratorTester
+ {
+ public:
+ virtual bool matches(const SearchIterator & base) const {
+ return (dynamic_cast<const FlagAttributeIterator *>(&base) != NULL) ||
+ (dynamic_cast<const BitVectorIterator *>(&base) != NULL) ||
+ (dynamic_cast<const queryeval::EmptySearch *>(&base) != NULL);
+ }
+ };
+ class AttributePostingListIteratorTester : public IteratorTester
+ {
+ public:
+ virtual bool matches(const SearchIterator & base) const {
+ return dynamic_cast<const AttributePostingListIterator *>(&base) != NULL ||
+ dynamic_cast<const queryeval::EmptySearch *>(&base) != NULL;
+
+ }
+ };
+
+
+ // test search iterator functionality
+ void testStrictSearchIterator(SearchContext & threeHits,
+ SearchContext & noHits,
+ const IteratorTester & typeTester);
+ void testNonStrictSearchIterator(SearchContext & threeHits,
+ SearchContext & noHits,
+ const IteratorTester & typeTester);
+ void fillForSearchIteratorTest(IntegerAttribute * ia);
+ void fillForSemiNibbleSearchIteratorTest(IntegerAttribute * ia);
+ void testSearchIterator();
+
+
+ // test search iterator unpacking
+ void fillForSearchIteratorUnpackingTest(IntegerAttribute * ia, bool extra);
+ void testSearchIteratorUnpacking(const AttributePtr & ptr,
+ SearchContext & sc,
+ bool extra,
+ bool strict);
+ void testSearchIteratorUnpacking();
+
+
+ // test range search
+ template <typename VectorType>
+ void performRangeSearch(const VectorType & vec, const vespalib::string & term,
+ const DocSet & expected);
+ template <typename VectorType, typename ValueType>
+ void testRangeSearch(const AttributePtr & ptr, uint32_t numDocs, std::vector<ValueType> values);
+ void testRangeSearch();
+ void testRangeSearchLimited();
+
+
+ // test case insensitive search
+ void performCaseInsensitiveSearch(const StringAttribute & vec, const vespalib::string & term,
+ const DocSet & expected);
+ void testCaseInsensitiveSearch(const AttributePtr & ptr);
+ void testCaseInsensitiveSearch();
+ void testRegexSearch(const AttributePtr & ptr);
+ void testRegexSearch();
+
+
+ // test prefix search
+ void performPrefixSearch(const StringAttribute & vec, const vespalib::string & term,
+ const DocSet & expected, QueryTermSimple::SearchTerm termType);
+ void testPrefixSearch(const AttributePtr & ptr);
+ void testPrefixSearch();
+
+ // test that search is working after clear doc
+ template <typename VectorType, typename ValueType>
+ void requireThatSearchIsWorkingAfterClearDoc(const vespalib::string & name, const Config & cfg,
+ ValueType startValue, const vespalib::string & term);
+ void requireThatSearchIsWorkingAfterClearDoc();
+
+ // test that search is working after load and clear doc
+ template <typename VectorType, typename ValueType>
+ void requireThatSearchIsWorkingAfterLoadAndClearDoc(const vespalib::string & name, const Config & cfg,
+ ValueType startValue, ValueType defaultValue,
+ const vespalib::string & term);
+ void requireThatSearchIsWorkingAfterLoadAndClearDoc();
+
+ template <typename VectorType, typename ValueType>
+ void requireThatSearchIsWorkingAfterUpdates(const vespalib::string & name,
+ const Config & cfg,
+ ValueType value1,
+ ValueType value2);
+ void requireThatSearchIsWorkingAfterUpdates();
+
+ void requireThatFlagAttributeIsWorkingWhenNewDocsAreAdded();
+
+ template <typename VectorType, typename ValueType>
+ void requireThatInvalidSearchTermGivesZeroHits(const vespalib::string & name,
+ const Config & cfg,
+ ValueType value);
+ void requireThatInvalidSearchTermGivesZeroHits();
+
+ void requireThatFlagAttributeHandlesTheByteRange();
+
+ void requireThatOutOfBoundsSearchTermGivesZeroHits(const vespalib::string &name,
+ const Config &cfg,
+ int64_t maxValue);
+ void requireThatOutOfBoundsSearchTermGivesZeroHits();
+
+ // init maps with config objects
+ void initIntegerConfig();
+ void initFloatConfig();
+ void initStringConfig();
+
+public:
+ SearchContextTest();
+ int Main();
+};
+
+
+void
+SearchContextTest::addReservedDoc(AttributeVector &ptr)
+{
+ ptr.addReservedDoc();
+}
+
+
+void
+SearchContextTest::addDocs(AttributeVector & ptr, uint32_t numDocs)
+{
+ uint32_t docId;
+ addReservedDoc(ptr);
+ for (uint32_t i = 1; i <= numDocs; ++i) {
+ ptr.addDoc(docId);
+ EXPECT_EQUAL(docId, i);
+ }
+ ASSERT_TRUE(ptr.getNumDocs() == numDocs + 1);
+}
+
+template <typename T>
+void
+SearchContextTest::fillVector(std::vector<T> & values, size_t numValues)
+{
+ values.clear();
+ values.reserve(numValues);
+ for (size_t i = 1; i <= numValues; ++i) {
+ values.push_back(static_cast<T>(i));
+ }
+}
+
+template <>
+void
+SearchContextTest::fillVector(std::vector<vespalib::string> & values, size_t numValues)
+{
+ values.clear();
+ values.reserve(numValues);
+ for (size_t i = 0; i < numValues; ++i) {
+ vespalib::asciistream ss;
+ ss << "string" << (i < 10 ? "0" : "") << i;
+ values.push_back(ss.str());
+ }
+}
+
+template <typename V, typename T>
+void
+SearchContextTest::fillAttribute(V & vec, const std::vector<T> & values)
+{
+ for (uint32_t doc = 1; doc < vec.getNumDocs(); ++doc) {
+ ASSERT_TRUE(doc < vec.getNumDocs());
+ vec.clearDoc(doc);
+ uint32_t valueCount = doc % (values.size() + 1);
+ for (uint32_t i = 0; i < valueCount; ++i) {
+ // std::cout << "append(" << doc << ", " << values[i] << ")" << std::endl;
+ EXPECT_TRUE(vec.append(doc, values[i], 1));
+ }
+ }
+ vec.commit(true);
+}
+
+template <typename V, typename T>
+void
+SearchContextTest::resetAttribute(V & vec, const T & value)
+{
+ for (uint32_t doc = 1; doc < vec.getNumDocs(); ++doc) {
+ ASSERT_TRUE(doc < vec.getNumDocs());
+ EXPECT_TRUE(vec.update(doc, value));
+ }
+ vec.commit(true);
+}
+
+template <typename V, typename T>
+void
+SearchContextTest::fillPostingList(PostingList<V, T> & pl, const DocRange & range)
+{
+ pl.getHits().clear();
+ for (uint32_t doc = range.start; doc < range.end; ++doc) {
+ ASSERT_TRUE(doc < pl.getAttribute().getNumDocs());
+ EXPECT_TRUE(pl.getAttribute().update(doc, pl.getValue()));
+ pl.getHits().insert(doc);
+ }
+ pl.getAttribute().commit(true);
+}
+
+template <typename V, typename T>
+void
+SearchContextTest::fillPostingList(PostingList<V, T> & pl)
+{
+ AttributeVector & vec = dynamic_cast<AttributeVector &>(pl.getAttribute());
+ pl.getHits().clear();
+ uint32_t sz = vec.getMaxValueCount();
+ T * buf = new T[sz];
+ for (uint32_t doc = 1; doc < vec.getNumDocs(); ++doc) {
+ uint32_t valueCount = vec.get(doc, buf, sz);
+ EXPECT_TRUE(valueCount <= sz);
+ for (uint32_t i = 0; i < valueCount; ++i) {
+ if (buf[i] == pl.getValue()) {
+ //std::cout << "hit for doc(" << doc << "): buf[" << i << "] (=" << buf[i] << ") == " << pl.getValue() << std::endl;
+ pl.getHits().insert(doc);
+ break;
+ }
+ }
+ }
+ delete [] buf;
+}
+
+void
+SearchContextTest::buildTermQuery(std::vector<char> & buffer, const vespalib::string & index, const vespalib::string & term, QueryTermSimple::SearchTerm termType)
+{
+ uint32_t indexLen = index.size();
+ uint32_t termLen = term.size();
+ uint32_t queryPacketSize = 1 + 2 * 4 + indexLen + termLen;
+ uint32_t p = 0;
+ buffer.resize(queryPacketSize);
+ switch (termType) {
+ case QueryTermSimple::PREFIXTERM: buffer[p++] = ParseItem::ITEM_PREFIXTERM; break;
+ case QueryTermSimple::REGEXP: buffer[p++] = ParseItem::ITEM_REGEXP; break;
+ default:
+ buffer[p++] = ParseItem::ITEM_TERM;
+ break;
+ }
+ p += vespalib::compress::Integer::compressPositive(indexLen, &buffer[p]);
+ memcpy(&buffer[p], index.c_str(), indexLen);
+ p += indexLen;
+ p += vespalib::compress::Integer::compressPositive(termLen, &buffer[p]);
+ memcpy(&buffer[p], term.c_str(), termLen);
+ p += termLen;
+ buffer.resize(p);
+}
+
+template <typename V, typename T>
+SearchContextPtr
+SearchContextTest::getSearch(const V & vec, const T & term, QueryTermSimple::SearchTerm termType)
+{
+ std::vector<char> query;
+ vespalib::asciistream ss;
+ ss << term;
+ buildTermQuery(query, vec.getName(), ss.str(), termType);
+
+ return (dynamic_cast<const AttributeVector &>(vec)).
+ getSearch(vespalib::stringref(&query[0], query.size()),
+ AttributeVector::SearchContext::Params());
+}
+
+ResultSetPtr
+SearchContextTest::performSearch(SearchIterator & sb, uint32_t numDocs)
+{
+ HitCollector hc(numDocs, numDocs, 0);
+ sb.initFullRange();
+ // assume strict toplevel search object located at start
+ for (sb.seek(1u); ! sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) {
+ hc.addHit(sb.getDocId(), 0.0);
+ }
+ return hc.getResultSet();
+}
+
+template <typename V, typename T>
+ResultSetPtr
+SearchContextTest::performSearch(const V & vec, const T & term, QueryTermSimple::SearchTerm termType)
+{
+ TermFieldMatchData dummy;
+ SearchContextPtr sc = getSearch(vec, term, termType);
+ sc->fetchPostings(true);
+ SearchBasePtr sb = sc->createIterator(&dummy, true);
+ ResultSetPtr rs = performSearch(*sb, vec.getNumDocs());
+ return rs;
+}
+
+template <typename V>
+void
+SearchContextTest::performSearch(const V & vec, const vespalib::string & term,
+ const DocSet & expected, QueryTermSimple::SearchTerm termType)
+{
+#if 0
+ std::cout << "performSearch[" << term << "]: {";
+ std::copy(expected.begin(), expected.end(), std::ostream_iterator<uint32_t>(std::cout, ", "));
+ std::cout << "}, prefix(" << (prefix ? "true" : "false") << ")" << std::endl;
+#endif
+ { // strict search iterator
+ ResultSetPtr rs = performSearch(vec, term, termType);
+ checkResultSet(*rs, expected, false);
+ }
+}
+
+void
+SearchContextTest::checkResultSet(const ResultSet & rs, const DocSet & expected, bool bitVector)
+{
+ EXPECT_EQUAL(rs.getNumHits(), expected.size());
+ if (bitVector) {
+ const BitVector * vec = rs.getBitOverflow();
+ if (expected.size() != 0) {
+ ASSERT_TRUE(vec != NULL);
+ for (const auto & expect : expected) {
+ EXPECT_TRUE(vec->testBit(expect));
+ }
+ }
+ } else {
+ const RankedHit * array = rs.getArray();
+ if (expected.size() != 0) {
+ ASSERT_TRUE(array != NULL);
+ uint32_t i = 0;
+ for (DocSet::const_iterator iter = expected.begin();
+ iter != expected.end(); ++iter, ++i)
+ {
+ EXPECT_TRUE(array[i]._docId == *iter);
+ }
+ }
+ }
+}
+
+
+//-----------------------------------------------------------------------------
+// Test search functionality
+//-----------------------------------------------------------------------------
+template <typename V, typename T>
+void
+SearchContextTest::testFind(const PostingList<V, T> & pl)
+{
+ { // strict search iterator
+ SearchContextPtr sc = getSearch(pl.getAttribute(), pl.getValue());
+ sc->fetchPostings(true);
+ TermFieldMatchData dummy;
+ SearchBasePtr sb = sc->createIterator(&dummy, true);
+ ResultSetPtr rs = performSearch(*sb, pl.getAttribute().getNumDocs());
+ checkResultSet(*rs, pl.getHits(), false);
+ }
+}
+
+template <typename V, typename T>
+void
+SearchContextTest::testSearch(V & attribute, uint32_t numDocs, const std::vector<T> & values)
+{
+ LOG(info, "testSearch: vector '%s' with %u documents and %lu unique values",
+ attribute.getName().c_str(), numDocs, static_cast<unsigned long>(values.size()));
+
+ // fill attribute vectors
+ addDocs(attribute, numDocs);
+
+ std::vector<PostingList<V, T> > lists;
+
+ // fill posting lists
+ ASSERT_TRUE((attribute.getNumDocs() - 1) % values.size() == 0);
+ uint32_t hitCount = attribute.getNumDocs() / values.size();
+ for (uint32_t i = 0; i < values.size(); ++i) {
+ // for each value a range with hitCount documents will hit on that value
+ lists.push_back(PostingList<V, T>(attribute, values[i]));
+ fillPostingList(lists.back(), DocRange(i * hitCount + 1, (i + 1) * hitCount + 1));
+ }
+
+ // test find()
+ for (const auto & list : lists) {
+ testFind(list);
+ }
+}
+
+template <typename V, typename T>
+void
+SearchContextTest::testMultiValueSearchHelper(V & vec, const std::vector<T> & values)
+{
+ std::vector<PostingList<V, T> > lists;
+
+ // fill posting lists based on attribute content
+ for (const T & value : values) {
+ lists.push_back(PostingList<V, T>(vec, value));
+ fillPostingList(lists.back());
+ }
+
+ // test find()
+ for (const auto & list : lists) {
+ //std::cout << "testFind(lists[" << i << "]): value = " << lists[i].getValue()
+ // << ", hit count = " << lists[i].getHitCount() << std::endl;
+ testFind(list);
+ }
+}
+
+template <typename V, typename T>
+void
+SearchContextTest::testMultiValueSearch(V & first, V & second, const std::vector<T> & values)
+{
+ addDocs(first, second.getNumDocs());
+ LOG(info, "testMultiValueSearch: vector '%s' with %u documents and %lu unique values",
+ first.getName().c_str(), first.getNumDocs(), static_cast<unsigned long>(values.size()));
+
+ fillAttribute(first, values);
+
+ testMultiValueSearchHelper(first, values);
+
+ ASSERT_TRUE(first.saveAs(second.getBaseFileName()));
+ ASSERT_TRUE(second.load());
+
+ testMultiValueSearchHelper(second, values);
+
+ size_t sz = values.size();
+ ASSERT_TRUE(sz > 2);
+ std::vector<T> subset;
+ // values[sz - 2] is not used -> 0 hits
+ // values[sz - 1] is used once -> 1 hit
+ for (size_t i = 0; i < sz - 2; ++i) {
+ subset.push_back(values[i]);
+ }
+
+ fillAttribute(first, subset);
+
+ ASSERT_TRUE(1u < first.getNumDocs());
+ EXPECT_TRUE(first.append(1u, values[sz - 1], 1));
+ first.commit(true);
+
+ testMultiValueSearchHelper(first, values);
+
+ ASSERT_TRUE(first.saveAs(second.getBaseFileName()));
+ ASSERT_TRUE(second.load());
+
+ testMultiValueSearchHelper(second, values);
+}
+
+template<typename T, typename A>
+void SearchContextTest::testSearch(const ConfigMap & cfgs) {
+ uint32_t numDocs = 100;
+ uint32_t numUniques = 20;
+ std::vector<T> values;
+ fillVector(values, numUniques);
+ for (const auto & cfg : cfgs) {
+ AttributePtr second = AttributeFactory::createAttribute(cfg.first + "-2", cfg.second);
+ testSearch(*(dynamic_cast<A *>(second.get())), numDocs, values);
+ if (second->hasMultiValue()) {
+ AttributePtr first = AttributeFactory::createAttribute(cfg.first + "-1", cfg.second);
+ testMultiValueSearch(*(dynamic_cast<A *>(first.get())),
+ *(dynamic_cast<A *>(second.get())), values);
+ }
+ }
+}
+
+using search::test::InitRangeVerifier;
+
+template<typename T, typename A>
+void SearchContextTest::testInitRange(T key, const vespalib::string & keyAsString, const ConfigMap & cfgs) {
+ InitRangeVerifier ir;
+ for (const auto & cfg : cfgs) {
+ AttributePtr attribute = AttributeFactory::createAttribute(cfg.first + "-initrange", cfg.second);
+ addDocs(*attribute, ir.getDocIdLimit());
+ for (uint32_t doc : ir.getExpectedDocIds()) {
+ EXPECT_TRUE(nullptr != dynamic_cast<A *>(attribute.get()));
+ EXPECT_TRUE(dynamic_cast<A *>(attribute.get())->update(doc, key));
+ }
+ attribute->commit(true);
+ SearchContextPtr sc = getSearch(*attribute, keyAsString);
+ ASSERT_TRUE(sc->valid());
+ sc->fetchPostings(true);
+ TermFieldMatchData dummy;
+ SearchBasePtr sb = sc->createIterator(&dummy, true);
+ ir.verify(*sb);
+ }
+}
+
+void SearchContextTest::testInitRange() {
+ testInitRange<AttributeVector::largeint_t, IntegerAttribute>(42, "42", _integerCfg);
+ testInitRange<double, FloatingPointAttribute>(42.42, "42.42", _floatCfg);
+ testInitRange<vespalib::string, StringAttribute>("any-key", "any-key", _stringCfg);
+}
+
+void
+SearchContextTest::testSearch()
+{
+ const uint32_t numDocs = 100;
+ const uint32_t numUniques = 20;
+
+ { // IntegerAttribute
+ for (const auto & cfg : _integerCfg) {
+ AttributePtr attribute = AttributeFactory::createAttribute(cfg.first + "-3", cfg.second);
+ SearchContextPtr sc = getSearch(*attribute, "100");
+ ASSERT_TRUE(sc->valid());
+ sc = getSearch(*attribute, "1A0");
+ EXPECT_FALSE( sc->valid() );
+ }
+
+
+ { // CollectionType::ARRAY Flags.
+ std::vector<AttributeVector::largeint_t> values;
+ fillVector(values, numUniques);
+ Config cfg(BasicType::INT8, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ AttributePtr second = AttributeFactory::createAttribute("flags-2", cfg);
+ testSearch(*(dynamic_cast<IntegerAttribute *>(second.get())), numDocs, values);
+ AttributePtr first = AttributeFactory::createAttribute("flags-1", cfg);
+ testMultiValueSearch(*(dynamic_cast<IntegerAttribute *>(first.get())),
+ *(dynamic_cast<IntegerAttribute *>(second.get())), values);
+ }
+ }
+
+ { // FloatingPointAttribute
+ for (const auto & cfg : _floatCfg) {
+ AttributePtr attribute = AttributeFactory::createAttribute(cfg.first + "-3", cfg.second);
+ SearchContextPtr sc = getSearch(*attribute, "100");
+ ASSERT_TRUE(sc->valid());
+ sc = getSearch(*attribute, "7.3");
+ ASSERT_TRUE( sc->valid() );
+ sc = getSearch(*attribute, "1A0");
+ EXPECT_FALSE( sc->valid() );
+ }
+ }
+
+ testSearch<AttributeVector::largeint_t, IntegerAttribute>(_integerCfg);
+ testSearch<double, FloatingPointAttribute>(_floatCfg);
+ testSearch<vespalib::string, StringAttribute>(_stringCfg);
+}
+
+//-----------------------------------------------------------------------------
+// Test search iterator functionality
+//-----------------------------------------------------------------------------
+void
+SearchContextTest::testStrictSearchIterator(SearchContext & threeHits,
+ SearchContext & noHits,
+ const IteratorTester & typeTester)
+{
+ TermFieldMatchData dummy;
+ { // search for value with 3 hits
+ threeHits.fetchPostings(true);
+ SearchBasePtr sb = threeHits.createIterator(&dummy, true);
+ sb->initFullRange();
+ EXPECT_TRUE(typeTester.matches(*sb));
+ EXPECT_TRUE(sb->getDocId() == sb->beginId() ||
+ sb->getDocId() == 1u);
+ EXPECT_TRUE(sb->seek(1));
+ EXPECT_EQUAL(sb->getDocId(), 1u);
+ EXPECT_TRUE(!sb->seek(2));
+ EXPECT_EQUAL(sb->getDocId(), 3u);
+ EXPECT_TRUE(sb->seek(3));
+ EXPECT_EQUAL(sb->getDocId(), 3u);
+ EXPECT_TRUE(!sb->seek(4));
+ EXPECT_EQUAL(sb->getDocId(), 5u);
+ EXPECT_TRUE(sb->seek(5));
+ EXPECT_EQUAL(sb->getDocId(), 5u);
+ EXPECT_TRUE(!sb->seek(6));
+ EXPECT_TRUE(sb->isAtEnd());
+ }
+
+ { // search for value with no hits
+ noHits.fetchPostings(true);
+ SearchBasePtr sb = noHits.createIterator(&dummy, true);
+ sb->initFullRange();
+ ASSERT_TRUE(typeTester.matches(*sb));
+ EXPECT_TRUE(sb->getDocId() == sb->beginId() ||
+ sb->isAtEnd());
+ EXPECT_TRUE(!sb->seek(1));
+ EXPECT_TRUE(sb->isAtEnd());
+ }
+}
+
+void
+SearchContextTest::testNonStrictSearchIterator(SearchContext & threeHits,
+ SearchContext & noHits,
+ const IteratorTester & typeTester)
+{
+ TermFieldMatchData dummy;
+ { // search for value with three hits
+ threeHits.fetchPostings(false);
+ SearchBasePtr sb = threeHits.createIterator(&dummy, false);
+ sb->initFullRange();
+ EXPECT_TRUE(typeTester.matches(*sb));
+ EXPECT_TRUE(sb->seek(1));
+ EXPECT_EQUAL(sb->getDocId(), 1u);
+ EXPECT_TRUE(!sb->seek(2));
+ EXPECT_EQUAL(sb->getDocId(), 1u);
+ EXPECT_TRUE(sb->seek(3));
+ EXPECT_EQUAL(sb->getDocId(), 3u);
+ EXPECT_TRUE(!sb->seek(4));
+ EXPECT_EQUAL(sb->getDocId(), 3u);
+ EXPECT_TRUE(sb->seek(5));
+ EXPECT_EQUAL(sb->getDocId(), 5u);
+ EXPECT_TRUE(!sb->seek(6));
+ EXPECT_TRUE(sb->getDocId() == 5u || sb->isAtEnd());
+ }
+ { // search for value with no hits
+ noHits.fetchPostings(false);
+ SearchBasePtr sb = noHits.createIterator(&dummy, false);
+ sb->initFullRange();
+
+ EXPECT_TRUE(typeTester.matches(*sb));
+ EXPECT_TRUE(sb->getDocId() == sb->beginId() ||
+ sb->isAtEnd());
+ EXPECT_TRUE(!sb->seek(1));
+ EXPECT_NOT_EQUAL(sb->getDocId(), 1u);
+ EXPECT_TRUE(!sb->seek(6));
+ EXPECT_NOT_EQUAL(sb->getDocId(), 6u);
+ }
+}
+
+void
+SearchContextTest::fillForSearchIteratorTest(IntegerAttribute * ia)
+{
+ addReservedDoc(*ia);
+ ia->addDocs(5);
+ ia->update(1, 10);
+ ia->update(2, 20);
+ ia->update(3, 10);
+ ia->update(4, 20);
+ ia->update(5, 10);
+ ia->commit(true);
+}
+
+void
+SearchContextTest::fillForSemiNibbleSearchIteratorTest(IntegerAttribute * ia)
+{
+ addReservedDoc(*ia);
+ ia->addDocs(5);
+ ia->update(1, 1);
+ ia->update(2, 2);
+ ia->update(3, 1);
+ ia->update(4, 2);
+ ia->update(5, 1);
+ ia->commit(true);
+}
+
+void
+SearchContextTest::testSearchIterator()
+{
+ {
+ Config cfg(BasicType::INT32, CollectionType::SINGLE);
+ AttributePtr ptr = AttributeFactory::createAttribute("s-int32", cfg);
+ fillForSearchIteratorTest(dynamic_cast<IntegerAttribute *>(ptr.get()));
+
+ SearchContextPtr threeHits = getSearch(*ptr.get(), 10);
+ SearchContextPtr noHits = getSearch(*ptr.get(), 30);
+ AttributeIteratorTester tester;
+ testStrictSearchIterator(*threeHits, *noHits, tester);
+ threeHits = getSearch(*ptr.get(), 10);
+ noHits = getSearch(*ptr.get(), 30);
+ testNonStrictSearchIterator(*threeHits, *noHits, tester);
+ }
+ {
+ Config cfg(BasicType::UINT2, CollectionType::SINGLE);
+ AttributePtr ptr = AttributeFactory::createAttribute("s-uint2", cfg);
+ fillForSemiNibbleSearchIteratorTest(dynamic_cast<IntegerAttribute *>
+ (ptr.get()));
+
+ SearchContextPtr threeHits = getSearch(*ptr.get(), 1);
+ SearchContextPtr noHits = getSearch(*ptr.get(), 3);
+ AttributeIteratorTester tester;
+ testStrictSearchIterator(*threeHits, *noHits, tester);
+ threeHits = getSearch(*ptr.get(), 1);
+ noHits = getSearch(*ptr.get(), 3);
+ testNonStrictSearchIterator(*threeHits, *noHits, tester);
+ }
+ {
+ Config cfg(BasicType::INT32, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("sfs-int32", cfg);
+ fillForSearchIteratorTest(dynamic_cast<IntegerAttribute *>(ptr.get()));
+
+ SearchContextPtr threeHits = getSearch(*ptr.get(), 10);
+ SearchContextPtr noHits = getSearch(*ptr.get(), 30);
+ AttributePostingListIteratorTester tester;
+ testStrictSearchIterator(*threeHits, *noHits, tester);
+ }
+ {
+ Config cfg(BasicType::STRING, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("sfs-string", cfg);
+ StringAttribute * sa = dynamic_cast<StringAttribute *>(ptr.get());
+ addReservedDoc(*ptr);
+ ptr->addDocs(5);
+ sa->update(1, "three");
+ sa->update(2, "two");
+ sa->update(3, "three");
+ sa->update(4, "two");
+ sa->update(5, "three");
+ ptr->commit(true);
+
+ SearchContextPtr threeHits = getSearch(*ptr.get(), "three");
+ SearchContextPtr noHits = getSearch(*ptr.get(), "none");
+ AttributePostingListIteratorTester tester;
+ testStrictSearchIterator(*threeHits, *noHits, tester);
+ }
+ {
+ Config cfg(BasicType::INT8, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("flags", cfg);
+ fillForSearchIteratorTest(dynamic_cast<IntegerAttribute *>(ptr.get()));
+
+ SearchContextPtr threeHits = getSearch(*ptr.get(), 10);
+ SearchContextPtr noHits = getSearch(*ptr.get(), 30);
+ FlagAttributeIteratorTester tester;
+ testStrictSearchIterator(*threeHits, *noHits, tester);
+ threeHits = getSearch(*ptr.get(), 10);
+ noHits = getSearch(*ptr.get(), 30);
+ testNonStrictSearchIterator(*threeHits, *noHits, tester);
+ }
+}
+
+
+
+//-----------------------------------------------------------------------------
+// Test search iterator unpacking
+//-----------------------------------------------------------------------------
+void
+SearchContextTest::fillForSearchIteratorUnpackingTest(IntegerAttribute * ia,
+ bool extra)
+{
+ addReservedDoc(*ia);
+ ia->addDocs(3);
+ if (ia->getCollectionType() == CollectionType::SINGLE) {
+ ia->update(1, 10);
+ ia->update(2, 10);
+ ia->update(3, 10);
+ } else if (ia->getCollectionType() == CollectionType::ARRAY) {
+ ia->append(1, 10, 1);
+ ia->append(2, 10, 1);
+ ia->append(2, 10, 1);
+ ia->append(3, 10, 1);
+ ia->append(3, 10, 1);
+ ia->append(3, 10, 1);
+ } else { // WEIGHTED SET
+ ia->append(1, 10, -50);
+ ia->append(2, 10, 0);
+ ia->append(3, 10, 50);
+ }
+ ia->commit(true);
+ if (!extra)
+ return;
+ ia->addDocs(20);
+ for (uint32_t d = 4; d < 24; ++d) {
+ if (ia->getCollectionType() == CollectionType::SINGLE)
+ ia->update(d, 10);
+ else
+ ia->append(d, 10, 1);
+ }
+ ia->commit(true);
+}
+
+void
+SearchContextTest::testSearchIteratorUnpacking(const AttributePtr & attr,
+ SearchContext & sc,
+ bool extra,
+ bool strict)
+{
+ LOG(info,
+ "testSearchIteratorUnpacking: vector '%s'", attr->getName().c_str());
+
+ TermFieldMatchData md;
+ md.reset(100);
+
+ TermFieldMatchDataPosition pos;
+ pos.setElementWeight(100);
+ md.appendPosition(pos);
+
+ sc.fetchPostings(strict);
+ SearchBasePtr sb = sc.createIterator(&md, strict);
+ sb->initFullRange();
+
+ std::vector<int32_t> weights(3);
+ if (attr->getCollectionType() == CollectionType::SINGLE ||
+ (attr->getCollectionType() == CollectionType::ARRAY && attr->getBasicType() == BasicType::INT8))
+ {
+ weights[0] = 1;
+ weights[1] = 1;
+ weights[2] = 1;
+ } else if (attr->getCollectionType() == CollectionType::ARRAY) {
+ weights[0] = 1;
+ weights[1] = 2;
+ weights[2] = 3;
+ } else {
+ weights[0] = -50;
+ weights[1] = 0;
+ weights[2] = 50;
+ }
+
+ // unpack and check weights
+ sb->unpack(1);
+ EXPECT_EQUAL(sb->getDocId(), 1u);
+ EXPECT_EQUAL(md.getDocId(), 1u);
+ EXPECT_EQUAL(md.getWeight(), weights[0]);
+
+ sb->unpack(2);
+ EXPECT_EQUAL(sb->getDocId(), 2u);
+ EXPECT_EQUAL(md.getDocId(), 2u);
+ EXPECT_EQUAL(md.getWeight(), weights[1]);
+
+ sb->unpack(3);
+ EXPECT_EQUAL(sb->getDocId(), 3u);
+ EXPECT_EQUAL(md.getDocId(), 3u);
+ EXPECT_EQUAL(md.getWeight(), weights[2]);
+ if (extra) {
+ sb->unpack(4);
+ EXPECT_EQUAL(sb->getDocId(), 4u);
+ EXPECT_EQUAL(md.getDocId(), 4u);
+ EXPECT_EQUAL(md.getWeight(), 1);
+ }
+}
+
+void
+SearchContextTest::testSearchIteratorUnpacking()
+{
+ std::vector<std::pair<vespalib::string, Config> > config;
+
+ {
+ Config cfg(BasicType::INT32, CollectionType::SINGLE);
+ config.emplace_back("s-int32", cfg);
+ }
+ {
+ Config cfg(BasicType::UINT4, CollectionType::SINGLE);
+ config.emplace_back("s-uint4", cfg);
+ }
+ {
+ Config cfg(BasicType::INT32, CollectionType::ARRAY);
+ config.emplace_back("a-int32", cfg);
+ }
+ {
+ Config cfg(BasicType::INT32, CollectionType::WSET);
+ config.emplace_back("w-int32", cfg);
+ }
+ {
+ Config cfg(BasicType::INT32, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ config.emplace_back("sfs-int32", cfg);
+ }
+ {
+ Config cfg(BasicType::INT32, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ config.emplace_back("afs-int32", cfg);
+ }
+ {
+ Config cfg(BasicType::INT32, CollectionType::WSET);
+ cfg.setFastSearch(true);
+ config.emplace_back("wfs-int32", cfg);
+ }
+ {
+ Config cfg(BasicType::INT8, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ config.emplace_back("flags", cfg);
+ }
+
+ for (const auto & cfg : config) {
+ AttributePtr ptr = AttributeFactory::createAttribute(cfg.first, cfg.second);
+ fillForSearchIteratorUnpackingTest(dynamic_cast<IntegerAttribute *>(ptr.get()), false);
+ SearchContextPtr sc = getSearch(*ptr.get(), 10);
+ testSearchIteratorUnpacking(ptr, *sc, false, true);
+ sc = getSearch(*ptr.get(), 10);
+ testSearchIteratorUnpacking(ptr, *sc, false, false);
+ if (cfg.second.fastSearch()) {
+ AttributePtr ptr2 = AttributeFactory::createAttribute(cfg.first + "-extra", cfg.second);
+ fillForSearchIteratorUnpackingTest(dynamic_cast<IntegerAttribute *>(ptr2.get()), true);
+ SearchContextPtr sc2 = getSearch(*ptr2.get(), 10);
+ testSearchIteratorUnpacking(ptr2, *sc2, true, true);
+ sc2 = getSearch(*ptr2.get(), 10);
+ testSearchIteratorUnpacking(ptr2, *sc2, true, false);
+ }
+ }
+}
+
+
+
+//-----------------------------------------------------------------------------
+// Test range search
+//-----------------------------------------------------------------------------
+
+template <typename VectorType>
+void
+SearchContextTest::performRangeSearch(const VectorType & vec, const vespalib::string & term,
+ const DocSet & expected)
+{
+ performSearch(vec, term, expected, QueryTermSimple::WORD);
+}
+
+template <typename VectorType, typename ValueType>
+void
+SearchContextTest::testRangeSearch(const AttributePtr & ptr, uint32_t numDocs, std::vector<ValueType> values)
+{
+ LOG(info, "testRangeSearch: vector '%s'", ptr->getName().c_str());
+
+ VectorType & vec = dynamic_cast<VectorType &>(*ptr.get());
+
+ addDocs(vec, numDocs);
+
+ std::map<ValueType, DocSet> postingList;
+
+ uint32_t docCnt = 0;
+ for (uint32_t i = 0; i < values.size() && docCnt < numDocs; i+=2) {
+ //std::cout << "postingList[" << values[i] << "]: {";
+ for (uint32_t j = 0; j < (i + 1) && docCnt < numDocs; ++j, ++docCnt) {
+ EXPECT_TRUE(vec.update(docCnt + 1u, values[i]));
+ postingList[values[i]].insert(docCnt + 1u);
+ //std::cout << docCnt << ", ";
+ }
+ //std::cout << "}" << std::endl;
+ }
+ ptr->commit(true);
+ uint32_t smallHits = 0;
+ ValueType zeroValue = 0;
+ bool smallUInt = isUnsignedSmallIntAttribute(vec);
+ if (smallUInt) {
+ for (uint32_t i = docCnt ; i < numDocs; ++i) {
+ postingList[zeroValue].insert(i + 1u);
+ ++smallHits;
+ }
+ }
+
+ // test less than ("<a")
+ for (uint32_t i = 0; i < values.size(); ++i) {
+ vespalib::asciistream ss;
+ ss << "<" << values[i];
+ DocSet expected;
+ if (smallUInt) {
+ expected.insert(postingList[zeroValue].begin(),
+ postingList[zeroValue].end());
+ }
+ for (uint32_t j = 0; j < i; ++j) {
+ expected.insert(postingList[values[j]].begin(), postingList[values[j]].end());
+ }
+ performRangeSearch(vec, ss.str(), expected);
+ }
+
+ // test greater than (">a")
+ for (uint32_t i = 0; i < values.size(); ++i) {
+ vespalib::asciistream ss;
+ ss << ">" << values[i];
+ DocSet expected;
+ for (uint32_t j = i + 1; j < values.size(); ++j) {
+ expected.insert(postingList[values[j]].begin(), postingList[values[j]].end());
+ }
+ performRangeSearch(vec, ss.str(), expected);
+ }
+
+ // test range ("[a;b]")
+ for (uint32_t i = 0; i < values.size(); ++i) {
+ for (uint32_t j = 0; j < values.size(); ++j) { // illegal range when j < i
+ vespalib::asciistream ss;
+ ss << "[" << values[i] << ";" << values[j] << "]";
+ DocSet expected;
+ for (uint32_t k = i; k < j + 1; ++k) {
+ expected.insert(postingList[values[k]].begin(), postingList[values[k]].end());
+ }
+ performRangeSearch(vec, ss.str(), expected);
+ }
+ }
+
+ { // test large range
+ vespalib::asciistream ss;
+ ss << "[" << (values.front() - 1) << ";" << (values.back() + 1) << "]";
+ DocSet expected;
+ for (uint32_t doc = 0; doc < numDocs; ++doc) {
+ expected.insert(doc + 1);
+ }
+ performRangeSearch(vec, ss.str(), expected);
+ }
+}
+
+void
+SearchContextTest::testRangeSearchLimited()
+{
+ largeint_t VALUES [] = {0,1,1,2,3,4,5,6,7,8,9,9,10 };
+ std::vector<largeint_t> values(VALUES, VALUES+sizeof(VALUES)/sizeof(VALUES[0]));
+ Config cfg(BasicType::INT32, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("limited-int32", cfg);
+ IntegerAttribute & vec = dynamic_cast<IntegerAttribute &>(*ptr);
+ addDocs(vec, values.size());
+ for (size_t i(1); i < values.size(); i++) {
+ EXPECT_TRUE(vec.update(i, values[i]));
+ }
+ ptr->commit(true);
+
+ DocSet expected;
+ for (size_t i(1); i < 12; i++) {
+ expected.put(i);
+ }
+ performRangeSearch(vec, "[1;9]", expected);
+ performRangeSearch(vec, "[1;9;100]", expected);
+ performRangeSearch(vec, "[1;9;-100]", expected);
+ expected.clear();
+ expected.put(3);
+ performRangeSearch(vec, "<1;3>", expected);
+ expected.put(4);
+ performRangeSearch(vec, "<1;3]", expected);
+ expected.clear();
+ expected.put(1).put(2).put(3);
+ performRangeSearch(vec, "[1;3>", expected);
+ expected.put(4);
+ performRangeSearch(vec, "[1;3]", expected);
+ expected.clear();
+ expected.put(1).put(2);
+ performRangeSearch(vec, "[1;9;1]", expected);
+ performRangeSearch(vec, "[1;9;2]", expected);
+ expected.put(3);
+ performRangeSearch(vec, "[1;9;3]", expected);
+ expected.clear();
+ expected.put(10).put(11);
+ performRangeSearch(vec, "[1;9;-1]", expected);
+ performRangeSearch(vec, "[1;9;-2]", expected);
+ expected.put(9);
+ performRangeSearch(vec, "[1;9;-3]", expected);
+ performRangeSearch(vec, "[1;9;-3]", expected);
+
+ expected.clear();
+ for (size_t i(1); i < 13; i++) {
+ expected.put(i);
+ }
+ performRangeSearch(vec, "[;;100]", expected);
+ performRangeSearch(vec, "[;;-100]", expected);
+
+ expected.clear();
+ expected.put(1).put(2);
+ performRangeSearch(vec, "[;;1]", expected);
+ expected.clear();
+ expected.put(12);
+ performRangeSearch(vec, "[;;-1]", expected);
+}
+
+void
+SearchContextTest::testRangeSearch()
+{
+ const uint32_t numDocs = 100;
+ const uint32_t numValues = 20;
+ const uint32_t numNibbleValues = 9;
+
+ { // IntegerAttribute
+ std::vector<largeint_t> values;
+ std::vector<largeint_t> nibbleValues;
+ largeint_t start = 1;
+
+ for (uint32_t i = 0; i < numValues; ++i) {
+ values.push_back(start + i);
+ }
+ for (uint32_t i = 0; i < numNibbleValues; ++i) {
+ nibbleValues.push_back(start + i);
+ }
+
+ for (const auto & cfg : _integerCfg) {
+ AttributePtr ptr = AttributeFactory::createAttribute(cfg.first, cfg.second);
+ testRangeSearch<IntegerAttribute, largeint_t>(ptr, numDocs, values);
+ }
+ { // CollectionType::ARRAY Flags.
+ Config cfg(BasicType::INT8, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ AttributePtr ptr = AttributeFactory::createAttribute("flags", cfg);
+ testRangeSearch<IntegerAttribute, largeint_t>(ptr, numDocs, values);
+ }
+ {
+ Config cfg(BasicType::UINT4, CollectionType::SINGLE);
+ AttributePtr ptr = AttributeFactory::createAttribute("s-uint4", cfg);
+ testRangeSearch<IntegerAttribute, largeint_t>(ptr, numDocs, nibbleValues);
+ }
+ }
+
+ { // FloatingPointAttribute
+ std::vector<double> values;
+ double start = 1;
+
+ for (uint32_t i = 0; i < numValues; ++i) {
+ values.push_back(start + i);
+ }
+
+ for (const auto & cfg : _floatCfg) {
+ AttributePtr ptr = AttributeFactory::createAttribute(cfg.first, cfg.second);
+ testRangeSearch<FloatingPointAttribute, double>(ptr, numDocs, values);
+ }
+ }
+}
+
+
+//-----------------------------------------------------------------------------
+// Test case insensitive search
+//-----------------------------------------------------------------------------
+
+void
+SearchContextTest::performCaseInsensitiveSearch(const StringAttribute & vec, const vespalib::string & term,
+ const DocSet & expected)
+{
+ performSearch(vec, term, expected, QueryTermSimple::WORD);
+}
+
+void
+SearchContextTest::testCaseInsensitiveSearch(const AttributePtr & ptr)
+{
+ LOG(info, "testCaseInsensitiveSearch: vector '%s'", ptr->getName().c_str());
+
+ StringAttribute & vec = dynamic_cast<StringAttribute &>(*ptr.get());
+
+ uint32_t numDocs = 5 * 5;
+ addDocs(*ptr.get(), numDocs);
+
+ const char * terms[][5] = {
+ {"lower", "upper", "firstupper", "mixedcase", "intermixedcase"}, // lower
+ {"LOWER", "UPPER", "FIRSTUPPER", "MIXEDCASE", "INTERMIXEDCASE"}, // upper
+ {"Lower", "Upper", "Firstupper", "Mixedcase", "Intermixedcase"}, // firstUpper
+ {"Lower", "Upper", "FirstUpper", "MixedCase", "InterMixedCase"}, // mixedCase
+ {"lower", "upper", "firstUpper", "mixedCase", "interMixedCase"}, // interMixedCase
+ };
+
+ uint32_t doc = 1;
+ for (uint32_t j = 0; j < 5; ++j) {
+ for (uint32_t i = 0; i < 5; ++i) {
+ ASSERT_TRUE(doc < vec.getNumDocs());
+ EXPECT_TRUE(vec.update(doc++, terms[i][j]));
+ }
+ }
+
+ ptr->commit(true);
+
+ const char * buffer[1];
+ doc = 1;
+ for (uint32_t j = 0; j < 5; ++j) {
+ for (uint32_t i = 0; i < 5; ++i) {
+ EXPECT_EQUAL(ptr->get(doc++, buffer, 1), uint32_t(1));
+ EXPECT_EQUAL(vespalib::string(buffer[0]), vespalib::string(terms[i][j]));
+ }
+ }
+
+ DocSet empty;
+ for (uint32_t j = 0; j < 5; ++j) {
+ DocSet expected;
+ for (doc = j * 5 + 1; doc < (j + 1) * 5 + 1; ++doc) {
+ expected.insert(doc);
+ }
+ // for non-posting attributes only lower case search terms should give hits
+ performCaseInsensitiveSearch(vec, terms[0][j], expected);
+
+ if (ptr->getConfig().fastSearch()) {
+ for (uint32_t i = 1; i < 5; ++i) {
+ performCaseInsensitiveSearch(vec, terms[i][j], expected);
+ }
+ } else {
+ for (uint32_t i = 1; i < 4; ++i) {
+ performCaseInsensitiveSearch(vec, terms[i][j], empty);
+ }
+ }
+ }
+ performCaseInsensitiveSearch(vec, "none", empty);
+ performCaseInsensitiveSearch(vec, "NONE", empty);
+ performCaseInsensitiveSearch(vec, "None", empty);
+}
+
+void
+SearchContextTest::testRegexSearch(const AttributePtr & ptr)
+{
+ LOG(info, "testRegexSearch: vector '%s'", ptr->getName().c_str());
+
+ StringAttribute & vec = dynamic_cast<StringAttribute &>(*ptr.get());
+
+ uint32_t numDocs = 6;
+ addDocs(*ptr.get(), numDocs);
+
+ const char * strings [] = {"abc1def", "abc2Def", "abc2def", "abc4def", "abc5def", "abc6def"};
+ std::vector<const char *> terms = { "abc", "bc2de" };
+
+ for (uint32_t doc = 1; doc < numDocs + 1; ++doc) {
+ ASSERT_TRUE(doc < vec.getNumDocs());
+ EXPECT_TRUE(vec.update(doc, strings[doc - 1]));
+ }
+
+ ptr->commit(true);
+
+ std::vector<DocSet> expected;
+ DocSet empty;
+ {
+ uint32_t docs[] = {1, 2, 3, 4, 5, 6};
+ expected.push_back(DocSet(docs, docs + 6)); // "abc"
+ }
+ {
+ uint32_t docs[] = {2, 3};
+ expected.push_back(DocSet(docs, docs + 2)); // "bc2de"
+ }
+
+ for (uint32_t i = 0; i < terms.size(); ++i) {
+ performSearch(vec, terms[i], expected[i], QueryTermSimple::REGEXP);
+ performSearch(vec, terms[i], empty, QueryTermSimple::WORD);
+ }
+}
+
+
+void
+SearchContextTest::testCaseInsensitiveSearch()
+{
+ for (const auto & cfg : _stringCfg) {
+ testCaseInsensitiveSearch(AttributeFactory::createAttribute(cfg.first, cfg.second));
+ }
+}
+
+void
+SearchContextTest::testRegexSearch()
+{
+ for (const auto & cfg : _stringCfg) {
+ testRegexSearch(AttributeFactory::createAttribute(cfg.first, cfg.second));
+ }
+}
+
+
+//-----------------------------------------------------------------------------
+// Test prefix search
+//-----------------------------------------------------------------------------
+
+void
+SearchContextTest::performPrefixSearch(const StringAttribute & vec, const vespalib::string & term,
+ const DocSet & expected, QueryTermSimple::SearchTerm termType)
+{
+ performSearch(vec, term, expected, termType);
+}
+
+void
+SearchContextTest::testPrefixSearch(const AttributePtr & ptr)
+{
+ LOG(info, "testPrefixSearch: vector '%s'", ptr->getName().c_str());
+
+ StringAttribute & vec = dynamic_cast<StringAttribute &>(*ptr.get());
+
+ uint32_t numDocs = 6;
+ addDocs(*ptr.get(), numDocs);
+
+ const char * strings [] = {"prefixsearch", "PREFIXSEARCH", "PrefixSearch", "precommit", "PRECOMMIT", "PreCommit"};
+ const char * terms[][3] = {{"pre", "PRE", "Pre"}, {"pref", "PREF", "Pref"},
+ {"prec", "PREC", "PreC"}, {"prex", "PREX", "Prex"}};
+
+ for (uint32_t doc = 1; doc < numDocs + 1; ++doc) {
+ ASSERT_TRUE(doc < vec.getNumDocs());
+ EXPECT_TRUE(vec.update(doc, strings[doc - 1]));
+ }
+
+ ptr->commit(true);
+
+ std::vector<DocSet> expected;
+ DocSet empty;
+ {
+ uint32_t docs[] = {1, 2, 3, 4, 5, 6};
+ expected.push_back(DocSet(docs, docs + 6)); // "pre"
+ }
+ {
+ uint32_t docs[] = {1, 2, 3};
+ expected.push_back(DocSet(docs, docs + 3)); // "pref"
+ }
+ {
+ uint32_t docs[] = {4, 5, 6};
+ expected.push_back(DocSet(docs, docs + 3)); // "prec"
+ }
+ expected.push_back(DocSet()); // "prex"
+
+ for (uint32_t i = 0; i < 4; ++i) {
+ for (uint32_t j = 0; j < 3; ++j) {
+ if (j == 0 || ptr->getConfig().fastSearch()) {
+ performPrefixSearch(vec, terms[i][j], expected[i], QueryTermSimple::PREFIXTERM);
+ performPrefixSearch(vec, terms[i][j], empty, QueryTermSimple::WORD);
+ } else {
+ performPrefixSearch(vec, terms[i][j], empty, QueryTermSimple::PREFIXTERM);
+ performPrefixSearch(vec, terms[i][j], empty, QueryTermSimple::WORD);
+ }
+ }
+ }
+}
+
+
+void
+SearchContextTest::testPrefixSearch()
+{
+ for (const auto & cfg : _stringCfg) {
+ testPrefixSearch(AttributeFactory::createAttribute(cfg.first, cfg.second));
+ }
+}
+
+template <typename VectorType, typename ValueType>
+void
+SearchContextTest::requireThatSearchIsWorkingAfterClearDoc(const vespalib::string & name,
+ const Config & cfg,
+ ValueType startValue,
+ const vespalib::string & term)
+{
+ AttributePtr a = AttributeFactory::createAttribute(name, cfg);
+ LOG(info, "requireThatSearchIsWorkingAfterClearDoc: vector '%s', term '%s'",
+ a->getName().c_str(), term.c_str());
+ addReservedDoc(*a);
+ a->addDocs(4);
+ VectorType & v = dynamic_cast<VectorType &>(*a);
+ resetAttribute(v, startValue);
+ {
+ ResultSetPtr rs = performSearch(v, term);
+ EXPECT_EQUAL(4u, rs->getNumHits());
+ ASSERT_TRUE(4u == rs->getNumHits());
+ const RankedHit * array = rs->getArray();
+ EXPECT_EQUAL(1u, array[0]._docId);
+ EXPECT_EQUAL(2u, array[1]._docId);
+ EXPECT_EQUAL(3u, array[2]._docId);
+ EXPECT_EQUAL(4u, array[3]._docId);
+ }
+ a->clearDoc(1);
+ a->clearDoc(3);
+ a->commit(true);
+ {
+ ResultSetPtr rs = performSearch(v, term);
+ EXPECT_EQUAL(2u, rs->getNumHits());
+ const RankedHit * array = rs->getArray();
+ EXPECT_EQUAL(2u, array[0]._docId);
+ EXPECT_EQUAL(4u, array[1]._docId);
+ }
+}
+
+void
+SearchContextTest::requireThatSearchIsWorkingAfterClearDoc()
+{
+ for (const auto & cfg : _integerCfg) {
+ requireThatSearchIsWorkingAfterClearDoc<IntegerAttribute>(cfg.first, cfg.second, 10, "10");
+ requireThatSearchIsWorkingAfterClearDoc<IntegerAttribute>(cfg.first, cfg.second, 10, "<11");
+ }
+
+ for (const auto & cfg : _floatCfg) {
+ requireThatSearchIsWorkingAfterClearDoc<FloatingPointAttribute>(cfg.first, cfg.second, 10.5, "10.5");
+ requireThatSearchIsWorkingAfterClearDoc<FloatingPointAttribute>(cfg.first, cfg.second, 10.5, "<10.6");
+ }
+
+ for (const auto & cfg : _stringCfg) {
+ requireThatSearchIsWorkingAfterClearDoc<StringAttribute>(cfg.first, cfg.second, "start", "start");
+ }
+}
+
+template <typename VectorType, typename ValueType>
+void
+SearchContextTest::requireThatSearchIsWorkingAfterLoadAndClearDoc(const vespalib::string & name,
+ const Config & cfg,
+ ValueType startValue,
+ ValueType defaultValue,
+ const vespalib::string & term)
+{
+ AttributePtr a = AttributeFactory::createAttribute(name, cfg);
+ LOG(info, "requireThatSearchIsWorkingAfterLoadAndClearDoc: vector '%s', term '%s'",
+ a->getName().c_str(), term.c_str());
+ addReservedDoc(*a);
+ a->addDocs(15);
+ VectorType & va = dynamic_cast<VectorType &>(*a);
+ resetAttribute(va, startValue); // triggers vector vector in posting list (count 15)
+ AttributePtr b = AttributeFactory::createAttribute(name + "-save", cfg);
+ EXPECT_TRUE(a->saveAs(b->getBaseFileName()));
+ EXPECT_TRUE(b->load());
+ b->clearDoc(6); // goes from vector vector to single vector with count 14
+ b->commit(true);
+ {
+ ResultSetPtr rs = performSearch(dynamic_cast<VectorType &>(*b), term);
+ EXPECT_EQUAL(14u, rs->getNumHits());
+ const RankedHit * array = rs->getArray();
+ for (uint32_t i = 0; i < 14; ++i) {
+ if (i < 5) {
+ EXPECT_EQUAL(i + 1, array[i]._docId);
+ } else
+ EXPECT_EQUAL(i + 2, array[i]._docId);
+ }
+ }
+ ValueType buf;
+ if (cfg.collectionType().isMultiValue()) {
+ EXPECT_EQUAL(0u, b->get(6, &buf, 1));
+ } else {
+ EXPECT_EQUAL(1u, b->get(6, &buf, 1));
+ EXPECT_EQUAL(defaultValue, buf);
+ }
+}
+
+void
+SearchContextTest::requireThatSearchIsWorkingAfterLoadAndClearDoc()
+{
+ {
+ int64_t value = 10;
+ int64_t defValue = search::attribute::getUndefined<int32_t>();
+ requireThatSearchIsWorkingAfterLoadAndClearDoc<IntegerAttribute>("s-fs-int32", _integerCfg["s-fs-int32"],
+ value, defValue, "10");
+ requireThatSearchIsWorkingAfterLoadAndClearDoc<IntegerAttribute>("a-fs-int32", _integerCfg["a-fs-int32"],
+ value, defValue, "10");
+ }
+ {
+ vespalib::string value = "foo";
+ vespalib::string defValue = "";
+ requireThatSearchIsWorkingAfterLoadAndClearDoc<StringAttribute>("s-fs-str", _stringCfg["s-fs-str"],
+ value, defValue, value);
+ requireThatSearchIsWorkingAfterLoadAndClearDoc<StringAttribute>("a-fs-str", _stringCfg["a-fs-str"],
+ value, defValue, value);
+ }
+}
+
+template <typename VectorType, typename ValueType>
+void
+SearchContextTest::requireThatSearchIsWorkingAfterUpdates(const vespalib::string & name,
+ const Config & cfg,
+ ValueType value1,
+ ValueType value2)
+{
+ AttributePtr a = AttributeFactory::createAttribute(name, cfg);
+ VectorType & va = dynamic_cast<VectorType &>(*a);
+ LOG(info, "requireThatSearchIsWorkingAfterUpdates: vector '%s'", a->getName().c_str());
+ addReservedDoc(*a);
+ a->addDocs(2);
+ va.update(1, value1);
+ va.commit(true);
+ va.update(2, value1);
+ va.update(2, value2);
+ va.commit(true);
+ {
+ ResultSetPtr rs = performSearch(va, value1);
+ EXPECT_EQUAL(1u, rs->getNumHits()); // doc 1 should not have this value
+ }
+ {
+ ResultSetPtr rs = performSearch(va, value2);
+ EXPECT_EQUAL(1u, rs->getNumHits());
+ }
+}
+
+void
+SearchContextTest::requireThatSearchIsWorkingAfterUpdates()
+{
+ for (const auto & cfg : _integerCfg) {
+ requireThatSearchIsWorkingAfterUpdates<IntegerAttribute>(cfg.first, cfg.second, 10, 20);
+ }
+
+ for (const auto & cfg : _stringCfg) {
+ requireThatSearchIsWorkingAfterUpdates<StringAttribute>(cfg.first, cfg.second, "foo", "bar");
+ }
+}
+
+void
+SearchContextTest::requireThatFlagAttributeIsWorkingWhenNewDocsAreAdded()
+{
+ LOG(info, "requireThatFlagAttributeIsWorkingWhenNewDocsAreAdded()");
+ Config cfg(BasicType::INT8, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ {
+ cfg.setGrowStrategy(GrowStrategy(1, 0, 1));
+ AttributePtr a = AttributeFactory::createAttribute("flags", cfg);
+ FlagAttribute & fa = dynamic_cast<FlagAttribute &>(*a);
+ addReservedDoc(fa);
+ fa.addDocs(1);
+ fa.append(1, 10, 1);
+ fa.append(1, 24, 1);
+ fa.commit(true);
+ fa.addDocs(1);
+ fa.append(2, 20, 1);
+ fa.append(2, 24, 1);
+ fa.commit(true);
+ fa.addDocs(1);
+ fa.append(3, 30, 1);
+ fa.append(3, 26, 1);
+ fa.commit(true);
+ fa.addDocs(1);
+ fa.append(4, 40, 1);
+ fa.append(4, 24, 1);
+ fa.commit(true);
+ {
+ ResultSetPtr rs = performSearch(fa, "<24");
+ EXPECT_EQUAL(2u, rs->getNumHits());
+ EXPECT_EQUAL(1u, rs->getArray()[0]._docId);
+ EXPECT_EQUAL(2u, rs->getArray()[1]._docId);
+ }
+ {
+ ResultSetPtr rs = performSearch(fa, "24");
+ EXPECT_EQUAL(3u, rs->getNumHits());
+ EXPECT_EQUAL(1u, rs->getArray()[0]._docId);
+ EXPECT_EQUAL(2u, rs->getArray()[1]._docId);
+ EXPECT_EQUAL(4u, rs->getArray()[2]._docId);
+ }
+ }
+ {
+ cfg.setGrowStrategy(GrowStrategy(4, 0, 4));
+ AttributePtr a = AttributeFactory::createAttribute("flags", cfg);
+ FlagAttribute & fa = dynamic_cast<FlagAttribute &>(*a);
+ std::vector<uint32_t> exp50;
+ std::vector<uint32_t> exp60;
+ addReservedDoc(fa);
+ for (uint32_t i = 0; i < 200; ++i) {
+ uint32_t docId;
+ EXPECT_TRUE(fa.addDoc(docId));
+ if (i % 2 == 0) {
+ fa.append(docId, 50, 1);
+ exp50.push_back(docId);
+ } else {
+ fa.append(docId, 60, 1);
+ exp60.push_back(docId);
+ }
+ fa.commit(true);
+ {
+ ResultSetPtr rs1 = performSearch(fa, "50");
+ ResultSetPtr rs2 = performSearch(fa, "<51");
+ EXPECT_EQUAL(exp50.size(), rs1->getNumHits());
+ EXPECT_EQUAL(exp50.size(), rs2->getNumHits());
+ for (size_t j = 0; j < exp50.size(); ++j) {
+ EXPECT_EQUAL(exp50[j], rs1->getArray()[j]._docId);
+ EXPECT_EQUAL(exp50[j], rs2->getArray()[j]._docId);
+ }
+ }
+ {
+ ResultSetPtr rs = performSearch(fa, "60");
+ EXPECT_EQUAL(exp60.size(), rs->getNumHits());
+ for (size_t j = 0; j < exp60.size(); ++j) {
+ EXPECT_EQUAL(exp60[j], rs->getArray()[j]._docId);
+ }
+ }
+ }
+ }
+}
+
+template <typename VectorType, typename ValueType>
+void
+SearchContextTest::requireThatInvalidSearchTermGivesZeroHits(const vespalib::string & name,
+ const Config & cfg,
+ ValueType value)
+{
+ AttributePtr a = AttributeFactory::createAttribute(name, cfg);
+ VectorType & va = dynamic_cast<VectorType &>(*a);
+ LOG(info, "requireThatInvalidSearchTermGivesZeroHits: vector '%s'", a->getName().c_str());
+ addReservedDoc(*a);
+ a->addDocs(1);
+ va.update(1, value);
+ va.commit(true);
+ ResultSetPtr rs = performSearch(va, "foo");
+ EXPECT_EQUAL(0u, rs->getNumHits());
+}
+
+void
+SearchContextTest::requireThatInvalidSearchTermGivesZeroHits()
+{
+ for (const auto & cfg : _integerCfg) {
+ requireThatInvalidSearchTermGivesZeroHits<IntegerAttribute>(cfg.first, cfg.second, 10);
+ }
+ for (const auto & cfg : _floatCfg) {
+ requireThatInvalidSearchTermGivesZeroHits<FloatingPointAttribute>(cfg.first, cfg.second, 10);
+ }
+}
+
+void
+SearchContextTest::requireThatFlagAttributeHandlesTheByteRange()
+{
+ LOG(info, "requireThatFlagAttributeHandlesTheByteRange()");
+ Config cfg(BasicType::INT8, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+
+ AttributePtr a = AttributeFactory::createAttribute("flags", cfg);
+ FlagAttribute & fa = dynamic_cast<FlagAttribute &>(*a);
+ addReservedDoc(fa);
+ fa.addDocs(5);
+ fa.append(1, -128, 1);
+ fa.append(2, -64, 1);
+ fa.append(2, -8, 1);
+ fa.append(3, 0, 1);
+ fa.append(3, 8, 1);
+ fa.append(4, 64, 1);
+ fa.append(4, 24, 1);
+ fa.append(5, 127, 1);
+ fa.commit(true);
+
+ performSearch(fa, "-128", DocSet().put(1), QueryTermSimple::WORD);
+ performSearch(fa, "127", DocSet().put(5), QueryTermSimple::WORD);
+ performSearch(fa, ">-128", DocSet().put(2).put(3).put(4).put(5), QueryTermSimple::WORD);
+ performSearch(fa, "<127", DocSet().put(1).put(2).put(3).put(4), QueryTermSimple::WORD);
+ performSearch(fa, "[-128;-8]", DocSet().put(1).put(2), QueryTermSimple::WORD);
+ performSearch(fa, "[-8;8]", DocSet().put(2).put(3), QueryTermSimple::WORD);
+ performSearch(fa, "[8;127]", DocSet().put(3).put(4).put(5), QueryTermSimple::WORD);
+ performSearch(fa, "[-129;-8]", DocSet().put(1).put(2), QueryTermSimple::WORD);
+ performSearch(fa, "[8;128]", DocSet().put(3).put(4).put(5), QueryTermSimple::WORD);
+}
+
+void
+SearchContextTest::requireThatOutOfBoundsSearchTermGivesZeroHits(const vespalib::string &name,
+ const Config &cfg,
+ int64_t maxValue)
+{
+ AttributePtr a = AttributeFactory::createAttribute(name, cfg);
+ IntegerAttribute &ia = dynamic_cast<IntegerAttribute &>(*a);
+ addReservedDoc(*a);
+ a->addDocs(1);
+ ia.update(1, maxValue);
+ ia.commit(true);
+ vespalib::string term = vespalib::make_string("%" PRIu64 "", (int64_t) maxValue + 1);
+ LOG(info, "requireThatOutOfBoundsSearchTermGivesZeroHits: vector '%s', term '%s'", a->getName().c_str(), term.c_str());
+ ResultSetPtr rs = performSearch(ia, term);
+ EXPECT_EQUAL(0u, rs->getNumHits());
+}
+
+void
+SearchContextTest::requireThatOutOfBoundsSearchTermGivesZeroHits()
+{
+ for (const auto & cfg : _integerCfg) {
+ int32_t maxValue = std::numeric_limits<int32_t>::max();
+ requireThatOutOfBoundsSearchTermGivesZeroHits(cfg.first, cfg.second, maxValue);
+ }
+ {
+ Config cfg(BasicType::INT8, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ int8_t maxValue = std::numeric_limits<int8_t>::max();
+ requireThatOutOfBoundsSearchTermGivesZeroHits("flags", cfg, maxValue);
+ }
+}
+
+
+void
+SearchContextTest::initIntegerConfig()
+{
+ { // CollectionType::SINGLE
+ Config cfg(BasicType::INT32, CollectionType::SINGLE);
+ _integerCfg["s-int32"] = cfg;
+ }
+ { // CollectionType::SINGLE && fastSearch
+ Config cfg(BasicType::INT32, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ _integerCfg["s-fs-int32"] = cfg;
+ }
+ { // CollectionType::ARRAY
+ Config cfg(BasicType::INT32, CollectionType::ARRAY);
+ _integerCfg["a-int32"] = cfg;
+ }
+ { // CollectionType::ARRAY && fastSearch
+ Config cfg(BasicType::INT32, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ _integerCfg["a-fs-int32"] = cfg;
+ }
+ { // CollectionType::WSET
+ Config cfg(BasicType::INT32, CollectionType::WSET);
+ _integerCfg["w-int32"] = cfg;
+ }
+ { // CollectionType::WSET && fastSearch
+ Config cfg(BasicType::INT32, CollectionType::WSET);
+ cfg.setFastSearch(true);
+ _integerCfg["w-fs-int32"] = cfg;
+ }
+}
+
+void
+SearchContextTest::initFloatConfig()
+{
+ { // CollectionType::SINGLE
+ Config cfg(BasicType::FLOAT, CollectionType::SINGLE);
+ _floatCfg["s-float"] = cfg;
+ }
+ { // CollectionType::SINGLE && fastSearch
+ Config cfg(BasicType::FLOAT, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ _floatCfg["s-fs-float"] = cfg;
+ }
+ { // CollectionType::ARRAY
+ Config cfg(BasicType::FLOAT, CollectionType::ARRAY);
+ _floatCfg["a-float"] = cfg;
+ }
+ { // CollectionType::ARRAY && fastSearch
+ Config cfg(BasicType::FLOAT, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ _floatCfg["a-fs-float"] = cfg;
+ }
+ { // CollectionType::WSET
+ Config cfg(BasicType::FLOAT, CollectionType::WSET);
+ _floatCfg["w-float"] = cfg;
+ }
+ { // CollectionType::WSET && fastSearch
+ Config cfg(BasicType::FLOAT, CollectionType::WSET);
+ cfg.setFastSearch(true);
+ _floatCfg["w-fs-float"] = cfg;
+ }
+}
+
+void
+SearchContextTest::initStringConfig()
+{
+ { // CollectionType::SINGLE
+ Config cfg(BasicType::STRING, CollectionType::SINGLE);
+ _stringCfg["s-str"] = cfg;
+ }
+ { // CollectionType::ARRAY
+ Config cfg(BasicType::STRING, CollectionType::ARRAY);
+ _stringCfg["a-str"] = cfg;
+ }
+ { // CollectionType::WSET
+ Config cfg(BasicType::STRING, CollectionType::WSET);
+ _stringCfg["w-str"] = cfg;
+ }
+ { // CollectionType::SINGLE && fastSearch
+ Config cfg(BasicType::STRING, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ _stringCfg["s-fs-str"] = cfg;
+ }
+ { // CollectionType::ARRAY && fastSearch
+ Config cfg(BasicType::STRING, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ _stringCfg["a-fs-str"] = cfg;
+ }
+ { // CollectionType::WSET && fastSearch
+ Config cfg(BasicType::STRING, CollectionType::WSET);
+ cfg.setFastSearch(true);
+ _stringCfg["w-fs-str"] = cfg;
+ }
+}
+
+SearchContextTest::SearchContextTest() :
+ _integerCfg(),
+ _floatCfg(),
+ _stringCfg()
+{
+ initIntegerConfig();
+ initFloatConfig();
+ initStringConfig();
+}
+
+int
+SearchContextTest::Main()
+{
+ TEST_INIT("searchcontext_test");
+ EXPECT_TRUE(true);
+
+ testSearch();
+ testInitRange();
+ testRangeSearch();
+ testRangeSearchLimited();
+ testCaseInsensitiveSearch();
+ testRegexSearch();
+ testPrefixSearch();
+ testSearchIterator();
+ testSearchIteratorUnpacking();
+ TEST_DO(requireThatSearchIsWorkingAfterClearDoc());
+ TEST_DO(requireThatSearchIsWorkingAfterLoadAndClearDoc());
+ TEST_DO(requireThatSearchIsWorkingAfterUpdates());
+ TEST_DO(requireThatFlagAttributeIsWorkingWhenNewDocsAreAdded());
+ TEST_DO(requireThatInvalidSearchTermGivesZeroHits());
+ TEST_DO(requireThatFlagAttributeHandlesTheByteRange());
+ TEST_DO(requireThatOutOfBoundsSearchTermGivesZeroHits());
+
+ TEST_DONE();
+}
+
+}
+
+TEST_APPHOOK(search::SearchContextTest);
diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.sh b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.sh
new file mode 100755
index 00000000000..3aae4bfe4d5
--- /dev/null
+++ b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+$VALGRIND ./searchlib_searchcontext_test_app
+rm -rf *.dat
+rm -rf *.idx
+rm -rf *.weight
diff --git a/searchlib/src/tests/attribute/sourceselector/.gitignore b/searchlib/src/tests/attribute/sourceselector/.gitignore
new file mode 100644
index 00000000000..265c856fd01
--- /dev/null
+++ b/searchlib/src/tests/attribute/sourceselector/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+sourceselector_test
+searchlib_sourceselector_test_app
diff --git a/searchlib/src/tests/attribute/sourceselector/CMakeLists.txt b/searchlib/src/tests/attribute/sourceselector/CMakeLists.txt
new file mode 100644
index 00000000000..24b7a75dd07
--- /dev/null
+++ b/searchlib/src/tests/attribute/sourceselector/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_sourceselector_test_app
+ SOURCES
+ sourceselector_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_sourceselector_test_app COMMAND searchlib_sourceselector_test_app)
diff --git a/searchlib/src/tests/attribute/sourceselector/DESC b/searchlib/src/tests/attribute/sourceselector/DESC
new file mode 100644
index 00000000000..7568f5de080
--- /dev/null
+++ b/searchlib/src/tests/attribute/sourceselector/DESC
@@ -0,0 +1 @@
+This is a test of the sourceselector interface.
diff --git a/searchlib/src/tests/attribute/sourceselector/FILES b/searchlib/src/tests/attribute/sourceselector/FILES
new file mode 100644
index 00000000000..0d2803e762d
--- /dev/null
+++ b/searchlib/src/tests/attribute/sourceselector/FILES
@@ -0,0 +1 @@
+sourceselector.cpp
diff --git a/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp b/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp
new file mode 100644
index 00000000000..a3595f8724d
--- /dev/null
+++ b/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp
@@ -0,0 +1,216 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+// Unit tests for sourceselector.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("sourceselector_test");
+
+#include <vespa/searchlib/attribute/fixedsourceselector.h>
+#include <vespa/searchlib/index/dummyfileheadercontext.h>
+#include <vespa/vespalib/testkit/testapp.h>
+
+using std::unique_ptr;
+using std::string;
+using namespace search;
+using namespace search::queryeval;
+using search::common::FileHeaderContext;
+using search::index::DummyFileHeaderContext;
+
+namespace {
+template <typename T, size_t N> size_t arraysize(const T (&)[N]) { return N; }
+
+const uint32_t maxDocId = 4096;
+struct DocSource { uint32_t docId; uint8_t source; };
+const DocSource docs[] = { {0,1}, {1, 0}, {2, 2}, {4, 3}, {8, 9}, {16, 178},
+ {32, 1}, {64, 2}, {128, 3}, {256,4}, {512, 2},
+ {1024, 1}, {2048,5}, {maxDocId,1} };
+const string index_dir = "test_data";
+const string base_file_name = "test_data/sourcelist";
+const string base_file_name2 = "test_data/sourcelist2";
+const uint32_t default_source = 7;
+const uint32_t base_id = 42;
+
+class Test : public vespalib::TestApp
+{
+public:
+ int Main();
+private:
+ void testSourceSelector(const DocSource *docSource, size_t sz, uint8_t defaultSource, ISourceSelector & selector);
+ void testFixed(const DocSource *docSource, size_t sz);
+ template <typename SelectorType>
+ void requireThatSelectorCanCloneAndSubtract();
+ void requireThatSelectorCanCloneAndSubtract();
+ template <typename SelectorType>
+ void requireThatSelectorCanSaveAndLoad();
+ void requireThatSelectorCanSaveAndLoad();
+ template <typename SelectorType>
+ void requireThatCompleteSourceRangeIsHandled();
+ void requireThatCompleteSourceRangeIsHandled();
+ template <typename SelectorType>
+ void requireThatSourcesAreCountedCorrectly();
+ void requireThatSourcesAreCountedCorrectly();
+};
+
+int
+Test::Main()
+{
+ TEST_INIT("sourceselector_test");
+
+ if (_argc > 0) {
+ DummyFileHeaderContext::setCreator(_argv[0]);
+ }
+ testFixed(docs, arraysize(docs));
+ TEST_DO(requireThatSelectorCanCloneAndSubtract());
+ TEST_DO(requireThatSelectorCanSaveAndLoad());
+ TEST_DO(requireThatCompleteSourceRangeIsHandled());
+ TEST_DO(requireThatSourcesAreCountedCorrectly());
+
+ TEST_DONE();
+}
+
+void setSources(ISourceSelector &selector) {
+ for (size_t i = 0; i < arraysize(docs); ++i) {
+ selector.setSource(docs[i].docId, docs[i].source);
+ }
+}
+
+void Test::testFixed(const DocSource *docSource, size_t sz)
+{
+ FixedSourceSelector selector(default_source, base_file_name, 10);
+ EXPECT_EQUAL(default_source, selector.getDefaultSource());
+ EXPECT_EQUAL(10u, selector.getDocIdLimit());
+// EXPECT_EQUAL(default_source, selector.createIterator()->getSource(maxDocId + 1));
+ setSources(selector);
+ testSourceSelector(docSource, sz, selector.getDefaultSource(), selector);
+ EXPECT_EQUAL(maxDocId+1, selector.getDocIdLimit());
+}
+
+void Test::testSourceSelector(const DocSource *docSource, size_t sz,
+ uint8_t defaultSource, ISourceSelector &selector)
+{
+ {
+ ISourceSelector::Iterator::UP it(selector.createIterator());
+ for (size_t i = 0; i < sz; ++i) {
+ EXPECT_EQUAL(docSource[i].source, it->getSource(docSource[i].docId));
+ }
+ }
+ {
+ ISourceSelector::Iterator::UP it(selector.createIterator());
+ for (size_t i = 0, j = 0; i <= docSource[sz - 1].docId; ++i) {
+ if (i != docSource[j].docId) {
+ EXPECT_EQUAL(defaultSource, it->getSource(i));
+ } else {
+ EXPECT_EQUAL(docSource[j].source, it->getSource(i));
+ ++j;
+ }
+ }
+ }
+}
+
+template <typename SelectorType>
+void
+Test::requireThatSelectorCanCloneAndSubtract()
+{
+ SelectorType selector(default_source, base_file_name);
+ setSources(selector);
+ selector.setBaseId(base_id);
+
+ const uint32_t diff = 3;
+ typename SelectorType::UP
+ new_selector(selector.cloneAndSubtract(base_file_name2, diff));
+ EXPECT_EQUAL(default_source - diff, new_selector->getDefaultSource());
+ EXPECT_EQUAL(base_id + diff, new_selector->getBaseId());
+ EXPECT_EQUAL(maxDocId+1, new_selector->getDocIdLimit());
+
+ ISourceSelector::Iterator::UP it(new_selector->createIterator());
+ for(size_t i = 0; i < arraysize(docs); ++i) {
+ if (docs[i].source > diff) {
+ EXPECT_EQUAL(docs[i].source - diff, it->getSource(docs[i].docId));
+ } else {
+ EXPECT_EQUAL(0, it->getSource(docs[i].docId));
+ }
+ }
+}
+
+void
+Test::requireThatSelectorCanCloneAndSubtract()
+{
+ requireThatSelectorCanCloneAndSubtract<FixedSourceSelector>();
+}
+
+template <typename SelectorType>
+void
+Test::requireThatSelectorCanSaveAndLoad()
+{
+ SelectorType selector(default_source, base_file_name2);
+ setSources(selector);
+ selector.setBaseId(base_id);
+ selector.setSource(maxDocId + 1, default_source);
+
+ FastOS_FileInterface::EmptyAndRemoveDirectory(index_dir.c_str());
+ FastOS_FileInterface::MakeDirIfNotPresentOrExit(index_dir.c_str());
+
+ SourceSelector::SaveInfo::UP save_info =
+ selector.extractSaveInfo(base_file_name);
+ save_info->save(TuneFileAttributes(), DummyFileHeaderContext());
+ typename SelectorType::UP
+ selector2(SelectorType::load(base_file_name));
+ testSourceSelector(docs, arraysize(docs), default_source, *selector2);
+ EXPECT_EQUAL(base_id, selector2->getBaseId());
+ EXPECT_EQUAL(maxDocId + 2, selector2->getDocIdLimit());
+
+ FastOS_FileInterface::EmptyAndRemoveDirectory(index_dir.c_str());
+}
+
+void
+Test::requireThatSelectorCanSaveAndLoad()
+{
+ requireThatSelectorCanSaveAndLoad<FixedSourceSelector>();
+}
+
+template <typename SelectorType>
+void
+Test::requireThatCompleteSourceRangeIsHandled()
+{
+ SelectorType selector(default_source, base_file_name);
+ for (uint32_t i = 0; i < ISourceSelector::SOURCE_LIMIT; ++i) {
+ selector.setSource(i, i);
+ }
+ ISourceSelector::Iterator::UP itr = selector.createIterator();
+ for (uint32_t i = 0; i < ISourceSelector::SOURCE_LIMIT; ++i) {
+ EXPECT_EQUAL((queryeval::Source)i, itr->getSource(i));
+ }
+}
+
+void
+Test::requireThatCompleteSourceRangeIsHandled()
+{
+ requireThatCompleteSourceRangeIsHandled<FixedSourceSelector>();
+}
+
+template <typename SelectorType>
+void
+Test::requireThatSourcesAreCountedCorrectly()
+{
+ SelectorType selector(default_source, base_file_name);
+ for (uint32_t i = 0; i < 256; ++i) {
+ selector.setSource(i, i%16);
+ }
+ SourceSelector::Histogram hist = selector.getDistribution();
+ for (uint32_t i = 0; i < 16; ++i) {
+ EXPECT_EQUAL(16u, hist[i]);
+ }
+ for (uint32_t i = 16; i < 256; ++i) {
+ EXPECT_EQUAL(0u, hist[i]);
+ }
+}
+
+void
+Test::requireThatSourcesAreCountedCorrectly()
+{
+ requireThatSourcesAreCountedCorrectly<FixedSourceSelector>();
+}
+
+} // namespace
+
+TEST_APPHOOK(Test);
diff --git a/searchlib/src/tests/attribute/stringattribute/.gitignore b/searchlib/src/tests/attribute/stringattribute/.gitignore
new file mode 100644
index 00000000000..0e8a04bc19d
--- /dev/null
+++ b/searchlib/src/tests/attribute/stringattribute/.gitignore
@@ -0,0 +1,4 @@
+.depend
+Makefile
+stringattribute_test
+searchlib_stringattribute_test_app
diff --git a/searchlib/src/tests/attribute/stringattribute/CMakeLists.txt b/searchlib/src/tests/attribute/stringattribute/CMakeLists.txt
new file mode 100644
index 00000000000..032ce9cac4e
--- /dev/null
+++ b/searchlib/src/tests/attribute/stringattribute/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_stringattribute_test_app
+ SOURCES
+ stringattribute_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_stringattribute_test_app COMMAND sh stringattribute_test.sh)
diff --git a/searchlib/src/tests/attribute/stringattribute/DESC b/searchlib/src/tests/attribute/stringattribute/DESC
new file mode 100644
index 00000000000..5d94ab94325
--- /dev/null
+++ b/searchlib/src/tests/attribute/stringattribute/DESC
@@ -0,0 +1 @@
+Unit tests for SingleValueStringAttribute and MultiValueStringAttribute.
diff --git a/searchlib/src/tests/attribute/stringattribute/FILES b/searchlib/src/tests/attribute/stringattribute/FILES
new file mode 100644
index 00000000000..e68ef57177d
--- /dev/null
+++ b/searchlib/src/tests/attribute/stringattribute/FILES
@@ -0,0 +1 @@
+stringattribute.cpp
diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
new file mode 100644
index 00000000000..154340ba408
--- /dev/null
+++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
@@ -0,0 +1,453 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("stringattribute_test");
+#include <vespa/vespalib/testkit/testapp.h>
+#include <vespa/searchlib/attribute/enumstore.h>
+#include <vespa/searchlib/attribute/singlestringattribute.h>
+#include <vespa/searchlib/attribute/singlestringpostattribute.h>
+#include <vespa/searchlib/attribute/multistringattribute.h>
+#include <vespa/searchlib/attribute/multistringpostattribute.h>
+
+#include <vespa/searchlib/attribute/enumstore.hpp>
+#include <vespa/searchlib/attribute/singlestringattribute.h>
+#include <vespa/searchlib/attribute/singlestringpostattribute.hpp>
+#include <vespa/searchlib/attribute/multistringattribute.h>
+#include <vespa/searchlib/attribute/multistringpostattribute.hpp>
+
+namespace search {
+
+using attribute::CollectionType;
+using attribute::IAttributeVector;
+
+class StringAttributeTest : public vespalib::TestApp
+{
+private:
+ typedef ArrayStringAttribute ArrayStr;
+ typedef WeightedSetStringAttribute WeightedSetStr;
+ typedef ArrayStringPostingAttribute ArrayStrPosting;
+ typedef WeightedSetStringPostingAttribute WeightedSetStrPosting;
+ typedef attribute::Config Config;
+ typedef attribute::BasicType BasicType;
+
+ template <typename Attribute>
+ void addDocs(Attribute & vec, uint32_t numDocs);
+ template <typename Attribute>
+ void checkCount(Attribute & vec, uint32_t doc, uint32_t valueCount,
+ uint32_t numValues, const vespalib::string & value);
+ void testMultiValue();
+ template <typename Attribute>
+ void testMultiValue(Attribute & attr, uint32_t numDocs);
+ void testMultiValueMultipleClearDocBetweenCommit();
+ void testMultiValueRemove();
+ void testSingleValue();
+ void testDefaultValueOnAddDoc(AttributeVector & v);
+ template <typename Attribute>
+ void testSingleValue(Attribute & svsa, Config &cfg);
+
+public:
+ int Main();
+};
+
+template <typename Attribute>
+void
+StringAttributeTest::addDocs(Attribute & vec, uint32_t numDocs)
+{
+ for (uint32_t i = 0; i < numDocs; ++i) {
+ typename Attribute::DocId doc;
+ EXPECT_TRUE(vec.addDoc(doc));
+ EXPECT_TRUE(doc == i);
+ EXPECT_TRUE(vec.getNumDocs() == i + 1);
+ EXPECT_TRUE(vec.getValueCount(doc) == 0);
+ }
+ EXPECT_TRUE(vec.getNumDocs() == numDocs);
+}
+
+template <typename Attribute>
+void
+StringAttributeTest::checkCount(Attribute & vec, uint32_t doc, uint32_t valueCount,
+ uint32_t numValues, const vespalib::string & value)
+{
+ std::vector<vespalib::string> buffer(valueCount);
+ EXPECT_TRUE(static_cast<uint32_t>(vec.getValueCount(doc)) == valueCount);
+ EXPECT_TRUE(vec.get(doc, &buffer[0], buffer.size()) == valueCount);
+ EXPECT_TRUE(std::count(buffer.begin(), buffer.end(), value) == numValues);
+}
+
+
+void
+StringAttributeTest::testMultiValue()
+{
+ uint32_t numDocs = ArrayStr::MultiValueMapping::maxValues() + 1;
+
+ { // Array String Attribute
+ ASSERT_TRUE(ArrayStr::MultiValueMapping::maxValues() == numDocs - 1);
+ ArrayStr attr("a-string");
+ testMultiValue(attr, numDocs);
+ }
+ { // Weighted Set String Attribute
+ ASSERT_TRUE(WeightedSetStr::MultiValueMapping::maxValues() == numDocs - 1);
+ WeightedSetStr attr("ws-string",
+ Config(BasicType::STRING, CollectionType::WSET));
+ testMultiValue(attr, numDocs);
+ }
+ { // Array String Posting Attribute
+ ASSERT_TRUE(ArrayStrPosting::MultiValueMapping::maxValues() == numDocs - 1);
+ Config cfg(BasicType::STRING, CollectionType::ARRAY);
+ cfg.setFastSearch(true);
+ ArrayStrPosting attr("a-fs-string", cfg);
+ testMultiValue(attr, numDocs);
+ }
+ { // Weighted Set String Posting Attribute
+ ASSERT_TRUE(WeightedSetStrPosting::MultiValueMapping::maxValues() == numDocs - 1);
+ Config cfg(BasicType::STRING, CollectionType::WSET);
+ cfg.setFastSearch(true);
+ WeightedSetStrPosting attr("ws-fs-string", cfg);
+ testMultiValue(attr, numDocs);
+ }
+
+}
+
+
+template <typename Attribute>
+void
+StringAttributeTest::testMultiValue(Attribute & attr, uint32_t numDocs)
+{
+ EXPECT_TRUE(attr.getNumDocs() == 0);
+
+ // generate two sets of unique strings
+ std::vector<vespalib::string> uniqueStrings;
+ uniqueStrings.reserve(numDocs - 1);
+ for (uint32_t i = 0; i < numDocs - 1; ++i) {
+ char unique[16];
+ sprintf(unique, i < 10 ? "enum0%u" : "enum%u", i);
+ uniqueStrings.push_back(vespalib::string(unique));
+ }
+ std::vector<vespalib::string> newUniques;
+ newUniques.reserve(numDocs - 1);
+ for (uint32_t i = 0; i < numDocs - 1; ++i) {
+ char unique[16];
+ sprintf(unique, i < 10 ? "unique0%u" : "unique%u", i);
+ newUniques.push_back(vespalib::string(unique));
+ }
+
+ // add docs
+ addDocs(attr, numDocs);
+
+ // insert values
+ for (uint32_t doc = 0; doc < numDocs; ++doc) {
+ uint32_t valueCount = doc;
+ for (uint32_t j = 0; j < valueCount; ++j) {
+ EXPECT_TRUE(attr.append(doc, uniqueStrings[j], 1));
+ }
+ attr.commit();
+ }
+
+ //attr.getEnumStore().printCurrentContent();
+
+ // check values and enums
+ for (uint32_t doc = 0; doc < numDocs; ++doc) {
+ uint32_t valueCount = attr.getValueCount(doc);
+ EXPECT_TRUE(valueCount == doc);
+
+ // test get first
+ if (valueCount == 0) {
+ EXPECT_TRUE(attr.get(doc) == NULL);
+ EXPECT_TRUE(attr.getEnum(doc) == std::numeric_limits<uint32_t>::max());
+ } else {
+ EXPECT_TRUE(strcmp(attr.get(doc), uniqueStrings[0].c_str()) == 0);
+ uint32_t e;
+ EXPECT_TRUE(attr.findEnum(uniqueStrings[0].c_str(), e));
+ EXPECT_TRUE(attr.getEnum(doc) == e);
+ }
+
+ // test get all
+ std::vector<vespalib::string> values(valueCount);
+ EXPECT_TRUE(attr.get(doc, &values[0], valueCount) == valueCount);
+
+ std::vector<uint32_t> enums(valueCount);
+ EXPECT_TRUE((static_cast<search::attribute::IAttributeVector &>(attr)).get(doc, &enums[0], valueCount) == valueCount);
+
+ for (uint32_t j = 0; j < valueCount; ++j) {
+ //LOG(info, "doc[%u][%u] = %s", doc, j, values[j].c_str());
+ EXPECT_TRUE(values[j] == uniqueStrings[j]);
+ uint32_t e = 100;
+ EXPECT_TRUE(attr.findEnum(values[j].c_str(), e));
+ EXPECT_TRUE(enums[j] == e);
+ }
+ }
+
+ // check for correct refcounts
+ for (uint32_t i = 0; i < uniqueStrings.size(); ++i) {
+ typename Attribute::EnumStore::Index idx;
+ EXPECT_TRUE(attr.getEnumStore().findIndex(uniqueStrings[i].c_str(), idx));
+ uint32_t expectedUsers = numDocs - 1 - i;
+ EXPECT_EQUAL(expectedUsers, attr.getEnumStore().getRefCount(idx));
+ }
+
+ typename Attribute::Histogram remaining = attr.getMultiValueMapping().getRemaining();
+ for (typename Attribute::Histogram::const_iterator it(remaining.begin()), mt(remaining.end()); it != mt; ++it) {
+ EXPECT_TRUE(it->second == 0);
+ }
+
+ // clear and insert new unique strings
+ for (uint32_t doc = 0; doc < numDocs; ++doc) {
+ uint32_t oldValueCount = doc;
+ uint32_t valueCount = numDocs - 1 - doc;
+ //LOG(info, "clear and insert: doc = %u, valueCount = %u", doc, valueCount);
+ EXPECT_TRUE(attr.clearDoc(doc) == oldValueCount);
+ for (uint32_t j = 0; j < valueCount; ++j) {
+ EXPECT_TRUE(attr.append(doc, newUniques[j], 1));
+ }
+ attr.commit();
+
+ //attr.getEnumStore().printCurrentContent();
+ }
+
+ // check values and enums
+ for (uint32_t doc = 0; doc < numDocs; ++doc) {
+ uint32_t valueCount = attr.getValueCount(doc);
+ uint32_t expectedValueCount = numDocs - 1 - doc;
+ EXPECT_TRUE(valueCount == expectedValueCount);
+
+ // test get all
+ std::vector<vespalib::string> values(valueCount);
+ EXPECT_TRUE(attr.get(doc, &values[0], valueCount) == valueCount);
+
+ std::vector<uint32_t> enums(valueCount);
+ EXPECT_TRUE((static_cast<search::attribute::IAttributeVector &>(attr)).get(doc, &enums[0], valueCount) == valueCount);
+
+ for (uint32_t j = 0; j < valueCount; ++j) {
+ //LOG(info, "doc[%u][%u] = %s", doc, j, values[j].c_str());
+ EXPECT_TRUE(values[j] == newUniques[j]);
+ uint32_t e = 100;
+ EXPECT_TRUE(attr.findEnum(values[j].c_str(), e));
+ EXPECT_TRUE(enums[j] == e);
+ }
+ }
+
+ // check that enumXX strings are removed
+ for (uint32_t i = 0; i < uniqueStrings.size(); ++i) {
+ uint32_t e;
+ EXPECT_TRUE(!attr.findEnum(uniqueStrings[i].c_str(), e));
+ }
+
+ // check for correct refcounts
+ for (uint32_t i = 0; i < newUniques.size(); ++i) {
+ typename Attribute::EnumStore::Index idx;
+ EXPECT_TRUE(attr.getEnumStore().findIndex(newUniques[i].c_str(), idx));
+ uint32_t expectedUsers = numDocs - 1 - i;
+ EXPECT_EQUAL(expectedUsers, attr.getEnumStore().getRefCount(idx));
+ }
+}
+
+void
+StringAttributeTest::testMultiValueMultipleClearDocBetweenCommit()
+{
+ // This is also tested for all array attributes in attribute unit test
+ ArrayStr mvsa("a-string");
+ uint32_t numDocs = 50;
+ addDocs(mvsa, numDocs);
+ std::vector<vespalib::string> buffer(numDocs);
+
+ for (uint32_t doc = 0; doc < numDocs; ++doc) {
+ uint32_t valueCount = doc;
+ EXPECT_TRUE(mvsa.clearDoc(doc) == 0);
+ for (uint32_t j = 0; j < valueCount; ++j) {
+ EXPECT_TRUE(mvsa.append(doc, "first", 1));
+ }
+ EXPECT_TRUE(mvsa.clearDoc(doc) == 0);
+ for (uint32_t j = 0; j < valueCount; ++j) {
+ EXPECT_TRUE(mvsa.append(doc, "second", 1));
+ }
+ mvsa.commit();
+
+ // check for correct values
+ checkCount(mvsa, doc, valueCount, valueCount, "second");
+ }
+}
+
+
+void
+StringAttributeTest::testMultiValueRemove()
+{
+ // This is also tested for all array attributes in attribute unit test
+ ArrayStr mvsa("a-string");
+ uint32_t numDocs = 50;
+ addDocs(mvsa, numDocs);
+ std::vector<vespalib::string> buffer(9);
+
+ for (uint32_t doc = 0; doc < numDocs; ++doc) {
+ EXPECT_TRUE(mvsa.append(doc, "one", 1));
+ for (uint32_t i = 0; i < 3; ++i) {
+ EXPECT_TRUE(mvsa.append(doc, "three", 1));
+ }
+ for (uint32_t i = 0; i < 5; ++i) {
+ EXPECT_TRUE(mvsa.append(doc, "five", 1));
+ }
+
+ mvsa.commit();
+ checkCount(mvsa, doc, 9, 1, "one");
+ checkCount(mvsa, doc, 9, 3, "three");
+ checkCount(mvsa, doc, 9, 5, "five");
+
+ EXPECT_TRUE(mvsa.remove(doc, "zero", 1));
+ mvsa.commit();
+ checkCount(mvsa, doc, 9, 1, "one");
+ checkCount(mvsa, doc, 9, 3, "three");
+ checkCount(mvsa, doc, 9, 5, "five");
+
+ EXPECT_TRUE(mvsa.remove(doc, "one", 1));
+ mvsa.commit();
+ checkCount(mvsa, doc, 8, 0, "one");
+ checkCount(mvsa, doc, 8, 3, "three");
+ checkCount(mvsa, doc, 8, 5, "five");
+
+ EXPECT_TRUE(mvsa.remove(doc, "five", 1));
+ mvsa.commit();
+ checkCount(mvsa, doc, 3, 0, "one");
+ checkCount(mvsa, doc, 3, 3, "three");
+ checkCount(mvsa, doc, 3, 0, "five");
+ }
+}
+
+void
+StringAttributeTest::testSingleValue()
+{
+ {
+ Config cfg(BasicType::STRING, CollectionType::SINGLE);
+ SingleValueStringAttribute svsa("svsa", cfg);
+ const IAttributeVector * ia = &svsa;
+ EXPECT_TRUE(dynamic_cast<const SingleValueEnumAttributeBase *>(ia) != nullptr);
+ testSingleValue(svsa, cfg);
+
+ SingleValueStringAttribute svsb("svsa", cfg);
+ testDefaultValueOnAddDoc(svsb);
+ }
+ {
+ Config cfg(BasicType::STRING, CollectionType::SINGLE);
+ cfg.setFastSearch(true);
+ SingleValueStringPostingAttribute svsa("svspb", cfg);
+ testSingleValue(svsa, cfg);
+
+ SingleValueStringPostingAttribute svsb("svspb", cfg);
+ testDefaultValueOnAddDoc(svsb);
+ }
+}
+
+void StringAttributeTest::testDefaultValueOnAddDoc(AttributeVector & v)
+{
+ EXPECT_EQUAL(0u, v.getNumDocs());
+ v.addReservedDoc();
+ EXPECT_EQUAL(1u, v.getNumDocs());
+ EXPECT_TRUE( EnumStoreBase::Index(v.getEnum(0)).valid() );
+ uint32_t doc(7);
+ EXPECT_TRUE( v.addDoc(doc) );
+ EXPECT_EQUAL(1u, doc);
+ EXPECT_EQUAL(2u, v.getNumDocs());
+ EXPECT_TRUE( EnumStoreBase::Index(v.getEnum(doc)).valid() );
+ EXPECT_EQUAL(0u, strlen(v.getString(doc, NULL, 0)));
+}
+
+template <typename Attribute>
+void
+StringAttributeTest::testSingleValue(Attribute & svsa, Config &cfg)
+{
+ StringAttribute & v = svsa;
+ const char * t = "not defined";
+ uint32_t doc = 2000;
+ uint32_t e1 = 2000;
+ uint32_t e2 = 2000;
+ uint32_t numDocs = 1000;
+ char tmp[32];
+
+ // add docs
+ for (uint32_t i = 0; i < numDocs; ++i) {
+ EXPECT_TRUE( v.addDoc(doc) );
+ EXPECT_TRUE( doc == i );
+ EXPECT_TRUE( v.getNumDocs() == i + 1 );
+ EXPECT_TRUE( v.getValueCount(doc) == 1 );
+ EXPECT_TRUE( ! EnumStoreBase::Index(v.getEnum(doc)).valid() );
+ }
+
+ std::map<vespalib::string, uint32_t> enums;
+ // 10 unique strings
+ for (uint32_t i = 0; i < numDocs; ++i) {
+ sprintf(tmp, "enum%u", i % 10);
+ EXPECT_TRUE( v.update(i, tmp) );
+ EXPECT_TRUE( v.getValueCount(i) == 1 );
+ EXPECT_TRUE( ! EnumStoreBase::Index(v.getEnum(i)).valid() );
+ if ((i % 10) == 9) {
+ v.commit();
+ for (uint32_t j = i - 9; j <= i; ++j) {
+ sprintf(tmp, "enum%u", j % 10);
+ EXPECT_TRUE( strcmp(t = v.get(j), tmp) == 0 );
+ e1 = v.getEnum(j);
+ EXPECT_TRUE( v.findEnum(t, e2) );
+ EXPECT_TRUE( e1 == e2 );
+ if (enums.count(vespalib::string(t)) == 0) {
+ enums[vespalib::string(t)] = e1;
+ } else {
+ EXPECT_TRUE( e1 == enums[vespalib::string(t)]);
+ EXPECT_TRUE( e2 == enums[vespalib::string(t)]);
+ }
+ }
+ }
+ }
+
+ //svsa.printBuffers();
+
+ // 1000 unique strings
+ for (uint32_t i = 0; i < numDocs; ++i) {
+ sprintf(tmp, "unique%u", i);
+ EXPECT_TRUE( v.update(i, tmp) );
+ sprintf(tmp, "enum%u", i % 10);
+ EXPECT_TRUE( strcmp(v.get(i), tmp) == 0 );
+ if ((i % 10) == 9) {
+ //LOG(info, "commit: i = %u", i);
+ v.commit();
+ for (uint32_t j = i - 9; j <= i; ++j) {
+ sprintf(tmp, "unique%u", j);
+ EXPECT_TRUE( strcmp(t = v.get(j), tmp) == 0 );
+ e1 = v.getEnum(j);
+ EXPECT_TRUE( v.findEnum(t, e2) );
+ EXPECT_TRUE( e1 == e2 );
+ }
+ //svsa.printBuffers();
+ }
+ }
+ //svsa.printBuffers();
+
+ // check that enumX strings are removed (
+ for (uint32_t i = 0; i < 10; ++i) {
+ sprintf(tmp, "enum%u", i);
+ EXPECT_TRUE( !v.findEnum(tmp, e1) );
+ }
+
+
+ Attribute load("load", cfg);
+ svsa.saveAs(load.getBaseFileName());
+ load.load();
+}
+
+
+
+int
+StringAttributeTest::Main()
+{
+ TEST_INIT("stringattribute_test");
+
+ testMultiValue();
+
+ testMultiValueMultipleClearDocBetweenCommit();
+
+ testMultiValueRemove();
+
+ testSingleValue();
+
+ TEST_DONE();
+}
+
+}
+
+TEST_APPHOOK(search::StringAttributeTest);
diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.sh b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.sh
new file mode 100755
index 00000000000..d7ac263c1c9
--- /dev/null
+++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+$VALGRIND ./searchlib_stringattribute_test_app
+rm -rf *.dat
diff --git a/searchlib/src/tests/attribute/tensorattribute/.gitignore b/searchlib/src/tests/attribute/tensorattribute/.gitignore
new file mode 100644
index 00000000000..08519fe7ae8
--- /dev/null
+++ b/searchlib/src/tests/attribute/tensorattribute/.gitignore
@@ -0,0 +1 @@
+searchlib_tensorattribute_test_app
diff --git a/searchlib/src/tests/attribute/tensorattribute/CMakeLists.txt b/searchlib/src/tests/attribute/tensorattribute/CMakeLists.txt
new file mode 100644
index 00000000000..ec16b4363eb
--- /dev/null
+++ b/searchlib/src/tests/attribute/tensorattribute/CMakeLists.txt
@@ -0,0 +1,8 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_tensorattribute_test_app
+ SOURCES
+ tensorattribute_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_tensorattribute_test_app COMMAND sh tensorattribute_test.sh)
diff --git a/searchlib/src/tests/attribute/tensorattribute/DESC b/searchlib/src/tests/attribute/tensorattribute/DESC
new file mode 100644
index 00000000000..1cd9aa7cf14
--- /dev/null
+++ b/searchlib/src/tests/attribute/tensorattribute/DESC
@@ -0,0 +1 @@
+Unit tests for TensorAttribute.
diff --git a/searchlib/src/tests/attribute/tensorattribute/FILES b/searchlib/src/tests/attribute/tensorattribute/FILES
new file mode 100644
index 00000000000..1c8480ffde7
--- /dev/null
+++ b/searchlib/src/tests/attribute/tensorattribute/FILES
@@ -0,0 +1 @@
+tensorattribute.cpp
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
new file mode 100644
index 00000000000..137f93bcffe
--- /dev/null
+++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp
@@ -0,0 +1,217 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("tensorattribute_test");
+#include <vespa/vespalib/testkit/test_kit.h>
+#include <vespa/searchlib/attribute/tensorattribute.h>
+#include <vespa/searchlib/attribute/attributeguard.h>
+#include <vespa/vespalib/tensor/tensor_factory.h>
+#include <vespa/vespalib/tensor/default_tensor.h>
+#include <vespa/vespalib/tensor/simple/simple_tensor_builder.h>
+
+using search::attribute::TensorAttribute;
+using search::AttributeGuard;
+using search::AttributeVector;
+using vespalib::tensor::Tensor;
+using vespalib::tensor::TensorCells;
+using vespalib::tensor::TensorDimensions;
+using vespalib::tensor::TensorFactory;
+using vespalib::tensor::TensorType;
+using vespalib::tensor::SimpleTensorBuilder;
+
+namespace vespalib {
+namespace tensor {
+
+static bool operator==(const Tensor &lhs, const Tensor &rhs)
+{
+ return lhs.equals(rhs);
+}
+
+}
+}
+
+
+struct Fixture
+{
+ using BasicType = search::attribute::BasicType;
+ using CollectionType = search::attribute::CollectionType;
+ using Config = search::attribute::Config;
+
+ Config _cfg;
+ vespalib::string _name;
+ std::shared_ptr<TensorAttribute> _tensorAttr;
+ std::shared_ptr<AttributeVector> _attr;
+ vespalib::tensor::DefaultTensor::builder _builder;
+
+ Fixture(const vespalib::string &typeSpec)
+ : _cfg(BasicType::TENSOR, CollectionType::SINGLE),
+ _name("test"),
+ _tensorAttr(),
+ _attr()
+ {
+ _cfg.setTensorType(TensorType::fromSpec(typeSpec));
+ _tensorAttr = std::make_shared<TensorAttribute>(_name, _cfg);
+ _attr = _tensorAttr;
+ _attr->addReservedDoc();
+ }
+
+ Tensor::UP createTensor(const TensorCells &cells) {
+ return TensorFactory::create(cells, _builder);
+ }
+ Tensor::UP createTensor(const TensorCells &cells,
+ const TensorDimensions &dimensions) {
+ return TensorFactory::create(cells, dimensions, _builder);
+ }
+
+ void ensureSpace(uint32_t docId) {
+ while (_attr->getNumDocs() <= docId) {
+ uint32_t newDocId = 0u;
+ _attr->addDoc(newDocId);
+ _attr->commit();
+ }
+ }
+
+ void clearTensor(uint32_t docId) {
+ ensureSpace(docId);
+ _tensorAttr->clearDoc(docId);
+ _attr->commit();
+ }
+
+ void setTensor(uint32_t docId, const Tensor &tensor) {
+ ensureSpace(docId);
+ _tensorAttr->setTensor(docId, tensor);
+ _attr->commit();
+ }
+
+ search::attribute::Status getStatus() {
+ _attr->commit(true);
+ return _attr->getStatus();
+ }
+
+ void
+ assertGetNoTensor(uint32_t docId) {
+ AttributeGuard guard(_attr);
+ Tensor::UP actTensor = _tensorAttr->getTensor(docId);
+ EXPECT_FALSE(actTensor);
+ }
+
+ void
+ assertGetTensor(const Tensor &expTensor, uint32_t docId)
+ {
+ AttributeGuard guard(_attr);
+ Tensor::UP actTensor = _tensorAttr->getTensor(docId);
+ EXPECT_TRUE(static_cast<bool>(actTensor));
+ EXPECT_EQUAL(expTensor, *actTensor);
+ }
+
+ void
+ assertGetTensor(const TensorCells &expCells,
+ const TensorDimensions &expDimensions,
+ uint32_t docId)
+ {
+ Tensor::UP expTensor = createTensor(expCells, expDimensions);
+ assertGetTensor(*expTensor, docId);
+ }
+
+ void save() {
+ bool saveok = _attr->save();
+ EXPECT_TRUE(saveok);
+ }
+
+ void load() {
+ _tensorAttr = std::make_shared<TensorAttribute>(_name, _cfg);
+ _attr = _tensorAttr;
+ bool loadok = _attr->load();
+ EXPECT_TRUE(loadok);
+ }
+};
+
+
+TEST_F("Test empty tensor attribute", Fixture("tensor()"))
+{
+ EXPECT_EQUAL(1u, f._attr->getNumDocs());
+ EXPECT_EQUAL(1u, f._attr->getCommittedDocIdLimit());
+}
+
+
+TEST_F("Test setting tensor value", Fixture("tensor(x{}, y{})"))
+{
+ f.ensureSpace(4);
+ EXPECT_EQUAL(5u, f._attr->getNumDocs());
+ EXPECT_EQUAL(5u, f._attr->getCommittedDocIdLimit());
+ TEST_DO(f.assertGetNoTensor(4));
+ f.setTensor(4, *f.createTensor({}, {}));
+ TEST_DO(f.assertGetTensor({}, {"x", "y"}, 4));
+ f.setTensor(3, *f.createTensor({ {{}, 3} }, { "x", "y"}));
+ TEST_DO(f.assertGetTensor({ {{}, 3} }, { "x", "y"}, 3));
+ TEST_DO(f.assertGetNoTensor(2));
+ TEST_DO(f.clearTensor(3));
+ TEST_DO(f.assertGetNoTensor(3));
+}
+
+
+TEST_F("Test saving / loading tensor attribute", Fixture("tensor(x{}, y{})"))
+{
+ f.ensureSpace(4);
+ f.setTensor(4, *f.createTensor({}, {}));
+ f.setTensor(3, *f.createTensor({ {{}, 3} }, { "x", "y"}));
+ TEST_DO(f.save());
+ TEST_DO(f.load());
+ EXPECT_EQUAL(5u, f._attr->getNumDocs());
+ EXPECT_EQUAL(5u, f._attr->getCommittedDocIdLimit());
+ TEST_DO(f.assertGetTensor({ {{}, 3} }, { "x", "y"}, 3));
+ TEST_DO(f.assertGetTensor({}, {"x", "y"}, 4));
+ TEST_DO(f.assertGetNoTensor(2));
+}
+
+
+TEST_F("Test compaction of tensor attribute", Fixture("tensor(x{}, y{})"))
+{
+ f.ensureSpace(4);
+ Tensor::UP emptytensor = f.createTensor({}, {});
+ Tensor::UP emptyxytensor = f.createTensor({}, {"x", "y"});
+ Tensor::UP simpletensor = f.createTensor({ {{}, 3} }, { "x", "y"});
+ Tensor::UP filltensor = f.createTensor({ {{}, 5} }, { "x", "y"});
+ f.setTensor(4, *emptytensor);
+ f.setTensor(3, *simpletensor);
+ f.setTensor(2, *filltensor);
+ f.clearTensor(2);
+ f.setTensor(2, *filltensor);
+ search::attribute::Status oldStatus = f.getStatus();
+ search::attribute::Status newStatus = oldStatus;
+ uint64_t iter = 0;
+ uint64_t iterLimit = 100000;
+ for (; iter < iterLimit; ++iter) {
+ f.clearTensor(2);
+ f.setTensor(2, *filltensor);
+ newStatus = f.getStatus();
+ if (newStatus.getUsed() < oldStatus.getUsed()) {
+ break;
+ }
+ oldStatus = newStatus;
+ }
+ EXPECT_GREATER(iterLimit, iter);
+ LOG(info,
+ "iter = %" PRIu64 ", memory usage %" PRIu64 ", -> %" PRIu64,
+ iter, oldStatus.getUsed(), newStatus.getUsed());
+ TEST_DO(f.assertGetNoTensor(1));
+ TEST_DO(f.assertGetTensor(*filltensor, 2));
+ TEST_DO(f.assertGetTensor(*simpletensor, 3));
+ TEST_DO(f.assertGetTensor(*emptyxytensor, 4));
+}
+
+TEST_F("Test tensortype file header tag", Fixture("tensor(x[10])"))
+{
+ f.ensureSpace(4);
+ TEST_DO(f.save());
+
+ vespalib::FileHeader header;
+ FastOS_File file;
+ EXPECT_TRUE(file.OpenReadOnly("test.dat"));
+ (void) header.readFile(file);
+ file.Close();
+ EXPECT_TRUE(header.hasTag("tensortype"));
+ EXPECT_EQUAL("tensor(x[10])", header.getTag("tensortype").asString());
+}
+
+TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.sh b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.sh
new file mode 100644
index 00000000000..2e940d5d99a
--- /dev/null
+++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+$VALGRIND ./searchlib_tensorattribute_test_app
+rm -rf *.dat