diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
commit | 72231250ed81e10d66bfe70701e64fa5fe50f712 (patch) | |
tree | 2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /searchlib/src/tests |
Publish
Diffstat (limited to 'searchlib/src/tests')
792 files changed, 74118 insertions, 0 deletions
diff --git a/searchlib/src/tests/.gitignore b/searchlib/src/tests/.gitignore new file mode 100644 index 00000000000..a3e9c375723 --- /dev/null +++ b/searchlib/src/tests/.gitignore @@ -0,0 +1,3 @@ +.depend +Makefile +*_test diff --git a/searchlib/src/tests/aggregator/.gitignore b/searchlib/src/tests/aggregator/.gitignore new file mode 100644 index 00000000000..fed1175d7cd --- /dev/null +++ b/searchlib/src/tests/aggregator/.gitignore @@ -0,0 +1,7 @@ +*.dat +.depend +Makefile +aggregator_test +perdocexpr_test +searchlib_attr_test_app +searchlib_perdocexpr_test_app diff --git a/searchlib/src/tests/aggregator/CMakeLists.txt b/searchlib/src/tests/aggregator/CMakeLists.txt new file mode 100644 index 00000000000..1cc750a8fac --- /dev/null +++ b/searchlib/src/tests/aggregator/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_perdocexpr_test_app + SOURCES + perdocexpr.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_perdocexpr_test_app COMMAND searchlib_perdocexpr_test_app) +vespa_add_executable(searchlib_attr_test_app + SOURCES + attr_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_attr_test_app COMMAND searchlib_attr_test_app) diff --git a/searchlib/src/tests/aggregator/DESC b/searchlib/src/tests/aggregator/DESC new file mode 100644 index 00000000000..74bbb4a99fe --- /dev/null +++ b/searchlib/src/tests/aggregator/DESC @@ -0,0 +1 @@ +This is a test of the aggregator manager interface. diff --git a/searchlib/src/tests/aggregator/FILES b/searchlib/src/tests/aggregator/FILES new file mode 100644 index 00000000000..2d49a798a26 --- /dev/null +++ b/searchlib/src/tests/aggregator/FILES @@ -0,0 +1 @@ +aggregator.cpp diff --git a/searchlib/src/tests/aggregator/attr_test.cpp b/searchlib/src/tests/aggregator/attr_test.cpp new file mode 100644 index 00000000000..5184f61b573 --- /dev/null +++ b/searchlib/src/tests/aggregator/attr_test.cpp @@ -0,0 +1,285 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> + +#include <vespa/searchlib/aggregation/perdocexpression.h> +#include <vespa/searchlib/aggregation/aggregation.h> +#include <vespa/searchlib/attribute/extendableattributes.h> +#include <vespa/vespalib/objects/objectdumper.h> + +using namespace search; +using namespace search::expression; +using namespace vespalib; + + +struct AttributeFixture { + + AttributeGuard guard; + + const double doc0attr[11] = { + 0.1428571428571428, + 0.2539682539682539, + 0.3448773448773448, + 0.4218004218004217, + 0.4884670884670883, + 0.5472906178788530, + 0.5999221968262214, + 0.6475412444452690, + 0.6910195053148342, + 0.7310195053148342, + 0.7680565423518712 + }; + const double doc1attr[11] = { + 0.1408450704225352, + 0.2507351803126450, + 0.3408252704027350, + 0.4171611482653304, + 0.4833863138282443, + 0.5418658459919869, + 0.5942218669343952, + 0.6416152318633051, + 0.6849052751533483, + 0.7247459126035475, + 0.7616462816072375 + }; + + AttributeFixture() : guard() + { + MultiFloatExtAttribute *attr = new MultiFloatExtAttribute("sortedArrayAttr"); + DocId d = 0; + + attr->addDoc(d); + for (double val : doc0attr) { + attr->add(val); + } + attr->addDoc(d); + for (double val : doc1attr) { + attr->add(val); + } + AttributeVector::SP sp(attr); + guard = AttributeGuard(sp); + } +}; + +struct IntAttrFixture { + AttributeGuard guard; + + const int64_t doc0attr[11] = { + 1, + 333, + 88888888L, + -17 + }; + const double doc1attr[11] = { + 2, + -42, + 4444, + 999999999L + }; + + IntAttrFixture() : guard() + { + MultiIntegerExtAttribute *attr = new MultiIntegerExtAttribute("sortedArrayAttr"); + DocId d = 0; + attr->addDoc(d); + for (int64_t val : doc0attr) { + attr->add(val); + } + attr->addDoc(d); + for (int64_t val : doc1attr) { + attr->add(val); + } + AttributeVector::SP sp(attr); + guard = AttributeGuard(sp); + } +}; + +struct StringAttrFixture { + AttributeGuard guard; + StringAttrFixture() : guard() + { + MultiStringExtAttribute *attr = new MultiStringExtAttribute("sortedArrayAttr"); + DocId d = 0; + attr->addDoc(d); + attr->add("1"); + attr->add("333"); + attr->add("88888888"); + attr->addDoc(d); + attr->add("2"); + attr->add("4444"); + attr->add("999999999"); + AttributeVector::SP sp(attr); + guard = AttributeGuard(sp); + } +}; + + +TEST_F("testArrayAt", AttributeFixture()) { + for (int i = 0; i < 11; i++) { + ExpressionNode::CP cn(new ConstantNode(new Int64ResultNode(i))); + ExpressionNode::CP ln(new ArrayAtLookup(*f1.guard, cn)); + + ExpressionTree et(ln); + ExpressionTree::Configure treeConf; + et.select(treeConf, treeConf); + EXPECT_TRUE(et.getResult().getClass().inherits(FloatResultNode::classId)); + + EXPECT_TRUE(et.execute(0, HitRank(0.0))); + EXPECT_EQUAL(et.getResult().getFloat(), f1.doc0attr[i]); + EXPECT_TRUE(et.execute(1, HitRank(0.0))); + EXPECT_EQUAL(et.getResult().getFloat(), f1.doc1attr[i]); + } +} + +TEST_F("testArrayAtInt", IntAttrFixture()) { + for (int i = 0; i < 3; i++) { + ExpressionNode::CP othercn(new ConstantNode(new Int64ResultNode(4567))); + ArrayAtLookup *x = new ArrayAtLookup(*f1.guard, othercn); + ExpressionNode::CP cn(new ConstantNode(new Int64ResultNode(i))); + ArrayAtLookup *y = new ArrayAtLookup(*f1.guard, cn); + *x = *y; + delete y; + ExpressionNode::CP ln(x); + + ExpressionTree et(ln); + ExpressionTree::Configure treeConf; + et.select(treeConf, treeConf); + EXPECT_TRUE(et.getResult().getClass().inherits(IntegerResultNode::classId)); + + EXPECT_TRUE(et.execute(0, HitRank(0.0))); + EXPECT_EQUAL(et.getResult().getInteger(), f1.doc0attr[i]); + EXPECT_TRUE(et.execute(1, HitRank(0.0))); + EXPECT_EQUAL(et.getResult().getInteger(), f1.doc1attr[i]); + } +} + + +TEST_F("testArrayAtString", StringAttrFixture()) { + ExpressionNode::CP cn(new ConstantNode(new Int64ResultNode(1))); + ExpressionNode::CP ln(new ArrayAtLookup(*f1.guard, cn)); + + ExpressionTree et(ln); + ExpressionTree::Configure treeConf; + et.select(treeConf, treeConf); + EXPECT_TRUE(et.getResult().getClass().inherits(StringResultNode::classId)); + + char mem[64]; + ResultNode::BufferRef buf(&mem, sizeof(mem)); + + EXPECT_TRUE(et.execute(0, HitRank(0.0))); + EXPECT_EQUAL(et.getResult().getString(buf).c_str(), std::string("333")); + + EXPECT_TRUE(et.execute(1, HitRank(0.0))); + EXPECT_EQUAL(et.getResult().getString(buf).c_str(), std::string("4444")); +} + +struct ArrayAtExpressionFixture : + public AttributeFixture +{ + ExpressionNode::CP cn; + ExpressionNode::CP ln; + ExpressionTree et; + + ArrayAtExpressionFixture(int i) : + AttributeFixture(), + cn(new ConstantNode(new Int64ResultNode(i))), + ln(new ArrayAtLookup(*guard, cn)), + et(ln) + { + ExpressionTree::Configure treeConf; + et.select(treeConf, treeConf); + } +}; + + +TEST_F("testArrayAtBelowRange", ArrayAtExpressionFixture(-1)) { + EXPECT_TRUE(f1.et.getResult().getClass().inherits(FloatResultNode::classId)); + + EXPECT_TRUE(f1.et.execute(0, HitRank(0.0))); + EXPECT_EQUAL(f1.et.getResult().getFloat(), f1.doc0attr[0]); + EXPECT_TRUE(f1.et.execute(1, HitRank(0.0))); + EXPECT_EQUAL(f1.et.getResult().getFloat(), f1.doc1attr[0]); +} + +TEST_F("testArrayAtAboveRange", ArrayAtExpressionFixture(17)) { + EXPECT_TRUE(f1.et.getResult().getClass().inherits(FloatResultNode::classId)); + + EXPECT_TRUE(f1.et.execute(0, HitRank(0.0))); + EXPECT_EQUAL(f1.et.getResult().getFloat(), f1.doc0attr[10]); + EXPECT_TRUE(f1.et.execute(1, HitRank(0.0))); + EXPECT_EQUAL(f1.et.getResult().getFloat(), f1.doc1attr[10]); +} + +TEST_F("testInterpolatedLookup", AttributeFixture()) { + + ExpressionNode::CP c1(new ConstantNode(new FloatResultNode(f1.doc0attr[2]))); + ExpressionNode::CP l1(new InterpolatedLookup(*f1.guard, c1)); + + ExpressionTree et(l1); + ExpressionTree::Configure treeConf; + et.select(treeConf, treeConf); + + EXPECT_TRUE(et.getResult().getClass().inherits(FloatResultNode::classId)); + + EXPECT_TRUE(et.execute(0, HitRank(0.0))); + EXPECT_EQUAL(et.getResult().getFloat(), 2.0); + + EXPECT_TRUE(et.execute(1, HitRank(0.0))); + EXPECT_EQUAL(et.getResult().getFloat(), 2.053082175617388); +} + +TEST_F("testWithRelevance", AttributeFixture()) { + + ExpressionNode::CP r1(new RelevanceNode()); + ExpressionNode::CP l1(new InterpolatedLookup(*f1.guard, r1)); + + ExpressionTree et(l1); + ExpressionTree::Configure treeConf; + et.select(treeConf, treeConf); + + EXPECT_TRUE(et.getResult().getClass().inherits(FloatResultNode::classId)); + + // docid 0 + double expect0[] = { 0.0, 0.0, 0.0, + + 0.514285714285715012, + 1.506349206349207659, + 2.716594516594518005, + + 4.19605949605949835, + 6.001633866649353166, + 8.224512367129145574, + + 10.0, 10.0, 10.0 }; + + for (int i = 0; i < 12; i++) { + double r = i-1; + r *= 0.1; + TEST_STATE(vespalib::make_string("i=%d", i).c_str()); + EXPECT_TRUE(et.execute(0, HitRank(r))); + EXPECT_EQUAL(expect0[i], et.getResult().getFloat()); + } + + EXPECT_TRUE(et.execute(0, HitRank(f1.doc0attr[2]))); + EXPECT_EQUAL(et.getResult().getFloat(), 2.0); + + // docid 1 + EXPECT_TRUE(et.execute(1, HitRank(f1.doc1attr[0] - 0.001))); + EXPECT_EQUAL(et.getResult().getFloat(), 0.0); + + EXPECT_TRUE(et.execute(1, HitRank(f1.doc1attr[0]))); + EXPECT_EQUAL(et.getResult().getFloat(), 0.0); + + EXPECT_TRUE(et.execute(1, HitRank(f1.doc1attr[2]))); + EXPECT_EQUAL(et.getResult().getFloat(), 2.0); + + EXPECT_TRUE(et.execute(1, HitRank(f1.doc1attr[4]))); + EXPECT_EQUAL(et.getResult().getFloat(), 4.0); + + EXPECT_TRUE(et.execute(1, HitRank(f1.doc1attr[10]))); + EXPECT_EQUAL(et.getResult().getFloat(), 10.0); + + EXPECT_TRUE(et.execute(1, HitRank(f1.doc1attr[10] + 0.01))); + EXPECT_EQUAL(et.getResult().getFloat(), 10.0); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/aggregator/perdocexpr.cpp b/searchlib/src/tests/aggregator/perdocexpr.cpp new file mode 100644 index 00000000000..8f073187cce --- /dev/null +++ b/searchlib/src/tests/aggregator/perdocexpr.cpp @@ -0,0 +1,1693 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/searchlib/aggregation/aggregation.h> +#include <vespa/searchlib/aggregation/expressioncountaggregationresult.h> +#include <vespa/searchlib/aggregation/perdocexpression.h> +#include <vespa/searchlib/attribute/extendableattributes.h> +#include <vespa/vespalib/objects/objectdumper.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <stdexcept> +#include <vespa/document/base/testdocman.h> +#include <vespa/vespalib/util/md5.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/searchlib/expression/getdocidnamespacespecificfunctionnode.h> + +using namespace search; +using namespace search::expression; +using namespace search::aggregation; +using namespace vespalib; + +struct AggrGetter { + virtual ~AggrGetter() { } + virtual const ResultNode &operator()(const AggregationResult &r) const = 0; +}; + +AttributeGuard createInt64Attribute(); +AttributeGuard createInt32Attribute(); +AttributeGuard createInt16Attribute(); +AttributeGuard createInt8Attribute(); +template<typename T> +void testCmp(const T & small, const T & medium, const T & large); + +void testMin(const ResultNode & a, const ResultNode & b) { + ASSERT_TRUE(a.cmp(b) < 0); + MinFunctionNode func; + func.appendArg(ConstantNode(a)).appendArg(ConstantNode(b)).prepare(false) + .execute(); + ASSERT_TRUE(func.getResult().cmp(a) == 0); + + MinFunctionNode funcR; + funcR.appendArg(ConstantNode(b)).appendArg(ConstantNode(a)).prepare(false) + .execute(); + ASSERT_TRUE(funcR.getResult().cmp(a) == 0); +} + +TEST("testMin") { + testMin(Int64ResultNode(67), Int64ResultNode(68)); + testMin(FloatResultNode(67), FloatResultNode(68)); + testMin(StringResultNode("67"), StringResultNode("68")); + testMin(RawResultNode("67", 2), RawResultNode("68", 2)); + testMin(RawResultNode("-67", 2), RawResultNode("68", 2)); +} + +void testMax(const ResultNode & a, const ResultNode & b) { + ASSERT_TRUE(a.cmp(b) < 0); + MaxFunctionNode func; + func.appendArg(ConstantNode(a)).appendArg(ConstantNode(b)).prepare(false) + .execute(); + ASSERT_TRUE(func.getResult().cmp(b) == 0); + + MaxFunctionNode funcR; + funcR.appendArg(ConstantNode(b)).appendArg(ConstantNode(a)).prepare(false) + .execute(); + ASSERT_TRUE(funcR.getResult().cmp(b) == 0); +} + +TEST("testMax") { + testMax(Int64ResultNode(67), Int64ResultNode(68)); + testMax(FloatResultNode(67), FloatResultNode(68)); + testMax(StringResultNode("67"), StringResultNode("68")); + testMax(RawResultNode("67", 2), RawResultNode("68", 2)); + testMax(RawResultNode("-67", 2), RawResultNode("68", 2)); +} + +ExpressionCountAggregationResult getExpressionCountWithNormalSketch() { + nbostream stream; + stream << (uint32_t)ExpressionCountAggregationResult::classId + << (char)0 << (uint32_t)0 + << (uint32_t)NormalSketch<>::classId + << NormalSketch<>::BUCKET_COUNT << NormalSketch<>::BUCKET_COUNT; + for (size_t i = 0; i < NormalSketch<>::BUCKET_COUNT; ++i) { + stream << static_cast<char>(0); + } + NBOSerializer serializer(stream); + ExpressionCountAggregationResult result; + serializer >> result; + EXPECT_EQUAL(0u, stream.size()); + EXPECT_EQUAL(NormalSketch<>(), result.getSketch()); + return result; +} + +void testExpressionCount(const ResultNode &a, uint32_t bucket, uint8_t val) { + ExpressionCountAggregationResult func = + getExpressionCountWithNormalSketch(); + func.setExpression(ConstantNode(a)); + func.aggregate(DocId(42), HitRank(21)); + + const auto &sketch = func.getSketch(); + auto normal = dynamic_cast<const NormalSketch<>&>(sketch); + for (uint32_t i = 0; i < sketch.BUCKET_COUNT; ++i) { + TEST_STATE(make_string("Bucket %u. Expected bucket %u=%u", + i, bucket, val).c_str()); + EXPECT_EQUAL(i == bucket? val : 0, (int) normal.bucket[i]); + } +} + +TEST("require that expression count can operate on different results") { + testExpressionCount(Int64ResultNode(67), 98, 2); + testExpressionCount(FloatResultNode(67), 545, 1); + testExpressionCount(StringResultNode("67"), 243, 1); + testExpressionCount(RawResultNode("67", 2), 243, 1); + testExpressionCount(RawResultNode("-67", 2), 434, 1); +} + +TEST("require that expression counts can be merged") { + ExpressionCountAggregationResult func1 = + getExpressionCountWithNormalSketch(); + func1.setExpression(ConstantNode(Int64ResultNode(67))) + .aggregate(DocId(42), HitRank(21)); + ExpressionCountAggregationResult func2 = + getExpressionCountWithNormalSketch(); + func2.setExpression(ConstantNode(FloatResultNode(67))) + .aggregate(DocId(42), HitRank(21)); + + EXPECT_EQUAL(2, func1.getRank().getInteger()); + func1.merge(func2); + EXPECT_EQUAL(3, func1.getRank().getInteger()); + const auto &sketch = func1.getSketch(); + auto normal = dynamic_cast<const NormalSketch<>&>(sketch); + EXPECT_EQUAL(2, normal.bucket[98]); // from func1 + EXPECT_EQUAL(1, normal.bucket[545]); // from func2 +} + +TEST("require that expression counts can be serialized") { + ExpressionCountAggregationResult func; + func.setExpression(ConstantNode(Int64ResultNode(67))) + .aggregate(DocId(42), HitRank(21)); + func.setExpression(ConstantNode(Int64ResultNode(68))) + .aggregate(DocId(42), HitRank(21)); + + nbostream os; + NBOSerializer nos(os); + nos << func; + Identifiable::UP obj = Identifiable::create(nos); + auto *func2 = dynamic_cast<ExpressionCountAggregationResult *>(obj.get()); + ASSERT_TRUE(func2); + EXPECT_EQUAL(func.getSketch(), func2->getSketch()); +} + +TEST("require that expression count estimates rank") { + ExpressionCountAggregationResult func = + getExpressionCountWithNormalSketch(); + EXPECT_EQUAL(0, func.getRank().getInteger()); + func.setExpression(ConstantNode(Int64ResultNode(67))) + .aggregate(DocId(42), HitRank(21)); + EXPECT_EQUAL(2, func.getRank().getInteger()); + func.setExpression(ConstantNode(FloatResultNode(67))) + .aggregate(DocId(42), HitRank(21)); + EXPECT_EQUAL(3, func.getRank().getInteger()); + func.setExpression(ConstantNode(FloatResultNode(67))) + .aggregate(DocId(42), HitRank(21)); + EXPECT_EQUAL(3, func.getRank().getInteger()); +} + +void testAdd(const ResultNode &a, const ResultNode &b, const ResultNode &c) { + AddFunctionNode func; + func.appendArg(ConstantNode(a)).appendArg(ConstantNode(b)).prepare(false) + .execute(); + EXPECT_EQUAL(func.getResult().asString(), c.asString()); + EXPECT_EQUAL(func.getResult().cmp(c), 0); + EXPECT_EQUAL(c.cmp(func.getResult()), 0); +} + +TEST("testAdd") { + testAdd(Int64ResultNode(67), Int64ResultNode(68), Int64ResultNode(67+68)); + testAdd(FloatResultNode(67), FloatResultNode(68), FloatResultNode(67+68)); + testAdd(StringResultNode("67"), StringResultNode("68"), + StringResultNode("lo")); + testAdd(RawResultNode("67", 2), RawResultNode("68", 2), + RawResultNode("lo", 2)); +} + +void testDivide(const ResultNode &a, const ResultNode &b, + const ResultNode &c) { + DivideFunctionNode func; + func.appendArg(ConstantNode(a)).appendArg(ConstantNode(b)).prepare(false) + .execute(); + EXPECT_EQUAL(func.getResult().asString(), c.asString()); + EXPECT_EQUAL(func.getResult().getFloat(), c.getFloat()); + EXPECT_EQUAL(func.getResult().cmp(c), 0); + EXPECT_EQUAL(c.cmp(func.getResult()), 0); +} + +TEST("testDivide") { + testDivide(Int64ResultNode(6), FloatResultNode(12.0), + FloatResultNode(0.5)); + testDivide(Int64ResultNode(6), Int64ResultNode(1), Int64ResultNode(6)); + testDivide(Int64ResultNode(6), Int64ResultNode(0), Int64ResultNode(0)); +} + +void testModulo(const ResultNode &a, const ResultNode &b, + const ResultNode &c) { + ModuloFunctionNode func; + func.appendArg(ConstantNode(a)).appendArg(ConstantNode(b)).prepare(false) + .execute(); + EXPECT_EQUAL(func.getResult().asString(), c.asString()); + EXPECT_EQUAL(func.getResult().getFloat(), c.getFloat()); + EXPECT_EQUAL(func.getResult().cmp(c), 0); + EXPECT_EQUAL(c.cmp(func.getResult()), 0); +} + +TEST("testModulo") { + testModulo(Int64ResultNode(0), Int64ResultNode(6), Int64ResultNode(0)); + testModulo(Int64ResultNode(1), Int64ResultNode(6), Int64ResultNode(1)); + testModulo(Int64ResultNode(2), Int64ResultNode(6), Int64ResultNode(2)); + testModulo(Int64ResultNode(3), Int64ResultNode(6), Int64ResultNode(3)); + testModulo(Int64ResultNode(4), Int64ResultNode(6), Int64ResultNode(4)); + testModulo(Int64ResultNode(5), Int64ResultNode(6), Int64ResultNode(5)); + testModulo(Int64ResultNode(6), Int64ResultNode(6), Int64ResultNode(0)); + + testModulo(Int64ResultNode(6), Int64ResultNode(1), Int64ResultNode(0)); + testModulo(Int64ResultNode(6), Int64ResultNode(0), Int64ResultNode(0)); + + testModulo(FloatResultNode(2), Int64ResultNode(6), FloatResultNode(2)); + testModulo(Int64ResultNode(3), FloatResultNode(6), FloatResultNode(3)); +} + +void testNegate(const ResultNode & a, const ResultNode & b) { + NegateFunctionNode func; + func.appendArg(ConstantNode(a)).prepare(false).execute(); + EXPECT_EQUAL(func.getResult().asString(), b.asString()); + EXPECT_EQUAL(func.getResult().cmp(b), 0); + EXPECT_EQUAL(b.cmp(func.getResult()), 0); +} + +TEST("testNegate") { + testNegate(Int64ResultNode(67), Int64ResultNode(-67)); + testNegate(FloatResultNode(67.0), FloatResultNode(-67.0)); + + char strnorm[4] = { 102, 111, 111, 0 }; + char strneg[4] = { -102, -111, -111, 0 }; + testNegate(StringResultNode(strnorm), StringResultNode(strneg)); + testNegate(RawResultNode(strnorm, 3), RawResultNode(strneg, 3)); +} + +template <typename T> +void testBuckets(const T * b) { + EXPECT_TRUE(b[0].cmp(b[1]) < 0); + EXPECT_TRUE(b[1].cmp(b[2]) < 0); + EXPECT_TRUE(b[2].cmp(b[3]) < 0); + EXPECT_TRUE(b[3].cmp(b[4]) < 0); + EXPECT_TRUE(b[4].cmp(b[5]) < 0); + + EXPECT_TRUE(b[1].cmp(b[0]) > 0); + EXPECT_TRUE(b[2].cmp(b[1]) > 0); + EXPECT_TRUE(b[3].cmp(b[2]) > 0); + EXPECT_TRUE(b[4].cmp(b[3]) > 0); + EXPECT_TRUE(b[5].cmp(b[4]) > 0); + + EXPECT_TRUE(b[1].cmp(b[1]) == 0); + EXPECT_TRUE(b[2].cmp(b[2]) == 0); + EXPECT_TRUE(b[3].cmp(b[3]) == 0); + EXPECT_TRUE(b[4].cmp(b[4]) == 0); + EXPECT_TRUE(b[5].cmp(b[5]) == 0); + + EXPECT_TRUE(b[0].contains(b[1]) < 0); + EXPECT_TRUE(b[1].contains(b[2]) < 0); + EXPECT_TRUE(b[2].contains(b[3]) == 0); + EXPECT_TRUE(b[3].contains(b[4]) < 0); + EXPECT_TRUE(b[4].contains(b[5]) < 0); + + EXPECT_TRUE(b[1].contains(b[0]) > 0); + EXPECT_TRUE(b[2].contains(b[1]) > 0); + EXPECT_TRUE(b[3].contains(b[2]) == 0); + EXPECT_TRUE(b[4].contains(b[3]) > 0); + EXPECT_TRUE(b[5].contains(b[4]) > 0); + + EXPECT_TRUE(b[1].contains(b[1]) == 0); + EXPECT_TRUE(b[2].contains(b[2]) == 0); + EXPECT_TRUE(b[3].contains(b[3]) == 0); + EXPECT_TRUE(b[4].contains(b[4]) == 0); + EXPECT_TRUE(b[5].contains(b[5]) == 0); +} + +TEST("testBuckets") { + IntegerBucketResultNodeVector iv; + IntegerBucketResultNodeVector::Vector & ib = iv.getVector(); + EXPECT_TRUE(iv.find(Int64ResultNode(6)) == NULL); + ib.resize(1); + ib[0] = IntegerBucketResultNode(7, 9); + EXPECT_TRUE(iv.find(Int64ResultNode(6)) == NULL); + EXPECT_TRUE(iv.find(Int64ResultNode(7)) != NULL); + EXPECT_TRUE(iv.find(Int64ResultNode(8)) != NULL); + EXPECT_TRUE(iv.find(Int64ResultNode(9)) == NULL); + EXPECT_TRUE(iv.find(Int64ResultNode(10)) == NULL); + + ib.resize(6); + ib[0] = IntegerBucketResultNode(7, 9); + ib[1] = IntegerBucketResultNode(13, 17); + ib[2] = IntegerBucketResultNode(15, 30); + ib[3] = IntegerBucketResultNode(19, 27); + ib[4] = IntegerBucketResultNode(20, 33); + ib[5] = IntegerBucketResultNode(50, 50); + testBuckets(&ib[0]); + iv.sort(); + testBuckets(&ib[0]); + EXPECT_TRUE(ib[0].contains(6) > 0); + EXPECT_TRUE(ib[0].contains(7) == 0); + EXPECT_TRUE(ib[0].contains(8) == 0); + EXPECT_TRUE(ib[0].contains(9) < 0); + EXPECT_TRUE(ib[0].contains(10) < 0); + EXPECT_TRUE(iv.find(Int64ResultNode(6)) == NULL); + EXPECT_TRUE(iv.find(Int64ResultNode(7)) != NULL); + EXPECT_TRUE(iv.find(Int64ResultNode(8)) != NULL); + EXPECT_TRUE(iv.find(Int64ResultNode(9)) == NULL); + EXPECT_TRUE(iv.find(Int64ResultNode(10)) == NULL); + EXPECT_TRUE(iv.find(Int64ResultNode(14)) != NULL); + EXPECT_TRUE(iv.find(Int64ResultNode(27)) != NULL); + EXPECT_TRUE(iv.find(Int64ResultNode(32)) != NULL); + EXPECT_TRUE(iv.find(Int64ResultNode(33)) == NULL); + EXPECT_TRUE(iv.find(Int64ResultNode(50)) == NULL); + + FloatBucketResultNodeVector fv; + FloatBucketResultNodeVector::Vector & fb = fv.getVector(); + fb.resize(6); + fb[0] = FloatBucketResultNode(7, 9); + fb[1] = FloatBucketResultNode(13, 17); + fb[2] = FloatBucketResultNode(15, 30); + fb[3] = FloatBucketResultNode(19, 27); + fb[4] = FloatBucketResultNode(20, 33); + fb[5] = FloatBucketResultNode(50, 50); + testBuckets(&fb[0]); + fv.sort(); + testBuckets(&fb[0]); + EXPECT_TRUE(fb[0].contains(6) > 0); + EXPECT_TRUE(fb[0].contains(7) == 0); + EXPECT_TRUE(fb[0].contains(8) == 0); + EXPECT_TRUE(fb[0].contains(9) < 0); + EXPECT_TRUE(fb[0].contains(10) < 0); + EXPECT_TRUE(fv.find(FloatResultNode(6)) == NULL); + EXPECT_TRUE(fv.find(FloatResultNode(7)) != NULL); + EXPECT_TRUE(fv.find(FloatResultNode(8)) != NULL); + EXPECT_TRUE(fv.find(FloatResultNode(9)) == NULL); + EXPECT_TRUE(fv.find(FloatResultNode(10)) == NULL); + EXPECT_TRUE(fv.find(FloatResultNode(14)) != NULL); + EXPECT_TRUE(fv.find(FloatResultNode(27)) != NULL); + EXPECT_TRUE(fv.find(FloatResultNode(32)) != NULL); + EXPECT_TRUE(fv.find(FloatResultNode(33)) == NULL); + EXPECT_TRUE(fv.find(FloatResultNode(50)) == NULL); + + StringBucketResultNodeVector sv; + StringBucketResultNodeVector::Vector & sb = sv.getVector(); + sb.resize(6); + sb[0] = StringBucketResultNode("07", "09"); + sb[1] = StringBucketResultNode("13", "17"); + sb[2] = StringBucketResultNode("15", "30"); + sb[3] = StringBucketResultNode("19", "27"); + sb[4] = StringBucketResultNode("20", "33"); + sb[5] = StringBucketResultNode("50", "50"); + testBuckets(&sb[0]); + sv.sort(); + testBuckets(&sb[0]); + EXPECT_TRUE(sb[0].contains("06") > 0); + EXPECT_TRUE(sb[0].contains("07") == 0); + EXPECT_TRUE(sb[0].contains("08") == 0); + EXPECT_TRUE(sb[0].contains("09") < 0); + EXPECT_TRUE(sb[0].contains("10") < 0); + EXPECT_TRUE(sv.find(StringResultNode("06")) == NULL); + EXPECT_TRUE(sv.find(StringResultNode("07")) != NULL); + EXPECT_TRUE(sv.find(StringResultNode("08")) != NULL); + EXPECT_TRUE(sv.find(StringResultNode("09")) == NULL); + EXPECT_TRUE(sv.find(StringResultNode("10")) == NULL); + EXPECT_TRUE(sv.find(StringResultNode("14")) != NULL); + EXPECT_TRUE(sv.find(StringResultNode("27")) != NULL); + EXPECT_TRUE(sv.find(StringResultNode("32")) != NULL); + EXPECT_TRUE(sv.find(StringResultNode("33")) == NULL); + EXPECT_TRUE(sv.find(StringResultNode("50")) == NULL); +} + +template<typename T> +void testCmp(const T & small, const T & medium, const T & large) { + EXPECT_TRUE(small.cmp(medium) < 0); + EXPECT_TRUE(small.cmp(large) < 0); + EXPECT_TRUE(medium.cmp(large) < 0); + EXPECT_TRUE(medium.cmp(small) > 0); + EXPECT_TRUE(large.cmp(small) > 0); + EXPECT_TRUE(large.cmp(medium) > 0); +} + +TEST("testResultNodes") { + Int64ResultNode i(89); + char mem[64]; + ResultNode::BufferRef buf(&mem, sizeof(mem)); + EXPECT_EQUAL(i.getInteger(), 89); + EXPECT_EQUAL(i.getFloat(), 89.0); + EXPECT_EQUAL(i.getString(buf).c_str(), std::string("89")); + FloatResultNode f(2165.798); + EXPECT_EQUAL(f.getInteger(), 2166); + EXPECT_EQUAL(f.getFloat(), 2165.798); + EXPECT_EQUAL(f.getString(buf).c_str(), std::string("2165.8")); + StringResultNode s("17.89hjkljly"); + EXPECT_EQUAL(s.getInteger(), 17); + EXPECT_EQUAL(s.getFloat(), 17.89); + EXPECT_EQUAL(s.getString(buf).c_str(), std::string("17.89hjkljly")); + RawResultNode r("hjgasfdg", 9); + EXPECT_EQUAL(r.getString(buf).c_str(), std::string("hjgasfdg")); + int64_t j(789); + double d(786324.78); + nbostream os; + os << j << d; + RawResultNode r1(os.c_str(), sizeof(j)); + EXPECT_EQUAL(r1.getInteger(), 789); + RawResultNode r2(os.c_str() + sizeof(j), sizeof(d)); + EXPECT_EQUAL(r2.getFloat(), 786324.78); + + StringResultNode s1, s2("a"), s3("a"), s4("b"), s5("bb"); + EXPECT_EQUAL(s1.cmp(s1), 0); + EXPECT_EQUAL(s2.cmp(s3), 0); + EXPECT_EQUAL(s4.cmp(s4), 0); + EXPECT_EQUAL(s5.cmp(s5), 0); + testCmp(s1, s2, s4); + testCmp(s1, s2, s5); + testCmp(s2, s4, s5); + + { + Int64ResultNode i1(-1), i2(0), i3(1), i4(0x80000000lu); + EXPECT_EQUAL(i1.cmp(i1), 0); + EXPECT_EQUAL(i2.cmp(i2), 0); + EXPECT_EQUAL(i3.cmp(i3), 0); + testCmp(i1, i2, i3); + testCmp(i1, i2, i4); + } + + { + FloatResultNode i1(-1), i2(0), i3(1), notanumber(nan("")), + minusInf(-INFINITY), plussInf(INFINITY); + EXPECT_EQUAL(i1.cmp(i1), 0); + EXPECT_EQUAL(i2.cmp(i2), 0); + EXPECT_EQUAL(i3.cmp(i3), 0); + EXPECT_EQUAL(minusInf.cmp(minusInf), 0); + EXPECT_EQUAL(plussInf.cmp(plussInf), 0); + EXPECT_EQUAL(notanumber.cmp(notanumber), 0); + testCmp(i1, i2, i3); + testCmp(minusInf, i1, plussInf); + testCmp(minusInf, i2, plussInf); + testCmp(minusInf, i3, plussInf); + testCmp(notanumber, i2, i3); + testCmp(notanumber, i2, plussInf); + testCmp(notanumber, minusInf, plussInf); + } + { + FloatBucketResultNode + i1(-1, 3), i2(188000, 188500), i3(1630000, 1630500), + notanumber(-nan(""), nan("")), inf(-INFINITY, INFINITY); + EXPECT_EQUAL(i1.cmp(i1), 0); + EXPECT_EQUAL(i2.cmp(i2), 0); + EXPECT_EQUAL(notanumber.cmp(notanumber), 0); + EXPECT_EQUAL(inf.cmp(inf), 0); + + testCmp(i1, i2, i3); + testCmp(inf, i1, i2); + testCmp(notanumber, i2, i3); + testCmp(notanumber, i1, i2); + testCmp(notanumber, inf, i1); + } +} + +void testStreaming(const Identifiable &v) { + nbostream os; + NBOSerializer nos(os); + nos << v; + Identifiable::UP s = Identifiable::create(nos); + ASSERT_TRUE(s.get() != NULL); + ASSERT_TRUE(v.cmp(*s) == 0); + nbostream os2, os3; + NBOSerializer nos2(os2), nos3(os3); + nos2 << v; + nos3 << *s; + + EXPECT_EQUAL(os2.size(), os3.size()); + ASSERT_TRUE(os2.size() == os3.size()); + EXPECT_EQUAL(0, memcmp(os2.c_str(), os3.c_str(), os3.size())); +} + +TEST("testTimeStamp") { + TimeStampFunctionNode t1; + testStreaming(t1); +} + +namespace { + +std::string +getVespaChecksumV2( + const std::string& ymumid, + int fid, + const std::string& flags_str) +{ + if (fid == 6 || fid == 0 || fid == 5) { + return 0; + } + + std::list<char> flags_list; + flags_list.clear(); + for (unsigned int i = 0; i< flags_str.length();i++) + if (isalpha(flags_str[i])) + flags_list.push_back(flags_str[i]); + flags_list.sort(); + + std::string new_flags_str =""; + std::list<char>::iterator it; + for (it = flags_list.begin();it!=flags_list.end();it++) + new_flags_str += *it; + + uint32_t networkFid = htonl(fid); + + int length = ymumid.length()+ + sizeof(networkFid)+ + new_flags_str.length(); + + unsigned char buffer[length]; + memset(buffer, 0x00, length); + memcpy(buffer, ymumid.c_str(), ymumid.length()); + memcpy(buffer + ymumid.length(), + (const char*)&networkFid, sizeof(networkFid)); + memcpy(buffer+ymumid.length()+sizeof(networkFid), new_flags_str.c_str(), + new_flags_str.length()); + + return std::string((char*)buffer, length); +} +} // namespace + +TEST("testMailChecksumExpression") { + document::TestDocMan testDocMan; + + int folder = 32; + std::string flags = "RWA"; + std::string ymumid = "barmuda"; + + document::Document::UP doc = + testDocMan.createDocument("foo", "userdoc:footype:1234:" + ymumid); + document::WeightedSetFieldValue + ws(doc->getField("byteweightedset").getDataType()); + + for (uint32_t i = 0; i < flags.size(); i++) { + ws.add(document::ByteFieldValue(flags[i])); + } + doc->setValue("headerval", document::IntFieldValue(folder)); + doc->setValue("byteweightedset", ws); + + CatFunctionNode e; + + // YMUMID + GetDocIdNamespaceSpecificFunctionNode* ns = + new GetDocIdNamespaceSpecificFunctionNode( + ResultNode::UP(new StringResultNode)); + e.appendArg(ExpressionNode::CP(ns)); + + // Folder + e.appendArg(DocumentFieldNode("headerval")); + + // Flags + e.appendArg(SortFunctionNode(DocumentFieldNode("byteweightedset"))); + + MD5BitFunctionNode node(e, 32); + + CatFunctionNode &cfn = + static_cast<CatFunctionNode&>(*node.expressionNodeVector()[0]); + MultiArgFunctionNode::ExpressionNodeVector &xe = + cfn.expressionNodeVector(); + + for (uint32_t i = 0; i < xe.size(); i++) { + DocumentAccessorNode* rf = + dynamic_cast<DocumentAccessorNode *>(xe[i].get()); + if (rf) { + rf->setDocType(doc->getType()); + rf->prepare(true); + rf->setDoc(*doc); + } else { + MultiArgFunctionNode * mf = + dynamic_cast<MultiArgFunctionNode *>(xe[i].get()); + MultiArgFunctionNode::ExpressionNodeVector& se = + mf->expressionNodeVector(); + for (uint32_t j = 0; j < se.size(); j++) { + DocumentAccessorNode* tf = + dynamic_cast<DocumentAccessorNode *>(se[j].get()); + tf->setDocType(doc->getType()); + tf->prepare(true); + tf->setDoc(*doc); + } + } + } + // SortFunctionNode & sfn = static_cast<SortFunctionNode&>(*xe[1]); + // sfn.prepare(false); + cfn.prepare(false); + + cfn.execute(); + ConstBufferRef ref = + static_cast<const RawResultNode &>(cfn.getResult()).get(); + + std::string cmp = getVespaChecksumV2(ymumid, folder, flags); + + EXPECT_EQUAL(ref.size(), 14u); + EXPECT_EQUAL(cmp.size(), ref.size()); + + for (uint32_t i = 0; i < ref.size(); i++) { + std::cerr << i << ": " << (int)ref.c_str()[i] << "/" << (int)cmp[i] + << "\n"; + } + + EXPECT_TRUE(memcmp(cmp.c_str(), ref.c_str(), cmp.size()) == 0); + + node.prepare(true); + node.execute(); + + ConstBufferRef ref2 = + static_cast<const RawResultNode &>(node.getResult()).get(); + + for (uint32_t i = 0; i < ref2.size(); i++) { + std::cerr << i << ": " << (int)ref2.c_str()[i] << "\n"; + } +} + +TEST("testDebugFunction") { + { + AddFunctionNode add; + add.appendArg(ConstantNode(Int64ResultNode(3))); + add.appendArg(ConstantNode(Int64ResultNode(4))); + DebugWaitFunctionNode n(add, 1.3, false); + n.prepare(false); + + FastOS_Time time; + time.SetNow(); + n.execute(); + EXPECT_TRUE(time.MilliSecsToNow() > 1000.0); + EXPECT_EQUAL(static_cast<const Int64ResultNode &>(n.getResult()).get(), + 7); + } + { + AddFunctionNode add; + add.appendArg(ConstantNode(Int64ResultNode(3))); + add.appendArg(ConstantNode(Int64ResultNode(4))); + DebugWaitFunctionNode n(add, 1.3, true); + n.prepare(false); + + FastOS_Time time; + time.SetNow(); + n.execute(); + EXPECT_TRUE(time.MilliSecsToNow() > 1000.0); + EXPECT_EQUAL(static_cast<const Int64ResultNode &>(n.getResult()).get(), + 7); + } +} + +TEST("testDivExpressions") { + { + StrLenFunctionNode e(ConstantNode(Int64ResultNode(238686))); + e.prepare(false); + e.execute(); + EXPECT_EQUAL(static_cast<const Int64ResultNode &>(e.getResult()).get(), + 6); + } + { + NormalizeSubjectFunctionNode + e(ConstantNode(StringResultNode("Re: Your mail"))); + e.prepare(false); + e.execute(); + EXPECT_EQUAL( + static_cast<const StringResultNode &>(e.getResult()).get(), + "Your mail"); + } + { + NormalizeSubjectFunctionNode + e(ConstantNode(StringResultNode("Your mail"))); + e.prepare(false); + e.execute(); + EXPECT_EQUAL( + static_cast<const StringResultNode &>(e.getResult()).get(), + "Your mail"); + } + { + StrCatFunctionNode e(ConstantNode(Int64ResultNode(238686))); + e.appendArg(ConstantNode(StringResultNode("ARG 2"))); + e.prepare(false); + e.execute(); + EXPECT_EQUAL( + static_cast<const StringResultNode &>(e.getResult()).get(), + "238686ARG 2"); + } + + { + ToStringFunctionNode e(ConstantNode(Int64ResultNode(238686))); + e.prepare(false); + e.execute(); + EXPECT_EQUAL(strcmp(static_cast<const StringResultNode &>( + e.getResult()).get().c_str(), "238686"), 0); + } + + { + ToRawFunctionNode e(ConstantNode(Int64ResultNode(238686))); + e.prepare(false); + e.execute(); + EXPECT_EQUAL(strcmp(static_cast<const RawResultNode &>( + e.getResult()).get().c_str(), "238686"), 0); + } + + { + CatFunctionNode e(ConstantNode(Int64ResultNode(238686))); + e.prepare(false); + e.execute(); + EXPECT_EQUAL( + static_cast<const RawResultNode &>(e.getResult()).get().size(), + 8u); + } + { + CatFunctionNode e(ConstantNode(Int32ResultNode(23886))); + e.prepare(false); + e.execute(); + EXPECT_EQUAL( + static_cast<const RawResultNode &>(e.getResult()).get().size(), + 4u); + } + { + const uint8_t buf[4] = { 0, 0, 0, 7 }; + MD5BitFunctionNode + e(ConstantNode(RawResultNode(buf, sizeof(buf))), 16*8); + e.prepare(false); + e.execute(); + ASSERT_TRUE(e.getResult().getClass().inherits(RawResultNode::classId)); + const RawResultNode & + r(static_cast<const RawResultNode &>(e.getResult())); + EXPECT_EQUAL(r.get().size(), 16u); + } + { + const uint8_t buf[4] = { 0, 0, 0, 7 }; + MD5BitFunctionNode + e(ConstantNode(RawResultNode(buf, sizeof(buf))), 2*8); + e.prepare(false); + e.execute(); + EXPECT_EQUAL( + static_cast<const RawResultNode &>(e.getResult()).get().size(), + 2u); + } + { + const uint8_t buf[4] = { 0, 0, 0, 7 }; + XorBitFunctionNode + e(ConstantNode(RawResultNode(buf, sizeof(buf))), 1*8); + e.prepare(false); + e.execute(); + EXPECT_EQUAL( + static_cast<const RawResultNode &>(e.getResult()).get().size(), + 1u); + EXPECT_EQUAL(static_cast<const RawResultNode &>( + e.getResult()).get().c_str()[0], + 0x7); + } + { + const uint8_t buf[4] = { 6, 0, 7, 7 }; + XorBitFunctionNode + e(ConstantNode(RawResultNode(buf, sizeof(buf))), 2*8); + e.prepare(false); + e.execute(); + EXPECT_EQUAL( + static_cast<const RawResultNode &>(e.getResult()).get().size(), + 2u); + EXPECT_EQUAL((int)static_cast<const RawResultNode &>( + e.getResult()).get().c_str()[0], + 0x1); + EXPECT_EQUAL((int)static_cast<const RawResultNode &>( + e.getResult()).get().c_str()[1], + 0x7); + } + { + const uint8_t wantedBuf[14] = + { 98, 97, 114, 109, 117, 100, 97, 0, 0, 0, 32, 65, 82, 87 }; + const uint8_t md5facit[16] = + { 0x22, 0x5, 0x22, 0x1c, 0x49, 0xff, 0x90, 0x25, 0xad, 0xbf, + 0x4e, 0x51, 0xdb, 0xca, 0x2a, 0xc5 }; + const uint8_t thomasBuf[22] = + { 0, 0, 0, 7, 98, 97, 114, 109, 117, 100, 97, 0, 0, 0, 32, 0, + 0, 0, 3, 65, 82, 87 }; + const uint8_t currentBuf[26] = + { 0, 0, 0, 22, 0, 0, 0, 7, 98, 97, 114, 109, 117, 100, 97, 0, + 0, 0, 32, 0 , 0, 0, 3, 65, 82, 87 }; + + MD5BitFunctionNode + e(ConstantNode(RawResultNode(wantedBuf, sizeof(wantedBuf))), 16*8); + e.prepare(false); + e.execute(); + ASSERT_TRUE(e.getResult().getClass().inherits(RawResultNode::classId)); + const RawResultNode & + r(static_cast<const RawResultNode &>(e.getResult())); + EXPECT_EQUAL(r.get().size(), 16u); + uint8_t md5[16]; + fastc_md5sum(currentBuf, sizeof(currentBuf), md5); + EXPECT_TRUE(memcmp(r.get().data(), md5, sizeof(md5)) != 0); + fastc_md5sum(wantedBuf, sizeof(wantedBuf), md5); + EXPECT_TRUE(memcmp(r.get().data(), md5, sizeof(md5)) == 0); + fastc_md5sum(thomasBuf, sizeof(thomasBuf), md5); + EXPECT_TRUE(memcmp(r.get().data(), md5, sizeof(md5)) != 0); + + MD5BitFunctionNode + finalCheck( + CatFunctionNode(ConstantNode(StringResultNode("barmuda"))) + .appendArg(ConstantNode(Int32ResultNode(32))) + .appendArg(SortFunctionNode( + ConstantNode(Int8ResultNodeVector() + .push_back(Int8ResultNode(87)) + .push_back(Int8ResultNode(65)) + .push_back(Int8ResultNode(82)) + ) + ) + ), 32); + finalCheck.prepare(false); + finalCheck.execute(); + const RawResultNode & + rr(static_cast<const RawResultNode &>(finalCheck.getResult())); + EXPECT_EQUAL(rr.get().size(), 4u); + fastc_md5sum(wantedBuf, sizeof(wantedBuf), md5); + EXPECT_TRUE(memcmp(md5facit, md5, sizeof(md5)) == 0); + EXPECT_TRUE(memcmp(rr.get().data(), md5, rr.get().size()) == 0); + } + { + CatFunctionNode e(ConstantNode(Int16ResultNode(23886))); + e.prepare(false); + e.execute(); + EXPECT_EQUAL( + static_cast<const RawResultNode &>(e.getResult()).get().size(), + 2u); + } + { + CatFunctionNode + e(ConstantNode(Int8ResultNodeVector().push_back(Int8ResultNode(86)) + .push_back(Int8ResultNode(14)))); + e.prepare(false); + e.execute(); + EXPECT_EQUAL( + static_cast<const RawResultNode &>(e.getResult()).get().size(), + 1*2u); + } + { + CatFunctionNode + e(ConstantNode(Int32ResultNodeVector() + .push_back(Int32ResultNode(238686)) + .push_back(Int32ResultNode(2133214)))); + e.prepare(false); + e.execute(); + EXPECT_EQUAL( + static_cast<const RawResultNode &>(e.getResult()).get().size(), + 4*2u); + } + { + NumElemFunctionNode e(ConstantNode(Int64ResultNode(238686))); + e.prepare(false); + e.execute(); + EXPECT_EQUAL(e.getResult().getInteger(), 1); + } + { + NumElemFunctionNode + e(ConstantNode(Int32ResultNodeVector() + .push_back(Int32ResultNode(238686)) + .push_back(Int32ResultNode(2133214)))); + e.prepare(false); + e.execute(); + EXPECT_EQUAL(e.getResult().getInteger(), 2); + } + { + NumElemFunctionNode + e(ConstantNode(Int32ResultNodeVector() + .push_back(Int32ResultNode(238686)) + .push_back(Int32ResultNode(2133214)))); + e.prepare(false); + e.execute(); + EXPECT_EQUAL(e.getResult().getInteger(), 2); + } +} + +bool test1MultivalueExpression(const MultiArgFunctionNode &exprConst, + const ExpressionNode::CP &mv, + const ResultNode & expected) { + MultiArgFunctionNode & expr(const_cast<MultiArgFunctionNode &>(exprConst)); + expr.appendArg(mv); + expr.prepare(false); + bool ok = EXPECT_TRUE(expr.execute()) && + EXPECT_EQUAL(0, expr.getResult().cmp(expected)); + if (!ok) { + std::cerr << "Expected:" << expected.asString() << std::endl + << "Got: " << expr.getResult().asString() << std::endl; + } + return ok; +} + +bool test1MultivalueExpressionException(const MultiArgFunctionNode & exprConst, + const ExpressionNode::CP & mv, + const char * expected) { + try { + test1MultivalueExpression(exprConst, mv, NullResultNode()); + return EXPECT_TRUE(false); + } catch (std::runtime_error & e) { + return EXPECT_TRUE(std::string(e.what()).find(expected) + != std::string::npos); + } +} + +TEST("testMultivalueExpression") { + IntegerResultNodeVector iv; + iv.push_back(Int64ResultNode(7)) + .push_back(Int64ResultNode(17)).push_back(Int64ResultNode(117)); + ExpressionNode::CP mv(new ConstantNode(iv)); + + EXPECT_TRUE(test1MultivalueExpression(AddFunctionNode(), mv, + Int64ResultNode(7 + 17 + 117))); + EXPECT_TRUE(test1MultivalueExpression(MultiplyFunctionNode(), mv, + Int64ResultNode(7 * 17 * 117))); + EXPECT_TRUE(test1MultivalueExpressionException(DivideFunctionNode(), mv, + "DivideFunctionNode")); + EXPECT_TRUE(test1MultivalueExpressionException(ModuloFunctionNode(), mv, + "ModuloFunctionNode")); + EXPECT_TRUE(test1MultivalueExpression(MinFunctionNode(), mv, + Int64ResultNode(7))); + EXPECT_TRUE(test1MultivalueExpression(MaxFunctionNode(), mv, + Int64ResultNode(117))); + + EXPECT_TRUE( + test1MultivalueExpression( + FixedWidthBucketFunctionNode() + .setWidth(Int64ResultNode(1)), mv, + IntegerBucketResultNodeVector() + .push_back(IntegerBucketResultNode(7,8)) + .push_back(IntegerBucketResultNode(17,18)) + .push_back(IntegerBucketResultNode(117,118)))); + + EXPECT_TRUE( + test1MultivalueExpression( + RangeBucketPreDefFunctionNode() + .setBucketList( + IntegerBucketResultNodeVector() + .push_back(IntegerBucketResultNode(0,10)) + .push_back(IntegerBucketResultNode(20,30)) + .push_back(IntegerBucketResultNode(100,120))), + mv, + IntegerBucketResultNodeVector() + .push_back(IntegerBucketResultNode(0,10)) + .push_back(IntegerBucketResultNode(0,0)) + .push_back(IntegerBucketResultNode(100,120)))); + + EXPECT_TRUE( + test1MultivalueExpression( + TimeStampFunctionNode() + .setTimePart(TimeStampFunctionNode::Second), mv, + IntegerResultNodeVector() + .push_back(Int64ResultNode(7)) + .push_back(Int64ResultNode(17)) + .push_back(Int64ResultNode(117%60)))); + + EXPECT_TRUE( + test1MultivalueExpression(NegateFunctionNode(), mv, + IntegerResultNodeVector() + .push_back(Int64ResultNode(-7)) + .push_back(Int64ResultNode(-17)) + .push_back(Int64ResultNode(-117)))); + EXPECT_TRUE(test1MultivalueExpression(SortFunctionNode(), mv, + IntegerResultNodeVector() + .push_back(Int64ResultNode(7)) + .push_back(Int64ResultNode(17)) + .push_back(Int64ResultNode(117)))); + EXPECT_TRUE(test1MultivalueExpression(ReverseFunctionNode(), mv, + IntegerResultNodeVector() + .push_back(Int64ResultNode(117)) + .push_back(Int64ResultNode(17)) + .push_back(Int64ResultNode(7)))); + EXPECT_TRUE(test1MultivalueExpression(SortFunctionNode(), + ReverseFunctionNode(mv), + IntegerResultNodeVector() + .push_back(Int64ResultNode(7)) + .push_back(Int64ResultNode(17)) + .push_back(Int64ResultNode(117)))); + EXPECT_TRUE(test1MultivalueExpression(AndFunctionNode(), mv, + Int64ResultNode(7 & 17 & 117))); + EXPECT_TRUE(test1MultivalueExpression(OrFunctionNode(), mv, + Int64ResultNode(7 | 17 | 117))); + EXPECT_TRUE(test1MultivalueExpression(XorFunctionNode(), mv, + Int64ResultNode(7 ^ 17 ^ 117))); +} + +TEST("testArithmeticNodes") { + AttributeGuard attr1 = createInt64Attribute(); + ExpressionNode::CP i1(new ConstantNode(new Int64ResultNode(1))); + ExpressionNode::CP i2(new ConstantNode(new Int64ResultNode(2))); + ExpressionNode::CP f1(new ConstantNode(new FloatResultNode(1.1))); + ExpressionNode::CP f2(new ConstantNode(new FloatResultNode(9.9))); + ExpressionNode::CP s1(new ConstantNode(new StringResultNode("1"))); + ExpressionNode::CP s2(new ConstantNode(new StringResultNode("2"))); + ExpressionNode::CP r1(new ConstantNode(new RawResultNode("1", 1))); + ExpressionNode::CP r2(new ConstantNode(new RawResultNode("2", 1))); + ExpressionNode::CP a1(new AttributeNode(*attr1)); + ExpressionNode::CP a2(new AttributeNode(*attr1)); + AddFunctionNode add1; + add1.appendArg(i1); + add1.appendArg(i2); + ExpressionTree et(add1); + + ExpressionTree::Configure treeConf; + et.select(treeConf, treeConf); + + EXPECT_TRUE( + et.getResult().getClass().inherits(IntegerResultNode::classId)); + EXPECT_TRUE(et.ExpressionNode::execute()); + EXPECT_EQUAL(et.getResult().getInteger(), 3); + EXPECT_TRUE(et.ExpressionNode::execute()); + EXPECT_EQUAL(et.getResult().getInteger(), 3); + AddFunctionNode add2; + add2.appendArg(i1); + add2.appendArg(f2); + add2.prepare(false); + EXPECT_TRUE( + add2.getResult().getClass().inherits(FloatResultNode::classId)); + AddFunctionNode add3; + add3.appendArg(i1); + add3.appendArg(s2); + add3.prepare(false); + EXPECT_TRUE( + add3.getResult().getClass().inherits(IntegerResultNode::classId)); + AddFunctionNode add4; + add4.appendArg(i1); + add4.appendArg(r2); + add4.prepare(false); + EXPECT_TRUE( + add4.getResult().getClass().inherits(IntegerResultNode::classId)); + AddFunctionNode add5; + add5.appendArg(i1); + add5.appendArg(a1); + add5.prepare(false); + EXPECT_TRUE( + add5.getResult().getClass().inherits(IntegerResultNode::classId)); + AddFunctionNode add6; + add6.appendArg(f1); + add6.appendArg(a1); + add6.prepare(false); + EXPECT_TRUE( + add6.getResult().getClass().inherits(FloatResultNode::classId)); +} + +void testArith(MultiArgFunctionNode &op, const ExpressionNode::CP &arg1, + const ExpressionNode::CP & arg2, int64_t intResult, + double floatResult) { + op.appendArg(arg1); + op.appendArg(arg2); + op.prepare(false); + op.execute(); + EXPECT_EQUAL(intResult, op.getResult().getInteger()); + ASSERT_TRUE(intResult == op.getResult().getInteger()); + EXPECT_EQUAL(floatResult, op.getResult().getFloat()); +} + +void testArith2(MultiArgFunctionNode &op, int64_t intResult, + double floatResult) { + op.prepare(false); + op.execute(); + EXPECT_EQUAL(intResult, op.getResult().getInteger()); + ASSERT_TRUE(intResult == op.getResult().getInteger()); + EXPECT_EQUAL(floatResult, op.getResult().getFloat()); +} + +void testAdd(const ExpressionNode::CP &arg1, + const ExpressionNode::CP &arg2, + int64_t intResult, double floatResult){ + AddFunctionNode add; + testArith(add, arg1, arg2, intResult, floatResult); +} + +void testMultiply(const ExpressionNode::CP & arg1, + const ExpressionNode::CP & arg2, + int64_t intResult, double floatResult) { + MultiplyFunctionNode add; + testArith(add, arg1, arg2, intResult, floatResult); +} + +void testDivide(const ExpressionNode::CP & arg1, + const ExpressionNode::CP & arg2, + int64_t intResult, double floatResult) { + DivideFunctionNode add; + testArith(add, arg1, arg2, intResult, floatResult); +} + +void testModulo(const ExpressionNode::CP & arg1, + const ExpressionNode::CP & arg2, + int64_t intResult, double floatResult) { + ModuloFunctionNode add; + testArith(add, arg1, arg2, intResult, floatResult); +} + +void testArithmeticArguments(NumericFunctionNode &function, + std::vector<double> & arg1, + std::vector<double> & arg2, + const std::vector<double> & result, + double flattenResult) { + ExpressionNode::CP scalarInt1(new ConstantNode(new Int64ResultNode( + static_cast<int64_t>(arg1[0])))); + ExpressionNode::CP scalarInt2(new ConstantNode(new Int64ResultNode( + static_cast<int64_t>(arg2[0])))); + ExpressionNode::CP scalarFloat1(new ConstantNode(new FloatResultNode( + arg1[0]))); + ExpressionNode::CP scalarFloat2(new ConstantNode(new FloatResultNode( + arg2[0]))); + + IntegerResultNodeVector iv1; + for (size_t i(0), m(arg1.size()); i<m; i++) { + iv1.push_back(Int64ResultNode(static_cast<int64_t>(arg1[i]))); + } + IntegerResultNodeVector iv2; + for (size_t i(0), m(arg2.size()); i<m; i++) { + iv2.push_back(Int64ResultNode(static_cast<int64_t>(arg2[i]))); + } + FloatResultNodeVector fv1; + for (size_t i(0), m(arg1.size()); i<m; i++) { + fv1.push_back(FloatResultNode(arg1[i])); + } + FloatResultNodeVector fv2; + for (size_t i(0), m(arg2.size()); i<m; i++) { + fv2.push_back(FloatResultNode(arg2[i])); + } + IntegerResultNodeVector ir; + for (size_t i(0), m(result.size()); i<m; i++) { + ir.push_back(Int64ResultNode((int64_t)result[i])); + } + FloatResultNodeVector fr; + for (size_t i(0), m(result.size()); i<m; i++) { + fr.push_back(FloatResultNode(result[i])); + } + ExpressionNode::CP vectorInt1(new ConstantNode(iv1)); + ExpressionNode::CP vectorInt2(new ConstantNode(iv2)); + ExpressionNode::CP vectorFloat1(new ConstantNode(fv1)); + ExpressionNode::CP vectorFloat2(new ConstantNode(fv2)); + function.appendArg(scalarInt1).appendArg(scalarInt2); + function.prepare(false); + EXPECT_TRUE( + function.getResult().getClass().equal(Int64ResultNode::classId)); + EXPECT_TRUE(function.execute()); + EXPECT_EQUAL(function.getResult().getInteger(), + static_cast<int64_t>(result[0])); + + function.reset(); + + function.appendArg(scalarInt1).appendArg(scalarFloat2); + function.prepare(false); + EXPECT_TRUE( + function.getResult().getClass().equal(FloatResultNode::classId)); + EXPECT_TRUE(function.execute()); + EXPECT_EQUAL(function.getResult().getFloat(), result[0]); + + function.reset(); + + function.appendArg(scalarFloat1).appendArg(scalarInt2); + function.prepare(false); + EXPECT_TRUE( + function.getResult().getClass().equal(FloatResultNode::classId)); + EXPECT_TRUE(function.execute()); + EXPECT_EQUAL(function.getResult().getFloat(), result[0]); + + function.reset(); + + function.appendArg(scalarFloat1).appendArg(scalarFloat2); + function.prepare(false); + EXPECT_TRUE( + function.getResult().getClass().equal(FloatResultNode::classId)); + EXPECT_TRUE(function.execute()); + EXPECT_EQUAL(function.getResult().getFloat(), result[0]); + + function.reset(); + + function.appendArg(vectorInt1); + function.prepare(false); + EXPECT_TRUE( + function.getResult().getClass().equal(Int64ResultNode::classId)); + EXPECT_TRUE(function.execute()); + EXPECT_EQUAL(function.getResult().getInteger(), + static_cast<int64_t>(flattenResult)); + + function.reset(); + + function.appendArg(vectorFloat1); + function.prepare(false); + EXPECT_TRUE( + function.getResult().getClass().equal(FloatResultNode::classId)); + EXPECT_TRUE(function.execute()); + EXPECT_EQUAL(function.getResult().getFloat(), flattenResult); + + function.reset(); + + function.appendArg(vectorInt1).appendArg(vectorInt2); + function.prepare(false); + EXPECT_TRUE(function.getResult().getClass() + .equal(IntegerResultNodeVector::classId)); + EXPECT_TRUE(function.execute()); + EXPECT_TRUE(function.getResult().getClass() + .equal(IntegerResultNodeVector::classId)); + EXPECT_EQUAL(static_cast<const IntegerResultNodeVector &>( + function.getResult()).size(), 7u); + EXPECT_EQUAL(0, function.getResult().cmp(ir)); + + function.reset(); + + function.appendArg(vectorFloat1).appendArg(vectorFloat2); + function.prepare(false); + EXPECT_TRUE(function.getResult().getClass() + .equal(FloatResultNodeVector::classId)); + EXPECT_TRUE(function.execute()); + EXPECT_TRUE(function.getResult().getClass() + .equal(FloatResultNodeVector::classId)); + EXPECT_EQUAL(static_cast<const FloatResultNodeVector &>( + function.getResult()).size(), 7u); + EXPECT_EQUAL(0, function.getResult().cmp(fr)); + + function.reset(); + + function.appendArg(vectorInt1).appendArg(vectorFloat2); + function.prepare(false); + EXPECT_TRUE(function.getResult().getClass() + .equal(FloatResultNodeVector::classId)); + EXPECT_TRUE(function.execute()); + EXPECT_TRUE(function.getResult().getClass() + .equal(FloatResultNodeVector::classId)); + EXPECT_EQUAL(static_cast<const FloatResultNodeVector &>( + function.getResult()).size(), 7u); + EXPECT_EQUAL(0, function.getResult().cmp(fr)); + + function.reset(); + + function.appendArg(vectorFloat1).appendArg(vectorInt2); + function.prepare(false); + EXPECT_TRUE(function.getResult().getClass() + .equal(FloatResultNodeVector::classId)); + EXPECT_TRUE(function.execute()); + EXPECT_TRUE(function.getResult().getClass() + .equal(FloatResultNodeVector::classId)); + EXPECT_EQUAL(static_cast<const FloatResultNodeVector &>( + function.getResult()).size(), 7u); + EXPECT_EQUAL(0, function.getResult().cmp(fr)); +} + +TEST("testArithmeticOperations") { + ExpressionNode::CP i1(new ConstantNode(new Int64ResultNode(1793253241))); + ExpressionNode::CP i2(new ConstantNode(new Int64ResultNode(1676521321))); + ExpressionNode::CP f1(new ConstantNode(new FloatResultNode(1.1109876))); + ExpressionNode::CP f2(new ConstantNode(new FloatResultNode(9.767681239))); + testAdd(i1, i2, 3469774562ull, 3469774562ull); + testAdd(i1, f2, 1793253251ull, 1793253250.767681239); + testAdd(f1, f2, 11, 10.878668839 ); + testMultiply(i1, i2, 3006427292488851361ull, 3006427292488851361ull); + testMultiply(i1, f2, 17515926039ull, 1793253241.0*9.767681239); + testMultiply(f1, f2, 11, 10.8517727372816364 ); + + std::vector<double> a(5), b(7); + a[0] = b[0] = 1; + a[1] = b[1] = 2; + a[2] = b[2] = 3; + a[3] = b[3] = 4; + a[4] = b[4] = 5; + b[5] = 6; + b[6] = 7; + std::vector<double> r(7); + { + r[0] = a[0] + b[0]; + r[1] = a[1] + b[1]; + r[2] = a[2] + b[2]; + r[3] = a[3] + b[3]; + r[4] = a[4] + b[4]; + r[5] = a[0] + b[5]; + r[6] = a[1] + b[6]; + AddFunctionNode f; + testArithmeticArguments(f, a, b, r, a[0]+a[1]+a[2]+a[3]+a[4]); + } + { + r[0] = a[0] * b[0]; + r[1] = a[1] * b[1]; + r[2] = a[2] * b[2]; + r[3] = a[3] * b[3]; + r[4] = a[4] * b[4]; + r[5] = a[0] * b[5]; + r[6] = a[1] * b[6]; + MultiplyFunctionNode f; + testArithmeticArguments(f, a, b, r, a[0]*a[1]*a[2]*a[3]*a[4]); + } +} + +TEST("testAggregatorsInExpressions") { + CountAggregationResult *c = new CountAggregationResult(); + c->setCount(3); + SumAggregationResult *s = new SumAggregationResult(); + ResultNode::CP r1(new Int64ResultNode(7)), + r2(new Int64ResultNode(22)); + ExpressionNode::CP i1(new ConstantNode(new Int64ResultNode(7))), + i2(c), + i3(s), + i4(new ConstantNode(new Int64ResultNode(22))); + AggregationResult::Configure conf; + s->setExpression(i4).select(conf, conf); + s->aggregate(0, 0); + + testAdd(i1, i2, 10, 10); + testMultiply(i1, i2, 21, 21); + testMultiply(i2, i3, 66, 66); + testDivide(i3, i2, 7, 7); + testDivide(i3, i1, 3, 3); + testModulo(i3, i2, 1, 1); + testModulo(i3, i1, 1, 1); + + MinAggregationResult *min = new MinAggregationResult(); + min->setResult(r2); + ExpressionNode::CP imin(min); + testAdd(imin, i1, 29, 29); + + MaxAggregationResult *max = new MaxAggregationResult(); + max->setResult(r1); + ExpressionNode::CP imax(max); + testAdd(imin, imax, 29, 29); + + XorAggregationResult *x = new XorAggregationResult(); + x->setExpression(i4).select(conf, conf); + x->aggregate(0, 0); + ExpressionNode::CP ix(x); + testAdd(ix, i1, 29, 29); + + AverageAggregationResult *avg = new AverageAggregationResult(); + avg->setExpression(i4).select(conf, conf); + avg->aggregate(0, 0); + ExpressionNode::CP iavg(avg); + testAdd(iavg, i1, 29, 29); +} + +void testAggregationResult(AggregationResult & aggr, const AggrGetter & g, + const ResultNode & v, const ResultNode & i, + const ResultNode & m, const ResultNode & s) { + ExpressionNode::CP scalarInt1(new ConstantNode(v)); + AggregationResult::Configure conf; + aggr.setExpression(scalarInt1).select(conf, conf); + EXPECT_TRUE(g(aggr).getClass().equal(i.getClass().id())); + EXPECT_EQUAL(0, i.cmp(g(aggr))); + aggr.aggregate(0,0); + EXPECT_TRUE(g(aggr).getClass().equal(i.getClass().id())); + EXPECT_EQUAL(0, m.cmp(g(aggr))); + aggr.aggregate(1,0); + EXPECT_TRUE(g(aggr).getClass().equal(i.getClass().id())); + EXPECT_EQUAL(0, s.cmp(g(aggr))); +} + +TEST("testAggregationResults") { + struct SumGetter : AggrGetter { + virtual const ResultNode &operator()(const AggregationResult & r) const + { return static_cast<const SumAggregationResult &>(r).getSum(); } + }; + SumAggregationResult sum; + testAggregationResult(sum, SumGetter(), Int64ResultNode(7), + Int64ResultNode(0), Int64ResultNode(7), + Int64ResultNode(14)); + testAggregationResult(sum, SumGetter(), FloatResultNode(7.77), + FloatResultNode(0), FloatResultNode(7.77), + FloatResultNode(15.54)); + IntegerResultNodeVector v; + v.push_back(Int64ResultNode(7)).push_back(Int64ResultNode(8)); + testAggregationResult(sum, SumGetter(), v, Int64ResultNode(0), + Int64ResultNode(15), Int64ResultNode(30)); + testAggregationResult(sum, SumGetter(), FloatResultNode(7.77), + FloatResultNode(0), FloatResultNode(7.77), + FloatResultNode(15.54)); +} + +TEST("testGrouping") { + AttributeGuard attr1 = createInt64Attribute(); + ExpressionNode::CP select1(new AttributeNode(*attr1)); + ExpressionNode::CP result1(new CountAggregationResult()); + (static_cast<AggregationResult &>(*result1)).setExpression(select1); + ExpressionNode::CP result2( new SumAggregationResult()); + (static_cast<AggregationResult &>(*result2)).setExpression(select1); + + Grouping grouping = Grouping() + .setFirstLevel(0) + .setLastLevel(1) + .addLevel(GroupingLevel() + .setExpression(select1) + .addResult(result1) + .addResult(result2)); + + grouping.configureStaticStuff(ConfigureStaticParams(0, 0)); + grouping.aggregate(0u, 10u); + const Group::GroupList &groups = grouping.getRoot().groups(); + EXPECT_EQUAL(grouping.getRoot().getChildrenSize(), 9u); + ASSERT_TRUE(groups[0]->getAggregationResult(0).getClass().id() == + CountAggregationResult::classId); + ASSERT_TRUE(groups[0]->getAggregationResult(1).getClass().id() == + SumAggregationResult::classId); + EXPECT_EQUAL(groups[0]->getId().getInteger(), 6u); + EXPECT_EQUAL(static_cast<const CountAggregationResult &>( + groups[0]->getAggregationResult(0)).getCount(), 1u); + EXPECT_EQUAL(static_cast<const SumAggregationResult &>( + groups[0]->getAggregationResult(1)).getSum().getInteger(), + 6); + EXPECT_EQUAL(groups[1]->getId().getInteger(), 7u); + EXPECT_EQUAL(static_cast<const CountAggregationResult &>( + groups[1]->getAggregationResult(0)).getCount(), 1u); + EXPECT_EQUAL(static_cast<const SumAggregationResult &>( + groups[1]->getAggregationResult(1)).getSum().getInteger(), + 7); + EXPECT_EQUAL(groups[2]->getId().getInteger(), 11u); + EXPECT_EQUAL(static_cast<const CountAggregationResult &>( + groups[2]->getAggregationResult(0)).getCount(), 1u); + EXPECT_EQUAL(static_cast<const SumAggregationResult &>( + groups[2]->getAggregationResult(1)).getSum().getInteger(), + 11); + EXPECT_EQUAL(groups[3]->getId().getInteger(), 13u); + EXPECT_EQUAL(static_cast<const CountAggregationResult &>( + groups[3]->getAggregationResult(0)).getCount(), 2u); + EXPECT_EQUAL(static_cast<const SumAggregationResult &>( + groups[3]->getAggregationResult(1)).getSum().getInteger(), + 26); + EXPECT_EQUAL(groups[4]->getId().getInteger(), 17u); + EXPECT_EQUAL(static_cast<const CountAggregationResult &>( + groups[4]->getAggregationResult(0)).getCount(), 1u); + EXPECT_EQUAL(static_cast<const SumAggregationResult &>( + groups[4]->getAggregationResult(1)).getSum().getInteger(), + 17); + EXPECT_EQUAL(groups[5]->getId().getInteger(), 27u); + EXPECT_EQUAL(static_cast<const CountAggregationResult &>( + groups[5]->getAggregationResult(0)).getCount(), 1u); + EXPECT_EQUAL(static_cast<const SumAggregationResult &>( + groups[5]->getAggregationResult(1)).getSum().getInteger(), + 27); + EXPECT_EQUAL(groups[6]->getId().getInteger(), 34u); + EXPECT_EQUAL(static_cast<const CountAggregationResult &>( + groups[6]->getAggregationResult(0)).getCount(), 1u); + EXPECT_EQUAL(static_cast<const SumAggregationResult &>( + groups[6]->getAggregationResult(1)).getSum().getInteger(), + 34); + EXPECT_EQUAL(groups[7]->getId().getInteger(), 67891u); + EXPECT_EQUAL(static_cast<const CountAggregationResult &>( + groups[7]->getAggregationResult(0)).getCount(), 1u); + EXPECT_EQUAL(static_cast<const SumAggregationResult &>( + groups[7]->getAggregationResult(1)).getSum().getInteger(), + 67891); + EXPECT_EQUAL(groups[8]->getId().getInteger(), 67892u); + EXPECT_EQUAL(static_cast<const CountAggregationResult &>( + groups[8]->getAggregationResult(0)).getCount(), 1u); + EXPECT_EQUAL(static_cast<const SumAggregationResult &>( + groups[8]->getAggregationResult(1)).getSum().getInteger(), + 67892); + testStreaming(grouping); +} + +TEST("testGrouping2") { + AttributeGuard attr1 = createInt64Attribute(); + + RangeBucketPreDefFunctionNode *predef( + new RangeBucketPreDefFunctionNode(AttributeNode(*attr1))); + IntegerBucketResultNodeVector prevec; + prevec.getVector().push_back(IntegerBucketResultNode(6,7)); + prevec.getVector().push_back(IntegerBucketResultNode(7,14)); + prevec.getVector().push_back(IntegerBucketResultNode(18,50)); //30 + prevec.getVector() + .push_back(IntegerBucketResultNode(80,50000000000ull)); //30 + predef->setBucketList(prevec); + ExpressionNode::CP select1(predef); + ExpressionNode::CP result1( new CountAggregationResult()); + (static_cast<AggregationResult &>(*result1)).setExpression(select1); + + Grouping grouping = Grouping() + .setFirstLevel(0) + .setLastLevel(1) + .addLevel(GroupingLevel() + .setExpression(select1) + .addResult(result1)); + + grouping.configureStaticStuff(ConfigureStaticParams(0, 0)); + grouping.aggregate(0u, 10u); + const Group::GroupList &groups = grouping.getRoot().groups(); + EXPECT_EQUAL(grouping.getRoot().getChildrenSize(), 5u); + ASSERT_TRUE(groups[0]->getAggregationResult(0).getClass().id() + == CountAggregationResult::classId); + EXPECT_EQUAL(groups[0]->getId().getInteger(), 0u); + EXPECT_EQUAL(static_cast<const CountAggregationResult &>( + groups[0]->getAggregationResult(0)).getCount(), 1u); + EXPECT_EQUAL(groups[1]->getId().getInteger(), 0u); + EXPECT_EQUAL(static_cast<const CountAggregationResult &>( + groups[1]->getAggregationResult(0)).getCount(), 1u); + EXPECT_EQUAL(groups[2]->getId().getInteger(), 0u); + EXPECT_EQUAL(static_cast<const CountAggregationResult &>( + groups[2]->getAggregationResult(0)).getCount(), 4u); + EXPECT_EQUAL(groups[3]->getId().getInteger(), 0u); + EXPECT_EQUAL(static_cast<const CountAggregationResult &>( + groups[3]->getAggregationResult(0)).getCount(), 2u); + EXPECT_EQUAL(groups[4]->getId().getInteger(), 0u); + EXPECT_EQUAL(static_cast<const CountAggregationResult &>( + groups[4]->getAggregationResult(0)).getCount(), 2u); + testStreaming(grouping); +} + +AttributeGuard createInt64Attribute() { + SingleInt64ExtAttribute *selectAttr1( + new SingleInt64ExtAttribute("selectAttr1")); + DocId docId(0); + selectAttr1->addDoc(docId); + selectAttr1->add(7); + selectAttr1->addDoc(docId); + selectAttr1->add(6); + selectAttr1->addDoc(docId); + selectAttr1->add(13); + selectAttr1->addDoc(docId); + selectAttr1->add(11); + selectAttr1->addDoc(docId); + selectAttr1->add(27); + selectAttr1->addDoc(docId); + selectAttr1->add(17); + selectAttr1->addDoc(docId); + selectAttr1->add(13); + selectAttr1->addDoc(docId); + selectAttr1->add(34); + selectAttr1->addDoc(docId); + selectAttr1->add(67891); + selectAttr1->addDoc(docId); + selectAttr1->add(67892); + + AttributeVector::SP spSelectAttr1(selectAttr1); + AttributeGuard attr1( spSelectAttr1 ); + return attr1; +} + +AttributeGuard createInt32Attribute() { + SingleInt32ExtAttribute *selectAttr1( + new SingleInt32ExtAttribute("selectAttr1")); + DocId docId(0); + selectAttr1->addDoc(docId); + selectAttr1->add(7); + selectAttr1->addDoc(docId); + selectAttr1->add(6); + selectAttr1->addDoc(docId); + selectAttr1->add(13); + selectAttr1->addDoc(docId); + selectAttr1->add(11); + selectAttr1->addDoc(docId); + selectAttr1->add(27); + selectAttr1->addDoc(docId); + selectAttr1->add(17); + selectAttr1->addDoc(docId); + selectAttr1->add(13); + selectAttr1->addDoc(docId); + selectAttr1->add(34); + selectAttr1->addDoc(docId); + selectAttr1->add(67891); + selectAttr1->addDoc(docId); + selectAttr1->add(67892); + + AttributeVector::SP spSelectAttr1(selectAttr1); + AttributeGuard attr1( spSelectAttr1 ); + return attr1; +} + +AttributeGuard createInt16Attribute() { + SingleInt16ExtAttribute *selectAttr1( + new SingleInt16ExtAttribute("selectAttr1")); + DocId docId(0); + selectAttr1->addDoc(docId); + selectAttr1->add(7); + selectAttr1->addDoc(docId); + selectAttr1->add(6); + selectAttr1->addDoc(docId); + selectAttr1->add(13); + selectAttr1->addDoc(docId); + selectAttr1->add(11); + selectAttr1->addDoc(docId); + selectAttr1->add(27); + selectAttr1->addDoc(docId); + selectAttr1->add(17); + selectAttr1->addDoc(docId); + selectAttr1->add(13); + selectAttr1->addDoc(docId); + selectAttr1->add(34); + selectAttr1->addDoc(docId); + selectAttr1->add(67891); + selectAttr1->addDoc(docId); + selectAttr1->add(67892); + + AttributeVector::SP spSelectAttr1(selectAttr1); + AttributeGuard attr1( spSelectAttr1 ); + return attr1; +} + +AttributeGuard createInt8Attribute() { + SingleInt8ExtAttribute *selectAttr1( + new SingleInt8ExtAttribute("selectAttr1")); + DocId docId(0); + selectAttr1->addDoc(docId); + selectAttr1->add(7); + selectAttr1->addDoc(docId); + selectAttr1->add(6); + selectAttr1->addDoc(docId); + selectAttr1->add(13); + selectAttr1->addDoc(docId); + selectAttr1->add(11); + selectAttr1->addDoc(docId); + selectAttr1->add(27); + selectAttr1->addDoc(docId); + selectAttr1->add(17); + selectAttr1->addDoc(docId); + selectAttr1->add(13); + selectAttr1->addDoc(docId); + selectAttr1->add(34); + selectAttr1->addDoc(docId); + selectAttr1->add(67891); + selectAttr1->addDoc(docId); + selectAttr1->add(67892); + + AttributeVector::SP spSelectAttr1(selectAttr1); + AttributeGuard attr1( spSelectAttr1 ); + return attr1; +} + +TEST("testIntegerTypes") { + EXPECT_EQUAL(AttributeNode(*createInt8Attribute()).prepare(false) + .getResult().getClass().id(), + uint32_t(Int64ResultNode::classId)); + EXPECT_EQUAL(AttributeNode(*createInt8Attribute()) + .prepare(true).getResult().getClass().id(), + uint32_t(Int8ResultNode::classId)); + EXPECT_EQUAL(AttributeNode(*createInt16Attribute()) + .prepare(false).getResult().getClass().id(), + uint32_t(Int64ResultNode::classId)); + EXPECT_EQUAL(AttributeNode(*createInt16Attribute()) + .prepare(true).getResult().getClass().id(), + uint32_t(Int16ResultNode::classId)); + EXPECT_EQUAL(AttributeNode(*createInt32Attribute()) + .prepare(false).getResult().getClass().id(), + uint32_t(Int64ResultNode::classId)); + EXPECT_EQUAL(AttributeNode(*createInt32Attribute()) + .prepare(true).getResult().getClass().id(), + uint32_t(Int32ResultNode::classId)); + EXPECT_EQUAL(AttributeNode(*createInt64Attribute()) + .prepare(false).getResult().getClass().id(), + uint32_t(Int64ResultNode::classId)); + EXPECT_EQUAL(AttributeNode(*createInt64Attribute()) + .prepare(true).getResult().getClass().id(), + uint32_t(Int64ResultNode::classId)); + + EXPECT_EQUAL( + AttributeNode(*AttributeGuard(AttributeVector::SP( + new MultiInt8ExtAttribute("test")))) + .prepare(false).getResult().getClass().id(), + uint32_t(Int64ResultNodeVector::classId)); + EXPECT_EQUAL( + AttributeNode(*AttributeGuard(AttributeVector::SP( + new MultiInt8ExtAttribute("test")))) + .prepare(true).getResult().getClass().id(), + uint32_t(Int8ResultNodeVector::classId)); + EXPECT_EQUAL(AttributeNode(*AttributeGuard(AttributeVector::SP( + new MultiInt16ExtAttribute("test")))) + .prepare(false).getResult().getClass().id(), + uint32_t(Int64ResultNodeVector::classId)); + EXPECT_EQUAL(AttributeNode(*AttributeGuard(AttributeVector::SP( + new MultiInt16ExtAttribute("test")))) + .prepare(true).getResult().getClass().id(), + uint32_t(Int16ResultNodeVector::classId)); + EXPECT_EQUAL(AttributeNode(*AttributeGuard(AttributeVector::SP( + new MultiInt32ExtAttribute("test")))) + .prepare(false).getResult().getClass().id(), + uint32_t(Int64ResultNodeVector::classId)); + EXPECT_EQUAL(AttributeNode(*AttributeGuard(AttributeVector::SP( + new MultiInt32ExtAttribute("test")))) + .prepare(true).getResult().getClass().id(), + uint32_t(Int32ResultNodeVector::classId)); + EXPECT_EQUAL(AttributeNode(*AttributeGuard(AttributeVector::SP( + new MultiInt64ExtAttribute("test")))) + .prepare(false).getResult().getClass().id(), + uint32_t(Int64ResultNodeVector::classId)); + EXPECT_EQUAL(AttributeNode(*AttributeGuard(AttributeVector::SP( + new MultiInt64ExtAttribute("test")))) + .prepare(true).getResult().getClass().id(), + uint32_t(Int64ResultNodeVector::classId)); +} + +TEST("testStreamingAll") { + testStreaming(Int64ResultNode(89)); + testStreaming(FloatResultNode(89.765)); + testStreaming(StringResultNode("Tester StringResultNode streaming")); + testStreaming(RawResultNode("Tester RawResultNode streaming", 30)); + testStreaming(CountAggregationResult()); + testStreaming(ExpressionCountAggregationResult()); + testStreaming(SumAggregationResult()); + testStreaming(MinAggregationResult()); + testStreaming(MaxAggregationResult()); + testStreaming(AverageAggregationResult()); + testStreaming(Group()); + testStreaming(Grouping()); + testStreaming(HitsAggregationResult()); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/alignment/.gitignore b/searchlib/src/tests/alignment/.gitignore new file mode 100644 index 00000000000..9668e4fc02c --- /dev/null +++ b/searchlib/src/tests/alignment/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +alignment_test +searchlib_alignment_test_app diff --git a/searchlib/src/tests/alignment/CMakeLists.txt b/searchlib/src/tests/alignment/CMakeLists.txt new file mode 100644 index 00000000000..3695c600f9b --- /dev/null +++ b/searchlib/src/tests/alignment/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_alignment_test_app + SOURCES + alignment.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_alignment_test_app COMMAND searchlib_alignment_test_app) diff --git a/searchlib/src/tests/alignment/DESC b/searchlib/src/tests/alignment/DESC new file mode 100644 index 00000000000..a37dbbc1c7a --- /dev/null +++ b/searchlib/src/tests/alignment/DESC @@ -0,0 +1 @@ +alignment test. Take a look at alignment.cpp for details. diff --git a/searchlib/src/tests/alignment/FILES b/searchlib/src/tests/alignment/FILES new file mode 100644 index 00000000000..067828da485 --- /dev/null +++ b/searchlib/src/tests/alignment/FILES @@ -0,0 +1 @@ +alignment.cpp diff --git a/searchlib/src/tests/alignment/alignment.cpp b/searchlib/src/tests/alignment/alignment.cpp new file mode 100644 index 00000000000..882e0942976 --- /dev/null +++ b/searchlib/src/tests/alignment/alignment.cpp @@ -0,0 +1,68 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("alignment_test"); + +#include <sys/resource.h> +#include <sys/time.h> +#include <vespa/vespalib/testkit/testapp.h> + +struct Timer { + rusage usage; + void start() { + getrusage(RUSAGE_SELF, &usage); + } + double stop() { + rusage tmp; + getrusage(RUSAGE_SELF, &tmp); + double startMs = (((double)usage.ru_utime.tv_sec) * 1000.0) + + (((double)usage.ru_utime.tv_usec) / 1000.0); + double stopMs = (((double)tmp.ru_utime.tv_sec) * 1000.0) + + (((double)tmp.ru_utime.tv_usec) / 1000.0); + return (stopMs - startMs); + } +}; + +TEST_SETUP(Test); + +double +timeAccess(void *bufp, uint32_t len, double &sum) +{ + double *buf = (double *)bufp; + Timer timer; + timer.start(); + for(uint32_t i = 0; i < 512 * 1024; ++i) { + for (uint32_t j = 0; j < len; ++j) { + sum += buf[j]; + } + } + double ret = timer.stop(); + return ret; +} + +int +Test::Main() +{ + TEST_INIT("alignment_test"); + + uint32_t buf[129]; + for (uint32_t i = 0; i < 129; ++i) { + buf[i] = i; + } + + uintptr_t ptr = reinterpret_cast<uintptr_t>(&buf[0]); + bool aligned = (ptr % sizeof(double) == 0); + + double foo = 0, bar = 0; + printf(aligned ? "ALIGNED\n" : "UNALIGNED\n"); + printf("warmup time = %.2f\n", timeAccess(reinterpret_cast<void*>(&buf[0]), 64, foo)); + printf("real time = %.2f\n", timeAccess(reinterpret_cast<void*>(&buf[0]), 64, bar)); + EXPECT_EQUAL(foo, bar); + + printf(!aligned ? "ALIGNED\n" : "UNALIGNED\n"); + printf("warmup time = %.2f\n", timeAccess(reinterpret_cast<void*>(&buf[1]), 64, foo)); + printf("real time = %.2f\n", timeAccess(reinterpret_cast<void*>(&buf[1]), 64, bar)); + EXPECT_EQUAL(foo, bar); + + TEST_DONE(); +} diff --git a/searchlib/src/tests/attribute/.gitignore b/searchlib/src/tests/attribute/.gitignore new file mode 100644 index 00000000000..732912ab981 --- /dev/null +++ b/searchlib/src/tests/attribute/.gitignore @@ -0,0 +1,11 @@ +*.dat +*.idx +*.weight +.depend +Makefile +attribute_test +attributebenchmark +searchlib_attribute_test_app +searchlib_attributeguard_test_app +searchlib_changevector_test_app +searchlib_attributebenchmark_app diff --git a/searchlib/src/tests/attribute/CMakeLists.txt b/searchlib/src/tests/attribute/CMakeLists.txt new file mode 100644 index 00000000000..0598b5776a8 --- /dev/null +++ b/searchlib/src/tests/attribute/CMakeLists.txt @@ -0,0 +1,29 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_attributeguard_test_app + SOURCES + attributeguard.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_attributeguard_test_app COMMAND sh attributeguard_test.sh) +vespa_add_executable(searchlib_attribute_test_app + SOURCES + attribute_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_attribute_test_app COMMAND sh attribute_test.sh) +vespa_add_executable(searchlib_changevector_test_app + SOURCES + changevector_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_changevector_test_app COMMAND sh changevector_test.sh) +vespa_add_executable(searchlib_attributebenchmark_app + SOURCES + attributebenchmark.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_attributebenchmark_app COMMAND searchlib_attributebenchmark_app BENCHMARK) diff --git a/searchlib/src/tests/attribute/DESC b/searchlib/src/tests/attribute/DESC new file mode 100644 index 00000000000..6a9215b1a3b --- /dev/null +++ b/searchlib/src/tests/attribute/DESC @@ -0,0 +1 @@ +Unit tests for attribute use. diff --git a/searchlib/src/tests/attribute/FILES b/searchlib/src/tests/attribute/FILES new file mode 100644 index 00000000000..b742644b750 --- /dev/null +++ b/searchlib/src/tests/attribute/FILES @@ -0,0 +1,2 @@ +attribute.cpp +attributebenchmark.cpp diff --git a/searchlib/src/tests/attribute/attribute_test.cpp b/searchlib/src/tests/attribute/attribute_test.cpp new file mode 100644 index 00000000000..b1d4e675e23 --- /dev/null +++ b/searchlib/src/tests/attribute/attribute_test.cpp @@ -0,0 +1,2200 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/document/fieldvalue/intfieldvalue.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/searchlib/attribute/attribute.h> +#include <vespa/searchlib/attribute/attributefile.h> +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/attributememorysavetarget.h> +#include <vespa/searchlib/attribute/singlenumericattribute.h> +#include <vespa/searchlib/attribute/multinumericattribute.h> +#include <vespa/searchlib/attribute/singlestringattribute.h> +#include <vespa/searchlib/attribute/multistringattribute.h> +#include <vespa/searchlib/attribute/attrvector.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/searchlib/util/randomgenerator.h> +#include <vespa/vespalib/io/fileutil.h> + +#include <vespa/searchlib/attribute/attributevector.hpp> + +#include <vespa/log/log.h> +LOG_SETUP("attribute_test"); + + +using namespace document; +using std::shared_ptr; +using search::common::FileHeaderContext; +using search::index::DummyFileHeaderContext; +using search::attribute::BasicType; +using search::attribute::IAttributeVector; + +namespace +{ + + +vespalib::string empty; +vespalib::string clstmp("clstmp"); +vespalib::string asuDir("asutmp"); + +bool +isUnsignedSmallIntAttribute(const BasicType::Type &type) +{ + switch (type) + { + case BasicType::UINT1: + case BasicType::UINT2: + case BasicType::UINT4: + return true; + default: + return false; + } +} + +bool +isUnsignedSmallIntAttribute(const AttributeVector &a) +{ + return isUnsignedSmallIntAttribute(a.getBasicType()); +} + +template <typename BufferType> +void +expectZero(const BufferType &b) +{ + EXPECT_EQUAL(0, b); +} + +template <> +void +expectZero(const vespalib::string &b) +{ + EXPECT_EQUAL(empty, b); +} + +uint64_t +statSize(const vespalib::string &fileName) +{ + FastOS_StatInfo statInfo; + if (EXPECT_TRUE(FastOS_File::Stat(fileName.c_str(), &statInfo))) { + return statInfo._size; + } else { + return 0u; + } +} + +uint64_t +statSize(const AttributeVector &a) +{ + vespalib::string baseFileName = a.getBaseFileName(); + uint64_t resultSize = statSize(baseFileName + ".dat"); + if (a.hasMultiValue()) { + resultSize += statSize(baseFileName + ".idx"); + } + if (a.hasWeightedSetType()) { + resultSize += statSize(baseFileName + ".weight"); + } + if (a.hasEnum() && a.getEnumeratedSave()) { + resultSize += statSize(baseFileName + ".udat"); + } + return resultSize; +} + + +bool +preciseEstimatedSize(const AttributeVector &a) +{ + if (a.getBasicType() == BasicType::STRING && + EXPECT_TRUE(a.hasEnum()) && !a.getEnumeratedSave()) { + return false; // Using average of string lens, can be somewhat off + } + return true; +} + +} + +namespace search { + +using attribute::CollectionType; +using attribute::Config; + +class AttributeTest : public vespalib::TestApp +{ +private: + typedef AttributeVector::SP AttributePtr; + + void addDocs(const AttributePtr & v, size_t sz); + template <typename VectorType> + void populate(VectorType & ptr, unsigned seed); + template <typename VectorType, typename BufferType> + void compare(VectorType & a, VectorType & b); + + void testReloadInt(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c, size_t numDocs); + void testReloadString(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c, size_t numDocs); + template <typename VectorType, typename BufferType> + void testReload(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c); + void testMemorySaverInt(const AttributePtr & a, const AttributePtr & b, size_t numDocs); + void testMemorySaverString(const AttributePtr & a, const AttributePtr & b, size_t numDocs); + template <typename VectorType, typename BufferType> + void testMemorySaver(const AttributePtr & a, const AttributePtr & b); + + void testReload(); + void testHasLoadData(); + void testMemorySaver(); + + void commit(const AttributePtr & ptr); + + template <typename T> + void fillNumeric(std::vector<T> & values, uint32_t numValues); + void fillString(std::vector<vespalib::string> & values, uint32_t numValues); + template <typename VectorType, typename BufferType> + bool appendToVector(VectorType & v, uint32_t doc, uint32_t valueCount, + const std::vector<BufferType> & values); + template <typename BufferType> + bool checkCount(const AttributePtr & ptr, uint32_t doc, uint32_t valueCount, + uint32_t numValues, const BufferType & value); + template <typename BufferType> + bool checkContent(const AttributePtr & ptr, uint32_t doc, uint32_t valueCount, + uint32_t range, const std::vector<BufferType> & values); + + // CollectionType::SINGLE + template <typename VectorType, typename BufferType, typename BaseType> + void testSingle(const AttributePtr & ptr, const std::vector<BufferType> & values); + void testSingle(); + + // CollectionType::ARRAY + template <typename BufferType> + void printArray(const AttributePtr & ptr); + template <typename VectorType, typename BufferType> + void testArray(const AttributePtr & ptr, const std::vector<BufferType> & values); + void testArray(); + + // CollectionType::WSET + template <typename BufferType> + void printWeightedSet(const AttributePtr & ptr); + template <typename VectorType, typename BufferType> + void testWeightedSet(const AttributePtr & ptr, const std::vector<BufferType> & values); + void testWeightedSet(); + void testBaseName(); + + template <typename VectorType, typename BufferType> + void testArithmeticValueUpdate(const AttributePtr & ptr); + void testArithmeticValueUpdate(); + + template <typename VectorType, typename BaseType, typename BufferType> + void testArithmeticWithUndefinedValue(const AttributePtr & ptr, BaseType before, BaseType after); + void testArithmeticWithUndefinedValue(); + + template <typename VectorType, typename BufferType> + void testMapValueUpdate(const AttributePtr & ptr, BufferType initValue, + const FieldValue & initFieldValue, const FieldValue & nonExistant, + bool removeIfZero, bool createIfNonExistant); + void testMapValueUpdate(); + + void testStatus(); + void testNullProtection(); + void testGeneration(const AttributePtr & attr, bool exactStatus); + void testGeneration(); + + void + testCreateSerialNum(void); + + template <typename VectorType, typename BufferType> + void + testCompactLidSpace(const Config &config, + bool fs, + bool es); + + template <typename VectorType, typename BufferType> + void + testCompactLidSpace(const Config &config); + + void + testCompactLidSpace(const Config &config); + + void + testCompactLidSpace(void); + + template <typename AttributeType> + void requireThatAddressSpaceUsageIsReported(const Config &config, bool fastSearch); + template <typename AttributeType> + void requireThatAddressSpaceUsageIsReported(const Config &config); + void requireThatAddressSpaceUsageIsReported(); + +public: + AttributeTest() { } + int Main(); +}; + +void AttributeTest::testBaseName() +{ + AttributeVector::BaseName v("attr1"); + EXPECT_EQUAL(v.getAttributeName(), "attr1"); + EXPECT_TRUE(v.getSnapshotName().empty()); + // EXPECT_TRUE(v.getIndexName().empty()); + EXPECT_EQUAL("", v.getIndexName()); + EXPECT_TRUE(v.getDirName().empty()); + v = "attribute/attr1/attr1"; + EXPECT_EQUAL(v.getAttributeName(), "attr1"); + EXPECT_TRUE(v.getSnapshotName().empty()); + // EXPECT_TRUE(v.getIndexName().empty()); + EXPECT_EQUAL("", v.getIndexName()); + EXPECT_EQUAL(v.getDirName(), "attribute/attr1"); + v = "attribute/attr1/snapshot-X/attr1"; + EXPECT_EQUAL(v.getAttributeName(), "attr1"); + EXPECT_EQUAL(v.getSnapshotName(), "snapshot-X"); + // EXPECT_TRUE(v.getIndexName().empty()); + EXPECT_EQUAL("", v.getIndexName()); + EXPECT_EQUAL(v.getDirName(), "attribute/attr1/snapshot-X"); + v = "/attribute/attr1/snapshot-X/attr1"; + EXPECT_EQUAL(v.getAttributeName(), "attr1"); + EXPECT_EQUAL(v.getSnapshotName(), "snapshot-X"); + // EXPECT_TRUE(v.getIndexName().empty()); + EXPECT_EQUAL("", v.getIndexName()); + EXPECT_EQUAL(v.getDirName(), "/attribute/attr1/snapshot-X"); + v = "index.1/1.ready/attribute/attr1/snapshot-X/attr1"; + EXPECT_EQUAL(v.getAttributeName(), "attr1"); + EXPECT_EQUAL(v.getSnapshotName(), "snapshot-X"); + EXPECT_EQUAL(v.getIndexName(), "index.1"); + EXPECT_EQUAL(v.getDirName(), "index.1/1.ready/attribute/attr1/snapshot-X"); + v = "/index.1/1.ready/attribute/attr1/snapshot-X/attr1"; + EXPECT_EQUAL(v.getAttributeName(), "attr1"); + EXPECT_EQUAL(v.getSnapshotName(), "snapshot-X"); + EXPECT_EQUAL(v.getIndexName(), "index.1"); + EXPECT_EQUAL(v.getDirName(), + "/index.1/1.ready/attribute/attr1/snapshot-X"); + v = "xxxyyyy/zzz/index.1/1.ready/attribute/attr1/snapshot-X/attr1"; + EXPECT_EQUAL(v.getAttributeName(), "attr1"); + EXPECT_EQUAL(v.getSnapshotName(), "snapshot-X"); + EXPECT_EQUAL(v.getIndexName(), "index.1"); + EXPECT_EQUAL(v.getDirName(), + "xxxyyyy/zzz/index.1/1.ready/attribute/attr1/snapshot-X"); +} + +void AttributeTest::addDocs(const AttributePtr & v, size_t sz) +{ + if (sz) { + AttributeVector::DocId docId; + for(size_t i(0); i< sz; i++) { + EXPECT_TRUE( v->addDoc(docId) ); + } + EXPECT_TRUE( docId+1 == sz ); + EXPECT_TRUE( v->getNumDocs() == sz ); + commit(v); + } +} + + +template <> +void AttributeTest::populate(IntegerAttribute & v, unsigned seed) +{ + srand(seed); + int weight = 1; + for(size_t i(0), m(v.getNumDocs()); i < m; i++) { + v.clearDoc(i); + if (v.hasMultiValue()) { + if (v.hasWeightedSetType()) { + weight = (rand() % 256) - 128; + } + for (size_t j(0); j <= i; j++) { + EXPECT_TRUE( v.append(i, rand(), weight) ); + } + } else { + EXPECT_TRUE( v.update(i, rand()) ); + } + } + v.commit(); +} + +template <> +void AttributeTest::populate(FloatingPointAttribute & v, unsigned seed) +{ + srand(seed); + int weight = 1; + for(size_t i(0), m(v.getNumDocs()); i < m; i++) { + v.clearDoc(i); + if (v.hasMultiValue()) { + if (v.hasWeightedSetType()) { + weight = (rand() % 256) - 128; + } + for (size_t j(0); j <= i; j++) { + EXPECT_TRUE( v.append(i, rand() * 1.25, weight) ); + } + } else { + EXPECT_TRUE( v.update(i, rand() * 1.25) ); + } + } + v.commit(); +} + +template <> +void AttributeTest::populate(StringAttribute & v, unsigned seed) +{ + RandomGenerator rnd(seed); + int weight = 1; + for(size_t i(0), m(v.getNumDocs()); i < m; i++) { + v.clearDoc(i); + if (v.hasMultiValue()) { + if (v.hasWeightedSetType()) { + weight = rnd.rand(0, 256) - 128; + } + for (size_t j(0); j <= i; j++) { + EXPECT_TRUE( v.append(i, rnd.getRandomString(2, 50), weight) ); + } + } else { + EXPECT_TRUE( v.update(i, rnd.getRandomString(2, 50)) ); + } + } + v.commit(); +} + +template <typename VectorType, typename BufferType> +void AttributeTest::compare(VectorType & a, VectorType & b) +{ + EXPECT_EQUAL(a.getNumDocs(), b.getNumDocs()); + ASSERT_TRUE(a.getNumDocs() == b.getNumDocs()); + uint32_t asz(a.getMaxValueCount()); + uint32_t bsz(b.getMaxValueCount()); + BufferType *av = new BufferType[asz]; + BufferType *bv = new BufferType[bsz]; + + for (size_t i(0), m(a.getNumDocs()); i < m; i++) { + ASSERT_TRUE(asz >= static_cast<uint32_t>(a.getValueCount(i))); + ASSERT_TRUE(bsz >= static_cast<uint32_t>(b.getValueCount(i))); + EXPECT_EQUAL(a.getValueCount(i), b.getValueCount(i)); + ASSERT_TRUE(a.getValueCount(i) == b.getValueCount(i)); + EXPECT_EQUAL(static_cast<const AttributeVector &>(a).get(i, av, asz), static_cast<uint32_t>(a.getValueCount(i))); + EXPECT_EQUAL(static_cast<const AttributeVector &>(b).get(i, bv, bsz), static_cast<uint32_t>(b.getValueCount(i))); + for(size_t j(0), k(std::min(a.getValueCount(i), b.getValueCount(i))); j < k; j++) { + EXPECT_TRUE(av[j] == bv[j]); + } + } + delete [] bv; + delete [] av; +} + +void AttributeTest::testReloadInt(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c, size_t numDocs) +{ + addDocs(a, numDocs); + addDocs(b, numDocs); + populate(static_cast<IntegerAttribute &>(*a.get()), 17); + populate(static_cast<IntegerAttribute &>(*b.get()), 17); + if (a->hasWeightedSetType()) { + testReload<IntegerAttribute, IntegerAttribute::WeightedInt>(a, b, c); + } else { + testReload<IntegerAttribute, IntegerAttribute::largeint_t>(a, b, c); + } +} + + +void AttributeTest::testReloadString(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c, size_t numDocs) +{ + addDocs(a, numDocs); + addDocs(b, numDocs); + populate(static_cast<StringAttribute &>(*a.get()), 17); + populate(static_cast<StringAttribute &>(*b.get()), 17); + if (a->hasWeightedSetType()) { + testReload<StringAttribute, StringAttribute::WeightedString>(a, b, c); + } else { + testReload<StringAttribute, vespalib::string>(a, b, c); + } +} + +template <typename VectorType, typename BufferType> +void AttributeTest::testReload(const AttributePtr & a, const AttributePtr & b, const AttributePtr & c) +{ + LOG(info, "testReload: vector '%s'", a->getName().c_str()); + + compare<VectorType, BufferType> + (*(static_cast<VectorType *>(a.get())), *(static_cast<VectorType *>(b.get()))); + a->setCreateSerialNum(43u); + EXPECT_TRUE( a->saveAs(b->getBaseFileName()) ); + if (preciseEstimatedSize(*a)) { + EXPECT_EQUAL(statSize(*b), a->getEstimatedSaveByteSize()); + } else { + double estSize = a->getEstimatedSaveByteSize(); + double actSize = statSize(*b); + EXPECT_LESS_EQUAL(actSize * 1.0, estSize * 1.3); + EXPECT_GREATER_EQUAL(actSize * 1.0, estSize * 0.7); + } + EXPECT_TRUE( a->saveAs(c->getBaseFileName()) ); + if (preciseEstimatedSize(*a)) { + EXPECT_EQUAL(statSize(*c), a->getEstimatedSaveByteSize()); + } + EXPECT_TRUE( b->load() ); + EXPECT_EQUAL(43u, b->getCreateSerialNum()); + compare<VectorType, BufferType> + (*(static_cast<VectorType *>(a.get())), *(static_cast<VectorType *>(b.get()))); + EXPECT_TRUE( c->load() ); + compare<VectorType, BufferType> + (*(static_cast<VectorType *>(a.get())), *(static_cast<VectorType *>(c.get()))); + + if (isUnsignedSmallIntAttribute(*a)) + return; + populate(static_cast<VectorType &>(*b.get()), 700); + populate(static_cast<VectorType &>(*c.get()), 700); + compare<VectorType, BufferType> + (*(static_cast<VectorType *>(b.get())), *(static_cast<VectorType *>(c.get()))); + + { + ReadAttributeFile readC(c->getBaseFileName(), c->getConfig()); + WriteAttributeFile writeC(b->getBaseFileName(), b->getConfig(), + DummyFileHeaderContext(), + c->getNumDocs()); + std::unique_ptr<AttributeFile::Record> record(readC.getRecord()); + ASSERT_TRUE(record.get()); + for (size_t i(0), m(c->getNumDocs()); i < m; i++) { + EXPECT_TRUE(readC.read(*record)); + EXPECT_TRUE(writeC.write(*record)); + } + EXPECT_TRUE( ! readC.read(*record)); + } + EXPECT_TRUE( b->load() ); + compare<VectorType, BufferType> + (*(static_cast<VectorType *>(a.get())), + *(static_cast<VectorType *>(b.get()))); + { + ReadAttributeFile readC(c->getBaseFileName(), c->getConfig()); + WriteAttributeFile writeC(b->getBaseFileName(), b->getConfig(), + DummyFileHeaderContext(), + c->getNumDocs()); + readC.enableDirectIO(); + writeC.enableDirectIO(); + std::unique_ptr<AttributeFile::Record> record(readC.getRecord()); + ASSERT_TRUE(record.get()); + for (size_t i(0), m(c->getNumDocs()); i < m; i++) { + EXPECT_TRUE(readC.read(*record)); + EXPECT_TRUE(writeC.write(*record)); + } + EXPECT_TRUE( ! readC.read(*record)); + } + EXPECT_TRUE( b->load() ); + compare<VectorType, BufferType> + (*(static_cast<VectorType *>(a.get())), *(static_cast<VectorType *>(b.get()))); +} + + +void AttributeTest::testReload() +{ + // IntegerAttribute + // CollectionType::SINGLE + { + AttributePtr iv1 = AttributeFactory::createAttribute("sint32_1", Config(BasicType::INT32, CollectionType::SINGLE)); + AttributePtr iv2 = AttributeFactory::createAttribute("sint32_2", Config(BasicType::INT32, CollectionType::SINGLE)); + AttributePtr iv3 = AttributeFactory::createAttribute("sint32_3", Config(BasicType::INT32, CollectionType::SINGLE)); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("suint4_1", Config(BasicType::UINT4, CollectionType::SINGLE)); + AttributePtr iv2 = AttributeFactory::createAttribute("suint4_2", Config(BasicType::UINT4, CollectionType::SINGLE)); + AttributePtr iv3 = AttributeFactory::createAttribute("suint4_3", Config(BasicType::UINT4, CollectionType::SINGLE)); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("suint2_1", Config(BasicType::UINT2, CollectionType::SINGLE)); + AttributePtr iv2 = AttributeFactory::createAttribute("suint2_2", Config(BasicType::UINT2, CollectionType::SINGLE)); + AttributePtr iv3 = AttributeFactory::createAttribute("suint2_3", Config(BasicType::UINT2, CollectionType::SINGLE)); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("suint1_1", Config(BasicType::UINT1, CollectionType::SINGLE)); + AttributePtr iv2 = AttributeFactory::createAttribute("suint1_2", Config(BasicType::UINT1, CollectionType::SINGLE)); + AttributePtr iv3 = AttributeFactory::createAttribute("suint1_3", Config(BasicType::UINT1, CollectionType::SINGLE)); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + { + Config cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr iv1 = AttributeFactory::createAttribute("sfsint32_1", cfg); + AttributePtr iv2 = AttributeFactory::createAttribute("sfsint32_2", cfg); + AttributePtr iv3 = AttributeFactory::createAttribute("sfsint32_3", cfg); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + // CollectionType::ARRAY + { + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr iv1 = AttributeFactory::createAttribute("flag_1", cfg); + AttributePtr iv2 = AttributeFactory::createAttribute("flag_2", cfg); + AttributePtr iv3 = AttributeFactory::createAttribute("flag_3", cfg); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("aint32_1", Config(BasicType::INT32, CollectionType::ARRAY)); + AttributePtr iv2 = AttributeFactory::createAttribute("aint32_2", Config(BasicType::INT32, CollectionType::ARRAY)); + AttributePtr iv3 = AttributeFactory::createAttribute("aint32_3", Config(BasicType::INT32, CollectionType::ARRAY)); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + { + Config cfg(BasicType::INT32, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr iv1 = AttributeFactory::createAttribute("afsint32_1", cfg); + AttributePtr iv2 = AttributeFactory::createAttribute("afsint32_2", cfg); + AttributePtr iv3 = AttributeFactory::createAttribute("afsint32_3", cfg); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + // CollectionType::WSET + { + AttributePtr iv1 = AttributeFactory::createAttribute("wint32_1", Config(BasicType::INT32, CollectionType::WSET)); + AttributePtr iv2 = AttributeFactory::createAttribute("wint32_2", Config(BasicType::INT32, CollectionType::WSET)); + AttributePtr iv3 = AttributeFactory::createAttribute("wint32_3", Config(BasicType::INT32, CollectionType::WSET)); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + { + Config cfg(BasicType::INT32, CollectionType::WSET); + cfg.setFastSearch(true); + AttributePtr iv1 = AttributeFactory::createAttribute("wfsint32_1", cfg); + AttributePtr iv2 = AttributeFactory::createAttribute("wfsint32_2", cfg); + AttributePtr iv3 = AttributeFactory::createAttribute("wfsint32_3", cfg); + testReloadInt(iv1, iv2, iv3, 0); + testReloadInt(iv1, iv2, iv3, 100); + } + + + // StringAttribute + { + AttributePtr iv1 = AttributeFactory::createAttribute("sstring_1", Config(BasicType::STRING, CollectionType::SINGLE)); + AttributePtr iv2 = AttributeFactory::createAttribute("sstring_2", Config(BasicType::STRING, CollectionType::SINGLE)); + AttributePtr iv3 = AttributeFactory::createAttribute("sstring_3", Config(BasicType::STRING, CollectionType::SINGLE)); + testReloadString(iv1, iv2, iv3, 0); + testReloadString(iv1, iv2, iv3, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("astring_1", Config(BasicType::STRING, CollectionType::ARRAY)); + AttributePtr iv2 = AttributeFactory::createAttribute("astring_2", Config(BasicType::STRING, CollectionType::ARRAY)); + AttributePtr iv3 = AttributeFactory::createAttribute("astring_3", Config(BasicType::STRING, CollectionType::ARRAY)); + testReloadString(iv1, iv2, iv3, 0); + testReloadString(iv1, iv2, iv3, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("wstring_1", Config(BasicType::STRING, CollectionType::WSET)); + AttributePtr iv2 = AttributeFactory::createAttribute("wstring_2", Config(BasicType::STRING, CollectionType::WSET)); + AttributePtr iv3 = AttributeFactory::createAttribute("wstring_3", Config(BasicType::STRING, CollectionType::WSET)); + testReloadString(iv1, iv2, iv3, 0); + testReloadString(iv1, iv2, iv3, 100); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr iv1 = AttributeFactory::createAttribute("sfsstring_1", cfg); + AttributePtr iv2 = AttributeFactory::createAttribute("sfsstring_2", cfg); + AttributePtr iv3 = AttributeFactory::createAttribute("sfsstring_3", cfg); + testReloadString(iv1, iv2, iv3, 0); + testReloadString(iv1, iv2, iv3, 100); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::ARRAY)); + cfg.setFastSearch(true); + AttributePtr iv1 = AttributeFactory::createAttribute("afsstring_1", cfg); + AttributePtr iv2 = AttributeFactory::createAttribute("afsstring_2", cfg); + AttributePtr iv3 = AttributeFactory::createAttribute("afsstring_3", cfg); + testReloadString(iv1, iv2, iv3, 0); + testReloadString(iv1, iv2, iv3, 100); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::WSET)); + cfg.setFastSearch(true); + AttributePtr iv1 = AttributeFactory::createAttribute("wsfsstring_1", cfg); + AttributePtr iv2 = AttributeFactory::createAttribute("wsfsstring_2", cfg); + AttributePtr iv3 = AttributeFactory::createAttribute("wsfsstring_3", cfg); + testReloadString(iv1, iv2, iv3, 0); + testReloadString(iv1, iv2, iv3, 100); + } +} + +void AttributeTest::testHasLoadData() +{ + { // single value + AttributePtr av = AttributeFactory::createAttribute("loaddata1", Config(BasicType::INT32)); + EXPECT_TRUE(!av->hasLoadData()); + av->save(); + EXPECT_TRUE(av->hasLoadData()); + av->saveAs("loaddata2"); + av = AttributeFactory::createAttribute("loaddata2", Config(BasicType::INT32)); + EXPECT_TRUE(av->hasLoadData()); + av->saveAs("loaddata3"); + } + { // array + AttributePtr av = AttributeFactory::createAttribute("loaddata3", Config(BasicType::INT32, CollectionType::ARRAY)); + EXPECT_TRUE(!av->hasLoadData()); + av->save(); + EXPECT_TRUE(av->hasLoadData()); + av->saveAs("loaddata4"); + av = AttributeFactory::createAttribute("loaddata4", Config(BasicType::INT32, CollectionType::ARRAY)); + EXPECT_TRUE(av->hasLoadData()); + av->saveAs("loaddata5"); + } + { // wset + AttributePtr av = AttributeFactory::createAttribute("loaddata5", Config(BasicType::INT32, CollectionType::WSET)); + EXPECT_TRUE(!av->hasLoadData()); + av->save(); + EXPECT_TRUE(av->hasLoadData()); + av->saveAs("loaddata6"); + av = AttributeFactory::createAttribute("loaddata6", Config(BasicType::INT32, CollectionType::WSET)); + EXPECT_TRUE(av->hasLoadData()); + } +} + +void +AttributeTest::testMemorySaverInt(const AttributePtr & a, const AttributePtr & b, size_t numDocs) +{ + addDocs(a, numDocs); + populate(static_cast<IntegerAttribute &>(*a.get()), 21); + if (a->hasWeightedSetType()) { + testMemorySaver<IntegerAttribute, IntegerAttribute::WeightedInt>(a, b); + } else { + testMemorySaver<IntegerAttribute, IntegerAttribute::largeint_t>(a, b); + } +} + +void +AttributeTest::testMemorySaverString(const AttributePtr & a, const AttributePtr & b, size_t numDocs) +{ + addDocs(a, numDocs); + populate(static_cast<StringAttribute &>(*a.get()), 21); + if (a->hasWeightedSetType()) { + testMemorySaver<StringAttribute, StringAttribute::WeightedString>(a, b); + } else { + testMemorySaver<StringAttribute, vespalib::string>(a, b); + } +} + +template <typename VectorType, typename BufferType> +void +AttributeTest::testMemorySaver(const AttributePtr & a, const AttributePtr & b) +{ + LOG(info, "testMemorySaver: vector '%s'", a->getName().c_str()); + + AttributeMemorySaveTarget saveTarget; + EXPECT_TRUE(a->saveAs(b->getBaseFileName(), saveTarget)); + FastOS_StatInfo statInfo; + vespalib::string datFile = vespalib::make_string("%s.dat", b->getBaseFileName().c_str()); + EXPECT_TRUE(!FastOS_File::Stat(datFile.c_str(), &statInfo)); + EXPECT_TRUE(saveTarget.writeToFile(TuneFileAttributes(), + DummyFileHeaderContext())); + EXPECT_TRUE(FastOS_File::Stat(datFile.c_str(), &statInfo)); + EXPECT_TRUE(b->load()); + compare<VectorType, BufferType> + (*(static_cast<VectorType *>(a.get())), *(static_cast<VectorType *>(b.get()))); +} + +void +AttributeTest::testMemorySaver() +{ + // CollectionType::SINGLE + { + AttributePtr iv1 = AttributeFactory::createAttribute("sint32_1ms", Config(BasicType::INT32, CollectionType::SINGLE)); + AttributePtr iv2 = AttributeFactory::createAttribute("sint32_2ms", Config(BasicType::INT32, CollectionType::SINGLE)); + testMemorySaverInt(iv1, iv2, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("suint4_1ms", Config(BasicType::UINT4, CollectionType::SINGLE)); + AttributePtr iv2 = AttributeFactory::createAttribute("suint4_2ms", Config(BasicType::UINT4, CollectionType::SINGLE)); + testMemorySaverInt(iv1, iv2, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("sstr_1ms", Config(BasicType::STRING, CollectionType::SINGLE)); + AttributePtr iv2 = AttributeFactory::createAttribute("sstr_2ms", Config(BasicType::STRING, CollectionType::SINGLE)); + testMemorySaverString(iv1, iv2, 100); + } + // CollectionType::ARRAY + { + AttributePtr iv1 = AttributeFactory::createAttribute("aint32_1ms", Config(BasicType::INT32, CollectionType::ARRAY)); + AttributePtr iv2 = AttributeFactory::createAttribute("aint32_2ms", Config(BasicType::INT32, CollectionType::ARRAY)); + testMemorySaverInt(iv1, iv2, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("astr_1ms", Config(BasicType::STRING, CollectionType::ARRAY)); + AttributePtr iv2 = AttributeFactory::createAttribute("astr_2ms", Config(BasicType::STRING, CollectionType::ARRAY)); + testMemorySaverString(iv1, iv2, 100); + } + // CollectionType::WSET + { + AttributePtr iv1 = AttributeFactory::createAttribute("wint32_1ms", Config(BasicType::INT32, CollectionType::WSET)); + AttributePtr iv2 = AttributeFactory::createAttribute("wint32_2ms", Config(BasicType::INT32, CollectionType::WSET)); + testMemorySaverInt(iv1, iv2, 100); + } + { + AttributePtr iv1 = AttributeFactory::createAttribute("wstr_1ms", Config(BasicType::STRING, CollectionType::WSET)); + AttributePtr iv2 = AttributeFactory::createAttribute("wstr_2ms", Config(BasicType::STRING, CollectionType::WSET)); + testMemorySaverString(iv1, iv2, 100); + } +} + + +template <typename T> +void +AttributeTest::fillNumeric(std::vector<T> & values, uint32_t numValues) +{ + values.clear(); + values.reserve(numValues); + for (uint32_t i = 0; i < numValues; ++i) { + values.push_back(static_cast<T>(i)); + } +} + +void +AttributeTest::fillString(std::vector<vespalib::string> & values, uint32_t numValues) +{ + values.clear(); + values.reserve(numValues); + for (uint32_t i = 0; i < numValues; ++i) { + vespalib::asciistream ss; + ss << "string" << (i < 10 ? "0" : "") << i; + values.push_back(ss.str()); + } +} + +template <typename VectorType, typename BufferType> +bool +AttributeTest::appendToVector(VectorType & v, uint32_t doc, uint32_t valueCount, + const std::vector<BufferType> & values) +{ + bool retval = true; + for (uint32_t i = 0; i < valueCount; ++i) { + EXPECT_TRUE((retval = retval && v.append(doc, values[i], 1))); + } + return retval; +} + +template <typename BufferType> +bool +AttributeTest::checkCount(const AttributePtr & ptr, uint32_t doc, uint32_t valueCount, + uint32_t numValues, const BufferType & value) +{ + std::vector<BufferType> buffer(valueCount); + if (!EXPECT_EQUAL(valueCount, ptr->getValueCount(doc))) return false; + if (!EXPECT_EQUAL(valueCount, ptr->get(doc, &buffer[0], buffer.size()))) return false; + if (!EXPECT_EQUAL(numValues, static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), value)))) return false; + return true; +} + +template <typename BufferType> +bool +AttributeTest::checkContent(const AttributePtr & ptr, uint32_t doc, uint32_t valueCount, + uint32_t range, const std::vector<BufferType> & values) +{ + std::vector<BufferType> buffer(valueCount); + bool retval = true; + EXPECT_TRUE((retval = retval && (static_cast<uint32_t>(ptr->getValueCount(doc)) == valueCount))); + EXPECT_TRUE((retval = retval && (ptr->get(doc, &buffer[0], buffer.size()) == valueCount))); + for (uint32_t i = 0; i < valueCount; ++i) { + EXPECT_TRUE((retval = retval && (buffer[i] == values[i % range]))); + } + return retval; +} + + +//----------------------------------------------------------------------------- +// CollectionType::SINGLE +//----------------------------------------------------------------------------- + +template <typename VectorType, typename BufferType, typename BaseType> +void +AttributeTest::testSingle(const AttributePtr & ptr, const std::vector<BufferType> & values) +{ + LOG(info, "testSingle: vector '%s' with %u documents and %lu values", + ptr->getName().c_str(), ptr->getNumDocs(), static_cast<unsigned long>(values.size())); + + VectorType & v = *(static_cast<VectorType *>(ptr.get())); + uint32_t numUniques = values.size(); + std::vector<BufferType> buffer(1); + + // test update() + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + EXPECT_TRUE(ptr->getValueCount(doc) == 1); + uint32_t i = doc % numUniques; + uint32_t j = (doc + 1) % numUniques; + + EXPECT_TRUE(v.update(doc, values[i])); + ptr->commit(); + EXPECT_TRUE(checkCount(ptr, doc, 1, 1, values[i])); + + EXPECT_TRUE(v.update(doc, values[j])); + ptr->commit(); + EXPECT_TRUE(checkCount(ptr, doc, 1, 1, values[j])); + } + EXPECT_TRUE(!v.update(ptr->getNumDocs(), values[0])); + + // test append() + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + EXPECT_TRUE(!v.append(doc, values[0], 1)); + } + EXPECT_TRUE(!v.append(ptr->getNumDocs(), values[0], 1)); + + // test remove() + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + EXPECT_TRUE(!v.remove(doc, values[0], 1)); + } + EXPECT_TRUE(!v.remove(ptr->getNumDocs(), values[0], 1)); + + bool smallUInt = isUnsignedSmallIntAttribute(*ptr); + // test clearDoc() + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + uint32_t i = (doc + 2) % numUniques; + + EXPECT_TRUE(v.update(doc, values[i])); + if (doc % 2 == 0) { // alternate clearing + ptr->clearDoc(doc); + } + ptr->commit(); + EXPECT_EQUAL(1u, ptr->get(doc, &buffer[0], buffer.size())); + if (doc % 2 == 0) { + if (smallUInt) { + expectZero(buffer[0]); + } else { + EXPECT_TRUE(attribute::isUndefined<BaseType>(buffer[0])); + } + } else { + EXPECT_TRUE(!attribute::isUndefined<BaseType>(buffer[0])); + EXPECT_EQUAL(values[i], buffer[0]); + } + } + EXPECT_TRUE(!v.clearDoc(ptr->getNumDocs())); +} + +void +AttributeTest::testSingle() +{ + uint32_t numDocs = 1000; + uint32_t numUniques = 50; + uint32_t numUniqueNibbles = 9; + { + std::vector<AttributeVector::largeint_t> values; + fillNumeric(values, numUniques); + std::vector<AttributeVector::largeint_t> nibbleValues; + fillNumeric(nibbleValues, numUniqueNibbles); + { + AttributePtr ptr = AttributeFactory::createAttribute("sv-int32", Config(BasicType::INT32, CollectionType::SINGLE)); + addDocs(ptr, numDocs); + testSingle<IntegerAttribute, AttributeVector::largeint_t, int32_t>(ptr, values); + } + { + AttributePtr ptr = AttributeFactory::createAttribute("sv-uint4", Config(BasicType::UINT4, CollectionType::SINGLE)); + addDocs(ptr, numDocs); + testSingle<IntegerAttribute, AttributeVector::largeint_t, int8_t>(ptr, nibbleValues); + } + { + Config cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sv-post-int32", cfg); + addDocs(ptr, numDocs); + testSingle<IntegerAttribute, AttributeVector::largeint_t, int32_t>(ptr, values); + } + } + { + std::vector<double> values; + fillNumeric(values, numUniques); + { + AttributePtr ptr = AttributeFactory::createAttribute("sv-float", Config(BasicType::FLOAT, CollectionType::SINGLE)); + addDocs(ptr, numDocs); + testSingle<FloatingPointAttribute, double, float>(ptr, values); + } + { + Config cfg(BasicType::FLOAT, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sv-post-float", cfg); + addDocs(ptr, numDocs); + testSingle<FloatingPointAttribute, double, float>(ptr, values); + } + + } + { + std::vector<vespalib::string> values; + fillString(values, numUniques); + { + AttributePtr ptr = AttributeFactory::createAttribute("sv-string", Config(BasicType::STRING, CollectionType::SINGLE)); + addDocs(ptr, numDocs); + testSingle<StringAttribute, vespalib::string, vespalib::string>(ptr, values); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sv-fs-string", cfg); + addDocs(ptr, numDocs); + testSingle<StringAttribute, vespalib::string, vespalib::string>(ptr, values); + } + } +} + + +//----------------------------------------------------------------------------- +// CollectionType::ARRAY +//----------------------------------------------------------------------------- + +template <typename VectorType, typename BufferType> +void +AttributeTest::testArray(const AttributePtr & ptr, const std::vector<BufferType> & values) +{ + LOG(info, "testArray: vector '%s' with %i documents and %lu values", + ptr->getName().c_str(), ptr->getNumDocs(), static_cast<unsigned long>(values.size())); + + VectorType & v = *(static_cast<VectorType *>(ptr.get())); + uint32_t numUniques = values.size(); + ASSERT_TRUE(numUniques >= 6); + + + // test update() + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 0u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u); + size_t sumAppends(0); + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + uint32_t valueCount = doc % numUniques; + ptr->clearDoc(doc); + + EXPECT_TRUE(appendToVector(v, doc, valueCount, values)); + ptr->commit(); + sumAppends += valueCount; + + uint32_t i = doc % numUniques; + EXPECT_TRUE(v.update(doc, values[i])); + ptr->commit(); + EXPECT_TRUE(checkCount(ptr, doc, 1, 1, values[i])); + } + EXPECT_TRUE(!v.update(ptr->getNumDocs(), values[0])); + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), (1 + 2)*ptr->getNumDocs() + sumAppends); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), sumAppends); + + + // test append() + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + uint32_t valueCount = doc % numUniques; + ptr->clearDoc(doc); + + // append unique values + EXPECT_TRUE(appendToVector(v, doc, valueCount, values)); + ptr->commit(); + EXPECT_TRUE(checkContent(ptr, doc, valueCount, valueCount, values)); + + // append duplicates + EXPECT_TRUE(appendToVector(v, doc, valueCount, values)); + ptr->commit(); + EXPECT_TRUE(checkContent(ptr, doc, valueCount * 2, valueCount, values)); + } + EXPECT_TRUE(!v.append(ptr->getNumDocs(), values[0], 1)); + + + // test remove() + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + ptr->clearDoc(doc); + + EXPECT_TRUE(v.append(doc, values[1], 1)); + for (uint32_t i = 0; i < 3; ++i) { + EXPECT_TRUE(v.append(doc, values[3], 1)); + } + for (uint32_t i = 0; i < 5; ++i) { + EXPECT_TRUE(v.append(doc, values[5], 1)); + } + + ptr->commit(); + EXPECT_TRUE(checkCount(ptr, doc, 9, 1, values[1])); + EXPECT_TRUE(checkCount(ptr, doc, 9, 3, values[3])); + EXPECT_TRUE(checkCount(ptr, doc, 9, 5, values[5])); + + EXPECT_TRUE(v.remove(doc, values[0], 1)); + ptr->commit(); + EXPECT_TRUE(checkCount(ptr, doc, 9, 1, values[1])); + EXPECT_TRUE(checkCount(ptr, doc, 9, 3, values[3])); + EXPECT_TRUE(checkCount(ptr, doc, 9, 5, values[5])); + + EXPECT_TRUE(v.remove(doc, values[1], 1)); + ptr->commit(); + EXPECT_TRUE(checkCount(ptr, doc, 8, 0, values[1])); + EXPECT_TRUE(checkCount(ptr, doc, 8, 3, values[3])); + EXPECT_TRUE(checkCount(ptr, doc, 8, 5, values[5])); + + EXPECT_TRUE(v.remove(doc, values[5], 1)); + ptr->commit(); + EXPECT_TRUE(checkCount(ptr, doc, 3, 0, values[1])); + EXPECT_TRUE(checkCount(ptr, doc, 3, 3, values[3])); + EXPECT_TRUE(checkCount(ptr, doc, 3, 0, values[5])); + } + EXPECT_TRUE(!v.remove(ptr->getNumDocs(), values[0], 1)); + + + // test clearDoc() + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + uint32_t valueCount = doc % numUniques; + + ptr->clearDoc(doc); + for (uint32_t j = 0; j < valueCount; ++j) { + EXPECT_TRUE(v.append(doc, values[0], 1)); + } + ptr->clearDoc(doc); + for (uint32_t j = 0; j < valueCount; ++j) { + EXPECT_TRUE(v.append(doc, values[1], 1)); + } + ptr->commit(); + + EXPECT_TRUE(checkCount(ptr, doc, valueCount, valueCount, values[1])); + } + EXPECT_TRUE(!v.clearDoc(ptr->getNumDocs())); +} + +template <typename BufferType> +void +AttributeTest::printArray(const AttributePtr & ptr) +{ + uint32_t bufferSize = ptr->getMaxValueCount(); + std::vector<BufferType> buffer(bufferSize); + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + uint32_t valueCount = ptr->get(doc, &buffer[0], buffer.size()); + for (uint32_t i = 0; i < valueCount; ++i) { + std::cout << "doc[" << doc << "][" << i << "] = " << buffer[i] + << std::endl; + } + } +} + +void +AttributeTest::testArray() +{ + uint32_t numDocs = 100; + uint32_t numUniques = 50; + { // IntegerAttribute + std::vector<AttributeVector::largeint_t> values; + fillNumeric(values, numUniques); + { + AttributePtr ptr = AttributeFactory::createAttribute("a-int32", Config(BasicType::INT32, CollectionType::ARRAY)); + addDocs(ptr, numDocs); + testArray<IntegerAttribute, AttributeVector::largeint_t>(ptr, values); + } + { + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("flags", cfg); + addDocs(ptr, numDocs); + testArray<IntegerAttribute, AttributeVector::largeint_t>(ptr, values); + } + { + Config cfg(BasicType::INT32, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("a-fs-int32", cfg); + addDocs(ptr, numDocs); + testArray<IntegerAttribute, AttributeVector::largeint_t>(ptr, values); + } + } + { // FloatingPointAttribute + std::vector<double> values; + fillNumeric(values, numUniques); + { + AttributePtr ptr = AttributeFactory::createAttribute("a-float", Config(BasicType::FLOAT, CollectionType::ARRAY)); + addDocs(ptr, numDocs); + testArray<FloatingPointAttribute, double>(ptr, values); + } + { + Config cfg(BasicType::FLOAT, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("a-fs-float", cfg); + addDocs(ptr, numDocs); + testArray<FloatingPointAttribute, double>(ptr, values); + } + } + { // StringAttribute + std::vector<vespalib::string> values; + fillString(values, numUniques); + { + AttributePtr ptr = AttributeFactory::createAttribute("a-string", Config(BasicType::STRING, CollectionType::ARRAY)); + addDocs(ptr, numDocs); + testArray<StringAttribute, vespalib::string>(ptr, values); + } + { + Config cfg(BasicType::STRING, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("afs-string", cfg); + addDocs(ptr, numDocs); + testArray<StringAttribute, vespalib::string>(ptr, values); + } + } +} + + +//----------------------------------------------------------------------------- +// CollectionType::WSET +//----------------------------------------------------------------------------- + +template <typename BufferType> +void +AttributeTest::printWeightedSet(const AttributePtr & ptr) +{ + std::vector<BufferType> buffer(ptr->getMaxValueCount()); + for (uint32_t doc = 0; doc < ptr->getNumDocs(); ++doc) { + uint32_t valueCount = ptr->get(doc, &buffer[0], buffer.size()); + for (uint32_t i = 0; i < valueCount; ++i) { + std::cout << "doc[" << doc << "][" << i << "] = {" << buffer[i].getValue() + << ", " << buffer[i].getWeight() << "}" << std::endl; + } + } +} + +template <typename VectorType, typename BufferType> +void +AttributeTest::testWeightedSet(const AttributePtr & ptr, const std::vector<BufferType> & values) +{ + LOG(info, "testWeightedSet: vector '%s' with %u documents and %lu values", + ptr->getName().c_str(), ptr->getNumDocs(), static_cast<unsigned long>(values.size())); + + VectorType & v = *(static_cast<VectorType *>(ptr.get())); + uint32_t numDocs = v.getNumDocs(); + ASSERT_TRUE(values.size() >= numDocs + 10); + uint32_t bufferSize = numDocs + 10; + std::vector<BufferType> buffer(bufferSize); + + // fill and check + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 0u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u); + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t valueCount = doc; + v.clearDoc(doc); + for (uint32_t j = 0; j < valueCount; ++j) { + EXPECT_TRUE(v.append(doc, values[j].getValue(), values[j].getWeight())); + } + commit(ptr); + EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount); + for (uint32_t j = 0; j < valueCount; ++j) { + EXPECT_TRUE(buffer[j].getValue() == values[j].getValue()); + EXPECT_TRUE(buffer[j].getWeight() == values[j].getWeight()); + } + } + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), numDocs + (numDocs*(numDocs-1))/2); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u); + + // test append() + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t valueCount = doc; + + // append non-existent value + EXPECT_TRUE(v.append(doc, values[doc].getValue(), values[doc].getWeight())); + commit(ptr); + EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 1); + EXPECT_TRUE(buffer[doc].getValue() == values[doc].getValue()); + EXPECT_TRUE(buffer[doc].getWeight() == values[doc].getWeight()); + + // append existent value + EXPECT_TRUE(v.append(doc, values[doc].getValue(), values[doc].getWeight() + 10)); + commit(ptr); + EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 1); + EXPECT_TRUE(buffer[doc].getValue() == values[doc].getValue()); + EXPECT_TRUE(buffer[doc].getWeight() == values[doc].getWeight() + 10); + + // append non-existent value two times + EXPECT_TRUE(v.append(doc, values[doc + 1].getValue(), values[doc + 1].getWeight())); + EXPECT_TRUE(v.append(doc, values[doc + 1].getValue(), values[doc + 1].getWeight() + 10)); + commit(ptr); + EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 2); + EXPECT_TRUE(buffer[doc + 1].getValue() == values[doc + 1].getValue()); + EXPECT_TRUE(buffer[doc + 1].getWeight() == values[doc + 1].getWeight() + 10); + } + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), numDocs + (numDocs*(numDocs-1))/2 + numDocs*4); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u); + + // test remove() + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t valueCount = doc; + + // remove non-existent value + EXPECT_TRUE(static_cast<uint32_t>(v.getValueCount(doc)) == valueCount + 2); + EXPECT_TRUE(v.remove(doc, values[doc + 2].getValue(), 0)); + commit(ptr); + EXPECT_TRUE(static_cast<uint32_t>(v.getValueCount(doc)) == valueCount + 2); + + // remove existent value + EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 2); + EXPECT_TRUE(buffer[doc + 1].getValue() == values[doc + 1].getValue()); + EXPECT_TRUE(v.remove(doc, values[doc + 1].getValue(), 0)); + commit(ptr); + EXPECT_TRUE(ptr->get(doc, &buffer[0], buffer.size()) == valueCount + 1); + for (uint32_t i = 0; i < valueCount + 1; ++i) { + EXPECT_TRUE(buffer[i].getValue() != values[doc + 1].getValue()); + } + } + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), numDocs + (numDocs*(numDocs-1))/2 + numDocs*4 + numDocs * 2); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u); +} + +void +AttributeTest::testWeightedSet() +{ + uint32_t numDocs = 100; + uint32_t numValues = numDocs + 10; + { // IntegerAttribute + std::vector<AttributeVector::WeightedInt> values; + values.reserve(numValues); + for (uint32_t i = 0; i < numValues; ++i) { + values.push_back(AttributeVector::WeightedInt(i, i + numValues)); + } + + { + AttributePtr ptr = AttributeFactory::createAttribute + ("wsint32", Config(BasicType::INT32, CollectionType::WSET)); + addDocs(ptr, numDocs); + testWeightedSet<IntegerAttribute, AttributeVector::WeightedInt>(ptr, values); + } + { + Config cfg(BasicType::INT32, CollectionType::WSET); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("ws-fs-int32", cfg); + addDocs(ptr, numDocs); + testWeightedSet<IntegerAttribute, AttributeVector::WeightedInt>(ptr, values); + IAttributeVector::EnumHandle e; + EXPECT_TRUE(ptr->findEnum("1", e)); + } + } + { // FloatingPointAttribute + std::vector<AttributeVector::WeightedFloat> values; + values.reserve(numValues); + for (uint32_t i = 0; i < numValues; ++i) { + values.push_back(AttributeVector::WeightedFloat(i, i + numValues)); + } + + { + Config cfg(BasicType::FLOAT, CollectionType::WSET); + AttributePtr ptr = AttributeFactory::createAttribute("ws-float", cfg); + addDocs(ptr, numDocs); + testWeightedSet<FloatingPointAttribute, AttributeVector::WeightedFloat>(ptr, values); + } + { + Config cfg(BasicType::FLOAT, CollectionType::WSET); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("ws-fs-float", cfg); + addDocs(ptr, numDocs); + testWeightedSet<FloatingPointAttribute, AttributeVector::WeightedFloat>(ptr, values); + IAttributeVector::EnumHandle e; + EXPECT_TRUE(ptr->findEnum("1", e)); + } + } + { // StringAttribute + std::vector<AttributeVector::WeightedString> values; + values.reserve(numValues); + for (uint32_t i = 0; i < numValues; ++i) { + vespalib::asciistream ss; + ss << "string" << (i < 10 ? "0" : "") << i; + values.push_back(AttributeVector::WeightedString(ss.str(), i + numValues)); + } + + { + AttributePtr ptr = AttributeFactory::createAttribute + ("wsstr", Config(BasicType::STRING, CollectionType::WSET)); + addDocs(ptr, numDocs); + testWeightedSet<StringAttribute, AttributeVector::WeightedString>(ptr, values); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::WSET)); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("wsfsstr", cfg); + addDocs(ptr, numDocs); + testWeightedSet<StringAttribute, AttributeVector::WeightedString>(ptr, values); + IAttributeVector::EnumHandle e; + EXPECT_TRUE(ptr->findEnum("string00", e)); + } + } +} + +template <typename VectorType, typename BufferType> +void +AttributeTest::testArithmeticValueUpdate(const AttributePtr & ptr) +{ + LOG(info, "testArithmeticValueUpdate: vector '%s'", ptr->getName().c_str()); + + typedef document::ArithmeticValueUpdate Arith; + VectorType & vec = static_cast<VectorType &>(*ptr.get()); + addDocs(ptr, 13); + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 0u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u); + for (uint32_t doc = 0; doc < 13; ++doc) { + ASSERT_TRUE(vec.update(doc, 100)); + } + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 13u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u); + ptr->commit(); + + EXPECT_TRUE(vec.apply(0, Arith(Arith::Add, 10))); + EXPECT_TRUE(vec.apply(1, Arith(Arith::Add, -10))); + EXPECT_TRUE(vec.apply(2, Arith(Arith::Sub, 10))); + EXPECT_TRUE(vec.apply(3, Arith(Arith::Sub, -10))); + EXPECT_TRUE(vec.apply(4, Arith(Arith::Mul, 10))); + EXPECT_TRUE(vec.apply(5, Arith(Arith::Mul, -10))); + EXPECT_TRUE(vec.apply(6, Arith(Arith::Div, 10))); + EXPECT_TRUE(vec.apply(7, Arith(Arith::Div, -10))); + EXPECT_TRUE(vec.apply(8, Arith(Arith::Add, 10.5))); + EXPECT_TRUE(vec.apply(9, Arith(Arith::Sub, 10.5))); + EXPECT_TRUE(vec.apply(10, Arith(Arith::Mul, 1.2))); + EXPECT_TRUE(vec.apply(11, Arith(Arith::Mul, 0.8))); + EXPECT_TRUE(vec.apply(12, Arith(Arith::Div, 0.8))); + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 26u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 13u); + ptr->commit(); + + std::vector<BufferType> buf(1); + ptr->get(0, &buf[0], 1); + EXPECT_EQUAL(buf[0], 110); + ptr->get(1, &buf[0], 1); + EXPECT_EQUAL(buf[0], 90); + ptr->get(2, &buf[0], 1); + EXPECT_EQUAL(buf[0], 90); + ptr->get(3, &buf[0], 1); + EXPECT_EQUAL(buf[0], 110); + ptr->get(4, &buf[0], 1); + EXPECT_EQUAL(buf[0], 1000); + ptr->get(5, &buf[0], 1); + EXPECT_EQUAL(buf[0], -1000); + ptr->get(6, &buf[0], 1); + EXPECT_EQUAL(buf[0], 10); + ptr->get(7, &buf[0], 1); + EXPECT_EQUAL(buf[0], -10); + if (ptr->getBasicType() == BasicType::INT32) { + ptr->get(8, &buf[0], 1); + EXPECT_EQUAL(buf[0], 110); + ptr->get(9, &buf[0], 1); + EXPECT_EQUAL(buf[0], 90); + } else if (ptr->getBasicType() == BasicType::FLOAT || + ptr->getBasicType() == BasicType::DOUBLE) + { + ptr->get(8, &buf[0], 1); + EXPECT_EQUAL(buf[0], 110.5); + ptr->get(9, &buf[0], 1); + EXPECT_EQUAL(buf[0], 89.5); + } else { + ASSERT_TRUE(false); + } + ptr->get(10, &buf[0], 1); + EXPECT_EQUAL(buf[0], 120); + ptr->get(11, &buf[0], 1); + EXPECT_EQUAL(buf[0], 80); + ptr->get(12, &buf[0], 1); + EXPECT_EQUAL(buf[0], 125); + + + // try several arithmetic operations on the same document in a single commit + ASSERT_TRUE(vec.update(0, 1100)); + ASSERT_TRUE(vec.update(1, 1100)); + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 28u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 13u); + for (uint32_t i = 0; i < 10; ++i) { + ASSERT_TRUE(vec.apply(0, Arith(Arith::Add, 10))); + ASSERT_TRUE(vec.apply(1, Arith(Arith::Add, 10))); + } + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 48u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 33u); + ptr->commit(); + ptr->get(0, &buf[0], 1); + EXPECT_EQUAL(buf[0], 1200); + ptr->get(1, &buf[0], 1); + EXPECT_EQUAL(buf[0], 1200); + + ASSERT_TRUE(vec.update(0, 10)); + ASSERT_TRUE(vec.update(1, 10)); + ASSERT_TRUE(vec.update(2, 10)); + ASSERT_TRUE(vec.update(3, 10)); + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 52u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 33u); + for (uint32_t i = 0; i < 8; ++i) { + EXPECT_TRUE(vec.apply(0, Arith(Arith::Mul, 1.2))); + EXPECT_TRUE(vec.apply(1, Arith(Arith::Mul, 2.3))); + EXPECT_TRUE(vec.apply(2, Arith(Arith::Mul, 3.4))); + EXPECT_TRUE(vec.apply(3, Arith(Arith::Mul, 5.6))); + ptr->commit(); + } + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 84u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 65u); + + + // try divide by zero + ASSERT_TRUE(vec.update(0, 100)); + EXPECT_TRUE(vec.apply(0, Arith(Arith::Div, 0))); + ptr->commit(); + if (ptr->getClass().inherits(FloatingPointAttribute::classId)) { + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 86u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 66u); + } else { // does not apply for interger attributes + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 85u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 65u); + } + ptr->get(0, &buf[0], 1); + if (ptr->getBasicType() == BasicType::INT32) { + EXPECT_EQUAL(buf[0], 100); + } + + // try divide by zero with empty change vector + EXPECT_TRUE(vec.apply(0, Arith(Arith::Div, 0))); + ptr->commit(); + if (ptr->getClass().inherits(FloatingPointAttribute::classId)) { + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 87u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 67u); + } else { // does not apply for interger attributes + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 85u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 65u); + } +} + +void +AttributeTest::testArithmeticValueUpdate() +{ + { + AttributePtr ptr = AttributeFactory::createAttribute("sint32", Config(BasicType::INT32, CollectionType::SINGLE)); + testArithmeticValueUpdate<IntegerAttribute, IntegerAttribute::largeint_t>(ptr); + } + { + AttributePtr ptr = AttributeFactory::createAttribute("sfloat", Config(BasicType::FLOAT, CollectionType::SINGLE)); + testArithmeticValueUpdate<FloatingPointAttribute, double>(ptr); + } + { + Config cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sfsint32", cfg); + testArithmeticValueUpdate<IntegerAttribute, IntegerAttribute::largeint_t>(ptr); + } + { + Config cfg(BasicType::FLOAT, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sfsfloat", cfg); + testArithmeticValueUpdate<FloatingPointAttribute, double>(ptr); + } + { + Config cfg(BasicType::DOUBLE, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sfsdouble", cfg); + testArithmeticValueUpdate<FloatingPointAttribute, double>(ptr); + } +} + + +template <typename VectorType, typename BaseType, typename BufferType> +void +AttributeTest::testArithmeticWithUndefinedValue(const AttributePtr & ptr, BaseType before, BaseType after) +{ + LOG(info, "testArithmeticWithUndefinedValue: vector '%s'", ptr->getName().c_str()); + + typedef document::ArithmeticValueUpdate Arith; + VectorType & vec = static_cast<VectorType &>(*ptr.get()); + addDocs(ptr, 1); + ASSERT_TRUE(vec.update(0, before)); + ptr->commit(); + + EXPECT_TRUE(vec.apply(0, Arith(Arith::Add, 10))); + ptr->commit(); + + std::vector<BufferType> buf(1); + ptr->get(0, &buf[0], 1); + + if (ptr->getClass().inherits(FloatingPointAttribute::classId)) { + EXPECT_TRUE(std::isnan(buf[0])); + } else { + EXPECT_EQUAL(buf[0], after); + } +} + +void +AttributeTest::testArithmeticWithUndefinedValue() +{ + { + AttributePtr ptr = AttributeFactory::createAttribute("sint32", Config(BasicType::INT32, CollectionType::SINGLE)); + testArithmeticWithUndefinedValue<IntegerAttribute, int32_t, IntegerAttribute::largeint_t> + (ptr, std::numeric_limits<int32_t>::min(), std::numeric_limits<int32_t>::min()); + } + { + AttributePtr ptr = AttributeFactory::createAttribute("sfloat", Config(BasicType::FLOAT, CollectionType::SINGLE)); + testArithmeticWithUndefinedValue<FloatingPointAttribute, float, double> + (ptr, std::numeric_limits<float>::quiet_NaN(), std::numeric_limits<float>::quiet_NaN()); + } + { + AttributePtr ptr = AttributeFactory::createAttribute("sdouble", Config(BasicType::DOUBLE, CollectionType::SINGLE)); + testArithmeticWithUndefinedValue<FloatingPointAttribute, double, double> + (ptr, std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN()); + } +} + + +template <typename VectorType, typename BufferType> +void +AttributeTest::testMapValueUpdate(const AttributePtr & ptr, BufferType initValue, + const FieldValue & initFieldValue, const FieldValue & nonExistant, + bool removeIfZero, bool createIfNonExistant) +{ + LOG(info, "testMapValueUpdate: vector '%s'", ptr->getName().c_str()); + typedef MapValueUpdate MapVU; + typedef ArithmeticValueUpdate ArithVU; + VectorType & vec = static_cast<VectorType &>(*ptr.get()); + + addDocs(ptr, 6); + for (uint32_t doc = 0; doc < 6; ++doc) { + ASSERT_TRUE(vec.append(doc, initValue.getValue(), 100)); + } + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 6u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 0u); + + EXPECT_TRUE(ptr->apply(0, MapVU(initFieldValue, + ArithVU(ArithVU::Add, 10)))); + EXPECT_TRUE(ptr->apply(1, MapVU(initFieldValue, + ArithVU(ArithVU::Sub, 10)))); + EXPECT_TRUE(ptr->apply(2, MapVU(initFieldValue, + ArithVU(ArithVU::Mul, 10)))); + EXPECT_TRUE(ptr->apply(3, MapVU(initFieldValue, + ArithVU(ArithVU::Div, 10)))); + ptr->commit(); + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 10u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 4u); + + std::vector<BufferType> buf(2); + ptr->get(0, &buf[0], 2); + EXPECT_EQUAL(buf[0].getWeight(), 110); + ptr->get(1, &buf[0], 2); + EXPECT_EQUAL(buf[0].getWeight(), 90); + ptr->get(2, &buf[0], 2); + EXPECT_EQUAL(buf[0].getWeight(), 1000); + ptr->get(3, &buf[0], 2); + EXPECT_EQUAL(buf[0].getWeight(), 10); + + // removeifzero + EXPECT_TRUE(ptr->apply(4, MapVU(initFieldValue, + ArithVU(ArithVU::Sub, 100)))); + ptr->commit(); + if (removeIfZero) { + EXPECT_EQUAL(ptr->get(4, &buf[0], 2), uint32_t(0)); + } else { + EXPECT_EQUAL(ptr->get(4, &buf[0], 2), uint32_t(1)); + EXPECT_EQUAL(buf[0].getWeight(), 0); + } + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 11u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 5u); + + // createifnonexistant + EXPECT_TRUE(ptr->apply(5, MapVU(nonExistant, + ArithVU(ArithVU::Add, 10)))); + ptr->commit(); + if (createIfNonExistant) { + EXPECT_EQUAL(ptr->get(5, &buf[0], 2), uint32_t(2)); + EXPECT_EQUAL(buf[0].getWeight(), 100); + EXPECT_EQUAL(buf[1].getWeight(), 10); + } else { + EXPECT_EQUAL(ptr->get(5, &buf[0], 2), uint32_t(1)); + EXPECT_EQUAL(buf[0].getWeight(), 100); + } + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 12u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 6u); + + + // try divide by zero (should be ignored) + vec.clearDoc(0); + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 13u); + ASSERT_TRUE(vec.append(0, initValue.getValue(), 12345)); + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 14u); + EXPECT_TRUE(ptr->apply(0, MapVU(initFieldValue, ArithVU(ArithVU::Div, 0)))); + EXPECT_EQUAL(ptr->getStatus().getUpdateCount(), 14u); + EXPECT_EQUAL(ptr->getStatus().getNonIdempotentUpdateCount(), 6u); + ptr->commit(); + ptr->get(0, &buf[0], 1); + EXPECT_EQUAL(buf[0].getWeight(), 12345); +} + +void +AttributeTest::testMapValueUpdate() +{ + { // regular set + AttributePtr ptr = AttributeFactory::createAttribute + ("wsint32", Config(BasicType::INT32, CollectionType::WSET)); + testMapValueUpdate<IntegerAttribute, AttributeVector::WeightedInt> + (ptr, AttributeVector::WeightedInt(64, 1), IntFieldValue(64), + IntFieldValue(32), false, false); + } + { // remove if zero + AttributePtr ptr = AttributeFactory::createAttribute + ("wsint32", Config(BasicType::INT32, CollectionType(CollectionType::WSET, true, false))); + testMapValueUpdate<IntegerAttribute, AttributeVector::WeightedInt> + (ptr, AttributeVector::WeightedInt(64, 1), IntFieldValue(64), + IntFieldValue(32), true, false); + } + { // create if non existant + AttributePtr ptr = AttributeFactory::createAttribute + ("wsint32", Config(BasicType::INT32, CollectionType(CollectionType::WSET, false, true))); + testMapValueUpdate<IntegerAttribute, AttributeVector::WeightedInt> + (ptr, AttributeVector::WeightedInt(64, 1), IntFieldValue(64), + IntFieldValue(32), false, true); + } + + Config setCfg(Config(BasicType::STRING, CollectionType::WSET)); + Config setRemoveCfg(Config(BasicType::STRING, CollectionType(CollectionType::WSET, true, false))); + Config setCreateCfg(Config(BasicType::STRING, CollectionType(CollectionType::WSET, false, true))); + + { // regular set + AttributePtr ptr = AttributeFactory::createAttribute("wsstr", setCfg); + testMapValueUpdate<StringAttribute, AttributeVector::WeightedString> + (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"), + StringFieldValue("second"), false, false); + } + { // remove if zero + AttributePtr ptr = AttributeFactory::createAttribute("wsstr", setRemoveCfg); + testMapValueUpdate<StringAttribute, AttributeVector::WeightedString> + (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"), + StringFieldValue("second"), true, false); + } + { // create if non existant + AttributePtr ptr = AttributeFactory::createAttribute("wsstr", setCreateCfg); + testMapValueUpdate<StringAttribute, AttributeVector::WeightedString> + (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"), + StringFieldValue("second"), false, true); + } + + // fast-search - posting lists + { // regular set + setCfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("wsfsstr", setCfg); + testMapValueUpdate<StringAttribute, AttributeVector::WeightedString> + (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"), + StringFieldValue("second"), false, false); + } + { // remove if zero + setRemoveCfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("wsfsstr", setRemoveCfg); + testMapValueUpdate<StringAttribute, AttributeVector::WeightedString> + (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"), + StringFieldValue("second"), true, false); + } + { // create if non existant + setCreateCfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("wsfsstr", setCreateCfg); + testMapValueUpdate<StringAttribute, AttributeVector::WeightedString> + (ptr, AttributeVector::WeightedString("first", 1), StringFieldValue("first"), + StringFieldValue("second"), false, true); + } +} + + + +void +AttributeTest::commit(const AttributePtr & ptr) +{ + ptr->commit(); +} + + +void +AttributeTest::testStatus() +{ + std::vector<vespalib::string> values; + fillString(values, 16); + uint32_t numDocs = 100; + // No posting list + static constexpr size_t LeafNodeSize = + 4 + sizeof(EnumStoreBase::Index) * EnumTreeTraits::LEAF_SLOTS; + static constexpr size_t InternalNodeSize = + 8 + (sizeof(EnumStoreBase::Index) + + sizeof(btree::EntryRef)) * EnumTreeTraits::INTERNAL_SLOTS; + static constexpr size_t NestedVectorSize = 24; // sizeof(vespalib::Array) + + { + Config cfg(BasicType::STRING, CollectionType::ARRAY); + AttributePtr ptr = AttributeFactory::createAttribute("as", cfg); + addDocs(ptr, numDocs); + StringAttribute & sa = *(static_cast<StringAttribute *>(ptr.get())); + for (uint32_t i = 0; i < numDocs; ++i) { + EXPECT_TRUE(appendToVector(sa, i, 1, values)); + } + ptr->commit(true); + EXPECT_EQUAL(ptr->getStatus().getNumDocs(), 100u); + EXPECT_EQUAL(ptr->getStatus().getNumValues(), 100u); + EXPECT_EQUAL(ptr->getStatus().getNumUniqueValues(), 1u); + size_t expUsed = 0; + expUsed += 1 * InternalNodeSize + 1 * LeafNodeSize; // enum store tree + expUsed += 1 * 32; // enum store (uniquevalues * bytes per entry) + // multi value mapping (numdocs * sizeof(MappingIndex) + numvalues * sizeof(EnumIndex)) + expUsed += 100 * sizeof(search::multivalue::Index32) + 100 * 4; + EXPECT_GREATER_EQUAL(ptr->getStatus().getUsed(), expUsed); + EXPECT_GREATER_EQUAL(ptr->getStatus().getAllocated(), expUsed); + } + + { + Config cfg(BasicType::STRING, CollectionType::ARRAY); + AttributePtr ptr = AttributeFactory::createAttribute("as", cfg); + addDocs(ptr, numDocs); + StringAttribute & sa = *(static_cast<StringAttribute *>(ptr.get())); + const size_t numUniq(16); + const size_t numValuesPerDoc(16); + for (uint32_t i = 0; i < numDocs; ++i) { + EXPECT_TRUE(appendToVector(sa, i, numValuesPerDoc, values)); + } + ptr->commit(true); + EXPECT_EQUAL(ptr->getStatus().getNumDocs(), numDocs); + EXPECT_EQUAL(ptr->getStatus().getNumValues(), numDocs*numValuesPerDoc); + EXPECT_EQUAL(ptr->getStatus().getNumUniqueValues(), numUniq); + size_t expUsed = 0; + expUsed += 1 * InternalNodeSize + 1 * LeafNodeSize; // enum store tree + expUsed += numUniq * 32; // enum store (16 unique values, 32 bytes per entry) + // multi value mapping (numdocs * sizeof(MappingIndex) + numvalues * sizeof(EnumIndex) + + // numdocs * sizeof(Array<EnumIndex>) (due to vector vector)) + expUsed += numDocs * sizeof(search::multivalue::Index32) + numDocs * numValuesPerDoc * sizeof(EnumStoreBase::Index) + ((numValuesPerDoc > search::multivalue::Index32::maxValues()) ? numDocs * NestedVectorSize : 0); + EXPECT_GREATER_EQUAL(ptr->getStatus().getUsed(), expUsed); + EXPECT_GREATER_EQUAL(ptr->getStatus().getAllocated(), expUsed); + } +} + +void +AttributeTest::testNullProtection() +{ + size_t len1 = strlen("evil"); + size_t len2 = strlen("string"); + size_t len = len1 + 1 + len2; + vespalib::string good("good"); + vespalib::string evil("evil string"); + vespalib::string pureEvil("evil"); + EXPECT_EQUAL(strlen(evil.data()), len); + EXPECT_EQUAL(strlen(evil.c_str()), len); + evil[len1] = 0; // replace space with '\0' + EXPECT_EQUAL(strlen(evil.data()), len1); + EXPECT_EQUAL(strlen(evil.c_str()), len1); + EXPECT_EQUAL(strlen(evil.data() + len1), 0u); + EXPECT_EQUAL(strlen(evil.c_str() + len1), 0u); + EXPECT_EQUAL(strlen(evil.data() + len1 + 1), len2); + EXPECT_EQUAL(strlen(evil.c_str() + len1 + 1), len2); + EXPECT_EQUAL(evil.size(), len); + { // string + AttributeVector::DocId docId; + std::vector<vespalib::string> buf(16); + AttributePtr attr = AttributeFactory::createAttribute("string", Config(BasicType::STRING, CollectionType::SINGLE)); + StringAttribute &v = static_cast<StringAttribute &>(*attr.get()); + EXPECT_TRUE(v.addDoc(docId)); + EXPECT_TRUE(v.update(docId, evil)); + v.commit(); + size_t n = static_cast<const AttributeVector &>(v).get(docId, &buf[0], buf.size()); + EXPECT_EQUAL(n, 1u); + EXPECT_EQUAL(buf[0], pureEvil); + } + { // string array + AttributeVector::DocId docId; + std::vector<vespalib::string> buf(16); + AttributePtr attr = AttributeFactory::createAttribute("string", Config(BasicType::STRING, CollectionType::ARRAY)); + StringAttribute &v = static_cast<StringAttribute &>(*attr.get()); + EXPECT_TRUE(v.addDoc(docId)); + EXPECT_TRUE(v.append(0, good, 1)); + EXPECT_TRUE(v.append(0, evil, 1)); + EXPECT_TRUE(v.append(0, good, 1)); + v.commit(); + size_t n = static_cast<const AttributeVector &>(v).get(0, &buf[0], buf.size()); + EXPECT_EQUAL(n, 3u); + EXPECT_EQUAL(buf[0], good); + EXPECT_EQUAL(buf[1], pureEvil); + EXPECT_EQUAL(buf[2], good); + } + { // string set + AttributeVector::DocId docId; + std::vector<StringAttribute::WeightedString> buf(16); + AttributePtr attr = AttributeFactory::createAttribute("string", Config(BasicType::STRING, CollectionType::WSET)); + StringAttribute &v = static_cast<StringAttribute &>(*attr.get()); + EXPECT_TRUE(v.addDoc(docId)); + EXPECT_TRUE(v.append(0, good, 10)); + EXPECT_TRUE(v.append(0, evil, 20)); + v.commit(); + size_t n = static_cast<const AttributeVector &>(v).get(0, &buf[0], buf.size()); + EXPECT_EQUAL(n, 2u); + if (buf[0].getValue() != good) { + std::swap(buf[0], buf[1]); + } + EXPECT_EQUAL(buf[0].getValue(), good); + EXPECT_EQUAL(buf[0].getWeight(), 10); + EXPECT_EQUAL(buf[1].getValue(), pureEvil); + EXPECT_EQUAL(buf[1].getWeight(), 20); + + // remove + EXPECT_TRUE(v.remove(0, evil, 20)); + v.commit(); + n = static_cast<const AttributeVector &>(v).get(0, &buf[0], buf.size()); + EXPECT_EQUAL(n, 1u); + EXPECT_EQUAL(buf[0].getValue(), good); + EXPECT_EQUAL(buf[0].getWeight(), 10); + } +} + +void +AttributeTest::testGeneration(const AttributePtr & attr, bool exactStatus) +{ + LOG(info, "testGeneration(%s)", attr->getName().c_str()); + IntegerAttribute & ia = static_cast<IntegerAttribute &>(*attr.get()); + // add docs to trigger inc generation when data vector is full + AttributeVector::DocId docId; + EXPECT_EQUAL(0u, ia.getCurrentGeneration()); + EXPECT_TRUE(ia.addDoc(docId)); + EXPECT_EQUAL(0u, ia.getCurrentGeneration()); + EXPECT_TRUE(ia.addDoc(docId)); + EXPECT_EQUAL(0u, ia.getCurrentGeneration()); + ia.commit(true); + EXPECT_EQUAL(1u, ia.getCurrentGeneration()); + uint64_t lastAllocated; + uint64_t lastOnHold; + if (exactStatus) { + EXPECT_EQUAL(2u, ia.getStatus().getAllocated()); + EXPECT_EQUAL(0u, ia.getStatus().getOnHold()); + } else { + EXPECT_LESS(0u, ia.getStatus().getAllocated()); + EXPECT_EQUAL(0u, ia.getStatus().getOnHold()); + lastAllocated = ia.getStatus().getAllocated(); + lastOnHold = ia.getStatus().getOnHold(); + } + { + AttributeGuard ag(attr); // guard on generation 1 + EXPECT_TRUE(ia.addDoc(docId)); // inc gen + EXPECT_EQUAL(2u, ia.getCurrentGeneration()); + ia.commit(true); + EXPECT_EQUAL(3u, ia.getCurrentGeneration()); + if (exactStatus) { + EXPECT_EQUAL(4u, ia.getStatus().getAllocated()); + EXPECT_EQUAL(2u, ia.getStatus().getOnHold()); // no cleanup due to guard + } else { + EXPECT_LESS(lastAllocated, ia.getStatus().getAllocated()); + EXPECT_LESS(lastOnHold, ia.getStatus().getOnHold()); + lastAllocated = ia.getStatus().getAllocated(); + lastOnHold = ia.getStatus().getOnHold(); + } + } + EXPECT_TRUE(ia.addDoc(docId)); + EXPECT_EQUAL(3u, ia.getCurrentGeneration()); + { + AttributeGuard ag(attr); // guard on generation 3 + ia.commit(true); + EXPECT_EQUAL(4u, ia.getCurrentGeneration()); + if (exactStatus) { + EXPECT_EQUAL(4u, ia.getStatus().getAllocated()); + EXPECT_EQUAL(0u, ia.getStatus().getOnHold()); // cleanup at end of addDoc() + } else { + EXPECT_EQUAL(lastAllocated, ia.getStatus().getAllocated()); + EXPECT_GREATER(lastOnHold, ia.getStatus().getOnHold()); + lastAllocated = ia.getStatus().getAllocated(); + lastOnHold = ia.getStatus().getOnHold(); + } + } + { + AttributeGuard ag(attr); // guard on generation 4 + EXPECT_TRUE(ia.addDoc(docId)); // inc gen + EXPECT_EQUAL(5u, ia.getCurrentGeneration()); + ia.commit(); + EXPECT_EQUAL(6u, ia.getCurrentGeneration()); + if (exactStatus) { + EXPECT_EQUAL(6u, ia.getStatus().getAllocated()); + EXPECT_EQUAL(4u, ia.getStatus().getOnHold()); // no cleanup due to guard + } else { + EXPECT_LESS(lastAllocated, ia.getStatus().getAllocated()); + EXPECT_LESS(lastOnHold, ia.getStatus().getOnHold()); + lastAllocated = ia.getStatus().getAllocated(); + lastOnHold = ia.getStatus().getOnHold(); + } + } + ia.commit(true); + EXPECT_EQUAL(7u, ia.getCurrentGeneration()); + if (exactStatus) { + EXPECT_EQUAL(6u, ia.getStatus().getAllocated()); + EXPECT_EQUAL(0u, ia.getStatus().getOnHold()); // cleanup at end of commit() + } else { + EXPECT_EQUAL(lastAllocated, ia.getStatus().getAllocated()); + EXPECT_GREATER(lastOnHold, ia.getStatus().getOnHold()); + } +} + +void +AttributeTest::testGeneration() +{ + { // single value attribute + Config cfg(BasicType::INT8); + cfg.setGrowStrategy(GrowStrategy(2, 0, 2)); + AttributePtr attr = AttributeFactory::createAttribute("int8", cfg); + testGeneration(attr, true); + } + { // enum attribute (with fast search) + Config cfg(BasicType::INT8); + cfg.setFastSearch(true); + cfg.setGrowStrategy(GrowStrategy(2, 0, 2)); + AttributePtr attr = AttributeFactory::createAttribute("faint8", cfg); + testGeneration(attr, false); + } + { // multi value attribute + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setGrowStrategy(GrowStrategy(2, 0, 2)); + AttributePtr attr = AttributeFactory::createAttribute("aint8", cfg); + testGeneration(attr, false); + } + { // multi value enum attribute (with fast search) + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + cfg.setGrowStrategy(GrowStrategy(2, 0, 2)); + AttributePtr attr = AttributeFactory::createAttribute("faaint8", cfg); + testGeneration(attr, false); + } +} + + +void +AttributeTest::testCreateSerialNum() +{ + Config cfg(BasicType::INT32); + AttributePtr attr = AttributeFactory::createAttribute("int32", cfg); + attr->setCreateSerialNum(42u); + EXPECT_TRUE(attr->save()); + AttributePtr attr2 = AttributeFactory::createAttribute("int32", cfg); + EXPECT_TRUE(attr2->load()); + EXPECT_EQUAL(42u, attr2->getCreateSerialNum()); +} + + +template <typename VectorType, typename BufferType> +void +AttributeTest::testCompactLidSpace(const Config &config, + bool fs, + bool es) +{ + uint32_t highDocs = 100; + uint32_t trimmedDocs = 30; + vespalib::string bts = config.basicType().asString(); + vespalib::string cts = config.collectionType().asString(); + vespalib::string fas = fs ? "-fs" : ""; + vespalib::string ess = es ? "-es" : ""; + Config cfg = config; + cfg.setFastSearch(fs); + + vespalib::string name = clstmp + "/" + bts + "-" + cts + fas + ess; + LOG(info, "testCompactLidSpace(%s)", name.c_str()); + AttributePtr attr = AttributeFactory::createAttribute(name, cfg); + VectorType &v = static_cast<VectorType &>(*attr.get()); + attr->enableEnumeratedSave(es); + attr->addDocs(highDocs); + populate(v, 17); + AttributePtr attr2 = AttributeFactory::createAttribute(name, cfg); + VectorType &v2 = static_cast<VectorType &>(*attr2.get()); + attr2->enableEnumeratedSave(es); + attr2->addDocs(trimmedDocs); + populate(v2, 17); + EXPECT_EQUAL(trimmedDocs, attr2->getNumDocs()); + EXPECT_EQUAL(trimmedDocs, attr2->getCommittedDocIdLimit()); + EXPECT_EQUAL(highDocs, attr->getNumDocs()); + EXPECT_EQUAL(highDocs, attr->getCommittedDocIdLimit()); + attr->compactLidSpace(trimmedDocs); + EXPECT_EQUAL(highDocs, attr->getNumDocs()); + EXPECT_EQUAL(trimmedDocs, attr->getCommittedDocIdLimit()); + EXPECT_TRUE(attr->save()); + EXPECT_EQUAL(highDocs, attr->getNumDocs()); + EXPECT_EQUAL(trimmedDocs, attr->getCommittedDocIdLimit()); + AttributePtr attr3 = AttributeFactory::createAttribute(name, cfg); + EXPECT_TRUE(attr3->load()); + EXPECT_EQUAL(trimmedDocs, attr3->getNumDocs()); + EXPECT_EQUAL(trimmedDocs, attr3->getCommittedDocIdLimit()); + VectorType &v3 = static_cast<VectorType &>(*attr3.get()); + compare<VectorType, BufferType>(v2, v3); + attr->shrinkLidSpace(); + EXPECT_EQUAL(trimmedDocs, attr->getNumDocs()); + EXPECT_EQUAL(trimmedDocs, attr->getCommittedDocIdLimit()); + compare<VectorType, BufferType>(v, v3); +} + + +template <typename VectorType, typename BufferType> +void +AttributeTest::testCompactLidSpace(const Config &config) +{ + testCompactLidSpace<VectorType, BufferType>(config, false, false); + testCompactLidSpace<VectorType, BufferType>(config, false, true); + bool smallUInt = isUnsignedSmallIntAttribute(config.basicType().type()); + if (smallUInt) + return; + testCompactLidSpace<VectorType, BufferType>(config, true, false); + testCompactLidSpace<VectorType, BufferType>(config, true, true); +} + + +void +AttributeTest::testCompactLidSpace(const Config &config) +{ + switch (config.basicType().type()) { + case BasicType::UINT1: + case BasicType::UINT2: + case BasicType::UINT4: + case BasicType::INT8: + case BasicType::INT16: + case BasicType::INT32: + case BasicType::INT64: + if (config.collectionType() == CollectionType::WSET) { + testCompactLidSpace<IntegerAttribute, + IntegerAttribute::WeightedInt>(config); + } else { + testCompactLidSpace<IntegerAttribute, + IntegerAttribute::largeint_t>(config); + } + break; + case BasicType::FLOAT: + case BasicType::DOUBLE: + if (config.collectionType() == CollectionType::WSET) { + testCompactLidSpace<FloatingPointAttribute, + FloatingPointAttribute::WeightedFloat>(config); + } else { + testCompactLidSpace<FloatingPointAttribute, double>(config); + } + break; + case BasicType::STRING: + if (config.collectionType() == CollectionType::WSET) { + testCompactLidSpace<StringAttribute, + StringAttribute::WeightedString>(config); + } else { + testCompactLidSpace<StringAttribute, vespalib::string>(config); + } + break; + default: + abort(); + } +} + + +void +AttributeTest::testCompactLidSpace() +{ + vespalib::rmdir(clstmp, true); + vespalib::mkdir(clstmp); + TEST_DO(testCompactLidSpace(Config(BasicType::UINT1, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::UINT2, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::UINT4, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT8, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT8, + CollectionType::ARRAY))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT8, + CollectionType::WSET))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT16, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT16, + CollectionType::ARRAY))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT16, + CollectionType::WSET))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT32, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT32, + CollectionType::ARRAY))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT32, + CollectionType::WSET))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT64, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT64, + CollectionType::ARRAY))); + TEST_DO(testCompactLidSpace(Config(BasicType::INT64, + CollectionType::WSET))); + TEST_DO(testCompactLidSpace(Config(BasicType::FLOAT, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::FLOAT, + CollectionType::ARRAY))); + TEST_DO(testCompactLidSpace(Config(BasicType::FLOAT, + CollectionType::WSET))); + TEST_DO(testCompactLidSpace(Config(BasicType::DOUBLE, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::DOUBLE, + CollectionType::ARRAY))); + TEST_DO(testCompactLidSpace(Config(BasicType::DOUBLE, + CollectionType::WSET))); + TEST_DO(testCompactLidSpace(Config(BasicType::STRING, + CollectionType::SINGLE))); + TEST_DO(testCompactLidSpace(Config(BasicType::STRING, + CollectionType::ARRAY))); + TEST_DO(testCompactLidSpace(Config(BasicType::STRING, + CollectionType::WSET))); + vespalib::rmdir(clstmp, true); +} + +template <typename AttributeType> +void +AttributeTest::requireThatAddressSpaceUsageIsReported(const Config &config, bool fastSearch) +{ + uint32_t numDocs = 10; + vespalib::string attrName = asuDir + "/" + config.basicType().asString() + "-" + + config.collectionType().asString() + (fastSearch ? "-fs" : ""); + Config cfg = config; + cfg.setFastSearch(fastSearch); + + AttributePtr attrPtr = AttributeFactory::createAttribute(attrName, cfg); + addDocs(attrPtr, numDocs); + AddressSpaceUsage before = attrPtr->getAddressSpaceUsage(); + populate(static_cast<AttributeType &>(*attrPtr.get()), 5); + AddressSpaceUsage after = attrPtr->getAddressSpaceUsage(); + if (attrPtr->hasEnum()) { + LOG(info, "requireThatAddressSpaceUsageIsReported(%s): Has enum", attrName.c_str()); + EXPECT_EQUAL(before.enumStoreUsage().used(), 0u); + EXPECT_GREATER(after.enumStoreUsage().used(), before.enumStoreUsage().used()); + EXPECT_EQUAL(after.enumStoreUsage().limit(), before.enumStoreUsage().limit()); + EXPECT_EQUAL(34359738368u, after.enumStoreUsage().limit()); // EnumStoreBase::DataStoreType::RefType::offsetSize() + } else { + LOG(info, "requireThatAddressSpaceUsageIsReported(%s): NOT enum", attrName.c_str()); + EXPECT_EQUAL(before.enumStoreUsage().used(), 0u); + EXPECT_EQUAL(after.enumStoreUsage(), before.enumStoreUsage()); + EXPECT_EQUAL(AddressSpaceUsage::defaultEnumStoreUsage(), after.enumStoreUsage()); + } + if (attrPtr->hasMultiValue()) { + LOG(info, "requireThatAddressSpaceUsageIsReported(%s): Has multi-value", attrName.c_str()); + EXPECT_EQUAL(before.multiValueUsage().used(), 0u); + EXPECT_GREATER(after.multiValueUsage().used(), before.multiValueUsage().used()); + EXPECT_EQUAL(after.multiValueUsage().limit(), before.multiValueUsage().limit()); + EXPECT_EQUAL(134217728u, after.multiValueUsage().limit()); // multivalue::Index32::offsetSize() + } else { + LOG(info, "requireThatAddressSpaceUsageIsReported(%s): NOT multi-value", attrName.c_str()); + EXPECT_EQUAL(before.multiValueUsage().used(), 0u); + EXPECT_EQUAL(after.multiValueUsage(), before.multiValueUsage()); + EXPECT_EQUAL(AddressSpaceUsage::defaultMultiValueUsage(), after.multiValueUsage()); + } +} + +template <typename AttributeType> +void +AttributeTest::requireThatAddressSpaceUsageIsReported(const Config &config) +{ + requireThatAddressSpaceUsageIsReported<AttributeType>(config, false); + requireThatAddressSpaceUsageIsReported<AttributeType>(config, true); +} + +void +AttributeTest::requireThatAddressSpaceUsageIsReported() +{ + vespalib::rmdir(asuDir, true); + vespalib::mkdir(asuDir); + TEST_DO(requireThatAddressSpaceUsageIsReported<IntegerAttribute>(Config(BasicType::INT32, CollectionType::SINGLE))); + TEST_DO(requireThatAddressSpaceUsageIsReported<IntegerAttribute>(Config(BasicType::INT32, CollectionType::ARRAY))); + TEST_DO(requireThatAddressSpaceUsageIsReported<FloatingPointAttribute>(Config(BasicType::FLOAT, CollectionType::SINGLE))); + TEST_DO(requireThatAddressSpaceUsageIsReported<FloatingPointAttribute>(Config(BasicType::FLOAT, CollectionType::ARRAY))); + TEST_DO(requireThatAddressSpaceUsageIsReported<StringAttribute>(Config(BasicType::STRING, CollectionType::SINGLE))); + TEST_DO(requireThatAddressSpaceUsageIsReported<StringAttribute>(Config(BasicType::STRING, CollectionType::ARRAY))); +} + +int AttributeTest::Main() +{ + TEST_INIT("attribute_test"); + + if (_argc > 0) { + DummyFileHeaderContext::setCreator(_argv[0]); + } + testBaseName(); + testReload(); + testHasLoadData(); + testMemorySaver(); + + testSingle(); + testArray(); + testWeightedSet(); + testArithmeticValueUpdate(); + testArithmeticWithUndefinedValue(); + testMapValueUpdate(); + testStatus(); + testNullProtection(); + testGeneration(); + testCreateSerialNum(); + TEST_DO(testCompactLidSpace()); + TEST_DO(requireThatAddressSpaceUsageIsReported()); + + TEST_DONE(); +} + +} + + +TEST_APPHOOK(search::AttributeTest); diff --git a/searchlib/src/tests/attribute/attribute_test.sh b/searchlib/src/tests/attribute/attribute_test.sh new file mode 100644 index 00000000000..89c52129b74 --- /dev/null +++ b/searchlib/src/tests/attribute/attribute_test.sh @@ -0,0 +1,7 @@ +#!/bin/bash +$VALGRIND ./searchlib_attribute_test_app +rm -rf *.dat +rm -rf *.idx +rm -rf *.weight +rm -rf clstmp +rm -rf alstmp diff --git a/searchlib/src/tests/attribute/attributebenchmark.cpp b/searchlib/src/tests/attribute/attributebenchmark.cpp new file mode 100644 index 00000000000..88446ef71f7 --- /dev/null +++ b/searchlib/src/tests/attribute/attributebenchmark.cpp @@ -0,0 +1,678 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include "attributesearcher.h" +#include "attributeupdater.h" +#include <vespa/searchlib/util/randomgenerator.h> +#include "runnable.h" +#include <vespa/searchlib/attribute/attribute.h> +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/singlenumericattribute.h> +#include <vespa/searchlib/attribute/multinumericattribute.h> +#include <vespa/searchlib/attribute/singlestringattribute.h> +#include <vespa/searchlib/attribute/multistringattribute.h> +#include <vespa/searchlib/attribute/attrvector.h> +#include <vespa/vespalib/util/sync.h> +#include <iostream> +#include <fstream> +#include <vespa/log/log.h> + +LOG_SETUP("attributebenchmark"); + +#include <vespa/searchlib/attribute/attributevector.hpp> + +using vespalib::Monitor; +using vespalib::MonitorGuard; +using std::shared_ptr; + +typedef std::vector<uint32_t> NumVector; +typedef std::vector<vespalib::string> StringVector; +typedef AttributeVector::SP AttributePtr; +typedef AttributeVector::DocId DocId; +typedef search::attribute::Config AttrConfig; +using search::attribute::BasicType; +using search::attribute::CollectionType; + +namespace search { + +class AttributeBenchmark : public FastOS_Application +{ +private: + class Config { + public: + vespalib::string _attribute; + uint32_t _numDocs; + uint32_t _numUpdates; + uint32_t _numValues; + uint32_t _numSearchers; + uint32_t _numQueries; + bool _searchersOnly; + bool _validate; + uint32_t _populateRuns; + uint32_t _updateRuns; + uint32_t _commitFreq; + uint32_t _minValueCount; + uint32_t _maxValueCount; + uint32_t _minStringLen; + uint32_t _maxStringLen; + uint32_t _seed; + bool _writeAttribute; + int64_t _rangeStart; + int64_t _rangeEnd; + int64_t _rangeDelta; + bool _rangeSearch; + uint32_t _prefixLength; + bool _prefixSearch; + + + Config() : _attribute(""), _numDocs(0), _numUpdates(0), _numValues(0), + _numSearchers(0), _numQueries(0), _searchersOnly(true), _validate(false), _populateRuns(0), _updateRuns(0), + _commitFreq(0), _minValueCount(0), _maxValueCount(0), _minStringLen(0), _maxStringLen(0), _seed(0), + _writeAttribute(false), _rangeStart(0), _rangeEnd(0), _rangeDelta(0), _rangeSearch(false), + _prefixLength(0), _prefixSearch(false) {} + void printXML() const; + }; + + class Resource { + private: + std::vector<struct rusage> _usages; + struct rusage _reset; + + public: + Resource() : _usages(), _reset() { reset(); }; + void reset() { + getrusage(0, &_reset); + } + void saveUsage() { + struct rusage now; + getrusage(0, &now); + struct rusage usage = computeDifference(_reset, now); + _usages.push_back(usage); + } + void printLastXML(uint32_t opCount) { + (void) opCount; + struct rusage & usage = _usages.back(); + std::cout << "<ru_utime>" << usage.ru_utime.tv_sec * 1000 + usage.ru_utime.tv_usec / 1000 + << "</ru_utime>" << std::endl; + std::cout << "<ru_stime>" << usage.ru_stime.tv_sec * 1000 + usage.ru_stime.tv_usec / 1000 + << "</ru_stime>" << std::endl; + std::cout << "<ru_nvcsw>" << usage.ru_nvcsw << "</ru_nvcsw>" << std::endl; + std::cout << "<ru_nivcsw>" << usage.ru_nivcsw << "</ru_nivcsw>" << std::endl; + } + static struct rusage computeDifference(struct rusage & first, struct rusage & second); + }; + + FastOS_ThreadPool * _threadPool; + Config _config; + RandomGenerator _rndGen; + + void init(const Config & config); + void usage(); + + // benchmark helper methods + void addDocs(const AttributePtr & ptr, uint32_t numDocs); + template <typename Vector, typename T, typename BT> + void benchmarkPopulate(const AttributePtr & ptr, const std::vector<T> & values, uint32_t id); + template <typename Vector, typename T, typename BT> + void benchmarkUpdate(const AttributePtr & ptr, const std::vector<T> & values, uint32_t id); + + template <typename T> + std::vector<vespalib::string> prepareForPrefixSearch(const std::vector<T> & values) const; + template <typename T> + void benchmarkSearch(const AttributePtr & ptr, const std::vector<T> & values); + template <typename Vector, typename T, typename BT> + void benchmarkSearchWithUpdater(const AttributePtr & ptr, + const std::vector<T> & values); + + template <typename Vector, typename T, typename BT> + void benchmarkAttribute(const AttributePtr & ptr, const std::vector<T> & values); + + // Numeric Attribute + void benchmarkNumeric(const AttributePtr & ptr); + + // String Attribute + void benchmarkString(const AttributePtr & ptr); + + +public: + AttributeBenchmark() : _threadPool(NULL), _config(), _rndGen() {} + ~AttributeBenchmark() { + if (_threadPool != NULL) { + delete _threadPool; + } + } + int Main(); +}; + + +void +AttributeBenchmark::Config::printXML() const +{ + std::cout << "<config>" << std::endl; + std::cout << "<attribute>" << _attribute << "</attribute>" << std::endl; + std::cout << "<num-docs>" << _numDocs << "</num-docs>" << std::endl; + std::cout << "<num-updates>" << _numUpdates << "</num-updates>" << std::endl; + std::cout << "<num-values>" << _numValues << "</num-values>" << std::endl; + std::cout << "<num-searchers>" << _numSearchers << "</num-searchers>" << std::endl; + std::cout << "<num-queries>" << _numQueries << "</num-queries>" << std::endl; + std::cout << "<searchers-only>" << (_searchersOnly ? "true" : "false") << "</searchers-only>" << std::endl; + std::cout << "<validate>" << (_validate ? "true" : "false") << "</validate>" << std::endl; + std::cout << "<populate-runs>" << _populateRuns << "</populate-runs>" << std::endl; + std::cout << "<update-runs>" << _updateRuns << "</update-runs>" << std::endl; + std::cout << "<commit-freq>" << _commitFreq << "</commit-freq>" << std::endl; + std::cout << "<min-value-count>" << _minValueCount << "</min-value-count>" << std::endl; + std::cout << "<max-value-count>" << _maxValueCount << "</max-value-count>" << std::endl; + std::cout << "<min-string-len>" << _minStringLen << "</min-string-len>" << std::endl; + std::cout << "<max-string-len>" << _maxStringLen << "</max-string-len>" << std::endl; + std::cout << "<seed>" << _seed << "</seed>" << std::endl; + std::cout << "<range-start>" << _rangeStart << "</range-start>" << std::endl; + std::cout << "<range-end>" << _rangeEnd << "</range-end>" << std::endl; + std::cout << "<range-delta>" << _rangeDelta << "</range-delta>" << std::endl; + std::cout << "<range-search>" << (_rangeSearch ? "true" : "false") << "</range-search>" << std::endl; + std::cout << "<prefix-length>" << _prefixLength << "</range-length>" << std::endl; + std::cout << "<prefix-search>" << (_prefixSearch ? "true" : "false") << "</prefix-search>" << std::endl; + std::cout << "</config>" << std::endl; +} + +void +AttributeBenchmark::init(const Config & config) +{ + _config = config; + _rndGen.srand(_config._seed); +} + + +//----------------------------------------------------------------------------- +// Benchmark helper methods +//----------------------------------------------------------------------------- +void +AttributeBenchmark::addDocs(const AttributePtr & ptr, uint32_t numDocs) +{ + DocId startDoc; + DocId lastDoc; + bool success = ptr->addDocs(startDoc, lastDoc, numDocs); + assert(success); + (void) success; + assert(startDoc == 0); + assert(lastDoc + 1 == numDocs); + assert(ptr->getNumDocs() == numDocs); +} + +template <typename Vector, typename T, typename BT> +void +AttributeBenchmark::benchmarkPopulate(const AttributePtr & ptr, const std::vector<T> & values, uint32_t id) +{ + std::cout << "<!-- Populate " << _config._numDocs << " documents -->" << std::endl; + AttributeUpdater<Vector, T, BT> + updater(ptr, values, _rndGen, _config._validate, _config._commitFreq, + _config._minValueCount, _config._maxValueCount); + updater.populate(); + std::cout << "<populate id='" << id << "'>" << std::endl; + updater.getStatus().printXML(); + std::cout << "</populate>" << std::endl; + if (_config._validate) { + std::cout << "<!-- All " << updater.getValidator().getTotalCnt() + << " asserts passed -->" << std::endl; + } +} + +template <typename Vector, typename T, typename BT> +void +AttributeBenchmark::benchmarkUpdate(const AttributePtr & ptr, const std::vector<T> & values, uint32_t id) +{ + std::cout << "<!-- Apply " << _config._numUpdates << " updates -->" << std::endl; + AttributeUpdater<Vector, T, BT> + updater(ptr, values, _rndGen, _config._validate, _config._commitFreq, + _config._minValueCount, _config._maxValueCount); + updater.update(_config._numUpdates); + std::cout << "<update id='" << id << "'>" << std::endl; + updater.getStatus().printXML(); + std::cout << "</update>" << std::endl; + if (_config._validate) { + std::cout << "<!-- All " << updater.getValidator().getTotalCnt() + << " asserts passed -->" << std::endl; + } +} + +template <typename T> +std::vector<vespalib::string> +AttributeBenchmark::prepareForPrefixSearch(const std::vector<T> & values) const +{ + (void) values; + return std::vector<vespalib::string>(); +} + +template <> +std::vector<vespalib::string> +AttributeBenchmark::prepareForPrefixSearch(const std::vector<AttributeVector::WeightedString> & values) const +{ + std::vector<vespalib::string> retval; + retval.reserve(values.size()); + for (size_t i = 0; i < values.size(); ++i) { + retval.push_back(values[i].getValue().substr(0, _config._prefixLength)); + } + return retval; +} + +template <typename T> +void +AttributeBenchmark::benchmarkSearch(const AttributePtr & ptr, const std::vector<T> & values) +{ + std::vector<AttributeSearcher *> searchers; + if (_config._numSearchers > 0) { + std::cout << "<!-- Starting " << _config._numSearchers << " searcher threads with " + << _config._numQueries << " queries each -->" << std::endl; + + std::vector<vespalib::string> prefixStrings = prepareForPrefixSearch(values); + + for (uint32_t i = 0; i < _config._numSearchers; ++i) { + if (_config._rangeSearch) { + RangeSpec spec(_config._rangeStart, _config._rangeEnd, _config._rangeDelta); + searchers.push_back(new AttributeRangeSearcher(i, ptr, spec, _config._numQueries)); + } else if (_config._prefixSearch) { + searchers.push_back(new AttributePrefixSearcher(i, ptr, prefixStrings, _config._numQueries)); + } else { + searchers.push_back(new AttributeFindSearcher<T>(i, ptr, values, _config._numQueries)); + } + _threadPool->NewThread(searchers.back()); + } + + for (uint32_t i = 0; i < searchers.size(); ++i) { + searchers[i]->join(); + } + + AttributeSearcherStatus totalStatus; + for (uint32_t i = 0; i < searchers.size(); ++i) { + std::cout << "<searcher-summary id='" << i << "'>" << std::endl; + searchers[i]->getStatus().printXML(); + std::cout << "</searcher-summary>" << std::endl; + totalStatus.merge(searchers[i]->getStatus()); + delete searchers[i]; + } + std::cout << "<total-searcher-summary>" << std::endl; + totalStatus.printXML(); + std::cout << "</total-searcher-summary>" << std::endl; + } +} + +template <typename Vector, typename T, typename BT> +void +AttributeBenchmark::benchmarkSearchWithUpdater(const AttributePtr & ptr, + const std::vector<T> & values) +{ + if (_config._numSearchers > 0) { + std::cout << "<!-- Starting 1 updater thread -->" << std::endl; + AttributeUpdaterThread<Vector, T, BT> + updater(ptr, values, _rndGen, _config._validate, _config._commitFreq, + _config._minValueCount, _config._maxValueCount); + _threadPool->NewThread(&updater); + benchmarkSearch(ptr, values); + updater.stop(); + updater.join(); + std::cout << "<updater-summary>" << std::endl; + updater.getStatus().printXML(); + std::cout << "</updater-summary>" << std::endl; + if (_config._validate) { + std::cout << "<!-- All " << updater.getValidator().getTotalCnt() + << " asserts passed -->" << std::endl; + } + } +} + +template <typename Vector, typename T, typename BT> +void +AttributeBenchmark::benchmarkAttribute(const AttributePtr & ptr, const std::vector<T> & values) +{ + addDocs(ptr, _config._numDocs); + + // populate + for (uint32_t i = 0; i < _config._populateRuns; ++i) { + benchmarkPopulate<Vector, T, BT>(ptr, values, i); + } + + // update + if (_config._numUpdates > 0) { + for (uint32_t i = 0; i < _config._updateRuns; ++i) { + benchmarkUpdate<Vector, T, BT>(ptr, values, i); + } + } + + // search + if (_config._searchersOnly) { + benchmarkSearch(ptr, values); + } else { + benchmarkSearchWithUpdater<Vector, T, BT>(ptr, values); + } + + _threadPool->Close(); +} + + +//----------------------------------------------------------------------------- +// Numeric Attribute +//----------------------------------------------------------------------------- +void +AttributeBenchmark::benchmarkNumeric(const AttributePtr & ptr) +{ + NumVector values; + if (_config._rangeSearch) { + values.reserve(_config._numValues); + for (uint32_t i = 0; i < _config._numValues; ++i) { + values.push_back(i); + } + } else { + _rndGen.fillRandomIntegers(values, _config._numValues); + } + + std::vector<int32_t> weights; + _rndGen.fillRandomIntegers(weights, _config._numValues); + + std::vector<AttributeVector::WeightedInt> weightedVector; + weightedVector.reserve(values.size()); + for (size_t i = 0; i < values.size(); ++i) { + if (!ptr->hasWeightedSetType()) { + weightedVector.push_back(AttributeVector::WeightedInt(values[i])); + } else { + weightedVector.push_back(AttributeVector::WeightedInt(values[i], weights[i])); + } + } + benchmarkAttribute<IntegerAttribute, AttributeVector::WeightedInt, AttributeVector::WeightedInt> + (ptr, weightedVector); +} + + +//----------------------------------------------------------------------------- +// String Attribute +//----------------------------------------------------------------------------- +void +AttributeBenchmark::benchmarkString(const AttributePtr & ptr) +{ + StringVector strings; + _rndGen.fillRandomStrings(strings, _config._numValues, _config._minStringLen, _config._maxStringLen); + + std::vector<int32_t> weights; + _rndGen.fillRandomIntegers(weights, _config._numValues); + + std::vector<AttributeVector::WeightedString> weightedVector; + weightedVector.reserve(strings.size()); + for (size_t i = 0; i < strings.size(); ++i) { + if (!ptr->hasWeightedSetType()) { + weightedVector.push_back(AttributeVector::WeightedString(strings[i])); + } else { + weightedVector.push_back(AttributeVector::WeightedString(strings[i], weights[i])); + } + } + benchmarkAttribute<StringAttribute, AttributeVector::WeightedString, AttributeVector::WeightedString> + (ptr, weightedVector); +} + + +//----------------------------------------------------------------------------- +// Resource utilization +//----------------------------------------------------------------------------- +struct rusage +AttributeBenchmark::Resource::computeDifference(struct rusage & first, struct rusage & second) +{ + struct rusage result; + // utime + uint64_t firstutime = first.ru_utime.tv_sec * 1000000 + first.ru_utime.tv_usec; + uint64_t secondutime = second.ru_utime.tv_sec * 1000000 + second.ru_utime.tv_usec; + uint64_t resultutime = secondutime - firstutime; + result.ru_utime.tv_sec = resultutime / 1000000; + result.ru_utime.tv_usec = resultutime % 1000000; + + // stime + uint64_t firststime = first.ru_stime.tv_sec * 1000000 + first.ru_stime.tv_usec; + uint64_t secondstime = second.ru_stime.tv_sec * 1000000 + second.ru_stime.tv_usec; + uint64_t resultstime = secondstime - firststime; + result.ru_stime.tv_sec = resultstime / 1000000; + result.ru_stime.tv_usec = resultstime % 1000000; + + result.ru_maxrss = second.ru_maxrss; // - first.ru_maxrss; + result.ru_ixrss = second.ru_ixrss; // - first.ru_ixrss; + result.ru_idrss = second.ru_idrss; // - first.ru_idrss; + result.ru_isrss = second.ru_isrss; // - first.ru_isrss; + result.ru_minflt = second.ru_minflt - first.ru_minflt; + result.ru_majflt = second.ru_majflt - first.ru_majflt; + result.ru_nswap = second.ru_nswap - first.ru_nswap; + result.ru_inblock = second.ru_inblock - first.ru_inblock; + result.ru_oublock = second.ru_oublock - first.ru_oublock; + result.ru_msgsnd = second.ru_msgsnd - first.ru_msgsnd; + result.ru_msgrcv = second.ru_msgrcv - first.ru_msgrcv; + result.ru_nsignals = second.ru_nsignals - first.ru_nsignals; + result.ru_nvcsw = second.ru_nvcsw - first.ru_nvcsw; + result.ru_nivcsw = second.ru_nivcsw - first.ru_nivcsw; + + return result; +} + + +void +AttributeBenchmark::usage() +{ + std::cout << "usage: attributebenchmark [-n numDocs] [-u numUpdates] [-v numValues]" << std::endl; + std::cout << " [-s numSearchers] [-q numQueries] [-p populateRuns] [-r updateRuns]" << std::endl; + std::cout << " [-c commitFrequency] [-l minValueCount] [-h maxValueCount]" << std::endl; + std::cout << " [-i minStringLen] [-a maxStringLen] [-e seed]" << std::endl; + std::cout << " [-S rangeStart] [-E rangeEnd] [-D rangeDelta] [-L prefixLength]" << std::endl; + std::cout << " [-b (searchers with updater)] [-R (range search)] [-P (prefix search)]" << std::endl; + std::cout << " [-t (validate updates)] [-w (write attribute to disk)]" << std::endl; + std::cout << " <attribute>" << std::endl; + std::cout << " <attribute> : s-uint32, a-uint32, ws-uint32" << std::endl; + std::cout << " s-fa-uint32, a-fa-uint32, ws-fa-uint32" << std::endl; + std::cout << " s-fs-uint32, a-fs-uint32, ws-fs-uint32 ws-frs-uint32" << std::endl; + std::cout << " s-string, a-string, ws-string" << std::endl; + std::cout << " s-fs-string, a-fs-string, ws-fs-string ws-frs-string" << std::endl; +} + +int +AttributeBenchmark::Main() +{ + Config dc; + dc._numDocs = 50000; + dc._numUpdates = 50000; + dc._numValues = 1000; + dc._numSearchers = 0; + dc._numQueries = 1000; + dc._searchersOnly = true; + dc._validate = false; + dc._populateRuns = 1; + dc._updateRuns = 1; + dc._commitFreq = 1000; + dc._minValueCount = 0; + dc._maxValueCount = 20; + dc._minStringLen = 1; + dc._maxStringLen = 50; + dc._seed = 555; + dc._writeAttribute = false; + dc._rangeStart = 0; + dc._rangeEnd = 1000; + dc._rangeDelta = 10; + dc._rangeSearch = false; + dc._prefixLength = 2; + dc._prefixSearch = false; + + int idx = 1; + char opt; + const char * arg; + bool optError = false; + while ((opt = GetOpt("n:u:v:s:q:p:r:c:l:h:i:a:e:S:E:D:L:bRPtw", arg, idx)) != -1) { + switch (opt) { + case 'n': + dc._numDocs = atoi(arg); + break; + case 'u': + dc._numUpdates = atoi(arg); + break; + case 'v': + dc._numValues = atoi(arg); + break; + case 's': + dc._numSearchers = atoi(arg); + break; + case 'q': + dc._numQueries = atoi(arg); + break; + case 'p': + dc._populateRuns = atoi(arg); + break; + case 'r': + dc._updateRuns = atoi(arg); + break; + case 'c': + dc._commitFreq = atoi(arg); + break; + case 'l': + dc._minValueCount = atoi(arg); + break; + case 'h': + dc._maxValueCount = atoi(arg); + break; + case 'i': + dc._minStringLen = atoi(arg); + break; + case 'a': + dc._maxStringLen = atoi(arg); + break; + case 'e': + dc._seed = atoi(arg); + break; + case 'S': + dc._rangeStart = strtoll(arg, NULL, 10); + break; + case 'E': + dc._rangeEnd = strtoll(arg, NULL, 10); + break; + case 'D': + dc._rangeDelta = strtoll(arg, NULL, 10); + break; + case 'L': + dc._prefixLength = atoi(arg); + break; + case 'b': + dc._searchersOnly = false; + break; + case 'R': + dc._rangeSearch = true; + break; + case 'P': + dc._prefixSearch = true; + break; + case 't': + dc._validate = true; + break; + case 'w': + dc._writeAttribute = true; + break; + default: + optError = true; + break; + } + } + + if (_argc != (idx + 1) || optError) { + usage(); + return -1; + } + + dc._attribute = vespalib::string(_argv[idx]); + + _threadPool = new FastOS_ThreadPool(256000); + + std::cout << "<attribute-benchmark>" << std::endl; + init(dc); + _config.printXML(); + + AttributePtr ptr; + + if (_config._attribute == "s-int32") { + std::cout << "<!-- Benchmark SingleValueNumericAttribute<int32_t> -->" << std::endl; + ptr = AttributeFactory::createAttribute("s-int32", AttrConfig(BasicType::INT32, CollectionType::SINGLE)); + benchmarkNumeric(ptr); + + } else if (_config._attribute == "a-int32") { + std::cout << "<!-- Benchmark MultiValueNumericAttribute<int32_t> (array) -->" << std::endl; + ptr = AttributeFactory::createAttribute("a-int32", AttrConfig(BasicType::INT32, CollectionType::ARRAY)); + benchmarkNumeric(ptr); + + } else if (_config._attribute == "ws-int32") { + std::cout << "<!-- Benchmark MultiValueNumericAttribute<int32_t> (wset) -->" << std::endl; + ptr = AttributeFactory::createAttribute("ws-int32", AttrConfig(BasicType::INT32, CollectionType::WSET)); + benchmarkNumeric(ptr); + + } else if (_config._attribute == "s-fs-int32") { + std::cout << "<!-- Benchmark SingleValueNumericPostingAttribute<int32_t> -->" << std::endl; + AttrConfig cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + ptr = AttributeFactory::createAttribute("s-fs-int32", cfg); + benchmarkNumeric(ptr); + + } else if (_config._attribute == "a-fs-int32") { + std::cout << "<!-- Benchmark MultiValueNumericPostingAttribute<int32_t> (array) -->" << std::endl; + AttrConfig cfg(BasicType::INT32, CollectionType::ARRAY); + cfg.setFastSearch(true); + ptr = AttributeFactory::createAttribute("a-fs-int32", cfg); + benchmarkNumeric(ptr); + + } else if (_config._attribute == "ws-fs-int32") { + std::cout << "<!-- Benchmark MultiValueNumericPostingAttribute<int32_t> (wset) -->" << std::endl; + AttrConfig cfg(BasicType::INT32, CollectionType::WSET); + cfg.setFastSearch(true); + ptr = AttributeFactory::createAttribute("ws-fs-int32", cfg); + benchmarkNumeric(ptr); + + } else if (_config._attribute == "s-string") { + std::cout << "<!-- Benchmark SingleValueStringAttribute -->" << std::endl; + ptr = AttributeFactory::createAttribute("s-string", AttrConfig(BasicType::STRING, CollectionType::SINGLE)); + benchmarkString(ptr); + + } else if (_config._attribute == "a-string") { + std::cout << "<!-- Benchmark ArrayStringAttribute (array) -->" << std::endl; + ptr = AttributeFactory::createAttribute("a-string", AttrConfig(BasicType::STRING, CollectionType::ARRAY)); + benchmarkString(ptr); + + } else if (_config._attribute == "ws-string") { + std::cout << "<!-- Benchmark WeightedSetStringAttribute (wset) -->" << std::endl; + ptr = AttributeFactory::createAttribute("ws-string", AttrConfig(BasicType::STRING, CollectionType::WSET)); + benchmarkString(ptr); + + } else if (_config._attribute == "s-fs-string") { + std::cout << "<!-- Benchmark SingleValueStringPostingAttribute (single fast search) -->" << std::endl; + AttrConfig cfg(BasicType::STRING, CollectionType::SINGLE); + cfg.setFastSearch(true); + ptr = AttributeFactory::createAttribute("s-fs-string", cfg); + benchmarkString(ptr); + + } else if (_config._attribute == "a-fs-string") { + std::cout << "<!-- Benchmark ArrayStringPostingAttribute (array fast search) -->" << std::endl; + AttrConfig cfg(BasicType::STRING, CollectionType::ARRAY); + cfg.setFastSearch(true); + ptr = AttributeFactory::createAttribute("a-fs-string", cfg); + benchmarkString(ptr); + + } else if (_config._attribute == "ws-fs-string") { + std::cout << "<!-- Benchmark WeightedSetStringPostingAttribute (wset fast search) -->" << std::endl; + AttrConfig cfg(BasicType::STRING, CollectionType::WSET); + cfg.setFastSearch(true); + ptr = AttributeFactory::createAttribute("ws-fs-string", cfg); + benchmarkString(ptr); + + } + + if (dc._writeAttribute) { + std::cout << "<!-- Writing attribute to disk -->" << std::endl; + ptr->saveAs(ptr->getBaseFileName()); + } + + std::cout << "</attribute-benchmark>" << std::endl; + + return 0; +} +} + +int main(int argc, char ** argv) +{ + search::AttributeBenchmark myapp; + return myapp.Entry(argc, argv); +} + diff --git a/searchlib/src/tests/attribute/attributebenchmark.rb b/searchlib/src/tests/attribute/attributebenchmark.rb new file mode 100644 index 00000000000..44b08ec4389 --- /dev/null +++ b/searchlib/src/tests/attribute/attributebenchmark.rb @@ -0,0 +1,22 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vectors = ["sv-num-new", "mv-num-new", "sv-string-new", "mv-string-new"]#, "sv-num-old", "mv-num-old", "sv-string-old", "mv-string-old"] +num_docs = [500000, 1000000, 2000000, 4000000, 8000000, 16000000] +unique_percent = [0.001, 0.01, 0.05, 0.20, 0.50] + +vectors.each do |vector| + num_docs.each do |num| + unique_percent.each do |percent| + unique = num * percent + command = "./attributebenchmark -n #{num} -u 1000000 -v #{unique} -p 2 -r 1 -s 1 -q 1000 #{vector} > 03-27-full/#{vector}-n#{num}-v#{unique}-p2-r1-s1-q1000.log 2>&1" + puts command + `#{command}` + s = 1 + 5.times do + command = "./attributebenchmark -n #{num} -v #{unique} -p 1 -r 0 -s #{s} -q 100 -b #{vector} > 03-27-full/#{vector}-n#{num}-v#{unique}-s#{s}-q100-b.log 2>&1" + puts command + `#{command}` + s = s*2; + end + end + end +end diff --git a/searchlib/src/tests/attribute/attributefilewriter/.gitignore b/searchlib/src/tests/attribute/attributefilewriter/.gitignore new file mode 100644 index 00000000000..ea6a0e03bf2 --- /dev/null +++ b/searchlib/src/tests/attribute/attributefilewriter/.gitignore @@ -0,0 +1 @@ +searchlib_attributefilewriter_test_app diff --git a/searchlib/src/tests/attribute/attributefilewriter/CMakeLists.txt b/searchlib/src/tests/attribute/attributefilewriter/CMakeLists.txt new file mode 100644 index 00000000000..a1d859bbfb9 --- /dev/null +++ b/searchlib/src/tests/attribute/attributefilewriter/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_attributefilewriter_test_app + SOURCES + attributefilewriter_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_attributefilewriter_test_app COMMAND searchlib_attributefilewriter_test_app) diff --git a/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp b/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp new file mode 100644 index 00000000000..acf61cd58bb --- /dev/null +++ b/searchlib/src/tests/attribute/attributefilewriter/attributefilewriter_test.cpp @@ -0,0 +1,116 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("attributefilewriter_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/stllike/string.h> +#include <vespa/searchlib/attribute/attributefilewriter.h> +#include <vespa/searchlib/attribute/attributefilebufferwriter.h> +#include <vespa/searchlib/util/fileutil.h> +#include <vespa/searchlib/util/rand48.h> +#include <vespa/searchlib/common/tunefileinfo.h> +#include <vespa/searchlib/common/fileheadercontext.h> +#include <vespa/searchlib/index/dummyfileheadercontext.h> + +using search::index::DummyFileHeaderContext; + +namespace search +{ + +namespace +{ + +vespalib::string testFileName("test.dat"); +vespalib::string hello("Hello world"); + +void removeTestFile() { FastOS_File::Delete(testFileName.c_str()); } + +struct Fixture { + TuneFileAttributes _tuneFileAttributes; + DummyFileHeaderContext _fileHeaderContext; + IAttributeSaveTarget::Config _cfg; + const vespalib::string _desc; + AttributeFileWriter _writer; + + Fixture() + : _tuneFileAttributes(), + _fileHeaderContext(), + _cfg(), + _desc("Attribute file sample description"), + _writer(_tuneFileAttributes, + _fileHeaderContext, + _cfg, + _desc) + { + removeTestFile(); + } + + ~Fixture() { + removeTestFile(); + } + +}; + +} + + +TEST_F("Test that we can write empty attribute file", Fixture) +{ + EXPECT_TRUE(f._writer.open(testFileName)); + f._writer.close(); + FileUtil::LoadedBuffer::UP loaded(FileUtil::loadFile(testFileName)); + EXPECT_EQUAL(0u, loaded->size()); +} + + +TEST_F("Test that we destroy writer without calling close", Fixture) +{ + EXPECT_TRUE(f._writer.open(testFileName)); +} + + +TEST_F("Test that buffer writer passes on written data", Fixture) +{ + std::vector<int> a; + const size_t mysize = 3000000; + const size_t writerBufferSize = AttributeFileBufferWriter::BUFFER_SIZE; + EXPECT_GREATER(mysize * sizeof(int), writerBufferSize); + a.reserve(mysize); + search::Rand48 rnd; + for (uint32_t i = 0; i < mysize; ++i) { + a.emplace_back(rnd.lrand48()); + } + EXPECT_TRUE(f._writer.open(testFileName)); + std::unique_ptr<BufferWriter> writer(f._writer.allocBufferWriter()); + writer->write(&a[0], a.size() * sizeof(int)); + writer->flush(); + writer.reset(); + f._writer.close(); + FileUtil::LoadedBuffer::UP loaded(FileUtil::loadFile(testFileName)); + EXPECT_EQUAL(a.size() * sizeof(int), loaded->size()); + EXPECT_TRUE(memcmp(&a[0], loaded->buffer(), loaded->size()) == 0); +} + + +TEST_F("Test that we can pass buffer directly", Fixture) +{ + using Buffer = IAttributeFileWriter::Buffer; + Buffer buf = f._writer.allocBuf(hello.size()); + buf->writeBytes(hello.c_str(), hello.size()); + EXPECT_TRUE(f._writer.open(testFileName)); + f._writer.writeBuf(std::move(buf)); + f._writer.close(); + FileUtil::LoadedBuffer::UP loaded(FileUtil::loadFile(testFileName)); + EXPECT_EQUAL(hello.size(), loaded->size()); + EXPECT_TRUE(memcmp(hello.c_str(), loaded->buffer(), loaded->size()) == 0); +} + + +} + + +TEST_MAIN() +{ + TEST_RUN_ALL(); +} diff --git a/searchlib/src/tests/attribute/attributeguard.cpp b/searchlib/src/tests/attribute/attributeguard.cpp new file mode 100644 index 00000000000..5c90caa094b --- /dev/null +++ b/searchlib/src/tests/attribute/attributeguard.cpp @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("attributeguard_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/searchlib/attribute/extendableattributes.h> + +namespace search { + +class AttributeGuardTest : public vespalib::TestApp +{ +public: + int Main(); +}; + +int +AttributeGuardTest::Main() +{ + TEST_INIT("attributeguard_test"); + + + AttributeVector::SP ssattr(new SingleStringExtAttribute("ss1")); + AttributeEnumGuard guard(ssattr); + EXPECT_TRUE(guard.valid()); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(search::AttributeGuardTest); diff --git a/searchlib/src/tests/attribute/attributeguard_test.sh b/searchlib/src/tests/attribute/attributeguard_test.sh new file mode 100644 index 00000000000..6a9557e7da7 --- /dev/null +++ b/searchlib/src/tests/attribute/attributeguard_test.sh @@ -0,0 +1,7 @@ +#!/bin/bahs +$VALGRIND ./searchlib_attributeguard_test_app +rm -rf *.dat +rm -rf *.idx +rm -rf *.weight +rm -rf clstmp +rm -rf alstmp diff --git a/searchlib/src/tests/attribute/attributemanager/.gitignore b/searchlib/src/tests/attribute/attributemanager/.gitignore new file mode 100644 index 00000000000..6fa89f09572 --- /dev/null +++ b/searchlib/src/tests/attribute/attributemanager/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +attributemanager_test +searchlib_attributemanager_test_app diff --git a/searchlib/src/tests/attribute/attributemanager/CMakeLists.txt b/searchlib/src/tests/attribute/attributemanager/CMakeLists.txt new file mode 100644 index 00000000000..ed3eeee1065 --- /dev/null +++ b/searchlib/src/tests/attribute/attributemanager/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_attributemanager_test_app + SOURCES + attributemanager_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_attributemanager_test_app COMMAND searchlib_attributemanager_test_app) diff --git a/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp new file mode 100644 index 00000000000..bf247668843 --- /dev/null +++ b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp @@ -0,0 +1,422 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("attribute_test"); +#include <vespa/searchlib/attribute/attribute.h> +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/attributemanager.h> +#include <vespa/searchlib/attribute/configconverter.h> +#include <vespa/searchlib/attribute/multinumericattribute.h> +#include <vespa/searchlib/attribute/multinumericattribute.hpp> +#include <vespa/searchlib/attribute/stringattribute.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <algorithm> + +using namespace config; +using namespace vespa::config::search; +using namespace search; +using namespace search::attribute; +using vespalib::tensor::TensorType; +using std::shared_ptr; + +typedef BasicType BT; +typedef CollectionType CT; +typedef AttributeVector::SP AVSP; + +namespace search { + +class AttributeManagerTest : public vespalib::TestApp +{ +private: + void verifyLoad(AttributeVector & v); + void testLoad(); + void testGuards(); + void testConfigConvert(); + void testContext(); + + bool + assertDataType(BT::Type exp, + AttributesConfig::Attribute::Datatype in); + + bool + assertCollectionType(CollectionType exp, + AttributesConfig::Attribute::Collectiontype in, + bool removeIfZ = false, + bool createIfNe = false); + +public: + AttributeManagerTest() + { + } + int Main(); +}; + + +typedef MultiValueNumericAttribute< IntegerAttributeTemplate<int32_t>, + multivalue::MVMTemplateArg< + multivalue::Value<int32_t>, multivalue::Index32> > +TestAttributeBase; + +class TestAttribute : public TestAttributeBase +{ +public: + TestAttribute(const std::string &name) + : + TestAttributeBase(name) + { + } + + generation_t + getGen() const + { + return getCurrentGeneration(); + } + + uint32_t + getRefCount(generation_t gen) const + { + return getGenerationRefCount(gen); + } + + void + incGen() + { + incGeneration(); + } + + void + updateFirstUsedGen(void) + { + updateFirstUsedGeneration(); + } + + generation_t + getFirstUsedGen() const + { + return getFirstUsedGeneration(); + } +}; + + +void +AttributeManagerTest::testGuards() +{ + AttributeVector::SP vec(new TestAttribute("mvint") ); + TestAttribute * v = static_cast<TestAttribute *> (vec.get()); + EXPECT_EQUAL(v->getGen(), unsigned(0)); + EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0)); + { + AttributeGuard g0(vec); + EXPECT_EQUAL(v->getGen(), unsigned(0)); + EXPECT_EQUAL(v->getRefCount(0), unsigned(1)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0)); + { + AttributeGuard g1(vec); + EXPECT_EQUAL(v->getGen(), unsigned(0)); + EXPECT_EQUAL(v->getRefCount(0), unsigned(2)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0)); + } + EXPECT_EQUAL(v->getRefCount(0), unsigned(1)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0)); + } + EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(0)); + + v->incGen(); + EXPECT_EQUAL(v->getGen(), unsigned(1)); + EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); + EXPECT_EQUAL(v->getRefCount(1), unsigned(0)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1)); + { + AttributeGuard g0(vec); + EXPECT_EQUAL(v->getGen(), unsigned(1)); + EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); + EXPECT_EQUAL(v->getRefCount(1), unsigned(1)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1)); + { + v->incGen(); + AttributeGuard g1(vec); + EXPECT_EQUAL(v->getGen(), unsigned(2)); + EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); + EXPECT_EQUAL(v->getRefCount(1), unsigned(1)); + EXPECT_EQUAL(v->getRefCount(2), unsigned(1)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1)); + } + EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); + EXPECT_EQUAL(v->getRefCount(1), unsigned(1)); + EXPECT_EQUAL(v->getRefCount(2), unsigned(0)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1)); + } + EXPECT_EQUAL(v->getRefCount(0), unsigned(0)); + EXPECT_EQUAL(v->getRefCount(1), unsigned(0)); + EXPECT_EQUAL(v->getRefCount(2), unsigned(0)); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(1)); + v->updateFirstUsedGeneration(); + EXPECT_EQUAL(v->getFirstUsedGen(), unsigned(2)); + EXPECT_EQUAL(v->getGen(), unsigned(2)); +} + + +void +AttributeManagerTest::verifyLoad(AttributeVector & v) +{ + EXPECT_TRUE( !v.isLoaded() ); + EXPECT_TRUE( v.load() ); + EXPECT_TRUE( v.isLoaded() ); + EXPECT_EQUAL( v.getNumDocs(), size_t(100) ); +} + + +void +AttributeManagerTest::testLoad() +{ + { + TestAttributeBase v("mvint"); + EXPECT_TRUE(!v.isLoaded()); + for(size_t i(0); i < 100; i++) { + AttributeVector::DocId doc; + EXPECT_TRUE( v.addDoc(doc) ); + EXPECT_TRUE( doc == i); + } + EXPECT_TRUE( v.getNumDocs() == 100); + for(size_t i(0); i < 100; i++) { + for(size_t j(0); j < i; j++) { + EXPECT_TRUE( v.append(i, j, 1) ); + } + v.commit(); + EXPECT_TRUE(size_t(v.getValueCount(i)) == i); + EXPECT_EQUAL(v.getMaxValueCount(), std::max(size_t(1), i)); + } + EXPECT_TRUE(v.isLoaded()); + EXPECT_TRUE(v.save()); + EXPECT_TRUE(v.isLoaded()); + } + { + TestAttributeBase v("mvint"); + verifyLoad(v); + } + { + AttributeVector::Config config(BT::INT32, + CollectionType::ARRAY); + TestAttributeBase v("mvint", config); + verifyLoad(v); + } + { + AttributeManager manager; + AttributeVector::Config config(BT::INT32, + CollectionType::ARRAY); + EXPECT_TRUE(manager.addVector("mvint", config)); + AttributeManager::AttributeList list; + manager.getAttributeList(list); + EXPECT_TRUE(list.size() == 1); + EXPECT_TRUE( list[0]->isLoaded()); + AttributeGuard::UP attrG(manager.getAttribute("mvint")); + EXPECT_TRUE( attrG->valid() ); + } +} + + +bool +AttributeManagerTest::assertDataType(BT::Type exp, + AttributesConfig::Attribute::Datatype in) +{ + AttributesConfig::Attribute a; + a.datatype = in; + return EXPECT_EQUAL(exp, ConfigConverter::convert(a).basicType().type()); +} + + +bool +AttributeManagerTest:: +assertCollectionType(CollectionType exp, + AttributesConfig::Attribute::Collectiontype in, + bool removeIfZ, + bool createIfNe) +{ + AttributesConfig::Attribute a; + a.collectiontype = in; + a.removeifzero = removeIfZ; + a.createifnonexistent = createIfNe; + AttributeVector::Config out = ConfigConverter::convert(a); + return EXPECT_EQUAL(exp.type(), out.collectionType().type()) && + EXPECT_EQUAL(exp.removeIfZero(), out.collectionType().removeIfZero()) && + EXPECT_EQUAL(exp.createIfNonExistant(), + out.collectionType().createIfNonExistant()); +} + + +void +AttributeManagerTest::testConfigConvert() +{ + // typedef AttributeVector::Config AVC; + typedef BT AVBT; + typedef CollectionType AVCT; + typedef AttributesConfig::Attribute CACA; + typedef ConfigConverter CC; + + EXPECT_TRUE(assertDataType(AVBT::STRING, CACA::STRING)); + EXPECT_TRUE(assertDataType(AVBT::INT8, CACA::INT8)); + EXPECT_TRUE(assertDataType(AVBT::INT16, CACA::INT16)); + EXPECT_TRUE(assertDataType(AVBT::INT32, CACA::INT32)); + EXPECT_TRUE(assertDataType(AVBT::INT64, CACA::INT64)); + EXPECT_TRUE(assertDataType(AVBT::FLOAT, CACA::FLOAT)); + EXPECT_TRUE(assertDataType(AVBT::DOUBLE, CACA::DOUBLE)); + EXPECT_TRUE(assertDataType(AVBT::PREDICATE, CACA::PREDICATE)); + EXPECT_TRUE(assertDataType(AVBT::TENSOR, CACA::TENSOR)); + EXPECT_TRUE(assertDataType(AVBT::NONE, CACA::NONE)); + + EXPECT_TRUE(assertCollectionType(AVCT::SINGLE, CACA::SINGLE)); + EXPECT_TRUE(assertCollectionType(AVCT::ARRAY, CACA::ARRAY)); + EXPECT_TRUE(assertCollectionType(AVCT::WSET, CACA::WEIGHTEDSET)); + EXPECT_TRUE(assertCollectionType(AVCT(AVCT::SINGLE, true, false), + CACA::SINGLE, true, false)); + EXPECT_TRUE(assertCollectionType(AVCT(AVCT::SINGLE, false, true), + CACA::SINGLE, false, true)); + + { // fastsearch + CACA a; + EXPECT_TRUE(!CC::convert(a).fastSearch()); + a.fastsearch = true; + EXPECT_TRUE(CC::convert(a).fastSearch()); + } + { // huge + CACA a; + EXPECT_TRUE(!CC::convert(a).huge()); + a.huge = true; + EXPECT_TRUE(CC::convert(a).huge()); + } + { // fastAccess + CACA a; + EXPECT_TRUE(!CC::convert(a).fastAccess()); + a.fastaccess = true; + EXPECT_TRUE(CC::convert(a).fastAccess()); + } + { // tensor + CACA a; + a.datatype = CACA::TENSOR; + a.tensortype = "tensor(x[5])"; + AttributeVector::Config out = ConfigConverter::convert(a); + EXPECT_EQUAL("tensor(x[5])", out.tensorType().toSpec()); + } +} + +bool gt_attribute(const attribute::IAttributeVector * a, const attribute::IAttributeVector * b) { + return a->getName() < b->getName(); +} + +void +AttributeManagerTest::testContext() +{ + std::vector<AVSP> attrs; + // create various attributes vectors + attrs.push_back(AttributeFactory::createAttribute("sint32", + Config(BT::INT32, CT::SINGLE))); + attrs.push_back(AttributeFactory::createAttribute("aint32", + Config(BT::INT32, CT::ARRAY))); + attrs.push_back(AttributeFactory::createAttribute("wsint32", + Config(BT::INT32, CT::WSET))); + attrs.push_back(AttributeFactory::createAttribute("dontcare", + Config(BT::INT32, CT::SINGLE))); + + // add docs + for (uint32_t i = 0; i < attrs.size(); ++i) { + attrs[i]->addDocs(64); + } + + // commit all attributes (current generation -> 1); + for (uint32_t i = 0; i < attrs.size(); ++i) { + attrs[i]->commit(); + } + + AttributeManager manager; + // add to manager + for (uint32_t i = 0; i < attrs.size(); ++i) { + manager.add(attrs[i]); + } + + { + IAttributeContext::UP first = manager.createContext(); + + // no generation guards taken yet + for (uint32_t i = 0; i < attrs.size(); ++i) { + EXPECT_EQUAL(attrs[i]->getCurrentGeneration(), 1u); + EXPECT_EQUAL(attrs[i]->getGenerationRefCount(1u), 0u); + } + + for (uint32_t i = 0; i < 2; ++i) { + EXPECT_TRUE(first->getAttribute("sint32") != NULL); + EXPECT_TRUE(first->getAttribute("aint32") != NULL); + EXPECT_TRUE(first->getAttribute("wsint32") != NULL); + EXPECT_TRUE(first->getAttributeStableEnum("wsint32") != NULL); + } + EXPECT_TRUE(first->getAttribute("foo") == NULL); + EXPECT_TRUE(first->getAttribute("bar") == NULL); + + // one generation guard taken per attribute asked for + for (uint32_t i = 0; i < attrs.size(); ++i) { + EXPECT_EQUAL(attrs[i]->getCurrentGeneration(), 1u); + EXPECT_EQUAL(attrs[i]->getGenerationRefCount(1u), + (i < 3) ? (i == 2 ? 2u : 1u) : 0u); + } + + { + IAttributeContext::UP second = manager.createContext(); + + EXPECT_TRUE(second->getAttribute("sint32") != NULL); + EXPECT_TRUE(second->getAttribute("aint32") != NULL); + EXPECT_TRUE(second->getAttribute("wsint32") != NULL); + EXPECT_TRUE(second->getAttributeStableEnum("wsint32") != NULL); + + // two generation guards taken per attribute asked for + for (uint32_t i = 0; i < attrs.size(); ++i) { + EXPECT_EQUAL(attrs[i]->getCurrentGeneration(), 1u); + EXPECT_EQUAL(attrs[i]->getGenerationRefCount(1u), + (i < 3) ? (i == 2 ? 4u : 2u) : 0u); + } + } + + // one generation guard taken per attribute asked for + for (uint32_t i = 0; i < attrs.size(); ++i) { + EXPECT_EQUAL(attrs[i]->getCurrentGeneration(), 1u); + EXPECT_EQUAL(attrs[i]->getGenerationRefCount(1u), + (i < 3) ? (i == 2 ? 2u : 1u) : 0u); + } + } + + // no generation guards taken + for (uint32_t i = 0; i < attrs.size(); ++i) { + EXPECT_EQUAL(attrs[i]->getCurrentGeneration(), 1u); + EXPECT_EQUAL(attrs[i]->getGenerationRefCount(1u), 0u); + } + + { + IAttributeContext::UP ctx = manager.createContext(); + std::vector<const attribute::IAttributeVector *> all; + ctx->getAttributeList(all); + EXPECT_EQUAL(4u, all.size()); + std::sort(all.begin(), all.end(), gt_attribute); + EXPECT_EQUAL("aint32", all[0]->getName()); + EXPECT_EQUAL("dontcare", all[1]->getName()); + EXPECT_EQUAL("sint32", all[2]->getName()); + EXPECT_EQUAL("wsint32", all[3]->getName()); + } +} + +int AttributeManagerTest::Main() +{ + TEST_INIT("attributemanager_test"); + + testLoad(); + testGuards(); + testConfigConvert(); + testContext(); + + TEST_DONE(); +} + +} // namespace search + + +TEST_APPHOOK(search::AttributeManagerTest); diff --git a/searchlib/src/tests/attribute/attributesearcher.h b/searchlib/src/tests/attribute/attributesearcher.h new file mode 100644 index 00000000000..7456d22f306 --- /dev/null +++ b/searchlib/src/tests/attribute/attributesearcher.h @@ -0,0 +1,265 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "runnable.h" +#include <vespa/fastos/fastos.h> +#include <vespa/searchlib/attribute/attribute.h> +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/searchlib/queryeval/hitcollector.h> +#include <vespa/vespalib/stllike/asciistream.h> +#include <vespa/vespalib/util/compress.h> + +namespace search { + +std::unique_ptr<ResultSet> +performSearch(queryeval::SearchIterator & sb, uint32_t numDocs) +{ + queryeval::HitCollector hc(numDocs, numDocs, 0); + // assume strict toplevel search object located at start + for (sb.seek(1); ! sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) { + hc.addHit(sb.getDocId(), 0.0); + } + return hc.getResultSet(); +} + +class AttributeSearcherStatus +{ +public: + double _totalSearchTime; + uint64_t _totalHitCount; + uint64_t _numQueries; + uint64_t _numClients; + + AttributeSearcherStatus() : _totalSearchTime(0), _totalHitCount(0), _numQueries(0), _numClients(0) {} + void merge(const AttributeSearcherStatus & status) { + _totalSearchTime += status._totalSearchTime; + _totalHitCount += status._totalHitCount; + _numQueries += status._numQueries; + _numClients += status._numClients; + } + void printXML() const { + std::cout << "<total-search-time>" << _totalSearchTime << "</total-search-time>" << std::endl; // ms + std::cout << "<avg-search-time>" << avgSearchTime() << "</avg-search-time>" << std::endl; // ms + std::cout << "<search-throughput>" << searchThroughout() << "</search-throughput>" << std::endl; // per/sec + std::cout << "<total-hit-count>" << _totalHitCount << "</total-hit-count>" << std::endl; + std::cout << "<avg-hit-count>" << avgHitCount() << "</avg-hit-count>" << std::endl; + } + double avgSearchTime() const { + return _totalSearchTime / _numQueries; + } + double searchThroughout() const { + return _numClients * 1000 * _numQueries / _totalSearchTime; + } + double avgHitCount() const { + return _totalHitCount / static_cast<double>(_numQueries); + } +}; + + +class AttributeSearcher : public Runnable +{ +protected: + typedef AttributeVector::SP AttributePtr; + + const AttributePtr & _attrPtr; + FastOS_Time _timer; + AttributeSearcherStatus _status; + +public: + AttributeSearcher(uint32_t id, const AttributePtr & attrPtr) : + Runnable(id), _attrPtr(attrPtr), _timer(), _status() + { + _status._numClients = 1; + } + virtual void doRun() = 0; + AttributeSearcherStatus & getStatus() { return _status; } + void buildTermQuery(std::vector<char> & buffer, const vespalib::string & index, const char * term, bool prefix = false); +}; + +void +AttributeSearcher::buildTermQuery(std::vector<char> & buffer, const vespalib::string & index, const char * term, bool prefix) +{ + uint32_t indexLen = index.size(); + uint32_t termLen = strlen(term); + uint32_t termIdx = prefix ? ParseItem::ITEM_PREFIXTERM : ParseItem::ITEM_TERM; + uint32_t queryPacketSize = vespalib::compress::Integer::compressedPositiveLength(termIdx) + + vespalib::compress::Integer::compressedPositiveLength(indexLen) + + vespalib::compress::Integer::compressedPositiveLength(termLen) + + indexLen + termLen; + buffer.resize(queryPacketSize); + char * p = &buffer[0]; + p += vespalib::compress::Integer::compressPositive(termIdx, p); + p += vespalib::compress::Integer::compressPositive(indexLen, p); + memcpy(p, index.c_str(), indexLen); + p += indexLen; + p += vespalib::compress::Integer::compressPositive(termLen, p); + memcpy(p, term, termLen); + p += termLen; + assert(p == (&buffer[0] + buffer.size())); +} + + +template <typename T> +class AttributeFindSearcher : public AttributeSearcher +{ +private: + const std::vector<T> & _values; + std::vector<char> _query; + +public: + AttributeFindSearcher(uint32_t id, const AttributePtr & attrPtr, const std::vector<T> & values, + uint32_t numQueries) : + AttributeSearcher(id, attrPtr), _values(values), _query() + { + _status._numQueries = numQueries; + } + virtual void doRun(); +}; + +template <typename T> +void +AttributeFindSearcher<T>::doRun() +{ + _timer.SetNow(); + for (uint32_t i = 0; i < _status._numQueries; ++i) { + // build simple term query + vespalib::asciistream ss; + ss << _values[i % _values.size()].getValue(); + this->buildTermQuery(_query, _attrPtr->getName(), ss.str().c_str()); + + AttributeGuard guard(_attrPtr); + std::unique_ptr<AttributeVector::SearchContext> searchContext = + _attrPtr->getSearch(vespalib::stringref(&_query[0], _query.size()), + AttributeVector::SearchContext::Params()); + + searchContext->fetchPostings(true); + std::unique_ptr<queryeval::SearchIterator> iterator = searchContext->createIterator(NULL, true); + std::unique_ptr<ResultSet> results = performSearch(*iterator, _attrPtr->getNumDocs()); + + _status._totalHitCount += results->getNumHits(); + } + _status._totalSearchTime += _timer.MilliSecsToNow(); +} + + +class RangeSpec +{ +public: + int64_t _min; + int64_t _max; + int64_t _range; + RangeSpec(int64_t min, int64_t max, int64_t range) : + _min(min), _max(max), _range(range) + { + assert(_min < _max); + assert(_range <= (_max - _min)); + } +}; + +class RangeIterator +{ +private: + RangeSpec _spec; + int64_t _a; + int64_t _b; + +public: + RangeIterator(const RangeSpec & spec) : _spec(spec), _a(spec._min), _b(spec._min + _spec._range) {} + RangeIterator & operator++() { + _a += _spec._range; + _b += _spec._range; + if (_b > _spec._max) { + _a = _spec._min; + _b = _spec._min + _spec._range; + } + return *this; + } + int64_t a() const { return _a; } + int64_t b() const { return _b; } +}; + +class AttributeRangeSearcher : public AttributeSearcher +{ +private: + RangeSpec _spec; + std::vector<char> _query; + +public: + AttributeRangeSearcher(uint32_t id, const AttributePtr & attrPtr, const RangeSpec & spec, + uint32_t numQueries) : + AttributeSearcher(id, attrPtr), _spec(spec), _query() + { + _status._numQueries = numQueries; + } + virtual void doRun(); +}; + +void +AttributeRangeSearcher::doRun() +{ + _timer.SetNow(); + RangeIterator iter(_spec); + for (uint32_t i = 0; i < _status._numQueries; ++i, ++iter) { + // build simple range term query + vespalib::asciistream ss; + ss << "[" << iter.a() << ";" << iter.b() << "]"; + buildTermQuery(_query, _attrPtr->getName(), ss.str().c_str()); + + AttributeGuard guard(_attrPtr); + std::unique_ptr<AttributeVector::SearchContext> searchContext = + _attrPtr->getSearch(vespalib::stringref(&_query[0], _query.size()), + AttributeVector::SearchContext::Params()); + + searchContext->fetchPostings(true); + std::unique_ptr<queryeval::SearchIterator> iterator = searchContext->createIterator(NULL, true); + std::unique_ptr<ResultSet> results = performSearch(*iterator, _attrPtr->getNumDocs()); + + _status._totalHitCount += results->getNumHits(); + } + _status._totalSearchTime += _timer.MilliSecsToNow(); +} + + +class AttributePrefixSearcher : public AttributeSearcher +{ +private: + const std::vector<vespalib::string> & _values; + std::vector<char> _query; + +public: + AttributePrefixSearcher(uint32_t id, const AttributePtr & attrPtr, + const std::vector<vespalib::string> & values, uint32_t numQueries) : + AttributeSearcher(id, attrPtr), _values(values), _query() + { + _status._numQueries = numQueries; + } + virtual void doRun(); +}; + +void +AttributePrefixSearcher::doRun() +{ + _timer.SetNow(); + for (uint32_t i = 0; i < _status._numQueries; ++i) { + // build simple prefix term query + buildTermQuery(_query, _attrPtr->getName(), _values[i % _values.size()].c_str(), true); + + AttributeGuard guard(_attrPtr); + std::unique_ptr<AttributeVector::SearchContext> searchContext = + _attrPtr->getSearch(vespalib::stringref(&_query[0], _query.size()), + AttributeVector::SearchContext::Params()); + + searchContext->fetchPostings(true); + std::unique_ptr<queryeval::SearchIterator> iterator = searchContext->createIterator(NULL, true); + std::unique_ptr<ResultSet> results = performSearch(*iterator, _attrPtr->getNumDocs()); + + _status._totalHitCount += results->getNumHits(); + } + _status._totalSearchTime += _timer.MilliSecsToNow(); +} + + + +} // search + diff --git a/searchlib/src/tests/attribute/attributeupdater.h b/searchlib/src/tests/attribute/attributeupdater.h new file mode 100644 index 00000000000..5193ca0f873 --- /dev/null +++ b/searchlib/src/tests/attribute/attributeupdater.h @@ -0,0 +1,299 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchlib/util/randomgenerator.h> +#include "runnable.h" +#include <vespa/searchlib/attribute/attribute.h> + +#define VALIDATOR_STR(str) #str +#define VALIDATOR_ASSERT(rc) reportAssert(rc, __FILE__, __LINE__, VALIDATOR_STR(rc)) +#define VALIDATOR_ASSERT_EQUAL(a, b) reportAssertEqual(__FILE__, __LINE__, VALIDATOR_STR(a), VALIDATOR_STR(b), a, b) + +namespace search { + +class AttributeValidator +{ +private: + uint32_t _totalCnt; + +public: + AttributeValidator() : _totalCnt(0) {} + uint32_t getTotalCnt() const { return _totalCnt; } + bool reportAssert(bool rc, const vespalib::string & file, uint32_t line, const vespalib::string & str) { + _totalCnt++; + if (!rc) { + std::cout << "Assert " << _totalCnt << " failed: \"" << str << "\" (" + << file << ":" << line << ")" << std::endl; + abort(); + } + return true; + } + template <class A, class B> + bool reportAssertEqual(const vespalib::string & file, uint32_t line, + const vespalib::string & aStr, const vespalib::string & bStr, + const A & a, const B & b) { + _totalCnt++; + if (!(a == b)) { + std::cout << "Assert equal failed: " << std::endl; + std::cout << aStr << ": " << a << std::endl; + std::cout << bStr << ": " << b << std::endl; + std::cout << "(" << file << ":" << line << ")" << std::endl; + abort(); + } + return true; + } +}; + +class AttributeUpdaterStatus +{ +public: + double _totalUpdateTime; + uint64_t _numDocumentUpdates; + uint64_t _numValueUpdates; + + AttributeUpdaterStatus() : + _totalUpdateTime(0), _numDocumentUpdates(0), _numValueUpdates(0) {} + void reset() { + _totalUpdateTime = 0; + _numDocumentUpdates = 0; + _numValueUpdates = 0; + } + void printXML() const { + std::cout << "<total-update-time>" << _totalUpdateTime << "</total-update-time>" << std::endl; + std::cout << "<documents-updated>" << _numDocumentUpdates << "</documents-updated>" << std::endl; + std::cout << "<document-update-throughput>" << documentUpdateThroughput() << "</document-update-throughput>" << std::endl; + std::cout << "<avg-document-update-time>" << avgDocumentUpdateTime() << "</avg-document-update-time>" << std::endl; + std::cout << "<values-updated>" << _numValueUpdates << "</values-updated>" << std::endl; + std::cout << "<value-update-throughput>" << valueUpdateThroughput() << "</value-update-throughput>" << std::endl; + std::cout << "<avg-value-update-time>" << avgValueUpdateTime() << "</avg-value-update-time>" << std::endl; + } + double documentUpdateThroughput() const { + return _numDocumentUpdates * 1000 / _totalUpdateTime; + } + double avgDocumentUpdateTime() const { + return _totalUpdateTime / _numDocumentUpdates; + } + double valueUpdateThroughput() const { + return _numValueUpdates * 1000 / _totalUpdateTime; + } + double avgValueUpdateTime() const { + return _totalUpdateTime / _numValueUpdates; + } +}; + +// AttributeVectorInstance, AttributeVectorType, AttributeVectorBufferType +template <typename Vector, typename T, typename BT> +class AttributeUpdater +{ +protected: + typedef AttributeVector::SP AttributePtr; + typedef std::map<uint32_t, std::vector<T> > AttributeCommit; + + const AttributePtr & _attrPtr; + Vector & _attrVec; + const std::vector<T> & _values; + std::vector<T> _buffer; + std::vector<BT> _getBuffer; + RandomGenerator & _rndGen; + AttributeCommit _expected; + FastOS_Time _timer; + AttributeUpdaterStatus _status; + AttributeValidator _validator; + + // config + bool _validate; + uint32_t _commitFreq; + uint32_t _minValueCount; + uint32_t _maxValueCount; + + uint32_t getRandomCount() { + return _rndGen.rand(_minValueCount, _maxValueCount); + } + uint32_t getRandomDoc() { + return _rndGen.rand(0, _attrPtr->getNumDocs() - 1); + } + const T & getRandomValue() { + return _values[_rndGen.rand(0, _values.size() - 1)]; + } + void updateValues(uint32_t doc); + void commit(); + +public: + AttributeUpdater(const AttributePtr & attrPtr, const std::vector<T> & values, + RandomGenerator & rndGen, bool validate, uint32_t commitFreq, + uint32_t minValueCount, uint32_t maxValueCount) : + _attrPtr(attrPtr), _attrVec(*(static_cast<Vector *>(attrPtr.get()))), + _values(values), _buffer(), _getBuffer(), _rndGen(rndGen), _expected(), _timer(), _status(), _validator(), + _validate(validate), _commitFreq(commitFreq), _minValueCount(minValueCount), _maxValueCount(maxValueCount) + { + } + void resetStatus() { + _status.reset(); + } + const AttributeUpdaterStatus & getStatus() const { + return _status; + } + const AttributeValidator & getValidator() const { + return _validator; + } + void populate(); + void update(uint32_t numUpdates); +}; + +template <typename Vector, typename T, typename BT> +class AttributeUpdaterThread : public AttributeUpdater<Vector, T, BT>, public Runnable +{ +private: + typedef AttributeVector::SP AttributePtr; + +public: + AttributeUpdaterThread(const AttributePtr & attrPtr, const std::vector<T> & values, + RandomGenerator & rndGen, bool validate, uint32_t commitFreq, + uint32_t minValueCount, uint32_t maxValueCount) : + AttributeUpdater<Vector, T, BT>(attrPtr, values, rndGen, validate, commitFreq, minValueCount, maxValueCount), + Runnable(0) {} + + virtual void doRun(); +}; + + +template <typename Vector, typename T, typename BT> +void +AttributeUpdater<Vector, T, BT>::updateValues(uint32_t doc) +{ + uint32_t valueCount = getRandomCount(); + + if (_validate) { + _buffer.clear(); + if (_attrPtr->hasMultiValue()) { + _attrPtr->clearDoc(doc); + for (uint32_t j = 0; j < valueCount; ++j) { + T value = getRandomValue(); + if (_attrPtr->hasWeightedSetType()) { + bool exists = false; + for (typename std::vector<T>::iterator iter = _buffer.begin(); iter != _buffer.end(); ++iter) { + if (iter->getValue() == value.getValue()) { + exists = true; + iter->setWeight(value.getWeight()); + break; + } + } + if (!exists) { + _buffer.push_back(value); + } + } else { + _buffer.push_back(value); + } + _attrVec.append(doc, value.getValue(), value.getWeight()); + } + } else { + _buffer.push_back(getRandomValue()); + _attrVec.update(doc, _buffer.back().getValue()); + } + _expected[doc] = _buffer; + + } else { + if (_attrPtr->hasMultiValue()) { + _attrPtr->clearDoc(doc); + for (uint32_t j = 0; j < valueCount; ++j) { + T value = getRandomValue(); + _attrVec.append(doc, value.getValue(), value.getWeight()); + } + } else { + _attrVec.update(doc, getRandomValue().getValue()); + } + } + + _status._numDocumentUpdates++; + _status._numValueUpdates += (_attrPtr->hasMultiValue() ? valueCount: 1); +} + +template <typename Vector, typename T, typename BT> +void +AttributeUpdater<Vector, T, BT>::commit() +{ + AttributeGuard guard(this->_attrPtr); + if (_validate) { + _attrPtr->commit(); + _getBuffer.resize(_maxValueCount); + for (typename AttributeCommit::iterator iter = _expected.begin(); + iter != _expected.end(); ++iter) + { + uint32_t valueCount = _attrPtr->get(iter->first, &_getBuffer[0], _getBuffer.size()); + _validator.VALIDATOR_ASSERT(_minValueCount <= valueCount && valueCount <= _maxValueCount); + if (valueCount != iter->second.size()) { + std::cout << "validate(" << iter->first << ")" << std::endl; + std::cout << "expected(" << iter->second.size() << ")" << std::endl; + for (size_t i = 0; i < iter->second.size(); ++i) { + std::cout << " [" << iter->second[i].getValue() << ", " << iter->second[i].getWeight() << "]" << std::endl; + } + std::cout << "actual(" << valueCount << ")" << std::endl; + for (size_t i = 0; i < valueCount; ++i) { + std::cout << " [" << _getBuffer[i].getValue() << ", " << _getBuffer[i].getWeight() << "]" << std::endl; + } + } + _validator.VALIDATOR_ASSERT_EQUAL(valueCount, iter->second.size()); + for (uint32_t i = 0; i < valueCount; ++i) { + _validator.VALIDATOR_ASSERT_EQUAL(_getBuffer[i].getValue(), iter->second[i].getValue()); + _validator.VALIDATOR_ASSERT_EQUAL(_getBuffer[i].getWeight(), iter->second[i].getWeight()); + } + } + _expected.clear(); + } else { + _attrPtr->commit(); + } +} + +template <typename Vector, typename T, typename BT> +void +AttributeUpdater<Vector, T, BT>::populate() +{ + _timer.SetNow(); + for (uint32_t doc = 0; doc < _attrPtr->getNumDocs(); ++doc) { + updateValues(doc); + if (doc % _commitFreq == (_commitFreq - 1)) { + commit(); + } + } + commit(); + _status._totalUpdateTime += _timer.MilliSecsToNow(); +} + + +template <typename Vector, typename T, typename BT> +void +AttributeUpdater<Vector, T, BT>::update(uint32_t numUpdates) +{ + _timer.SetNow(); + for (uint32_t i = 0; i < numUpdates; ++i) { + uint32_t doc = getRandomDoc(); + updateValues(doc); + if (i % _commitFreq == (_commitFreq - 1)) { + commit(); + } + } + commit(); + _status._totalUpdateTime += _timer.MilliSecsToNow(); +} + + +template <typename Vector, typename T, typename BT> +void +AttributeUpdaterThread<Vector, T, BT>::doRun() +{ + this->_timer.SetNow(); + while(!_done) { + uint32_t doc = this->getRandomDoc(); + this->updateValues(doc); + if (this->_status._numDocumentUpdates % this->_commitFreq == (this->_commitFreq - 1)) { + this->commit(); + } + } + this->commit(); + this->_status._totalUpdateTime += this->_timer.MilliSecsToNow(); +} + + +} // search + diff --git a/searchlib/src/tests/attribute/benchmarkplotter.rb b/searchlib/src/tests/attribute/benchmarkplotter.rb new file mode 100644 index 00000000000..d77c92c8acd --- /dev/null +++ b/searchlib/src/tests/attribute/benchmarkplotter.rb @@ -0,0 +1,134 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +require 'rexml/document' + +def plot_graph(plot_data, plot_png, title, xlabel, ylabel, graph_titles) + plot_cmd = ""; + plot_cmd += "set terminal png\n" + plot_cmd += "set output \"#{plot_png}\"\n" + plot_cmd += "set title \"#{title}\"\n" + plot_cmd += "set xlabel \"#{xlabel}\"\n" + plot_cmd += "set ylabel \"#{ylabel}\"\n" + c = 2 + plots = [] + plot_cmd += "plot " + graph_titles.each do |title| + plots.push("\"#{plot_data}\" using 1:#{c} title \"#{title}\" with linespoints") + c += 1 + end + plot_cmd += plots.join(", ") + + plot_cmd_file = File.open("plot_graph.cmd", "w") + plot_cmd_file.write(plot_cmd); + plot_cmd_file.close + cmd = "gnuplot plot_graph.cmd" + puts cmd + puts `#{cmd}` +end + +def extract_alpha(num_docs, percentages, input, output, xml_getter) + plot_data = File.open(output, "w"); + num_docs.each do |num| + data_line = "#{num} " + percentages.each do |prc| + unique = num * prc + filename = input.sub("#N", "#{num}").sub("#V", "#{unique}") + value = 0 + begin + xml_root = REXML::Document.new(File.open(filename)).root + value = send(xml_getter, xml_root) + rescue REXML::ParseException + puts "Could not parse file: #{filename}" + end + data_line += "#{value} " + end + plot_data.write(data_line + "\n") + end + plot_data.close +end + +def extract_beta(num_docs, percentage, num_threads, input, output, xml_getter) + plot_data = File.open(output, "w"); + num_docs.each do |num| + data_line = "#{num} " + unique = num * percentage + num_threads.each do |thread| + filename = input.sub("#N", "#{num}").sub("#V", "#{unique}").sub("#S", "#{thread}") + value = 0 + begin + xml_root = REXML::Document.new(File.open(filename)).root + value = send(xml_getter, xml_root) + rescue REXML::ParseException + puts "Could not parse file: #{filename}" + end + data_line += "#{value} " + end + plot_data.write(data_line + "\n") + end + plot_data.close +end + +def xml_getter_update_0_throughput(xml_root) + return xml_root.elements["update[@id='0']"].elements["throughput"].text +end + +def xml_getter_search_throughput(xml_root) + return xml_root.elements["total-searcher-summary"].elements["search-throughput"].text +end + +def xml_getter_updater_thread_throughput(xml_root) + return throughput = xml_root.elements["updater-summary"].elements["throughput"].text +end + + +vectors = ["mv-num-new"]#, "mv-num-new", "sv-string-new", "mv-string-new"]#, "sv-num-old", "mv-num-old", "sv-string-old", "mv-string-old"] +num_docs = [500000, 1000000, 2000000, 4000000, 8000000, 16000000] +unique_percentages = [0.001, 0.01, 0.05, 0.20, 0.50] +num_threads = [1, 2, 4, 8, 16] + +inputs = ["03-27-full/#AV-n#N-v#V-p2-r1-s1-q1000.log", + "03-27-full/#AV-n#N-v#V-s#S-q100-b.log"] +graph_titles = [[], []] +unique_percentages.each do |percentage| + graph_titles[0].push("#{percentage * 100} % uniques") +end +num_threads.each do |thread| + graph_titles[1].push("#{thread} searcher thread(s)") +end + +vectors.each do |vector| + extract_alpha(num_docs, unique_percentages, + inputs[0].sub("#AV", vector), + "#{vector}-update-speed.dat", + :xml_getter_update_0_throughput) + plot_graph("#{vector}-update-speed.dat", + "#{vector}-update-speed.png", + "Update speed when applying 1M updates", + "Number of documents", "Updates per/sec", graph_titles[0]) + + extract_alpha(num_docs, unique_percentages, + inputs[0].sub("#AV", vector), + "#{vector}-search-speed.dat", + :xml_getter_search_throughput) + plot_graph("#{vector}-search-speed.dat", + "#{vector}-search-speed.png", + "Search speed with 1 searcher thread", + "Number of documents", "Queries per/sec", graph_titles[0]) + + extract_beta(num_docs, 0.01, num_threads, + inputs[1].sub("#AV", vector), + "#{vector}-search-speed-multiple.dat", + :xml_getter_search_throughput) + plot_graph("#{vector}-search-speed-multiple.dat", + "#{vector}-search-speed-multiple.png", + "Search speed with 1 update thread and X searcher threads", + "Number of documents", "Queries per/sec", graph_titles[1]) + + extract_beta(num_docs, 0.01, num_threads, + inputs[1].sub("#AV", vector), + "#{vector}-update-speed-multiple.dat", + :xml_getter_updater_thread_throughput) + plot_graph("#{vector}-update-speed-multiple.dat", + "#{vector}-update-speed-multiple.png", + "Update speed with 1 update thread and X searcher threads", + "Number of documents", "Updates per/sec", graph_titles[1]) +end diff --git a/searchlib/src/tests/attribute/bitvector/.gitignore b/searchlib/src/tests/attribute/bitvector/.gitignore new file mode 100644 index 00000000000..05ec0a4df59 --- /dev/null +++ b/searchlib/src/tests/attribute/bitvector/.gitignore @@ -0,0 +1 @@ +searchlib_bitvector_test_app diff --git a/searchlib/src/tests/attribute/bitvector/CMakeLists.txt b/searchlib/src/tests/attribute/bitvector/CMakeLists.txt new file mode 100644 index 00000000000..bc65fc04dc4 --- /dev/null +++ b/searchlib/src/tests/attribute/bitvector/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_bitvector_test_app + SOURCES + bitvector_test.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_bitvector_test_app COMMAND searchlib_bitvector_test_app) diff --git a/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp new file mode 100644 index 00000000000..85f83d217eb --- /dev/null +++ b/searchlib/src/tests/attribute/bitvector/bitvector_test.cpp @@ -0,0 +1,632 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("bitvector_test"); +#include <vespa/vespalib/testkit/testapp.h> + +#include <vespa/searchlib/attribute/attribute.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/searchlib/util/randomgenerator.h> +#include <vespa/vespalib/util/compress.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> + +#include <vespa/searchlib/attribute/attributevector.hpp> +#include <vespa/searchlib/attribute/i_document_weight_attribute.h> +#include <vespa/searchlib/queryeval/document_weight_search_iterator.h> +#include <vespa/searchlib/test/initrange.h> +#include <vespa/searchlib/common/bitvectoriterator.h> + +using search::attribute::BasicType; +using search::attribute::CollectionType; +using search::attribute::Config; +using search::AttributeFactory; +using search::FloatingPointAttribute; +using search::IntegerAttribute; +using search::StringAttribute; +using search::AttributeVector; +using search::ParseItem; +using search::fef::TermFieldMatchData; +using search::BitVector; +using search::BitVectorIterator; +using search::queryeval::SearchIterator; + +typedef std::unique_ptr<AttributeVector::SearchContext> SearchContextPtr; +typedef std::unique_ptr<search::queryeval::SearchIterator> SearchBasePtr; + +struct BitVectorTest +{ + typedef AttributeVector::SP AttributePtr; + + BitVectorTest() { } + + ~BitVectorTest() { } + + template <typename VectorType> + VectorType & as(AttributePtr &v); + IntegerAttribute & asInt(AttributePtr &v); + StringAttribute & asString(AttributePtr &v); + FloatingPointAttribute & asFloat(AttributePtr &v); + + AttributePtr + make(Config cfg, + const vespalib::string &pref, + bool fastSearch, + bool enableBitVectors, + bool enableOnlyBitVector, + bool filter); + + void + addDocs(const AttributePtr &v, size_t sz); + + template <typename VectorType> + void populate(VectorType &v, + uint32_t low, + uint32_t high, + bool set); + + template <typename VectorType> + void populateAll(VectorType &v, + uint32_t low, + uint32_t high, + bool set); + + void + buildTermQuery(std::vector<char> & buffer, + const vespalib::string & index, + const vespalib::string & term, bool prefix); + + template <typename V> + vespalib::string + getSearchStr(); + + template <typename V, typename T> + SearchContextPtr + getSearch(const V & vec, const T & term, bool prefix, bool useBitVector); + + template <typename V> + SearchContextPtr + getSearch(const V & vec, bool useBitVector); + + void + checkSearch(AttributePtr v, + SearchBasePtr sb, + TermFieldMatchData &md, + uint32_t expFirstDocId, + uint32_t expFastDocId, + uint32_t expDocFreq, + bool weights, + bool checkStride); + + void + checkSearch(AttributePtr v, + SearchContextPtr sc, + uint32_t expFirstDocId, + uint32_t expLastDocId, + uint32_t expDocFreq, + bool weights, + bool checkStride); + + template <typename VectorType, typename BufferType> + void + test(BasicType bt, CollectionType ct, const vespalib::string &pref, + bool fastSearch, + bool enableBitVectors, + bool enableOnlyBitVector, + bool filter); + + template <typename VectorType, typename BufferType> + void + test(BasicType bt, CollectionType ct, const vespalib::string &pref); +}; + + +template <typename VectorType> +VectorType & +BitVectorTest::as(AttributePtr &v) +{ + VectorType *res = dynamic_cast<VectorType *>(v.get()); + assert(res != NULL); + return *res; +} + + +IntegerAttribute & +BitVectorTest::asInt(AttributePtr &v) +{ + return as<IntegerAttribute>(v); +} + + +StringAttribute & +BitVectorTest::asString(AttributePtr &v) +{ + return as<StringAttribute>(v); +} + + +FloatingPointAttribute & +BitVectorTest::asFloat(AttributePtr &v) +{ + return as<FloatingPointAttribute>(v); +} + + +void +BitVectorTest::buildTermQuery(std::vector<char> &buffer, + const vespalib::string &index, + const vespalib::string &term, + bool prefix) +{ + uint32_t indexLen = index.size(); + uint32_t termLen = term.size(); + uint32_t queryPacketSize = 1 + 2 * 4 + indexLen + termLen; + uint32_t p = 0; + buffer.resize(queryPacketSize); + buffer[p++] = prefix ? ParseItem::ITEM_PREFIXTERM : ParseItem::ITEM_TERM; + p += vespalib::compress::Integer::compressPositive(indexLen, &buffer[p]); + memcpy(&buffer[p], index.c_str(), indexLen); + p += indexLen; + p += vespalib::compress::Integer::compressPositive(termLen, &buffer[p]); + memcpy(&buffer[p], term.c_str(), termLen); + p += termLen; + buffer.resize(p); +} + + +template <> +vespalib::string +BitVectorTest::getSearchStr<IntegerAttribute>() +{ + return "[-42;-42]"; +} + +template <> +vespalib::string +BitVectorTest::getSearchStr<FloatingPointAttribute>() +{ + return "[-42.0;-42.0]"; +} + +template <> +vespalib::string +BitVectorTest::getSearchStr<StringAttribute>() +{ + return "foo"; +} + + +template <typename V, typename T> +SearchContextPtr +BitVectorTest::getSearch(const V &vec, const T &term, bool prefix, + bool useBitVector) +{ + std::vector<char> query; + vespalib::asciistream ss; + ss << term; + buildTermQuery(query, vec.getName(), ss.str(), prefix); + + return (static_cast<const AttributeVector &>(vec)). + getSearch(vespalib::stringref(&query[0], query.size()), + AttributeVector::SearchContext::Params().useBitVector(useBitVector)); +} + + +template <> +SearchContextPtr +BitVectorTest::getSearch<IntegerAttribute>(const IntegerAttribute &v, + bool useBitVector) +{ + return getSearch<IntegerAttribute>(v, "[-42;-42]", false, useBitVector); +} + +template <> +SearchContextPtr +BitVectorTest:: +getSearch<FloatingPointAttribute>(const FloatingPointAttribute &v, + bool useBitVector) +{ + return getSearch<FloatingPointAttribute>(v, "[-42.0;-42.0]", false, + useBitVector); +} + +template <> +SearchContextPtr +BitVectorTest::getSearch<StringAttribute>(const StringAttribute &v, + bool useBitVector) +{ + return getSearch<StringAttribute, const vespalib::string &> + (v, "foo", false, useBitVector); +} + + +BitVectorTest::AttributePtr +BitVectorTest::make(Config cfg, + const vespalib::string &pref, + bool fastSearch, + bool enableBitVectors, + bool enableOnlyBitVector, + bool filter) +{ + cfg.setFastSearch(fastSearch); + cfg.setEnableBitVectors(enableBitVectors); + cfg.setEnableOnlyBitVector(enableOnlyBitVector); + cfg.setIsFilter(filter); + AttributePtr v = AttributeFactory::createAttribute(pref, cfg); + return v; +} + + +void +BitVectorTest::addDocs(const AttributePtr &v, size_t sz) +{ + while (v->getNumDocs() < sz) { + AttributeVector::DocId docId = 0; + EXPECT_TRUE(v->addDoc(docId)); + v->clearDoc(docId); + } + EXPECT_TRUE(v->getNumDocs() == sz); + v->commit(true); +} + + +template <> +void +BitVectorTest::populate(IntegerAttribute &v, + uint32_t low, uint32_t high, + bool set) +{ + for(size_t i(low), m(high); i < m; i+= 5) { + if (!set) { + v.clearDoc(i); + } else if (v.hasMultiValue()) { + v.append(i, -42, 27); + v.append(i, -43, 14); + v.append(i, -42, -3); + } else { + EXPECT_TRUE(v.update(i, -42)); + } + } + v.commit(); +} + + +template <> +void +BitVectorTest::populate(FloatingPointAttribute &v, + uint32_t low, uint32_t high, + bool set) +{ + for(size_t i(low), m(high); i < m; i+= 5) { + if (!set) { + v.clearDoc(i); + } else if (v.hasMultiValue()) { + v.append(i, -42.0, 27); + v.append(i, -43.0, 14); + v.append(i, -42.0, -3); + } else { + EXPECT_TRUE(v.update(i, -42.0)); + } + } + v.commit(); +} + + +template <> +void +BitVectorTest::populate(StringAttribute &v, + uint32_t low, uint32_t high, + bool set) +{ + for(size_t i(low), m(high); i < m; i+= 5) { + if (!set) { + v.clearDoc(i); + } else if (v.hasMultiValue()) { + v.append(i, "foo", 27); + v.append(i, "bar", 14); + v.append(i, "foO", -3); + } else { + EXPECT_TRUE(v.update(i, "foo")); + } + } + v.commit(); +} + +template <> +void +BitVectorTest::populateAll(IntegerAttribute &v, + uint32_t low, uint32_t high, + bool set) +{ + for(size_t i(low), m(high); i < m; ++i) { + if (!set) { + v.clearDoc(i); + } else if (v.hasMultiValue()) { + v.clearDoc(i); + v.append(i, -42, 27); + v.append(i, -43, 14); + v.append(i, -42, -3); + } else { + EXPECT_TRUE(v.update(i, -42)); + } + } + v.commit(); +} + + +template <> +void +BitVectorTest::populateAll(FloatingPointAttribute &v, + uint32_t low, uint32_t high, + bool set) +{ + for(size_t i(low), m(high); i < m; ++i) { + if (!set) { + v.clearDoc(i); + } else if (v.hasMultiValue()) { + v.clearDoc(i); + v.append(i, -42.0, 27); + v.append(i, -43.0, 14); + v.append(i, -42.0, -3); + } else { + EXPECT_TRUE(v.update(i, -42.0)); + } + } + v.commit(); +} + + +template <> +void +BitVectorTest::populateAll(StringAttribute &v, + uint32_t low, uint32_t high, + bool set) +{ + for(size_t i(low), m(high); i < m; ++i) { + if (!set) { + v.clearDoc(i); + } else if (v.hasMultiValue()) { + v.clearDoc(i); + v.append(i, "foo", 27); + v.append(i, "bar", 14); + v.append(i, "foO", -3); + } else { + EXPECT_TRUE(v.update(i, "foo")); + } + } + v.commit(); +} + + +void +BitVectorTest::checkSearch(AttributePtr v, + SearchBasePtr sb, + TermFieldMatchData &md, + uint32_t expFirstDocId, + uint32_t expLastDocId, + uint32_t expDocFreq, + bool weights, + bool checkStride) +{ + sb->initFullRange(); + sb->seek(1u); + uint32_t docId = sb->getDocId(); + uint32_t lastDocId = 0; + uint32_t docFreq = 0; + EXPECT_EQUAL(expFirstDocId, docId); + while (docId != search::endDocId) { + lastDocId = docId; + ++docFreq, + assert(!checkStride || (docId % 5) == 2u); + sb->unpack(docId); + EXPECT_EQUAL(md.getDocId(), docId); + if (v->getCollectionType() == CollectionType::SINGLE || + !weights) { + EXPECT_EQUAL(1, md.getWeight()); + } else if (v->getCollectionType() == CollectionType::ARRAY) { + EXPECT_EQUAL(2, md.getWeight()); + } else { + if (v->getBasicType() == BasicType::STRING) { + EXPECT_EQUAL(24, md.getWeight()); + } else { + EXPECT_EQUAL(-3, md.getWeight()); + } + } + sb->seek(docId + 1); + docId = sb->getDocId(); + } + EXPECT_EQUAL(expLastDocId, lastDocId); + EXPECT_EQUAL(expDocFreq, docFreq); +} + + +void +BitVectorTest::checkSearch(AttributePtr v, + SearchContextPtr sc, + uint32_t expFirstDocId, + uint32_t expLastDocId, + uint32_t expDocFreq, + bool weights, + bool checkStride) +{ + TermFieldMatchData md; + sc->fetchPostings(true); + SearchBasePtr sb = sc->createIterator(&md, true); + checkSearch(v, std::move(sb), md, + expFirstDocId, expLastDocId, expDocFreq, weights, + checkStride); +} + + +template <typename VectorType, typename BufferType> +void +BitVectorTest::test(BasicType bt, + CollectionType ct, + const vespalib::string &pref, + bool fastSearch, + bool enableBitVectors, + bool enableOnlyBitVector, + bool filter) +{ + Config cfg(bt, ct); + AttributePtr v = make(cfg, pref, fastSearch, + enableBitVectors, enableOnlyBitVector, filter); + addDocs(v, 1024); + VectorType &tv = as<VectorType>(v); + populate(tv, 2, 1023, true); + + SearchContextPtr sc = getSearch<VectorType>(tv, true); + checkSearch(v, std::move(sc), 2, 1022, 205, !enableBitVectors && !filter, + true); + sc = getSearch<VectorType>(tv, false); + checkSearch(v, std::move(sc), 2, 1022, 205, !enableOnlyBitVector && + !filter, true); + const search::IDocumentWeightAttribute *dwa = + v->asDocumentWeightAttribute(); + if (dwa != NULL) { + search::IDocumentWeightAttribute::LookupResult lres = + dwa->lookup(getSearchStr<VectorType>()); + typedef search::queryeval::DocumentWeightSearchIterator DWSI; + typedef search::queryeval::SearchIterator SI; + TermFieldMatchData md; + SI::UP dwsi(new DWSI(md, *dwa, lres)); + if (!enableOnlyBitVector) { + checkSearch(v, std::move(dwsi), md, 2, 1022, 205, !filter, true); + } else { + dwsi->initFullRange(); + EXPECT_TRUE(dwsi->isAtEnd()); + } + } + populate(tv, 2, 973, false); + sc = getSearch<VectorType>(tv, true); + checkSearch(v, std::move(sc), 977, 1022, 10, !enableOnlyBitVector && + !filter, true); + populate(tv, 2, 973, true); + sc = getSearch<VectorType>(tv, true); + checkSearch(v, std::move(sc), 2, 1022, 205, !enableBitVectors && !filter, + true); + addDocs(v, 15000); + sc = getSearch<VectorType>(tv, true); + checkSearch(v, std::move(sc), 2, 1022, 205, !enableOnlyBitVector && + !filter, true); + populateAll(tv, 10, 15000, true); + sc = getSearch<VectorType>(tv, true); + checkSearch(v, std::move(sc), 2, 14999, 14992, + !enableBitVectors && !filter, + false); +} + + +template <typename VectorType, typename BufferType> +void +BitVectorTest::test(BasicType bt, + CollectionType ct, + const vespalib::string &pref) +{ + LOG(info, + "test run, pref is %s", + pref.c_str()); + test<VectorType, BufferType>(bt, ct, pref, + false, false, false, false); + test<VectorType, BufferType>(bt, ct, pref, + false, false, false, true); + test<VectorType, BufferType>(bt, ct, pref, + true, false, false, false); + test<VectorType, BufferType>(bt, ct, pref, + true, false, false, true); + test<VectorType, BufferType>(bt, ct, pref, + true, true, false, false); + test<VectorType, BufferType>(bt, ct, pref, + true, true, false, true); + test<VectorType, BufferType>(bt, ct, pref, + true, true, true, false); + test<VectorType, BufferType>(bt, ct, pref, + true, true, true, true); +} + + +TEST_F("Test bitvectors with single value int32", BitVectorTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::largeint_t>(BasicType::INT32, + CollectionType::SINGLE, + "int32_sv"); +} + +TEST_F("Test bitvectors with array value int32", BitVectorTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::largeint_t>(BasicType::INT32, + CollectionType::ARRAY, + "int32_a"); +} + +TEST_F("Test bitvectors with weighted set value int32", BitVectorTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::WeightedInt>(BasicType::INT32, + CollectionType::WSET, + "int32_sv"); +} + +TEST_F("Test bitvectors with single value double", BitVectorTest) +{ + f.template test<FloatingPointAttribute, + double>(BasicType::DOUBLE, + CollectionType::SINGLE, + "double_sv"); +} + +TEST_F("Test bitvectors with array value double", BitVectorTest) +{ + f.template test<FloatingPointAttribute, + double>(BasicType::DOUBLE, + CollectionType::ARRAY, + "double_a"); +} + +TEST_F("Test bitvectors with weighted set value double", BitVectorTest) +{ + f.template test<FloatingPointAttribute, + FloatingPointAttribute::WeightedFloat>(BasicType::DOUBLE, + CollectionType::WSET, + "double_ws"); +} + +TEST_F("Test bitvectors with single value string", BitVectorTest) +{ + f.template test<StringAttribute, + vespalib::string>(BasicType::STRING, + CollectionType::SINGLE, + "string_sv"); +} + +TEST_F("Test bitvectors with array value string", BitVectorTest) +{ + f.template test<StringAttribute, + vespalib::string>(BasicType::STRING, + CollectionType::ARRAY, + "string_a"); +} + +TEST_F("Test bitvectors with weighted set value string", BitVectorTest) +{ + f.template test<StringAttribute, + StringAttribute::WeightedString>(BasicType::STRING, + CollectionType::WSET, + "string_ws"); +} + +TEST("Test bitvector iterators adheres to initRange") { + search::test::InitRangeVerifier initRangeTest; + BitVector::UP bv = BitVector::create(initRangeTest.getDocIdLimit()); + for (uint32_t docId: initRangeTest.getExpectedDocIds()) { + bv->setBit(docId); + } + TermFieldMatchData tfmd; + initRangeTest.verify(*BitVectorIterator::create(bv.get(), initRangeTest.getDocIdLimit(), tfmd, false)); + initRangeTest.verify(*BitVectorIterator::create(bv.get(), initRangeTest.getDocIdLimit(), tfmd, true)); +} + + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/attribute/changevector_test.cpp b/searchlib/src/tests/attribute/changevector_test.cpp new file mode 100644 index 00000000000..9f0a796fd3e --- /dev/null +++ b/searchlib/src/tests/attribute/changevector_test.cpp @@ -0,0 +1,92 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("changevector_test"); +#include <vespa/vespalib/testkit/testapp.h> + +#include <vespa/searchlib/attribute/changevector.h> + +using namespace search; + +template <typename T> +void verifyStrictOrdering(const T & v) { + long count(0); + for (const auto & c : v) { + count++; + EXPECT_EQUAL(count, c._data.get()); + } + EXPECT_EQUAL(v.size(), size_t(count)); +} + +class Accessor { +public: + Accessor(const std::vector<long> & v) : _size(v.size()), _current(v.begin()), _end(v.end()) { } + size_t size() const { return _size; } + void next() { _current++; } + long value() const { return *_current; } + int weight() const { return *_current; } +private: + size_t _size; + std::vector<long>::const_iterator _current; + std::vector<long>::const_iterator _end; +}; + +TEST("require insert ordering is preserved for same doc") +{ + typedef ChangeTemplate<NumericChangeData<long>> Change; + typedef ChangeVectorT<Change> CV; + CV a; + a.push_back(Change(Change::NOOP, 7, 1)); + EXPECT_EQUAL(1u, a.size()); + a.push_back(Change(Change::NOOP, 7, 2)); + EXPECT_EQUAL(2u, a.size()); + verifyStrictOrdering(a); +} + +TEST("require insert ordering is preserved ") +{ + typedef ChangeTemplate<NumericChangeData<long>> Change; + typedef ChangeVectorT<Change> CV; + CV a; + a.push_back(Change(Change::NOOP, 7, 1)); + EXPECT_EQUAL(1u, a.size()); + a.push_back(Change(Change::NOOP, 5, 2)); + EXPECT_EQUAL(2u, a.size()); + a.push_back(Change(Change::NOOP, 6, 3)); + EXPECT_EQUAL(3u, a.size()); + verifyStrictOrdering(a); +} + +TEST("require insert ordering is preserved with mix") +{ + typedef ChangeTemplate<NumericChangeData<long>> Change; + typedef ChangeVectorT<Change> CV; + CV a; + a.push_back(Change(Change::NOOP, 7, 1)); + EXPECT_EQUAL(1u, a.size()); + a.push_back(Change(Change::NOOP, 5, 2)); + EXPECT_EQUAL(2u, a.size()); + a.push_back(Change(Change::NOOP, 5, 3)); + EXPECT_EQUAL(3u, a.size()); + a.push_back(Change(Change::NOOP, 6, 10)); + EXPECT_EQUAL(4u, a.size()); + std::vector<long> v({4,5,6,7,8}); + Accessor ac(v); + a.push_back(5, ac); + EXPECT_EQUAL(9u, a.size()); + a.push_back(Change(Change::NOOP, 5, 9)); + EXPECT_EQUAL(10u, a.size()); + verifyStrictOrdering(a); +} + +TEST("require that inserting empty vector does not affect the vector.") { + typedef ChangeTemplate<NumericChangeData<long>> Change; + typedef ChangeVectorT<Change> CV; + CV a; + std::vector<long> v; + Accessor ac(v); + a.push_back(1, ac); + EXPECT_EQUAL(0u, a.size()); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/attribute/changevector_test.sh b/searchlib/src/tests/attribute/changevector_test.sh new file mode 100644 index 00000000000..cb70f5465a4 --- /dev/null +++ b/searchlib/src/tests/attribute/changevector_test.sh @@ -0,0 +1,7 @@ +#!/bin/bash +$VALGRIND ./searchlib_changevector_test_app +rm -rf *.dat +rm -rf *.idx +rm -rf *.weight +rm -rf clstmp +rm -rf alstmp diff --git a/searchlib/src/tests/attribute/comparator/.gitignore b/searchlib/src/tests/attribute/comparator/.gitignore new file mode 100644 index 00000000000..51c5b5944c9 --- /dev/null +++ b/searchlib/src/tests/attribute/comparator/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +comparator_test +searchlib_comparator_test_app diff --git a/searchlib/src/tests/attribute/comparator/CMakeLists.txt b/searchlib/src/tests/attribute/comparator/CMakeLists.txt new file mode 100644 index 00000000000..4a14181db3c --- /dev/null +++ b/searchlib/src/tests/attribute/comparator/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_comparator_test_app + SOURCES + comparator_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_comparator_test_app COMMAND searchlib_comparator_test_app) diff --git a/searchlib/src/tests/attribute/comparator/DESC b/searchlib/src/tests/attribute/comparator/DESC new file mode 100644 index 00000000000..6b3ba01c89b --- /dev/null +++ b/searchlib/src/tests/attribute/comparator/DESC @@ -0,0 +1 @@ +comparator test. Take a look at comparator_test.cpp for details. diff --git a/searchlib/src/tests/attribute/comparator/FILES b/searchlib/src/tests/attribute/comparator/FILES new file mode 100644 index 00000000000..b4c23c09022 --- /dev/null +++ b/searchlib/src/tests/attribute/comparator/FILES @@ -0,0 +1 @@ +comparator_test.cpp diff --git a/searchlib/src/tests/attribute/comparator/comparator_test.cpp b/searchlib/src/tests/attribute/comparator/comparator_test.cpp new file mode 100644 index 00000000000..2a4c3c6fb87 --- /dev/null +++ b/searchlib/src/tests/attribute/comparator/comparator_test.cpp @@ -0,0 +1,169 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("comparator_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/attribute/enumcomparator.h> +#include <vespa/searchlib/btree/btreeroot.h> + +#include <vespa/searchlib/attribute/enumstore.hpp> +#include <vespa/searchlib/btree/btreenode.hpp> +#include <vespa/searchlib/btree/btreenodeallocator.hpp> +#include <vespa/searchlib/btree/btreeroot.hpp> + +namespace search { + +using namespace btree; + +typedef EnumStoreT<NumericEntryType<int32_t> > NumericEnumStore; +typedef EnumStoreComparatorT<NumericEntryType<int32_t> > NumericComparator; + +typedef EnumStoreT<NumericEntryType<float> > FloatEnumStore; +typedef EnumStoreComparatorT<NumericEntryType<float> > FloatComparator; + +typedef EnumStoreT<StringEntryType> StringEnumStore; +typedef EnumStoreComparatorT<StringEntryType> StringComparator; +typedef EnumStoreFoldedComparatorT<StringEntryType> FoldedStringComparator; + +typedef EnumStoreBase::Index EnumIndex; + +typedef BTreeRoot<EnumIndex, BTreeNoLeafData, + btree::NoAggregated, + const EnumStoreComparatorWrapper> TreeType; +typedef TreeType::NodeAllocatorType NodeAllocator; + +class Test : public vespalib::TestApp { +private: + void requireThatNumericComparatorIsWorking(); + void requireThatFloatComparatorIsWorking(); + void requireThatStringComparatorIsWorking(); + void requireThatComparatorWithTreeIsWorking(); + void requireThatFoldedComparatorIsWorking(); + +public: + Test() {} + int Main(); +}; + +void +Test::requireThatNumericComparatorIsWorking() +{ + NumericEnumStore es(1024, false); + EnumIndex e1, e2; + es.addEnum(10, e1); + es.addEnum(30, e2); + NumericComparator cmp1(es); + EXPECT_TRUE(cmp1(e1, e2)); + EXPECT_TRUE(!cmp1(e2, e1)); + EXPECT_TRUE(!cmp1(e1, e1)); + NumericComparator cmp2(es, 20); + EXPECT_TRUE(cmp2(EnumIndex(), e2)); + EXPECT_TRUE(!cmp2(e2, EnumIndex())); +} + +void +Test::requireThatFloatComparatorIsWorking() +{ + FloatEnumStore es(1024, false); + EnumIndex e1, e2, e3; + es.addEnum(10.5, e1); + es.addEnum(30.5, e2); + es.addEnum(std::numeric_limits<float>::quiet_NaN(), e3); + FloatComparator cmp1(es); + EXPECT_TRUE(cmp1(e1, e2)); + EXPECT_TRUE(!cmp1(e2, e1)); + EXPECT_TRUE(!cmp1(e1, e1)); + EXPECT_TRUE(cmp1(e3, e1)); // nan + EXPECT_TRUE(!cmp1(e1, e3)); // nan + EXPECT_TRUE(!cmp1(e3, e3)); // nan + FloatComparator cmp2(es, 20.5); + EXPECT_TRUE(cmp2(EnumIndex(), e2)); + EXPECT_TRUE(!cmp2(e2, EnumIndex())); +} + +void +Test::requireThatStringComparatorIsWorking() +{ + StringEnumStore es(1024, false); + EnumIndex e1, e2, e3; + es.addEnum("Aa", e1); + es.addEnum("aa", e2); + es.addEnum("aB", e3); + StringComparator cmp1(es); + EXPECT_TRUE(cmp1(e1, e2)); // similar folded, fallback to regular + EXPECT_TRUE(!cmp1(e2, e1)); + EXPECT_TRUE(!cmp1(e1, e1)); + EXPECT_TRUE(cmp1(e2, e3)); // folded compare + EXPECT_TRUE(strcmp("aa", "aB") > 0); // regular + StringComparator cmp2(es, "AB"); + EXPECT_TRUE(cmp2(EnumIndex(), e3)); + EXPECT_TRUE(!cmp2(e3, EnumIndex())); +} + +void +Test::requireThatComparatorWithTreeIsWorking() +{ + NumericEnumStore es(2048, false); + vespalib::GenerationHandler g; + TreeType t; + NodeAllocator m; + EnumIndex ei; + for (int32_t v = 100; v > 0; --v) { + NumericComparator cmp(es, v); + EXPECT_TRUE(!t.find(EnumIndex(), m, cmp).valid()); + es.addEnum(v, ei); + t.insert(ei, BTreeNoLeafData(), m, cmp); + } + EXPECT_EQUAL(100u, t.size(m)); + int32_t exp = 1; + for (TreeType::Iterator itr = t.begin(m); itr.valid(); ++itr) { + EXPECT_EQUAL(exp++, es.getValue(itr.getKey())); + } + EXPECT_EQUAL(101, exp); + t.clear(m); + m.freeze(); + m.transferHoldLists(g.getCurrentGeneration()); + g.incGeneration(); + m.trimHoldLists(g.getFirstUsedGeneration()); +} + +void +Test::requireThatFoldedComparatorIsWorking() +{ + StringEnumStore es(1024, false); + EnumIndex e1, e2, e3, e4; + es.addEnum("Aa", e1); + es.addEnum("aa", e2); + es.addEnum("aB", e3); + es.addEnum("Folded", e4); + FoldedStringComparator cmp1(es); + EXPECT_TRUE(!cmp1(e1, e2)); // similar folded + EXPECT_TRUE(!cmp1(e2, e1)); // similar folded + EXPECT_TRUE(cmp1(e2, e3)); // folded compare + EXPECT_TRUE(!cmp1(e3, e2)); // folded compare + FoldedStringComparator cmp2(es, "fol", false); + FoldedStringComparator cmp3(es, "fol", true); + EXPECT_TRUE(cmp2(EnumIndex(), e4)); + EXPECT_TRUE(!cmp2(e4, EnumIndex())); + EXPECT_TRUE(!cmp3(EnumIndex(), e4)); // similar when prefix + EXPECT_TRUE(!cmp3(e4, EnumIndex())); // similar when prefix +} + +int +Test::Main() +{ + TEST_INIT("comparator_test"); + + requireThatNumericComparatorIsWorking(); + requireThatFloatComparatorIsWorking(); + requireThatStringComparatorIsWorking(); + requireThatComparatorWithTreeIsWorking(); + requireThatFoldedComparatorIsWorking(); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(search::Test); + diff --git a/searchlib/src/tests/attribute/document_weight_iterator/.gitignore b/searchlib/src/tests/attribute/document_weight_iterator/.gitignore new file mode 100644 index 00000000000..08cae9a48df --- /dev/null +++ b/searchlib/src/tests/attribute/document_weight_iterator/.gitignore @@ -0,0 +1 @@ +searchlib_document_weight_iterator_test_app diff --git a/searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt b/searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt new file mode 100644 index 00000000000..2a1b36a626d --- /dev/null +++ b/searchlib/src/tests/attribute/document_weight_iterator/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_document_weight_iterator_test_app + SOURCES + document_weight_iterator_test.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_document_weight_iterator_test_app COMMAND searchlib_document_weight_iterator_test_app) diff --git a/searchlib/src/tests/attribute/document_weight_iterator/FILES b/searchlib/src/tests/attribute/document_weight_iterator/FILES new file mode 100644 index 00000000000..9bb94dc8770 --- /dev/null +++ b/searchlib/src/tests/attribute/document_weight_iterator/FILES @@ -0,0 +1 @@ +document_weight_iterator_test.cpp diff --git a/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp b/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp new file mode 100644 index 00000000000..fbe62f80843 --- /dev/null +++ b/searchlib/src/tests/attribute/document_weight_iterator/document_weight_iterator_test.cpp @@ -0,0 +1,189 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> + +#include <vespa/searchlib/attribute/i_document_weight_attribute.h> +#include <vespa/searchlib/attribute/attributevector.h> +#include <vespa/searchlib/attribute/attribute.h> +#include <vespa/searchlib/attribute/attributefile.h> +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/attributememorysavetarget.h> +#include <vespa/searchlib/attribute/singlenumericattribute.h> +#include <vespa/searchlib/attribute/multinumericattribute.h> +#include <vespa/searchlib/attribute/singlestringattribute.h> +#include <vespa/searchlib/attribute/multistringattribute.h> +#include <vespa/searchlib/attribute/attrvector.h> +#include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/searchlib/util/randomgenerator.h> +#include <vespa/searchlib/attribute/attributevector.hpp> +#include <vespa/searchlib/attribute/multinumericpostattribute.hpp> +#include <vespa/searchlib/attribute/multistringpostattribute.hpp> +#include <vespa/searchlib/attribute/iattributemanager.h> +#include <vespa/searchlib/query/tree/location.h> +#include <vespa/searchlib/query/tree/point.h> +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/query/weight.h> +#include <vespa/searchlib/test/initrange.h> +#include <vespa/searchlib/queryeval/document_weight_search_iterator.h> + +using namespace search; +using namespace search::attribute; + +AttributeVector::SP make_attribute(BasicType type, CollectionType collection, bool fast_search) { + Config cfg(type, collection); + cfg.setFastSearch(fast_search); + return AttributeFactory::createAttribute("my_attribute", cfg); +} + +void add_docs(AttributeVector::SP attr_ptr, size_t limit = 1000) { + AttributeVector::DocId docid; + for (size_t i = 0; i < limit; ++i) { + attr_ptr->addDoc(docid); + } + attr_ptr->commit(); + ASSERT_EQUAL((limit - 1), docid); +} + +template <typename ATTR, typename KEY> +void set_doc(ATTR *attr, uint32_t docid, KEY key, int32_t weight) { + attr->clearDoc(docid); + attr->append(docid, key, weight); + attr->commit(); +} + +void populate_long(AttributeVector::SP attr_ptr) { + IntegerAttribute *attr = static_cast<IntegerAttribute *>(attr_ptr.get()); + set_doc(attr, 1, int64_t(111), 20); + set_doc(attr, 5, int64_t(111), 5); + set_doc(attr, 7, int64_t(111), 10); +} + +void populate_string(AttributeVector::SP attr_ptr) { + StringAttribute *attr = static_cast<StringAttribute *>(attr_ptr.get()); + set_doc(attr, 1, "foo", 20); + set_doc(attr, 5, "foo", 5); + set_doc(attr, 7, "foo", 10); +} + +struct LongFixture { + AttributeVector::SP attr; + const IDocumentWeightAttribute *api; + LongFixture() : attr(make_attribute(BasicType::INT64, CollectionType::WSET, true)), + api(attr->asDocumentWeightAttribute()) + { + ASSERT_TRUE(api != nullptr); + add_docs(attr); + populate_long(attr); + } +}; + +struct StringFixture { + AttributeVector::SP attr; + const IDocumentWeightAttribute *api; + StringFixture() : attr(make_attribute(BasicType::STRING, CollectionType::WSET, true)), + api(attr->asDocumentWeightAttribute()) + { + ASSERT_TRUE(api != nullptr); + add_docs(attr); + populate_string(attr); + } +}; + +TEST("require that appropriate attributes support the document weight attribute interface") { + EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::WSET, true)->asDocumentWeightAttribute() != nullptr); + EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::WSET, true)->asDocumentWeightAttribute() != nullptr); +} + +TEST("require that inappropriate attributes do not support the document weight attribute interface") { + EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::SINGLE, false)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::ARRAY, false)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::WSET, false)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::SINGLE, true)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::INT64, CollectionType::ARRAY, true)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::SINGLE, false)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::ARRAY, false)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::WSET, false)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::SINGLE, true)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::STRING, CollectionType::ARRAY, true)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::INT32, CollectionType::WSET, true)->asDocumentWeightAttribute() == nullptr); + EXPECT_TRUE(make_attribute(BasicType::DOUBLE, CollectionType::WSET, true)->asDocumentWeightAttribute() == nullptr); +} + +void verify_valid_lookup(IDocumentWeightAttribute::LookupResult result) { + EXPECT_TRUE(result.posting_idx.valid()); + EXPECT_EQUAL(3u, result.posting_size); + EXPECT_EQUAL(5, result.min_weight); + EXPECT_EQUAL(20, result.max_weight); +} + +void verify_invalid_lookup(IDocumentWeightAttribute::LookupResult result) { + EXPECT_FALSE(result.posting_idx.valid()); + EXPECT_EQUAL(0u, result.posting_size); + EXPECT_EQUAL(0, result.min_weight); + EXPECT_EQUAL(0, result.max_weight); +} + +TEST_F("require that integer lookup works correctly", LongFixture) { + verify_valid_lookup(f1.api->lookup("111")); + verify_invalid_lookup(f1.api->lookup("222")); +} + +TEST_F("require string lookup works correctly", StringFixture) { + verify_valid_lookup(f1.api->lookup("foo")); + verify_invalid_lookup(f1.api->lookup("bar")); +} + +void verify_posting(const IDocumentWeightAttribute &api, const char *term) { + auto result = api.lookup(term); + ASSERT_TRUE(result.posting_idx.valid()); + std::vector<DocumentWeightIterator> itr_store; + api.create(result.posting_idx, itr_store); + ASSERT_EQUAL(1u, itr_store.size()); + { + DocumentWeightIterator &itr = itr_store[0]; + if (itr.valid() && itr.getKey() < 1) { + itr.linearSeek(1); + } + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(1u, itr.getKey()); // docid + EXPECT_EQUAL(20, itr.getData()); // weight + itr.linearSeek(2); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(5u, itr.getKey()); // docid + EXPECT_EQUAL(5, itr.getData()); // weight + itr.linearSeek(6); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(7u, itr.getKey()); // docid + EXPECT_EQUAL(10, itr.getData()); // weight + itr.linearSeek(8); + EXPECT_FALSE(itr.valid()); + } +} + +TEST_F("require that integer iterators are created correctly", LongFixture) { + verify_posting(*f1.api, "111"); +} + +TEST_F("require that string iterators are created correctly", StringFixture) { + verify_posting(*f1.api, "foo"); +} + +TEST("verify init range for document weight search iterator") { + search::test::InitRangeVerifier ir; + AttributeVector::SP attr(make_attribute(BasicType::INT64, CollectionType::WSET, true)); + add_docs(attr, ir.getDocIdLimit()); + auto docids = ir.getExpectedDocIds(); + IntegerAttribute *int_attr = static_cast<IntegerAttribute *>(attr.get()); + for (auto docid: docids) { + set_doc(int_attr, docid, int64_t(123), 1); + } + const IDocumentWeightAttribute *api(attr->asDocumentWeightAttribute()); + ASSERT_TRUE(api != nullptr); + auto dict_entry = api->lookup("123"); + ASSERT_TRUE(dict_entry.posting_idx.valid()); + fef::TermFieldMatchData tfmd; + queryeval::DocumentWeightSearchIterator itr(tfmd, *api, dict_entry); + ir.verify(itr); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/attribute/enumeratedsave/.gitignore b/searchlib/src/tests/attribute/enumeratedsave/.gitignore new file mode 100644 index 00000000000..a4680f95f72 --- /dev/null +++ b/searchlib/src/tests/attribute/enumeratedsave/.gitignore @@ -0,0 +1,127 @@ +/double_a0_e.udat +/double_a0_ee.udat +/double_a1_e.udat +/double_a1_ee.udat +/double_a2_e.udat +/double_a2_ee.udat +/double_sv0_e.udat +/double_sv0_ee.udat +/double_sv1_e.udat +/double_sv1_ee.udat +/double_sv2_e.udat +/double_sv2_ee.udat +/double_ws0_e.udat +/double_ws0_ee.udat +/double_ws1_e.udat +/double_ws1_ee.udat +/double_ws2_e.udat +/double_ws2_ee.udat +/float_a0_e.udat +/float_a0_ee.udat +/float_a1_e.udat +/float_a1_ee.udat +/float_a2_e.udat +/float_a2_ee.udat +/float_sv0_e.udat +/float_sv0_ee.udat +/float_sv1_e.udat +/float_sv1_ee.udat +/float_sv2_e.udat +/float_sv2_ee.udat +/float_ws0_e.udat +/float_ws0_ee.udat +/float_ws1_e.udat +/float_ws1_ee.udat +/float_ws2_e.udat +/float_ws2_ee.udat +/int16_a0_e.udat +/int16_a0_ee.udat +/int16_a1_e.udat +/int16_a1_ee.udat +/int16_a2_e.udat +/int16_a2_ee.udat +/int16_sv0_e.udat +/int16_sv0_ee.udat +/int16_sv1_e.udat +/int16_sv1_ee.udat +/int16_sv2_e.udat +/int16_sv2_ee.udat +/int16_ws0_e.udat +/int16_ws0_ee.udat +/int16_ws1_e.udat +/int16_ws1_ee.udat +/int16_ws2_e.udat +/int16_ws2_ee.udat +/int32_a0_e.udat +/int32_a0_ee.udat +/int32_a1_e.udat +/int32_a1_ee.udat +/int32_a2_e.udat +/int32_a2_ee.udat +/int32_sv0_e.udat +/int32_sv0_ee.udat +/int32_sv1_e.udat +/int32_sv1_ee.udat +/int32_sv2_e.udat +/int32_sv2_ee.udat +/int32_ws0_e.udat +/int32_ws0_ee.udat +/int32_ws1_e.udat +/int32_ws1_ee.udat +/int32_ws2_e.udat +/int32_ws2_ee.udat +/int64_a0_e.udat +/int64_a0_ee.udat +/int64_a1_e.udat +/int64_a1_ee.udat +/int64_a2_e.udat +/int64_a2_ee.udat +/int64_sv0_e.udat +/int64_sv0_ee.udat +/int64_sv1_e.udat +/int64_sv1_ee.udat +/int64_sv2_e.udat +/int64_sv2_ee.udat +/int64_ws0_e.udat +/int64_ws0_ee.udat +/int64_ws1_e.udat +/int64_ws1_ee.udat +/int64_ws2_e.udat +/int64_ws2_ee.udat +/int8_a0_e.udat +/int8_a0_ee.udat +/int8_a1_e.udat +/int8_a1_ee.udat +/int8_a2_e.udat +/int8_a2_ee.udat +/int8_sv0_e.udat +/int8_sv0_ee.udat +/int8_sv1_e.udat +/int8_sv1_ee.udat +/int8_sv2_e.udat +/int8_sv2_ee.udat +/int8_ws0_e.udat +/int8_ws0_ee.udat +/int8_ws1_e.udat +/int8_ws1_ee.udat +/int8_ws2_e.udat +/int8_ws2_ee.udat +/str_a0_e.udat +/str_a0_ee.udat +/str_a1_e.udat +/str_a1_ee.udat +/str_a2_e.udat +/str_a2_ee.udat +/str_sv0_e.udat +/str_sv0_ee.udat +/str_sv1_e.udat +/str_sv1_ee.udat +/str_sv2_e.udat +/str_sv2_ee.udat +/str_ws0_e.udat +/str_ws0_ee.udat +/str_ws1_e.udat +/str_ws1_ee.udat +/str_ws2_e.udat +/str_ws2_ee.udat +searchlib_enumeratedsave_test_app diff --git a/searchlib/src/tests/attribute/enumeratedsave/CMakeLists.txt b/searchlib/src/tests/attribute/enumeratedsave/CMakeLists.txt new file mode 100644 index 00000000000..0dbb59043c1 --- /dev/null +++ b/searchlib/src/tests/attribute/enumeratedsave/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_enumeratedsave_test_app + SOURCES + enumeratedsave_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_enumeratedsave_test_app COMMAND searchlib_enumeratedsave_test_app) diff --git a/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp new file mode 100644 index 00000000000..312814eb55a --- /dev/null +++ b/searchlib/src/tests/attribute/enumeratedsave/enumeratedsave_test.cpp @@ -0,0 +1,944 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/document/fieldvalue/intfieldvalue.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/searchlib/attribute/attribute.h> +#include <vespa/searchlib/attribute/attributefile.h> +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/attributememorysavetarget.h> +#include <vespa/searchlib/attribute/singlenumericattribute.h> +#include <vespa/searchlib/attribute/multinumericattribute.h> +#include <vespa/searchlib/attribute/singlestringattribute.h> +#include <vespa/searchlib/attribute/multistringattribute.h> +#include <vespa/searchlib/attribute/attrvector.h> +#include <vespa/searchlib/attribute/attributefilesavetarget.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/util/compress.h> +#include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/searchlib/util/randomgenerator.h> +#include <vespa/searchlib/util/bufferwriter.h> +#include <vespa/searchlib/attribute/attributememoryfilebufferwriter.h> + +#include <vespa/searchlib/attribute/attributevector.hpp> + +#include <vespa/log/log.h> +LOG_SETUP("enumeratedsave_test"); +#include <limits> +#include <string> +#include <iostream> + + +using search::attribute::BasicType; +using search::attribute::CollectionType; +using search::attribute::Config; +using search::AttributeFactory; +using search::FloatingPointAttribute; +using search::IntegerAttribute; +using search::StringAttribute; +using search::RandomGenerator; +using search::ParseItem; +using search::fef::TermFieldMatchData; +using search::IAttributeFileWriter; +using search::BufferWriter; +using search::AttributeMemoryFileBufferWriter; + +typedef std::unique_ptr<AttributeVector::SearchContext> SearchContextPtr; +typedef std::unique_ptr<search::queryeval::SearchIterator> SearchBasePtr; + +bool +FastOS_UNIX_File::Sync(void) +{ + // LOG(info, "Skip sync"); + return true; +} + + +class MemAttrFileWriter : public IAttributeFileWriter +{ +private: + Buffer _buf; + +public: + MemAttrFileWriter() + : _buf() + { + } + + virtual Buffer allocBuf(size_t size) override { + return std::make_unique<BufferBuf>(size, 4096); + } + + virtual void writeBuf(Buffer buf_in) override { + if (!_buf) { + _buf = std::move(buf_in); + } else { + _buf->writeBytes(buf_in->getData(), buf_in->getDataLen()); + } + } + + const Buffer &buf() const { return _buf; } + + std::unique_ptr<BufferWriter> allocBufferWriter() override; +}; + +std::unique_ptr<BufferWriter> +MemAttrFileWriter::allocBufferWriter() +{ + if (!_buf) { + _buf = allocBuf(1); + } + return std::make_unique<AttributeMemoryFileBufferWriter>(*this); +} + +class MemAttr : public search::IAttributeSaveTarget +{ +private: + MemAttrFileWriter _datWriter; + MemAttrFileWriter _idxWriter; + MemAttrFileWriter _weightWriter; + MemAttrFileWriter _udatWriter; + +public: + typedef std::shared_ptr<MemAttr> SP; + + MemAttr(void) + : _datWriter(), + _idxWriter(), + _weightWriter(), + _udatWriter() + { + } + + // Implements IAttributeSaveTarget + virtual bool setup() { return true; } + virtual void close() {} + virtual IAttributeFileWriter &datWriter() override { return _datWriter; } + virtual IAttributeFileWriter &idxWriter() override { return _idxWriter; } + virtual IAttributeFileWriter &weightWriter() override { + return _weightWriter; + } + virtual IAttributeFileWriter &udatWriter() override { return _udatWriter; } + + bool + bufEqual(const Buffer &lhs, const Buffer &rhs) const; + + bool + operator==(const MemAttr &rhs) const; +}; + +class EnumeratedSaveTest +{ +private: + typedef AttributeVector::SP AttributePtr; + + template <typename VectorType> + VectorType & + as(AttributePtr &v); + + IntegerAttribute & + asInt(AttributePtr &v); + + StringAttribute & + asString(AttributePtr &v); + + FloatingPointAttribute & + asFloat(AttributePtr &v); + + void + addDocs(const AttributePtr &v, size_t sz); + + template <typename VectorType> + void populate(VectorType &v, unsigned seed, BasicType bt); + + template <typename VectorType, typename BufferType> + void compare(VectorType &a, VectorType &b); + + void + buildTermQuery(std::vector<char> & buffer, + const vespalib::string & index, + const vespalib::string & term, bool prefix); + + template <typename V, typename T> + SearchContextPtr + getSearch(const V & vec, const T & term, bool prefix); + + template <typename V> + SearchContextPtr + getSearch(const V & vec); + + MemAttr::SP + saveMem(AttributeVector &v); + + void + checkMem(AttributeVector &v, const MemAttr &e, bool enumerated); + + MemAttr::SP + saveBoth(AttributePtr v); + + AttributePtr + make(Config cfg, + const vespalib::string &pref, + bool fastSearch = false); + + void + load(AttributePtr v, const vespalib::string &name); + + template <typename VectorType, typename BufferType> + void + checkLoad(AttributePtr v, + const vespalib::string &name, + AttributePtr ev); + + template <typename VectorType, typename BufferType> + void + testReload(AttributePtr v0, + AttributePtr v1, + AttributePtr v2, + MemAttr::SP mv0, + MemAttr::SP mv1, + MemAttr::SP mv2, + MemAttr::SP emv0, + MemAttr::SP emv1, + MemAttr::SP emv2, + Config cfg, + const vespalib::string &pref, + bool fastSearch); + +public: + template <typename VectorType, typename BufferType> + void + test(BasicType bt, CollectionType ct, const vespalib::string &pref); + + EnumeratedSaveTest() + { + } +}; + + +bool +MemAttr::bufEqual(const Buffer &lhs, const Buffer &rhs) const +{ + if (!EXPECT_TRUE((lhs.get() != NULL) == (rhs.get() != NULL))) + return false; + if (lhs.get() == NULL) + return true; + if (!EXPECT_TRUE(lhs->getDataLen() == rhs->getDataLen())) + return false; + if (!EXPECT_TRUE(memcmp(lhs->getData(), rhs->getData(), + lhs->getDataLen()) == 0)) + return false; + return true; +} + +bool +MemAttr::operator==(const MemAttr &rhs) const +{ + if (!EXPECT_TRUE(bufEqual(_datWriter.buf(), rhs._datWriter.buf()))) + return false; + if (!EXPECT_TRUE(bufEqual(_idxWriter.buf(), rhs._idxWriter.buf()))) + return false; + if (!EXPECT_TRUE(bufEqual(_weightWriter.buf(), rhs._weightWriter.buf()))) + return false; + if (!EXPECT_TRUE(bufEqual(_udatWriter.buf(), rhs._udatWriter.buf()))) + return false; + return true; +} + + +void +EnumeratedSaveTest::addDocs(const AttributePtr &v, size_t sz) +{ + if (sz) { + AttributeVector::DocId docId; + for(size_t i(0); i< sz; i++) { + EXPECT_TRUE( v->addDoc(docId) ); + } + EXPECT_TRUE( docId+1 == sz ); + EXPECT_TRUE( v->getNumDocs() == sz ); + v->commit(true); + } +} + + +template <> +void +EnumeratedSaveTest::populate(IntegerAttribute &v, unsigned seed, + BasicType bt) +{ + search::Rand48 rnd; + IntegerAttribute::largeint_t mask(std::numeric_limits + <IntegerAttribute::largeint_t>::max()); + switch (bt.type()) { + case BasicType::INT8: + mask = 0x7f; + break; + case BasicType::INT16: + mask = 0x7fff; + break; + default: + ; + } + rnd.srand48(seed); + int weight = 1; + for(size_t i(0), m(v.getNumDocs()); i < m; i++) { + v.clearDoc(i); + if (i == 9) + continue; + if (i == 7) { + if (v.hasMultiValue()) { + v.append(i, -42, 27); + v.append(i, -43, 14); + v.append(i, -42, -3); + } else { + EXPECT_TRUE( v.update(i, -42) ); + } + v.commit(); + continue; + } + if (v.hasMultiValue()) { + if (v.hasWeightedSetType()) { + weight = (rand() % 256) - 128; + } + for (size_t j(0); j <= i; j++) { + EXPECT_TRUE( v.append(i, rnd.lrand48() & mask, weight) ); + } + v.commit(); + if (!v.hasWeightedSetType()) { + EXPECT_EQUAL(static_cast<uint32_t>(v.getValueCount(i)), i + 1); + ASSERT_TRUE(static_cast<uint32_t>(v.getValueCount(i)) == + i + 1); + } + } else { + EXPECT_TRUE( v.update(i, lrand48() & mask) ); + } + } + v.commit(); +} + + +template <> +void +EnumeratedSaveTest::populate(FloatingPointAttribute &v, unsigned seed, + BasicType bt) +{ + (void) bt; + search::Rand48 rnd; + rnd.srand48(seed); + int weight = 1; + for(size_t i(0), m(v.getNumDocs()); i < m; i++) { + v.clearDoc(i); + if (i == 9) + continue; + if (i == 7) { + if (v.hasMultiValue()) { + v.append(i, -42.0, 27); + v.append(i, -43.0, 14); + v.append(i, -42.0, -3); + } else { + EXPECT_TRUE( v.update(i, -42.0) ); + } + v.commit(); + continue; + } + if (v.hasMultiValue()) { + if (v.hasWeightedSetType()) { + weight = (rand() % 256) - 128; + } + for (size_t j(0); j <= i; j++) { + EXPECT_TRUE( v.append(i, rnd.lrand48(), weight) ); + } + v.commit(); + if (!v.hasWeightedSetType()) { + EXPECT_EQUAL(static_cast<uint32_t>(v.getValueCount(i)), i + 1); + ASSERT_TRUE(static_cast<uint32_t>(v.getValueCount(i)) == + i + 1); + } + } else { + EXPECT_TRUE( v.update(i, lrand48()) ); + } + } + v.commit(); +} + + +template <> +void +EnumeratedSaveTest::populate(StringAttribute &v, unsigned seed, + BasicType bt) +{ + (void) bt; + RandomGenerator rnd(seed); + int weight = 1; + for(size_t i(0), m(v.getNumDocs()); i < m; i++) { + v.clearDoc(i); + if (i == 9) + continue; + if (i == 7) { + if (v.hasMultiValue()) { + v.append(i, "foo", 27); + v.append(i, "bar", 14); + v.append(i, "foO", -3); + } else { + EXPECT_TRUE( v.update(i, "foo") ); + } + v.commit(); + continue; + } + if (v.hasMultiValue()) { + if (v.hasWeightedSetType()) { + weight = rnd.rand(0, 256) - 128; + } + for (size_t j(0); j <= i; j++) { + EXPECT_TRUE( v.append(i, rnd.getRandomString(2, 50), weight) ); + } + v.commit(); + if (!v.hasWeightedSetType()) { + EXPECT_EQUAL(static_cast<uint32_t>(v.getValueCount(i)), i + 1); + } + } else { + EXPECT_TRUE( v.update(i, rnd.getRandomString(2, 50)) ); + } + } + v.commit(); +} + +namespace +{ + +template <typename T> +inline bool +equalsHelper(const T &lhs, const T &rhs) +{ + return lhs == rhs; +} + +template <> +inline bool +equalsHelper<float>(const float &lhs, const float &rhs) +{ + if (std::isnan(lhs)) + return std::isnan(rhs); + if (std::isnan(rhs)) + return false; + return lhs == rhs; +} + +template <> +inline bool +equalsHelper<double>(const double &lhs, const double &rhs) +{ + if (std::isnan(lhs)) + return std::isnan(rhs); + if (std::isnan(rhs)) + return false; + return lhs == rhs; +} + +} + +template <typename VectorType, typename BufferType> +void +EnumeratedSaveTest::compare(VectorType &a, VectorType &b) +{ + EXPECT_EQUAL(a.getNumDocs(), b.getNumDocs()); + ASSERT_TRUE(a.getNumDocs() == b.getNumDocs()); + // EXPECT_EQUAL(a.getMaxValueCount(), b.getMaxValueCount()); + EXPECT_EQUAL(a.getCommittedDocIdLimit(), b.getCommittedDocIdLimit()); + uint32_t asz(a.getMaxValueCount()); + uint32_t bsz(b.getMaxValueCount()); + BufferType *av = new BufferType[asz]; + BufferType *bv = new BufferType[bsz]; + + for (size_t i(0), m(a.getNumDocs()); i < m; i++) { + ASSERT_TRUE(asz >= static_cast<uint32_t>(a.getValueCount(i))); + ASSERT_TRUE(bsz >= static_cast<uint32_t>(b.getValueCount(i))); + EXPECT_EQUAL(a.getValueCount(i), b.getValueCount(i)); + ASSERT_TRUE(a.getValueCount(i) == b.getValueCount(i)); + EXPECT_EQUAL(static_cast<const AttributeVector &>(a).get(i, av, asz), + static_cast<uint32_t>(a.getValueCount(i))); + EXPECT_EQUAL(static_cast<const AttributeVector &>(b).get(i, bv, bsz), + static_cast<uint32_t>(b.getValueCount(i))); + for(size_t j(0), k(std::min(a.getValueCount(i), b.getValueCount(i))); + j < k; j++) { + EXPECT_TRUE(equalsHelper(av[j], bv[j])); + } + } + delete [] bv; + delete [] av; +} + + +template <typename VectorType> +VectorType & +EnumeratedSaveTest::as(AttributePtr &v) +{ + VectorType *res = dynamic_cast<VectorType *>(v.get()); + assert(res != NULL); + return *res; +} + + +IntegerAttribute & +EnumeratedSaveTest::asInt(AttributePtr &v) +{ + return as<IntegerAttribute>(v); +} + + +StringAttribute & +EnumeratedSaveTest::asString(AttributePtr &v) +{ + return as<StringAttribute>(v); +} + + +FloatingPointAttribute & +EnumeratedSaveTest::asFloat(AttributePtr &v) +{ + return as<FloatingPointAttribute>(v); +} + + +void +EnumeratedSaveTest::buildTermQuery(std::vector<char> &buffer, + const vespalib::string &index, + const vespalib::string &term, + bool prefix) +{ + uint32_t indexLen = index.size(); + uint32_t termLen = term.size(); + uint32_t queryPacketSize = 1 + 2 * 4 + indexLen + termLen; + uint32_t p = 0; + buffer.resize(queryPacketSize); + buffer[p++] = prefix ? ParseItem::ITEM_PREFIXTERM : ParseItem::ITEM_TERM; + p += vespalib::compress::Integer::compressPositive(indexLen, &buffer[p]); + memcpy(&buffer[p], index.c_str(), indexLen); + p += indexLen; + p += vespalib::compress::Integer::compressPositive(termLen, &buffer[p]); + memcpy(&buffer[p], term.c_str(), termLen); + p += termLen; + buffer.resize(p); +} + + +template <typename V, typename T> +SearchContextPtr +EnumeratedSaveTest::getSearch(const V &vec, const T &term, bool prefix) +{ + std::vector<char> query; + vespalib::asciistream ss; + ss << term; + buildTermQuery(query, vec.getName(), ss.str(), prefix); + + return (static_cast<const AttributeVector &>(vec)). + getSearch(vespalib::stringref(&query[0], query.size()), + AttributeVector::SearchContext::Params()); +} + + +template <> +SearchContextPtr +EnumeratedSaveTest::getSearch<IntegerAttribute>(const IntegerAttribute &v) +{ + return getSearch<IntegerAttribute>(v, "[-42;-42]", false); +} + +template <> +SearchContextPtr +EnumeratedSaveTest::getSearch<FloatingPointAttribute>(const FloatingPointAttribute &v) +{ + return getSearch<FloatingPointAttribute>(v, "[-42.0;-42.0]", false); +} + +template <> +SearchContextPtr +EnumeratedSaveTest::getSearch<StringAttribute>(const StringAttribute &v) +{ + return getSearch<StringAttribute, const vespalib::string &> + (v, "foo", false); +} + +MemAttr::SP +EnumeratedSaveTest::saveMem(AttributeVector &v) +{ + MemAttr::SP res(new MemAttr); + EXPECT_TRUE(v.save(*res)); + return res; +} + + +void +EnumeratedSaveTest::checkMem(AttributeVector &v, const MemAttr &e, + bool enumerated) +{ + MemAttr m; + v.enableEnumeratedSave(enumerated); + EXPECT_TRUE(v.save(m)); + v.enableEnumeratedSave(false); + ASSERT_TRUE(m == e); +} + + +MemAttr::SP +EnumeratedSaveTest::saveBoth(AttributePtr v) +{ + EXPECT_TRUE(v->save()); + vespalib::string basename = v->getBaseFileName(); + AttributePtr v2 = make(v->getConfig(), basename, true); + EXPECT_TRUE(v2->load()); + v2->enableEnumeratedSave(true); + EXPECT_TRUE(v2->saveAs(basename + "_e")); + if ((v->getConfig().basicType() == BasicType::INT32 && + v->getConfig().collectionType() == CollectionType::WSET) || true) { + search::AttributeMemorySaveTarget ms; + search::TuneFileAttributes tune; + search::index::DummyFileHeaderContext fileHeaderContext; + EXPECT_TRUE(v2->saveAs(basename + "_ee", ms)); + EXPECT_TRUE(ms.writeToFile(tune, fileHeaderContext)); + } + return saveMem(*v2); +} + + +EnumeratedSaveTest::AttributePtr +EnumeratedSaveTest::make(Config cfg, + const vespalib::string &pref, + bool fastSearch) +{ + cfg.setFastSearch(fastSearch); + AttributePtr v = AttributeFactory::createAttribute(pref, cfg); + return v; +} + + +void +EnumeratedSaveTest::load(AttributePtr v, const vespalib::string &name) +{ + v->setBaseFileName(name); + EXPECT_TRUE(v->load()); +} + +template <typename VectorType, typename BufferType> +void +EnumeratedSaveTest::checkLoad(AttributePtr v, const vespalib::string &name, + AttributePtr ev) +{ + v->setBaseFileName(name); + EXPECT_TRUE(v->load()); + compare<VectorType, BufferType>(as<VectorType>(v), as<VectorType>(ev)); +} + + +template <typename VectorType, typename BufferType> +void +EnumeratedSaveTest::testReload(AttributePtr v0, + AttributePtr v1, + AttributePtr v2, + MemAttr::SP mv0, + MemAttr::SP mv1, + MemAttr::SP mv2, + MemAttr::SP emv0, + MemAttr::SP emv1, + MemAttr::SP emv2, + Config cfg, + const vespalib::string &pref, + bool fastSearch) +{ + // typedef AttributePtr AVP; + + bool flagAttr = + cfg.collectionType() == CollectionType::ARRAY && + cfg.basicType() == BasicType::INT8 && + fastSearch; + bool supportsEnumerated = (fastSearch || + cfg.basicType() == BasicType::STRING) && + !flagAttr; + + + AttributePtr v = make(cfg, pref, fastSearch); + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "0", v0))); + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "1", v1))); + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "2", v2))); + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "1", v1))); + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "0", v0))); + + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "0", v0))); + TEST_DO(checkMem(*v, *mv0, false)); + TEST_DO(checkMem(*v, supportsEnumerated ? *emv0 : *mv0, true)); + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "1", v1))); + TEST_DO(checkMem(*v, *mv1, false)); + TEST_DO(checkMem(*v, supportsEnumerated ? *emv1 : *mv1, true)); + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "2", v2))); + TEST_DO(checkMem(*v, *mv2, false)); + TEST_DO(checkMem(*v, supportsEnumerated ? *emv2 : *mv2, true)); + + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "0_e", v0))); + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "1_e", v1))); + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "2_e", v2))); + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "1_e", v1))); + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "0_e", v0))); + + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "0_e", v0))); + TEST_DO(checkMem(*v, *mv0, false)); + TEST_DO(checkMem(*v, supportsEnumerated ? *emv0 : *mv0, true)); + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "1_e", v1))); + TEST_DO(checkMem(*v, *mv1, false)); + TEST_DO(checkMem(*v, supportsEnumerated ? *emv1 : *mv1, true)); + TEST_DO((checkLoad<VectorType, BufferType>(v, pref + "2_e", v2))); + TEST_DO(checkMem(*v, *mv2, false)); + TEST_DO(checkMem(*v, supportsEnumerated ? *emv2 : *mv2, true)); + + TermFieldMatchData md; + SearchContextPtr sc = getSearch<VectorType>(as<VectorType>(v)); + sc->fetchPostings(true); + SearchBasePtr sb = sc->createIterator(&md, true); + sb->initFullRange(); + sb->seek(1u); + EXPECT_EQUAL(7u, sb->getDocId()); + sb->unpack(7u); + EXPECT_EQUAL(md.getDocId(), 7u); + if (v->getCollectionType() == CollectionType::SINGLE || + flagAttr) { + EXPECT_EQUAL(md.getWeight(), 1); + } else if (v->getCollectionType() == CollectionType::ARRAY) { + EXPECT_EQUAL(md.getWeight(), 2); + } else { + if (cfg.basicType() == BasicType::STRING) { + EXPECT_EQUAL(md.getWeight(), 24); + } else { + EXPECT_EQUAL(md.getWeight(), -3); + } + } +} + + +template <typename VectorType, typename BufferType> +void +EnumeratedSaveTest::test(BasicType bt, CollectionType ct, + const vespalib::string &pref) +{ + Config cfg(bt, ct); + AttributePtr v0 = AttributeFactory::createAttribute(pref + "0", cfg); + AttributePtr v1 = AttributeFactory::createAttribute(pref + "1", cfg); + AttributePtr v2 = AttributeFactory::createAttribute(pref + "2", cfg); + + addDocs(v0, 0); + addDocs(v1, 10); + addDocs(v2, 30); + + populate(as<VectorType>(v0), 0, bt); + populate(as<VectorType>(v1), 10, bt); + populate(as<VectorType>(v2), 30, bt); + + MemAttr::SP mv0 = saveMem(*v0); + MemAttr::SP mv1 = saveMem(*v1); + MemAttr::SP mv2 = saveMem(*v2); + + MemAttr::SP emv0 = saveBoth(v0); + MemAttr::SP emv1 = saveBoth(v1); + MemAttr::SP emv2 = saveBoth(v2); + + AttributePtr v = make(cfg, pref, true); + checkLoad<VectorType, BufferType>(v, pref + "0_ee", v0); + checkLoad<VectorType, BufferType>(v, pref + "1_ee", v1); + checkLoad<VectorType, BufferType>(v, pref + "2_ee", v2); + v.reset(); + + TEST_DO((testReload<VectorType, BufferType>(v0, v1, v2, + mv0, mv1, mv2, + emv0, emv1, emv2, + cfg, pref, + false))); + TEST_DO((testReload<VectorType, BufferType>(v0, v1, v2, + mv0, mv1, mv2, + emv0, emv1, emv2, + cfg, pref, + true))); +} + +TEST_F("Test enumerated save with single value int8", EnumeratedSaveTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::largeint_t>(BasicType::INT8, + CollectionType::SINGLE, + "int8_sv"); +} + +TEST_F("Test enumerated save with array value int8", EnumeratedSaveTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::largeint_t>(BasicType::INT8, + CollectionType::ARRAY, + "int8_a"); +} + +TEST_F("Test enumerated save with weighted set value int8", + EnumeratedSaveTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::WeightedInt>(BasicType::INT8, + CollectionType::WSET, + "int8_ws"); +} + +TEST_F("Test enumerated save with single value int16", EnumeratedSaveTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::largeint_t>(BasicType::INT16, + CollectionType::SINGLE, + "int16_sv"); +} + +TEST_F("Test enumerated save with array value int16", EnumeratedSaveTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::largeint_t>(BasicType::INT16, + CollectionType::ARRAY, + "int16_a"); +} + +TEST_F("Test enumerated save with weighted set value int16", + EnumeratedSaveTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::WeightedInt>(BasicType::INT16, + CollectionType::WSET, + "int16_ws"); +} + +TEST_F("Test enumerated save with single value int32", EnumeratedSaveTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::largeint_t>(BasicType::INT32, + CollectionType::SINGLE, + "int32_sv"); +} + +TEST_F("Test enumerated save with array value int32", EnumeratedSaveTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::largeint_t>(BasicType::INT32, + CollectionType::ARRAY, + "int32_a"); +} + +TEST_F("Test enumerated save with weighted set value int32", + EnumeratedSaveTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::WeightedInt>(BasicType::INT32, + CollectionType::WSET, + "int32_ws"); +} + +TEST_F("Test enumerated save with single value int64", EnumeratedSaveTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::largeint_t>(BasicType::INT64, + CollectionType::SINGLE, + "int64_sv"); +} + +TEST_F("Test enumerated save with array value int64", EnumeratedSaveTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::largeint_t>(BasicType::INT64, + CollectionType::ARRAY, + "int64_a"); +} + +TEST_F("Test enumerated save with weighted set value int64", + EnumeratedSaveTest) +{ + f.template test<IntegerAttribute, + IntegerAttribute::WeightedInt>(BasicType::INT64, + CollectionType::WSET, + "int64_ws"); +} + +TEST_F("Test enumerated save with single value float", EnumeratedSaveTest) +{ + f.template test<FloatingPointAttribute, + double>(BasicType::FLOAT, + CollectionType::SINGLE, + "float_sv"); +} + +TEST_F("Test enumerated save with array value float", EnumeratedSaveTest) +{ + f.template test<FloatingPointAttribute, + double>(BasicType::FLOAT, + CollectionType::ARRAY, + "float_a"); +} + +TEST_F("Test enumerated save with weighted set value float", + EnumeratedSaveTest) +{ + f.template test<FloatingPointAttribute, + FloatingPointAttribute::WeightedFloat>( + BasicType::FLOAT, + CollectionType::WSET, + "float_ws"); +} + + +TEST_F("Test enumerated save with single value double", EnumeratedSaveTest) +{ + f.template test<FloatingPointAttribute, + double>(BasicType::DOUBLE, + CollectionType::SINGLE, + "double_sv"); +} + +TEST_F("Test enumerated save with array value double", EnumeratedSaveTest) +{ + f.template test<FloatingPointAttribute, + double>(BasicType::DOUBLE, + CollectionType::ARRAY, + "double_a"); +} + +TEST_F("Test enumerated save with weighted set value double", + EnumeratedSaveTest) +{ + f.template test<FloatingPointAttribute, + FloatingPointAttribute::WeightedFloat>( + BasicType::DOUBLE, + CollectionType::WSET, + "double_ws"); +} + + +TEST_F("Test enumerated save with single value string", EnumeratedSaveTest) +{ + f.template test<StringAttribute, + vespalib::string>(BasicType::STRING, + CollectionType::SINGLE, + "str_sv"); +} + +TEST_F("Test enumerated save with array value string", EnumeratedSaveTest) +{ + f.template test<StringAttribute, + vespalib::string>(BasicType::STRING, + CollectionType::ARRAY, + "str_a"); +} + +TEST_F("Test enumerated save with weighted set value string", + EnumeratedSaveTest) +{ + f.template test<StringAttribute, + StringAttribute::WeightedString>( + BasicType::STRING, + CollectionType::WSET, + "str_ws"); +} + +TEST_MAIN() +{ + AttributeVector::enableEnumeratedLoad(); + TEST_RUN_ALL(); +} diff --git a/searchlib/src/tests/attribute/enumstore/.gitignore b/searchlib/src/tests/attribute/enumstore/.gitignore new file mode 100644 index 00000000000..c58a018bbd9 --- /dev/null +++ b/searchlib/src/tests/attribute/enumstore/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +enumstore_test +searchlib_enumstore_test_app diff --git a/searchlib/src/tests/attribute/enumstore/CMakeLists.txt b/searchlib/src/tests/attribute/enumstore/CMakeLists.txt new file mode 100644 index 00000000000..33190553747 --- /dev/null +++ b/searchlib/src/tests/attribute/enumstore/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_enumstore_test_app + SOURCES + enumstore_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_enumstore_test_app COMMAND searchlib_enumstore_test_app) diff --git a/searchlib/src/tests/attribute/enumstore/DESC b/searchlib/src/tests/attribute/enumstore/DESC new file mode 100644 index 00000000000..514c9a47caf --- /dev/null +++ b/searchlib/src/tests/attribute/enumstore/DESC @@ -0,0 +1 @@ +This is a test for the EnumStore class. diff --git a/searchlib/src/tests/attribute/enumstore/FILES b/searchlib/src/tests/attribute/enumstore/FILES new file mode 100644 index 00000000000..6fdb2381292 --- /dev/null +++ b/searchlib/src/tests/attribute/enumstore/FILES @@ -0,0 +1 @@ +enumstore.cpp diff --git a/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp new file mode 100644 index 00000000000..e63889bbeb8 --- /dev/null +++ b/searchlib/src/tests/attribute/enumstore/enumstore_test.cpp @@ -0,0 +1,879 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("enumstore_test"); +#include <vespa/vespalib/testkit/testapp.h> +//#define LOG_ENUM_STORE +#include <vespa/searchlib/attribute/enumstore.hpp> +#include <limits> +#include <string> +#include <iostream> + +namespace search { + +size_t enumStoreAlign(size_t size) +{ + return (size + 15) & -UINT64_C(16); +} + +// EnumStoreBase::Index(0,0) is reserved thus 16 bytes are reserved in buffer 0 +const uint32_t RESERVED_BYTES = 16u; +typedef EnumStoreT<NumericEntryType<uint32_t> > NumericEnumStore; + +class EnumStoreTest : public vespalib::TestApp +{ +private: + typedef EnumStoreT<StringEntryType> StringEnumStore; + typedef EnumStoreT<NumericEntryType<float> > FloatEnumStore; + typedef EnumStoreT<NumericEntryType<double> > DoubleEnumStore; + + typedef EnumStoreBase::Index EnumIndex; + typedef vespalib::GenerationHandler::generation_t generation_t; + + void testIndex(); + void fillDataBuffer(char * data, uint32_t enumValue, uint32_t refCount, + const std::string & string); + void fillDataBuffer(char * data, uint32_t enumValue, uint32_t refCount, + uint32_t value); + void testStringEntry(); + void testNumericEntry(); + + template <typename EnumStoreType, typename T> + void testFloatEnumStore(EnumStoreType & es); + void testFloatEnumStore(); + + void testAddEnum(); + template <typename EnumStoreType> + void testAddEnum(bool hasPostings); + + template <typename EnumStoreType, typename Dictionary> + void + testUniques(const EnumStoreType &ses, + const std::vector<std::string> &unique); + + + void testCompaction(); + template <typename EnumStoreType> + void testCompaction(bool hasPostings, bool disableReEnumerate); + + void testReset(); + template <typename EnumStoreType> + void testReset(bool hasPostings); + + void testHoldListAndGeneration(); + void testMemoryUsage(); + void requireThatAddressSpaceUsageIsReported(); + void testBufferLimit(); + + // helper methods + typedef std::vector<std::string> StringVector; + template <typename T> + T random(T low, T high); + std::string getRandomString(uint32_t minLen, uint32_t maxLen); + StringVector fillRandomStrings(uint32_t numStrings, uint32_t minLen, uint32_t maxLen); + StringVector sortRandomStrings(StringVector & strings); + + struct StringEntry { + StringEntry(uint32_t e, uint32_t r, const std::string & s) : + _enum(e), _refCount(r), _string(s) {} + uint32_t _enum; + uint32_t _refCount; + std::string _string; + }; + + struct Reader { + typedef StringEnumStore::Index Index; + typedef std::vector<Index> IndexVector; + typedef std::vector<StringEntry> ExpectedVector; + uint32_t _generation; + IndexVector _indices; + ExpectedVector _expected; + Reader(uint32_t generation, const IndexVector & indices, + const ExpectedVector & expected) : + _generation(generation), _indices(indices), _expected(expected) {} + }; + + void + checkReaders(const StringEnumStore &ses, + generation_t sesGen, + const std::vector<Reader> &readers); + +public: + EnumStoreTest() {} + int Main(); +}; + +void +EnumStoreTest::testIndex() +{ + { + StringEnumStore::Index idx; + EXPECT_TRUE( ! idx.valid()); + EXPECT_EQUAL(idx.offset(), 0u); + EXPECT_TRUE(idx.bufferId() == 0); + } + { + StringEnumStore::Index idx(enumStoreAlign(1000), 0); + EXPECT_TRUE(idx.offset() == enumStoreAlign(1000)); + EXPECT_TRUE(idx.bufferId() == 0); + } + { + StringEnumStore::Index idx((UINT64_C(1) << 31)- RESERVED_BYTES, 1); + EXPECT_TRUE(idx.offset() == (UINT64_C(1) << 31) - RESERVED_BYTES); + EXPECT_TRUE(idx.bufferId() == 1); + } + { + StringEnumStore::Index idx((UINT64_C(1) << 33) - RESERVED_BYTES, 1); + EXPECT_TRUE(idx.offset() == (UINT64_C(1) << 33) - RESERVED_BYTES); + EXPECT_TRUE(idx.bufferId() == 1); + } + { + StringEnumStore::Index idx((UINT64_C(1) << 35) - RESERVED_BYTES, 1); + EXPECT_TRUE(idx.offset() == (UINT64_C(1) << 35) - RESERVED_BYTES); + EXPECT_TRUE(idx.bufferId() == 1); + } + { + // Change offsets when alignment changes. + StringEnumStore::Index idx1(48, 0); + StringEnumStore::Index idx2(80, 0); + StringEnumStore::Index idx3(48, 0); + EXPECT_TRUE(!(idx1 == idx2)); + EXPECT_TRUE(idx1 == idx3); + } + { + EXPECT_TRUE(StringEnumStore::Index::numBuffers() == 2); + } +} + +void +EnumStoreTest::fillDataBuffer(char * data, uint32_t enumValue, uint32_t refCount, + const std::string & string) +{ + StringEnumStore::insertEntry(data, enumValue, refCount, string.c_str()); +} + +void +EnumStoreTest::fillDataBuffer(char * data, uint32_t enumValue, uint32_t refCount, + uint32_t value) +{ + NumericEnumStore::insertEntry(data, enumValue, refCount, value); +} + +void +EnumStoreTest::testStringEntry() +{ + { + char data[9]; + fillDataBuffer(data, 0, 0, ""); + StringEnumStore::Entry e(data); + EXPECT_TRUE(StringEnumStore::getEntrySize("") == + StringEnumStore::alignEntrySize(8 + 1)); + + EXPECT_TRUE(e.getEnum() == 0); + EXPECT_TRUE(e.getRefCount() == 0); + EXPECT_TRUE(strcmp(e.getValue(), "") == 0); + + e.incRefCount(); + EXPECT_TRUE(e.getEnum() == 0); + EXPECT_TRUE(e.getRefCount() == 1); + EXPECT_TRUE(strcmp(e.getValue(), "") == 0); + e.decRefCount(); + EXPECT_TRUE(e.getEnum() == 0); + EXPECT_TRUE(e.getRefCount() == 0); + EXPECT_TRUE(strcmp(e.getValue(), "") == 0); + } + { + char data[18]; + fillDataBuffer(data, 10, 5, "enumstore"); + StringEnumStore::Entry e(data); + EXPECT_TRUE(StringEnumStore::getEntrySize("enumstore") == + StringEnumStore::alignEntrySize(8 + 1 + 9)); + + EXPECT_TRUE(e.getEnum() == 10); + EXPECT_TRUE(e.getRefCount() == 5); + EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0); + + e.incRefCount(); + EXPECT_TRUE(e.getEnum() == 10); + EXPECT_TRUE(e.getRefCount() == 6); + EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0); + e.decRefCount(); + EXPECT_TRUE(e.getEnum() == 10); + EXPECT_TRUE(e.getRefCount() == 5); + EXPECT_TRUE(strcmp(e.getValue(), "enumstore") == 0); + } +} + +void +EnumStoreTest::testNumericEntry() +{ + { + char data[12]; + fillDataBuffer(data, 10, 20, 30); + NumericEnumStore::Entry e(data); + EXPECT_TRUE(NumericEnumStore::getEntrySize(30) == + NumericEnumStore::alignEntrySize(8 + 4)); + + EXPECT_TRUE(e.getEnum() == 10); + EXPECT_TRUE(e.getRefCount() == 20); + EXPECT_TRUE(e.getValue() == 30); + + e.incRefCount(); + EXPECT_TRUE(e.getEnum() == 10); + EXPECT_TRUE(e.getRefCount() == 21); + EXPECT_TRUE(e.getValue() == 30); + e.decRefCount(); + EXPECT_TRUE(e.getEnum() == 10); + EXPECT_TRUE(e.getRefCount() == 20); + EXPECT_TRUE(e.getValue() == 30); + } +} + +template <typename EnumStoreType, typename T> +void +EnumStoreTest::testFloatEnumStore(EnumStoreType & es) +{ + EnumIndex idx; + + T a[5] = {-20.5f, -10.5f, -0.5f, 9.5f, 19.5f}; + T b[5] = {-25.5f, -15.5f, -5.5f, 4.5f, 14.5f}; + + for (uint32_t i = 0; i < 5; ++i) { + es.addEnum(a[i], idx); + } + + for (uint32_t i = 0; i < 5; ++i) { + EXPECT_TRUE(es.findIndex(a[i], idx)); + EXPECT_TRUE(!es.findIndex(b[i], idx)); + } + + es.addEnum(std::numeric_limits<T>::quiet_NaN(), idx); + EXPECT_TRUE(es.findIndex(std::numeric_limits<T>::quiet_NaN(), idx)); + EXPECT_TRUE(es.findIndex(std::numeric_limits<T>::quiet_NaN(), idx)); + + for (uint32_t i = 0; i < 5; ++i) { + EXPECT_TRUE(es.findIndex(a[i], idx)); + EXPECT_TRUE(!es.findIndex(b[i], idx)); + } +} + +void +EnumStoreTest::testFloatEnumStore() +{ + { + FloatEnumStore fes(1000, false); + testFloatEnumStore<FloatEnumStore, float>(fes); + } + { + DoubleEnumStore des(1000, false); + testFloatEnumStore<DoubleEnumStore, double>(des); + } +} + +void +EnumStoreTest::testAddEnum() +{ + testAddEnum<StringEnumStore>(false); + + testAddEnum<StringEnumStore>(true); +} + +template <typename EnumStoreType> +void +EnumStoreTest::testAddEnum(bool hasPostings) +{ + EnumStoreType ses(100, hasPostings); + EXPECT_EQUAL(enumStoreAlign(100u) + RESERVED_BYTES, + ses.getBuffer(0).capacity()); + EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0).size()); + EXPECT_EQUAL(enumStoreAlign(100u), ses.getBuffer(0).remaining()); + EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0)._deadElems); + + EnumIndex idx; + uint64_t offset = ses.getBuffer(0).size(); + std::vector<EnumIndex> indices; + std::vector<std::string> unique; + unique.push_back(""); + unique.push_back("add"); + unique.push_back("enumstore"); + unique.push_back("unique"); + + for (uint32_t i = 0; i < unique.size(); ++i) { + ses.addEnum(unique[i].c_str(), idx); + EXPECT_EQUAL(offset, idx.offset()); + EXPECT_EQUAL(0u, idx.bufferId()); + ses.incRefCount(idx); + EXPECT_EQUAL(1u, ses.getRefCount(idx)); + indices.push_back(idx); + offset += EnumStoreType::alignEntrySize(unique[i].size() + 1 + 8); + EXPECT_TRUE(ses.findIndex(unique[i].c_str(), idx)); + EXPECT_TRUE(ses.getLastEnum() == i); + } + ses.freezeTree(); + + for (uint32_t i = 0; i < indices.size(); ++i) { + uint32_t e = ses.getEnum(indices[i]); + EXPECT_EQUAL(i, e); + EXPECT_TRUE(ses.findEnum(unique[i].c_str(), e)); + EXPECT_TRUE(ses.getEnum(btree::EntryRef(e)) == i); + EXPECT_TRUE(ses.findIndex(unique[i].c_str(), idx)); + EXPECT_TRUE(idx == indices[i]); + EXPECT_EQUAL(1u, ses.getRefCount(indices[i])); + StringEntryType::Type value = 0; + EXPECT_TRUE(ses.getValue(indices[i], value)); + EXPECT_TRUE(strcmp(unique[i].c_str(), value) == 0); + } + + if (hasPostings) { + testUniques<EnumStoreType, EnumPostingTree>(ses, unique); + } else { + testUniques<EnumStoreType, EnumTree>(ses, unique); + } +} + +template <typename EnumStoreType, typename Dictionary> +void +EnumStoreTest::testUniques +(const EnumStoreType &ses, const std::vector<std::string> &unique) +{ + const EnumStoreDict<Dictionary> *enumDict = + dynamic_cast<const EnumStoreDict<Dictionary> *> + (&ses.getEnumStoreDict()); + assert(enumDict != NULL); + const Dictionary &dict = enumDict->getDictionary(); + uint32_t i = 0; + EnumIndex idx; + for (typename Dictionary::Iterator iter = dict.begin(); + iter.valid(); ++iter, ++i) { + idx = iter.getKey(); + EXPECT_TRUE(strcmp(unique[i].c_str(), ses.getValue(idx)) == 0); + } + EXPECT_EQUAL(static_cast<uint32_t>(unique.size()), i); +} + + +void +EnumStoreTest::testCompaction() +{ + testCompaction<StringEnumStore>(false, false); + testCompaction<StringEnumStore>(true, false); + testCompaction<StringEnumStore>(false, true); + testCompaction<StringEnumStore>(true, true); +} + +template <typename EnumStoreType> +void +EnumStoreTest::testCompaction(bool hasPostings, bool disableReEnumerate) +{ + // entrySize = 15 before alignment + uint32_t entrySize = EnumStoreType::alignEntrySize(15); + uint32_t bufferSize = entrySize * 5; + EnumStoreType ses(bufferSize, hasPostings); + EnumIndex idx; + std::vector<EnumIndex> indices; + typename EnumStoreType::Type t = "foo"; + std::vector<std::string> uniques; + uniques.push_back("enum00"); + uniques.push_back("enum01"); + uniques.push_back("enum02"); + uniques.push_back("enum03"); + uniques.push_back("enum04"); + + // fill with unique values + for (uint32_t i = 0; i < 5; ++i) { + EXPECT_TRUE(ses.getRemaining() == bufferSize - i * entrySize); + ses.addEnum(uniques[i].c_str(), idx); + ses.incRefCount(idx); + EXPECT_TRUE(ses.getRefCount(idx)); + indices.push_back(idx); + } + EXPECT_EQUAL(0u, ses.getRemaining()); + EXPECT_EQUAL(0u, ses.getBuffer(0).remaining()); + EXPECT_EQUAL(entrySize * 5 + RESERVED_BYTES, ses.getBuffer(0).size()); + EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0)._deadElems); + uint32_t failEntrySize = ses.getEntrySize("enum05"); + EXPECT_TRUE(failEntrySize > ses.getRemaining()); + + // change from enum00 -> enum01 + ses.decRefCount(indices[0]); + ses.incRefCount(indices[1]); + indices[0] = indices[1]; + + // check correct refcount + for (uint32_t i = 0; i < 5; ++i) { + EXPECT_TRUE(ses.findIndex(uniques[i].c_str(), idx)); + uint32_t refCount = ses.getRefCount(idx); + if (i == 0) { + EXPECT_TRUE(refCount == 0); + } else if (i == 1) { + EXPECT_TRUE(refCount == 2); + } else { + EXPECT_TRUE(refCount == 1); + } + } + + // free unused enums + ses.freeUnusedEnums(true); + EXPECT_TRUE(!ses.findIndex("enum00", idx)); + EXPECT_EQUAL(entrySize + RESERVED_BYTES, ses.getBuffer(0)._deadElems); + + // perform compaction + if (disableReEnumerate) { + ses.disableReEnumerate(); + } + EXPECT_TRUE(ses.performCompaction(3 * entrySize)); + if (disableReEnumerate) { + ses.enableReEnumerate(); + } + EXPECT_TRUE(ses.getRemaining() >= 3 * entrySize); + EXPECT_TRUE(ses.getBuffer(1).remaining() >= 3 * entrySize); + EXPECT_TRUE(ses.getBuffer(1).size() == entrySize * 4); + EXPECT_TRUE(ses.getBuffer(1)._deadElems == 0); + + EXPECT_EQUAL((disableReEnumerate ? 4u : 3u), ses.getLastEnum()); + + // add new unique strings + ses.addEnum("enum05", idx); + EXPECT_EQUAL((disableReEnumerate ? 5u : 4u), ses.getEnum(idx)); + ses.addEnum("enum06", idx); + EXPECT_EQUAL((disableReEnumerate ? 6u : 5u), ses.getEnum(idx)); + ses.addEnum("enum00", idx); + EXPECT_EQUAL((disableReEnumerate ? 7u : 6u), ses.getEnum(idx)); + + EXPECT_EQUAL((disableReEnumerate ? 7u : 6u), ses.getLastEnum()); + + // compare old and new indices + for (uint32_t i = 0; i < indices.size(); ++i) { + EXPECT_TRUE(ses.getCurrentIndex(indices[i], idx)); + EXPECT_TRUE(indices[i].bufferId() == 0); + EXPECT_TRUE(idx.bufferId() == 1); + EXPECT_TRUE(ses.getValue(indices[i], t)); + typename EnumStoreType::Type s = "bar"; + EXPECT_TRUE(ses.getValue(idx, s)); + EXPECT_TRUE(strcmp(t, s) == 0); + } + // EnumIndex(0,0) is reserved so we have 4 bytes extra at the start of buffer 0 + EXPECT_TRUE(ses.getCurrentIndex(indices[0], idx)); + EXPECT_EQUAL(entrySize + RESERVED_BYTES, indices[0].offset()); + EXPECT_EQUAL(0u, idx.offset()); + EXPECT_TRUE(ses.getCurrentIndex(indices[1], idx)); + EXPECT_EQUAL(entrySize + RESERVED_BYTES, indices[1].offset()); + EXPECT_EQUAL(0u, idx.offset()); + EXPECT_TRUE(ses.getCurrentIndex(indices[2], idx)); + EXPECT_EQUAL(2 * entrySize + RESERVED_BYTES, indices[2].offset()); + EXPECT_EQUAL(entrySize, idx.offset()); + EXPECT_TRUE(ses.getCurrentIndex(indices[3], idx)); + EXPECT_EQUAL(3 * entrySize + RESERVED_BYTES, indices[3].offset()); + EXPECT_EQUAL(2 * entrySize, idx.offset()); + EXPECT_TRUE(ses.getCurrentIndex(indices[4], idx)); + EXPECT_EQUAL(4 * entrySize + RESERVED_BYTES, indices[4].offset()); + EXPECT_EQUAL(3 * entrySize, idx.offset()); +} + +void +EnumStoreTest::testReset() +{ + testReset<StringEnumStore>(false); + + testReset<StringEnumStore>(true); +} + +template <typename EnumStoreType> +void +EnumStoreTest::testReset(bool hasPostings) +{ + uint32_t numUniques = 10000; + srand(123456789); + StringVector rndStrings = fillRandomStrings(numUniques, 10, 15); + EXPECT_EQUAL(rndStrings.size(), size_t(numUniques)); + StringVector uniques = sortRandomStrings(rndStrings); + EXPECT_EQUAL(uniques.size(), size_t(numUniques)); + // max entrySize = 25 before alignment + uint32_t maxEntrySize = EnumStoreType::alignEntrySize(8 + 1 + 16); + EnumStoreType ses(numUniques * maxEntrySize, hasPostings); + EnumIndex idx; + + uint32_t cnt = 0; + // add new unique strings + for (StringVector::reverse_iterator iter = uniques.rbegin(); iter != uniques.rend(); ++iter) { + ses.addEnum(iter->c_str(), idx); + EXPECT_EQUAL(ses.getNumUniques(), ++cnt); + } + + // check for unique strings + for (StringVector::iterator iter = uniques.begin(); iter != uniques.end(); ++iter) { + EXPECT_TRUE(ses.findIndex(iter->c_str(), idx)); + } + + EXPECT_EQUAL(ses.getNumUniques(), numUniques); + if (hasPostings) { + testUniques<EnumStoreType, EnumPostingTree>(ses, uniques); + } else { + testUniques<EnumStoreType, EnumTree>(ses, uniques); + } + + rndStrings = fillRandomStrings(numUniques, 15, 20); + StringVector newUniques = sortRandomStrings(rndStrings); + + typename EnumStoreType::Builder builder; + for (StringVector::iterator iter = newUniques.begin(); iter != newUniques.end(); ++iter) { + builder.insert(iter->c_str()); + } + + ses.reset(builder); + EXPECT_EQUAL(RESERVED_BYTES, ses.getRemaining()); + + // check for old unique strings + for (StringVector::iterator iter = uniques.begin(); iter != uniques.end(); ++iter) { + EXPECT_TRUE(!ses.findIndex(iter->c_str(), idx)); + } + + // check for new unique strings + for (StringVector::iterator iter = newUniques.begin(); iter != newUniques.end(); ++iter) { + EXPECT_TRUE(ses.findIndex(iter->c_str(), idx)); + } + + EXPECT_EQUAL(ses.getNumUniques(), numUniques); + if (hasPostings) { + testUniques<EnumStoreType, EnumPostingTree>(ses, newUniques); + } else { + testUniques<EnumStoreType, EnumTree>(ses, newUniques); + } +} + +void +EnumStoreTest::testHoldListAndGeneration() +{ + uint32_t entrySize = StringEnumStore::alignEntrySize(8 + 1 + 6); + StringEnumStore ses(100 * entrySize, false); + StringEnumStore::Index idx; + StringVector uniques; + generation_t sesGen = 0u; + uniques.reserve(100); + for (uint32_t i = 0; i < 100; ++i) { + char tmp[16]; + sprintf(tmp, i < 10 ? "enum0%u" : "enum%u", i); + uniques.push_back(tmp); + } + StringVector newUniques; + newUniques.reserve(100); + for (uint32_t i = 0; i < 100; ++i) { + char tmp[16]; + sprintf(tmp, i < 10 ? "unique0%u" : "unique%u", i); + newUniques.push_back(tmp); + } + uint32_t generation = 0; + std::vector<Reader> readers; + + // insert first batch of unique strings + for (uint32_t i = 0; i < 100; ++i) { + ses.addEnum(uniques[i].c_str(), idx); + ses.incRefCount(idx); + EXPECT_TRUE(ses.getRefCount(idx)); + + // associate readers + if (i % 10 == 9) { + Reader::IndexVector indices; + Reader::ExpectedVector expected; + for (uint32_t j = i - 9; j <= i; ++j) { + EXPECT_TRUE(ses.findIndex(uniques[j].c_str(), idx)); + indices.push_back(idx); + StringEnumStore::Entry entry = ses.getEntry(idx); + EXPECT_TRUE(entry.getEnum() == j); + EXPECT_TRUE(entry.getRefCount() == 1); + EXPECT_TRUE(strcmp(entry.getValue(), uniques[j].c_str()) == 0); + expected.push_back(StringEntry(entry.getEnum(), entry.getRefCount(), + std::string(entry.getValue()))); + } + EXPECT_TRUE(indices.size() == 10); + EXPECT_TRUE(expected.size() == 10); + sesGen = generation++; + readers.push_back(Reader(sesGen, indices, expected)); + checkReaders(ses, sesGen, readers); + } + } + + EXPECT_EQUAL(0u, ses.getRemaining()); + EXPECT_EQUAL(RESERVED_BYTES, ses.getBuffer(0)._deadElems); + + // remove all uniques + for (uint32_t i = 0; i < 100; ++i) { + EXPECT_TRUE(ses.findIndex(uniques[i].c_str(), idx)); + ses.decRefCount(idx); + EXPECT_EQUAL(0u, ses.getRefCount(idx)); + } + ses.freeUnusedEnums(true); + EXPECT_EQUAL(100 * entrySize + RESERVED_BYTES, ses.getBuffer(0)._deadElems); + + // perform compaction + uint32_t newEntrySize = StringEnumStore::alignEntrySize(8 + 1 + 8); + EXPECT_TRUE(ses.performCompaction(5 * newEntrySize)); + + // check readers again + checkReaders(ses, sesGen, readers); + + // fill up buffer + uint32_t i = 0; + while (ses.getRemaining() >= newEntrySize) { + //LOG(info, "fill: %s", newUniques[i].c_str()); + ses.addEnum(newUniques[i++].c_str(), idx); + ses.incRefCount(idx); + EXPECT_TRUE(ses.getRefCount(idx)); + } + EXPECT_LESS(ses.getRemaining(), newEntrySize); + // buffer on hold list + EXPECT_TRUE(!ses.performCompaction(5 * newEntrySize)); + + checkReaders(ses, sesGen, readers); + ses.transferHoldLists(sesGen); + ses.trimHoldLists(sesGen + 1); + + // buffer no longer on hold list + EXPECT_LESS(ses.getRemaining(), newEntrySize); + EXPECT_TRUE(ses.performCompaction(5 * newEntrySize)); + EXPECT_TRUE(ses.getRemaining() >= 5 * newEntrySize); +} + +void +EnumStoreTest::testMemoryUsage() +{ + StringEnumStore ses(200, false); + StringEnumStore::Index idx; + uint32_t num = 8; + std::vector<StringEnumStore::Index> indices; + std::vector<std::string> uniques; + for (uint32_t i = 0; i < num; ++i) { + std::stringstream ss; + ss << "enum" << i; + uniques.push_back(ss.str()); + } + generation_t sesGen = 0u; + uint32_t entrySize = StringEnumStore::alignEntrySize(8 + 1 + 5); // enum(4) + refcount(4) + 1(\0) + strlen("enumx") + + // usage before inserting enums + MemoryUsage usage = ses.getMemoryUsage(); + EXPECT_EQUAL(ses.getNumUniques(), uint32_t(0)); + EXPECT_EQUAL(enumStoreAlign(200u) + RESERVED_BYTES, usage.allocatedBytes()); + EXPECT_EQUAL(RESERVED_BYTES, usage.usedBytes()); + EXPECT_EQUAL(RESERVED_BYTES, usage.deadBytes()); + EXPECT_EQUAL(0u, usage.allocatedBytesOnHold()); + + for (uint32_t i = 0; i < num; ++i) { + ses.addEnum(uniques[i].c_str(), idx); + indices.push_back(idx); + ses.incRefCount(idx); + EXPECT_TRUE(ses.getRefCount(idx)); + } + + // usage after inserting enums + usage = ses.getMemoryUsage(); + EXPECT_EQUAL(ses.getNumUniques(), num); + EXPECT_EQUAL(enumStoreAlign(200u) + RESERVED_BYTES, usage.allocatedBytes()); + EXPECT_EQUAL(num * entrySize + RESERVED_BYTES, usage.usedBytes()); + EXPECT_EQUAL(RESERVED_BYTES, usage.deadBytes()); + EXPECT_EQUAL(0u, usage.allocatedBytesOnHold()); + + // assign new enum for num / 2 of indices + for (uint32_t i = 0; i < num / 2; ++i) { + ses.decRefCount(indices[i]); + EXPECT_TRUE(ses.findIndex(uniques.back().c_str(), idx)); + ses.incRefCount(idx); + indices[i] = idx; + } + ses.freeUnusedEnums(true); + + // usage after removing enums + usage = ses.getMemoryUsage(); + EXPECT_EQUAL(ses.getNumUniques(), num / 2); + EXPECT_EQUAL(enumStoreAlign(200u) + RESERVED_BYTES, usage.allocatedBytes()); + EXPECT_EQUAL(num * entrySize + RESERVED_BYTES, usage.usedBytes()); + EXPECT_EQUAL((num / 2) * entrySize + RESERVED_BYTES, usage.deadBytes()); + EXPECT_EQUAL(0u, usage.allocatedBytesOnHold()); + + ses.performCompaction(400); + + // usage after compaction + MemoryUsage usage2 = ses.getMemoryUsage(); + EXPECT_EQUAL(ses.getNumUniques(), num / 2); + EXPECT_EQUAL(usage.usedBytes() + (num / 2) * entrySize, usage2.usedBytes()); + EXPECT_EQUAL(usage.deadBytes(), usage2.deadBytes()); + EXPECT_EQUAL(usage.usedBytes() - usage.deadBytes(), usage2.allocatedBytesOnHold()); + + ses.transferHoldLists(sesGen); + ses.trimHoldLists(sesGen + 1); + + // usage after hold list trimming + MemoryUsage usage3 = ses.getMemoryUsage(); + EXPECT_EQUAL((num / 2) * entrySize, usage3.usedBytes()); + EXPECT_EQUAL(0u, usage3.deadBytes()); + EXPECT_EQUAL(0u, usage3.allocatedBytesOnHold()); +} + +namespace { + +NumericEnumStore::Index +addEnum(NumericEnumStore &store, uint32_t value) +{ + NumericEnumStore::Index result; + store.addEnum(value, result); + store.incRefCount(result); + return result; +} + +void +decRefCount(NumericEnumStore &store, NumericEnumStore::Index idx) +{ + store.decRefCount(idx); + store.freeUnusedEnums(false); +} + +} + +void +EnumStoreTest::requireThatAddressSpaceUsageIsReported() +{ + const size_t ADDRESS_LIMIT = 34359738368; // NumericEnumStore::DataStoreType::RefType::offsetSize() + NumericEnumStore store(200, false); + + EXPECT_EQUAL(AddressSpace(0, ADDRESS_LIMIT), store.getAddressSpaceUsage()); + NumericEnumStore::Index idx1 = addEnum(store, 10); + EXPECT_EQUAL(AddressSpace(16, ADDRESS_LIMIT), store.getAddressSpaceUsage()); + NumericEnumStore::Index idx2 = addEnum(store, 20); + EXPECT_EQUAL(AddressSpace(32, ADDRESS_LIMIT), store.getAddressSpaceUsage()); + decRefCount(store, idx1); + EXPECT_EQUAL(AddressSpace(16, ADDRESS_LIMIT), store.getAddressSpaceUsage()); + decRefCount(store, idx2); + EXPECT_EQUAL(AddressSpace(0, ADDRESS_LIMIT), store.getAddressSpaceUsage()); +} + +size_t +digits(size_t num) +{ + size_t digits = 1; + while (num / 10 > 0) { + num /= 10; + digits++; + } + return digits; +} + +void +EnumStoreTest::testBufferLimit() +{ + size_t enumSize = StringEnumStore::Index::offsetSize(); + StringEnumStore es(enumSize, false); + + size_t strLen = 65536; + char str[strLen + 1]; + for (size_t i = 0; i < strLen; ++i) { + str[i] = 'X'; + } + str[strLen] = 0; + + size_t entrySize = StringEnumStore::getEntrySize(str); + size_t numUniques = enumSize / entrySize; + size_t uniqDigits = digits(numUniques); + + EnumIndex idx; + EnumIndex lastIdx; + for (size_t i = 0; i < numUniques; ++i) { + sprintf(str, "%0*zu", (int)uniqDigits, i); + str[uniqDigits] = 'X'; + es.addEnum(str, idx); + if (i % (numUniques / 32) == 1) { + EXPECT_TRUE(idx.offset() > lastIdx.offset()); + EXPECT_EQUAL(i + 1, es.getNumUniques()); + std::cout << "idx.offset(" << idx.offset() << "), str(" << std::string(str, uniqDigits) << ")" << std::endl; + } + lastIdx = idx; + } + EXPECT_EQUAL(idx.offset(), lastIdx.offset()); + EXPECT_EQUAL(numUniques, es.getNumUniques()); + std::cout << "idx.offset(" << idx.offset() << "), str(" << std::string(str, uniqDigits) << ")" << std::endl; +} + +template <typename T> +T +EnumStoreTest::random(T low, T high) +{ + return (rand() % (high - low)) + low; +} + +std::string +EnumStoreTest::getRandomString(uint32_t minLen, uint32_t maxLen) +{ + uint32_t len = random(minLen, maxLen); + std::string retval; + for (uint32_t i = 0; i < len; ++i) { + char c = random('a', 'z'); + retval.push_back(c); + } + return retval; +} + +EnumStoreTest::StringVector +EnumStoreTest::fillRandomStrings(uint32_t numStrings, uint32_t minLen, uint32_t maxLen) +{ + StringVector retval; + retval.reserve(numStrings); + for (uint32_t i = 0; i < numStrings; ++i) { + retval.push_back(getRandomString(minLen, maxLen)); + } + return retval; +} + +EnumStoreTest::StringVector +EnumStoreTest::sortRandomStrings(StringVector & strings) +{ + std::sort(strings.begin(), strings.end()); + std::vector<std::string> retval; + retval.reserve(strings.size()); + std::vector<std::string>::iterator pos = std::unique(strings.begin(), strings.end()); + std::copy(strings.begin(), pos, std::back_inserter(retval)); + return retval; +} + +void +EnumStoreTest::checkReaders(const StringEnumStore & ses, + generation_t sesGen, + const std::vector<Reader> & readers) +{ + (void) sesGen; + //uint32_t refCount = 1000; + StringEnumStore::Type t = ""; + for (uint32_t i = 0; i < readers.size(); ++i) { + const Reader & r = readers[i]; + for (uint32_t j = 0; j < r._indices.size(); ++j) { + EXPECT_EQUAL(r._expected[j]._enum, ses.getEnum(r._indices[j])); + EXPECT_TRUE(ses.getValue(r._indices[j], t)); + EXPECT_TRUE(r._expected[j]._string == std::string(t)); + } + } +} + + +int +EnumStoreTest::Main() +{ + TEST_INIT("enumstore_test"); + + testIndex(); + testStringEntry(); + testNumericEntry(); + testFloatEnumStore(); + testAddEnum(); + testCompaction(); + testReset(); + testHoldListAndGeneration(); + testMemoryUsage(); + TEST_DO(requireThatAddressSpaceUsageIsReported()); + if (_argc > 1) { + testBufferLimit(); // large test with 8 GB buffer + } + + TEST_DONE(); +} +} + + +TEST_APPHOOK(search::EnumStoreTest); diff --git a/searchlib/src/tests/attribute/extendattributes/.gitignore b/searchlib/src/tests/attribute/extendattributes/.gitignore new file mode 100644 index 00000000000..4018a7d4f5b --- /dev/null +++ b/searchlib/src/tests/attribute/extendattributes/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +extendattribute_test +searchlib_extendattribute_test_app diff --git a/searchlib/src/tests/attribute/extendattributes/CMakeLists.txt b/searchlib/src/tests/attribute/extendattributes/CMakeLists.txt new file mode 100644 index 00000000000..b0803f0a232 --- /dev/null +++ b/searchlib/src/tests/attribute/extendattributes/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_extendattribute_test_app + SOURCES + extendattribute.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_extendattribute_test_app COMMAND sh extendattribute_test.sh) diff --git a/searchlib/src/tests/attribute/extendattributes/DESC b/searchlib/src/tests/attribute/extendattributes/DESC new file mode 100644 index 00000000000..4f88189a1d7 --- /dev/null +++ b/searchlib/src/tests/attribute/extendattributes/DESC @@ -0,0 +1 @@ +Unit tests for extendable attributes. diff --git a/searchlib/src/tests/attribute/extendattributes/FILES b/searchlib/src/tests/attribute/extendattributes/FILES new file mode 100644 index 00000000000..930039cae19 --- /dev/null +++ b/searchlib/src/tests/attribute/extendattributes/FILES @@ -0,0 +1 @@ +extendattribute.cpp diff --git a/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp b/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp new file mode 100644 index 00000000000..0bb751d26ee --- /dev/null +++ b/searchlib/src/tests/attribute/extendattributes/extendattribute.cpp @@ -0,0 +1,176 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("extendattribute_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/attribute/extendableattributes.h> + +namespace search { + +class ExtendAttributeTest : public vespalib::TestApp +{ +private: + template <typename Attribute> + void testExtendInteger(Attribute & attr); + template <typename Attribute> + void testExtendFloat(Attribute & attr); + template <typename Attribute> + void testExtendString(Attribute & attr); + +public: + int Main(); +}; + +template <typename Attribute> +void ExtendAttributeTest::testExtendInteger(Attribute & attr) +{ + uint32_t docId(0); + EXPECT_EQUAL(attr.getNumDocs(), 0u); + attr.addDoc(docId); + EXPECT_EQUAL(docId, 0u); + EXPECT_EQUAL(attr.getNumDocs(), 1u); + attr.add(1, 10); + EXPECT_EQUAL(attr.getInt(0), 1); + attr.add(2, 20); + EXPECT_EQUAL(attr.getInt(0), attr.hasMultiValue() ? 1 : 2); + if (attr.hasMultiValue()) { + AttributeVector::WeightedInt v[2]; + EXPECT_EQUAL((static_cast<AttributeVector &>(attr)).get(0, v, 2), 2u); + EXPECT_EQUAL(v[0].getValue(), 1); + EXPECT_EQUAL(v[1].getValue(), 2); + if (attr.hasWeightedSetType()) { + EXPECT_EQUAL(v[0].getWeight(), 10); + EXPECT_EQUAL(v[1].getWeight(), 20); + } + } + attr.addDoc(docId); + EXPECT_EQUAL(docId, 1u); + EXPECT_EQUAL(attr.getNumDocs(), 2u); + attr.add(3, 30); + EXPECT_EQUAL(attr.getInt(1), 3); + if (attr.hasMultiValue()) { + AttributeVector::WeightedInt v[1]; + EXPECT_EQUAL((static_cast<AttributeVector &>(attr)).get(1, v, 1), 1u); + EXPECT_EQUAL(v[0].getValue(), 3); + if (attr.hasWeightedSetType()) { + EXPECT_EQUAL(v[0].getWeight(), 30); + } + } +} + +template <typename Attribute> +void ExtendAttributeTest::testExtendFloat(Attribute & attr) +{ + uint32_t docId(0); + EXPECT_EQUAL(attr.getNumDocs(), 0u); + attr.addDoc(docId); + EXPECT_EQUAL(docId, 0u); + EXPECT_EQUAL(attr.getNumDocs(), 1u); + attr.add(1.7, 10); + EXPECT_EQUAL(attr.getInt(0), 1); + EXPECT_EQUAL(attr.getFloat(0), 1.7); + attr.add(2.3, 20); + EXPECT_EQUAL(attr.getFloat(0), attr.hasMultiValue() ? 1.7 : 2.3); + if (attr.hasMultiValue()) { + AttributeVector::WeightedFloat v[2]; + EXPECT_EQUAL((static_cast<AttributeVector &>(attr)).get(0, v, 2), 2u); + EXPECT_EQUAL(v[0].getValue(), 1.7); + EXPECT_EQUAL(v[1].getValue(), 2.3); + if (attr.hasWeightedSetType()) { + EXPECT_EQUAL(v[0].getWeight(), 10); + EXPECT_EQUAL(v[1].getWeight(), 20); + } + } + attr.addDoc(docId); + EXPECT_EQUAL(docId, 1u); + EXPECT_EQUAL(attr.getNumDocs(), 2u); + attr.add(3.6, 30); + EXPECT_EQUAL(attr.getFloat(1), 3.6); + if (attr.hasMultiValue()) { + AttributeVector::WeightedFloat v[1]; + EXPECT_EQUAL((static_cast<AttributeVector &>(attr)).get(1, v, 1), 1u); + EXPECT_EQUAL(v[0].getValue(), 3.6); + if (attr.hasWeightedSetType()) { + EXPECT_EQUAL(v[0].getWeight(), 30); + } + } +} + +template <typename Attribute> +void ExtendAttributeTest::testExtendString(Attribute & attr) +{ + uint32_t docId(0); + EXPECT_EQUAL(attr.getNumDocs(), 0u); + attr.addDoc(docId); + EXPECT_EQUAL(docId, 0u); + EXPECT_EQUAL(attr.getNumDocs(), 1u); + attr.add("1.7", 10); + EXPECT_EQUAL(std::string(attr.getString(0, NULL, 0)), "1.7"); + attr.add("2.3", 20); + EXPECT_EQUAL(std::string(attr.getString(0, NULL, 0)), attr.hasMultiValue() ? "1.7" : "2.3"); + if (attr.hasMultiValue()) { + AttributeVector::WeightedString v[2]; + EXPECT_EQUAL((static_cast<AttributeVector &>(attr)).get(0, v, 2), 2u); + EXPECT_EQUAL(v[0].getValue(), "1.7"); + EXPECT_EQUAL(v[1].getValue(), "2.3"); + if (attr.hasWeightedSetType()) { + EXPECT_EQUAL(v[0].getWeight(), 10); + EXPECT_EQUAL(v[1].getWeight(), 20); + } + } + attr.addDoc(docId); + EXPECT_EQUAL(docId, 1u); + EXPECT_EQUAL(attr.getNumDocs(), 2u); + attr.add("3.6", 30); + EXPECT_EQUAL(std::string(attr.getString(1, NULL, 0)), "3.6"); + if (attr.hasMultiValue()) { + AttributeVector::WeightedString v[1]; + EXPECT_EQUAL((static_cast<AttributeVector &>(attr)).get(1, v, 1), 1u); + EXPECT_EQUAL(v[0].getValue(), "3.6"); + if (attr.hasWeightedSetType()) { + EXPECT_EQUAL(v[0].getWeight(), 30); + } + } +} + +int +ExtendAttributeTest::Main() +{ + TEST_INIT("extendattribute_test"); + + SingleIntegerExtAttribute siattr("si1"); + MultiIntegerExtAttribute miattr("mi1"); + WeightedSetIntegerExtAttribute wsiattr("wsi1"); + EXPECT_TRUE( ! siattr.hasMultiValue() ); + EXPECT_TRUE( miattr.hasMultiValue() ); + EXPECT_TRUE( wsiattr.hasWeightedSetType() ); + testExtendInteger(siattr); + testExtendInteger(miattr); + testExtendInteger(wsiattr); + + SingleFloatExtAttribute sdattr("sd1"); + MultiFloatExtAttribute mdattr("md1"); + WeightedSetFloatExtAttribute wsdattr("wsd1"); + EXPECT_TRUE( ! sdattr.hasMultiValue() ); + EXPECT_TRUE( mdattr.hasMultiValue() ); + EXPECT_TRUE( wsdattr.hasWeightedSetType() ); + testExtendFloat(sdattr); + testExtendFloat(mdattr); + testExtendFloat(wsdattr); + + SingleStringExtAttribute ssattr("ss1"); + MultiStringExtAttribute msattr("ms1"); + WeightedSetStringExtAttribute wssattr("wss1"); + EXPECT_TRUE( ! ssattr.hasMultiValue() ); + EXPECT_TRUE( msattr.hasMultiValue() ); + EXPECT_TRUE( wssattr.hasWeightedSetType() ); + testExtendString(ssattr); + testExtendString(msattr); + testExtendString(wssattr); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(search::ExtendAttributeTest); diff --git a/searchlib/src/tests/attribute/extendattributes/extendattribute_test.sh b/searchlib/src/tests/attribute/extendattributes/extendattribute_test.sh new file mode 100755 index 00000000000..6f335b18229 --- /dev/null +++ b/searchlib/src/tests/attribute/extendattributes/extendattribute_test.sh @@ -0,0 +1,3 @@ +#!/bin/bash +$VALGRIND ./searchlib_extendattribute_test_app +rm -rf *.dat diff --git a/searchlib/src/tests/attribute/gidmapattribute/.gitignore b/searchlib/src/tests/attribute/gidmapattribute/.gitignore new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/searchlib/src/tests/attribute/gidmapattribute/.gitignore diff --git a/searchlib/src/tests/attribute/multivaluemapping/.gitignore b/searchlib/src/tests/attribute/multivaluemapping/.gitignore new file mode 100644 index 00000000000..743c738a0a2 --- /dev/null +++ b/searchlib/src/tests/attribute/multivaluemapping/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +multivaluemapping_test +searchlib_multivaluemapping_test_app diff --git a/searchlib/src/tests/attribute/multivaluemapping/CMakeLists.txt b/searchlib/src/tests/attribute/multivaluemapping/CMakeLists.txt new file mode 100644 index 00000000000..36c66b09966 --- /dev/null +++ b/searchlib/src/tests/attribute/multivaluemapping/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_multivaluemapping_test_app + SOURCES + multivaluemapping_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_multivaluemapping_test_app COMMAND searchlib_multivaluemapping_test_app) diff --git a/searchlib/src/tests/attribute/multivaluemapping/DESC b/searchlib/src/tests/attribute/multivaluemapping/DESC new file mode 100644 index 00000000000..44c27ec9926 --- /dev/null +++ b/searchlib/src/tests/attribute/multivaluemapping/DESC @@ -0,0 +1 @@ +This is a test for the MultivalueMapping class. diff --git a/searchlib/src/tests/attribute/multivaluemapping/FILES b/searchlib/src/tests/attribute/multivaluemapping/FILES new file mode 100644 index 00000000000..bf22403a5fe --- /dev/null +++ b/searchlib/src/tests/attribute/multivaluemapping/FILES @@ -0,0 +1 @@ +multivaluemapping.cpp diff --git a/searchlib/src/tests/attribute/multivaluemapping/multivaluemapping_test.cpp b/searchlib/src/tests/attribute/multivaluemapping/multivaluemapping_test.cpp new file mode 100644 index 00000000000..e78e180856b --- /dev/null +++ b/searchlib/src/tests/attribute/multivaluemapping/multivaluemapping_test.cpp @@ -0,0 +1,836 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("multivaluemapping_test"); +#include <vespa/vespalib/testkit/testapp.h> +//#define DEBUG_MULTIVALUE_MAPPING +//#define LOG_MULTIVALUE_MAPPING +#include <vespa/searchlib/attribute/multivaluemapping.h> +#include <algorithm> +#include <limits> + +namespace search { + +namespace +{ + +uint32_t dummyCommittedDocIdLimit = std::numeric_limits<uint32_t>::max(); + +} + +typedef MultiValueMappingT<uint32_t> MvMapping; +typedef MvMapping::Index Index; +typedef multivalue::Index64 Index64; +typedef multivalue::Index32 Index32; +typedef MvMapping::Histogram Histogram; + +class MultiValueMappingTest : public vespalib::TestApp +{ +private: + typedef std::vector<Index> IndexVector; + typedef std::vector<std::vector<uint32_t> > ExpectedVector; + typedef vespalib::GenerationHandler::generation_t generation_t; + + class Reader { + public: + uint32_t _startGen; + uint32_t _endGen; + IndexVector _indices; + ExpectedVector _expected; + uint32_t numKeys() { return _indices.size(); } + Reader(uint32_t startGen, uint32_t endGen, const IndexVector & indices, + const ExpectedVector & expected) : + _startGen(startGen), _endGen(endGen), _indices(indices), _expected(expected) {} + }; + + typedef std::vector<Reader> ReaderVector; + + void testIndex32(); + void testIndex64(); + void testSimpleSetAndGet(); + void testChangingValueCount(); + + void + checkReaders(MvMapping &mvm, + generation_t mvmGen, + ReaderVector &readers); + + void testHoldListAndGeneration(); + void testManualCompaction(); + void testVariousGets(); + void testReplace(); + void testMemoryUsage(); + void testShrink(); + void testHoldElem(); + void requireThatAddressSpaceUsageIsReported(); + void requireThatDeadIsNotAccountedInAddressSpaceUsage(); + +public: + int Main(); +}; + +void +MultiValueMappingTest::testIndex32() +{ + { + Index32 idx; + EXPECT_EQUAL(idx.values(), 0u); + EXPECT_EQUAL(idx.alternative(), 0u); + EXPECT_EQUAL(idx.vectorIdx(), 0u); + EXPECT_EQUAL(idx.offset(), 0u); + } + { + Index32 idx(3, 0, 1000); + EXPECT_EQUAL(idx.values(), 3u); + EXPECT_EQUAL(idx.alternative(), 0u); + EXPECT_EQUAL(idx.vectorIdx(), 6u); + EXPECT_EQUAL(idx.offset(), 1000u); + EXPECT_EQUAL(idx.idx(), 0x300003e8u); + } + { + Index32 idx(15, 1, 134217727); + EXPECT_EQUAL(idx.values(), 15u); + EXPECT_EQUAL(idx.alternative(), 1u); + EXPECT_EQUAL(idx.vectorIdx(), 31u); + EXPECT_EQUAL(idx.offset(), 134217727u); + EXPECT_EQUAL(idx.idx(), 0xffffffffu); + } + { + EXPECT_EQUAL(Index32::maxValues(), 15u); + EXPECT_EQUAL(Index32::alternativeSize(), 2u); + } +} + +void +MultiValueMappingTest::testIndex64() +{ + { + Index64 idx; + EXPECT_EQUAL(idx.values(), 0u); + EXPECT_EQUAL(idx.alternative(), 0u); + EXPECT_EQUAL(idx.vectorIdx(), 0u); + EXPECT_EQUAL(idx.offset(), 0u); + } + { + Index64 idx(3, 0, 1000); + EXPECT_EQUAL(idx.values(), 3u); + EXPECT_EQUAL(idx.alternative(), 0u); + EXPECT_EQUAL(idx.vectorIdx(), 6u); + EXPECT_EQUAL(idx.offset(), 1000u); + EXPECT_EQUAL(idx.idx(), 0x3000003e8ull); + } + { + Index64 idx(15, 1, 134217727); + EXPECT_EQUAL(idx.values(), 15u); + EXPECT_EQUAL(idx.alternative(), 1u); + EXPECT_EQUAL(idx.vectorIdx(), 31u); + EXPECT_EQUAL(idx.offset(), 134217727u); + EXPECT_EQUAL(idx.idx(), 0xf87ffffffull); + } + { + EXPECT_EQUAL(Index64::maxValues(), 1023u); + EXPECT_EQUAL(Index64::alternativeSize(), 2u); + } +} + +void +MultiValueMappingTest::testSimpleSetAndGet() +{ + uint32_t maxValueCount = Index::maxValues() * 2; + uint32_t numKeys = maxValueCount * 2; + MvMapping mvm(dummyCommittedDocIdLimit, numKeys); + EXPECT_EQUAL(mvm.getNumKeys(), numKeys); + Index idx; + + // insert values + for (uint32_t key = 0; key < numKeys; ++key) { + uint32_t valueCount = key / maxValueCount; + std::vector<uint32_t> values(valueCount, key); + Histogram needed(Index::maxValues()); + needed[valueCount] = 1; + if (!mvm.enoughCapacity(needed)) { + mvm.trimHoldLists(1); + mvm.performCompaction(needed); + } + mvm.set(key, values); + EXPECT_EQUAL(mvm.getValueCount(key), valueCount); + idx = mvm._indices[key]; + if (valueCount < Index::maxValues()) { + EXPECT_EQUAL(idx.values(), valueCount); + } else { + EXPECT_EQUAL(idx.values(), Index::maxValues()); + } +#ifdef LOG_MULTIVALUE_MAPPING + LOG(info, "------------------------------------------------------------"); +#endif + } + EXPECT_TRUE(!mvm.hasKey(numKeys)); + + // check for expected values + for (uint32_t key = 0; key < numKeys; ++key) { + uint32_t valueCount = key / maxValueCount; + EXPECT_EQUAL(mvm.getValueCount(key), valueCount); + std::vector<uint32_t> buffer(valueCount); + EXPECT_EQUAL(mvm.get(key, buffer), valueCount); + EXPECT_TRUE(buffer.size() == valueCount); + EXPECT_EQUAL(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), key)), valueCount); + uint32_t value; + const uint32_t * handle = NULL; + EXPECT_EQUAL(mvm.get(key, handle), valueCount); + EXPECT_TRUE(valueCount == 0 ? handle == NULL : handle != NULL); + for (uint32_t i = 0; i < valueCount; ++i) { + EXPECT_TRUE(mvm.get(key, i, value)); + EXPECT_EQUAL(value, key); + EXPECT_TRUE(handle[i] == key); + } + EXPECT_TRUE(!mvm.get(key, valueCount, value)); + } + + // reset + mvm.reset(10); + EXPECT_TRUE(mvm.getNumKeys() == 10); + EXPECT_TRUE(!mvm.hasKey(10)); + EXPECT_TRUE(mvm._genHolder.getHeldBytes() == 0); + for (uint32_t key = 0; key < 10; ++key) { + EXPECT_TRUE(mvm.getValueCount(key) == 0); + std::vector<uint32_t> buffer; + EXPECT_TRUE(mvm.get(key, buffer) == 0); + EXPECT_TRUE(buffer.size() == 0); + } + + // add more keys + for (uint32_t i = 0; i < 5; ++i) { + uint32_t key; + mvm.addKey(key); + EXPECT_TRUE(key == 10 + i); + EXPECT_TRUE(mvm.getNumKeys() == 11 + i); + } +} + +void +MultiValueMappingTest::testChangingValueCount() +{ + uint32_t numKeys = 10; + uint32_t maxCount = Index::maxValues() + 1; + Histogram initCapacity(Index::maxValues()); + for (uint32_t i = 0; i < Index::maxValues(); ++i) { + initCapacity[i] = numKeys; + } + initCapacity[Index::maxValues()] = numKeys * 2; + MvMapping mvm(dummyCommittedDocIdLimit, numKeys, initCapacity); + + // Increasing the value count for some keys + for (uint32_t valueCount = 1; valueCount <= maxCount; ++valueCount) { +#ifdef LOG_MULTIVALUE_MAPPING + LOG(info, "########################### %u ##############################", valueCount); +#endif + uint32_t lastValueCount = valueCount - 1; + // set values + for (uint32_t key = 0; key < numKeys; ++key) { + std::vector<uint32_t> buffer(valueCount, key); + mvm.set(key, buffer); + } + + Histogram remaining = mvm.getRemaining(); + if (valueCount < Index::maxValues()) { + EXPECT_TRUE(remaining[valueCount] == 0); + } else { + EXPECT_TRUE(remaining[Index::maxValues()] == numKeys * (maxCount - valueCount)); + } + + if (valueCount < Index::maxValues()) { + MvMapping::SingleVectorPtr current = mvm.getSingleVector(valueCount, MvMapping::ACTIVE); + EXPECT_TRUE(current.first->used() == numKeys * (valueCount)); + EXPECT_TRUE(current.first->dead() == 0); + + if (lastValueCount != 0) { + MvMapping::SingleVectorPtr last = mvm.getSingleVector(lastValueCount, MvMapping::ACTIVE); + EXPECT_TRUE(last.first->used() == numKeys * (lastValueCount)); + EXPECT_TRUE(last.first->dead() == numKeys * (lastValueCount)); + } + } else { + MvMapping::VectorVectorPtr current = mvm.getVectorVector(MvMapping::ACTIVE); + EXPECT_TRUE(current.first->used() == numKeys * (valueCount - Index::maxValues() + 1)); + EXPECT_TRUE(current.first->dead() == numKeys * (valueCount - Index::maxValues())); + } + + // check values + for (uint32_t key = 0; key < numKeys; ++key) { + std::vector<uint32_t> buffer(valueCount); + EXPECT_TRUE(mvm.get(key, buffer) == valueCount); + EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), key)) == valueCount); + } + } +} + +void +MultiValueMappingTest::checkReaders(MvMapping &mvm, + generation_t mvmGen, + ReaderVector &readers) +{ + for (ReaderVector::iterator iter = readers.begin(); + iter != readers.end(); ) { + if (iter->_endGen <= mvmGen) { +#ifdef LOG_MULTIVALUE_MAPPING + LOG(info, "check and remove reader: start = %u, end = %u", + iter->_startGen, iter->_endGen); +#endif + for (uint32_t key = 0; key < iter->numKeys(); ++key) { + Index idx = iter->_indices[key]; + uint32_t valueCount = iter->_expected[key].size(); + if (valueCount < Index::maxValues()) { + EXPECT_TRUE(idx.values() == valueCount); + for (uint32_t i = idx.offset() * idx.values(), j = 0; + i < (idx.offset() + 1) * idx.values() && j < iter->_expected[key].size(); + ++i, ++j) + { + EXPECT_TRUE(mvm._singleVectors[idx.vectorIdx()][i] == iter->_expected[key][j]); + } + } else { + EXPECT_TRUE(mvm._vectorVectors[idx.alternative()][idx.offset()].size() == + valueCount); + EXPECT_TRUE(std::equal(mvm._vectorVectors[idx.alternative()][idx.offset()].begin(), + mvm._vectorVectors[idx.alternative()][idx.offset()].end(), + iter->_expected[key].begin())); + } + } + iter = readers.erase(iter); + } else { + ++iter; + } + } +} + +void +MultiValueMappingTest::testHoldListAndGeneration() +{ + uint32_t numKeys = 10; + uint32_t maxCount = Index::maxValues() + 1; + uint32_t maxKeys = numKeys * 2; + + Histogram initCapacity(Index::maxValues()); + for (uint32_t i = 1; i < maxCount; ++i) { + initCapacity[i] = numKeys; // make enough capacity for 1/2 of the keys + } + MvMapping mvm(dummyCommittedDocIdLimit, maxKeys, initCapacity); + EXPECT_TRUE(mvm.enoughCapacity(initCapacity)); + + ReaderVector readers; + uint32_t safeGen = std::numeric_limits<uint32_t>::max(); + uint32_t readDuration = 2; + generation_t mvmGen = 0u; + + for (uint32_t valueCount = 1; valueCount < maxCount; ++valueCount) { +#ifdef LOG_MULTIVALUE_MAPPING + LOG(info, "#################### count(%u) - gen(%u) ####################", + valueCount, mvm.getGeneration()); +#endif + + // check and remove readers + checkReaders(mvm, mvmGen, readers); + + // update safe generation and removeOldGenerations + safeGen = std::numeric_limits<uint32_t>::max(); + for (ReaderVector::iterator iter = readers.begin(); iter != readers.end(); ++iter) { + if ((*iter)._startGen < safeGen) { + safeGen= (*iter)._startGen; + } + } + mvm.trimHoldLists(safeGen); + + // set new values for 1/2 of the keys + for (uint32_t key = 0; key < numKeys; ++key) { + std::vector<uint32_t> values(valueCount, valueCount * numKeys + key); + mvm.set(key, values); + } + // check new values + for (uint32_t key = 0; key < numKeys; ++key) { + EXPECT_TRUE(mvm.getValueCount(key) == valueCount); + std::vector<uint32_t> buffer(valueCount); + EXPECT_TRUE(mvm.get(key, buffer) == valueCount); + EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), valueCount * numKeys + key)) == valueCount); + } + mvm.transferHoldLists(mvmGen); + ++mvmGen; + + // associate reader with current generation + IndexVector indices; + ExpectedVector expected; + for (uint32_t key = 0; key < numKeys; ++key) { + indices.push_back(mvm._indices[key]); + expected.push_back(std::vector<uint32_t>(valueCount, valueCount * numKeys + key)); + } + readers.push_back(Reader(mvmGen, mvmGen + readDuration, + indices, expected)); + readDuration = (readDuration % 4) + 2; + + // perform compaction + Histogram needed(Index::maxValues()); + needed[valueCount] = maxKeys; + EXPECT_TRUE(!mvm.enoughCapacity(needed)); + mvm.performCompaction(needed); + + // set new value for all keys (the associated reader should see the old values) + for (uint32_t key = 0; key < maxKeys; ++key) { + std::vector<uint32_t> values(valueCount, valueCount * maxKeys + key); + mvm.set(key, values); + } + // check new values + for (uint32_t key = 0; key < maxKeys; ++key) { + EXPECT_TRUE(mvm.getValueCount(key) == valueCount); + std::vector<uint32_t> buffer(valueCount); + EXPECT_TRUE(mvm.get(key, buffer) == valueCount); + EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), valueCount * maxKeys + key)) == valueCount); + } + + mvm.transferHoldLists(mvmGen); + ++mvmGen; + } + while (!readers.empty()) { + checkReaders(mvm, mvmGen, readers); + mvm.transferHoldLists(mvmGen); + ++mvmGen; + } +} + +void +MultiValueMappingTest::testManualCompaction() +{ + Histogram initCapacity(Index::maxValues()); + uint32_t maxCount = Index::maxValues() + 1; + for (uint32_t i = 1; i < maxCount; ++i) { + initCapacity[i] = 1; + } + MvMapping mvm(dummyCommittedDocIdLimit, maxCount * 2, initCapacity); + EXPECT_TRUE(mvm.enoughCapacity(initCapacity)); + + // first update pass. use all capacity + for (uint32_t key = 1; key < maxCount; ++key) { + std::vector<uint32_t> values(key, key); + Histogram needed(Index::maxValues()); + needed[key] = 1; + EXPECT_TRUE(mvm.enoughCapacity(needed)); + mvm.set(key, values); + EXPECT_TRUE(!mvm.enoughCapacity(needed)); + } + // second update pass. must perform compaction + for (uint32_t key = maxCount + 1; key < maxCount * 2; ++key) { + uint32_t valueCount = key % maxCount; + std::vector<uint32_t> values(valueCount, key); + Histogram needed(Index::maxValues()); + needed[valueCount] = 1; + EXPECT_TRUE(!mvm.enoughCapacity(needed)); + mvm.performCompaction(needed); + EXPECT_TRUE(mvm.enoughCapacity(needed)); + mvm.set(key, values); + } + // check for correct buffer values + for (uint32_t key = 0; key < maxCount * 2; ++key) { + uint32_t valueCount = key % maxCount; + EXPECT_TRUE(mvm.getValueCount(key) == valueCount); + std::vector<uint32_t> buffer(valueCount); + EXPECT_TRUE(mvm.get(key, buffer) == valueCount); + EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), key)) == valueCount); + } + + // reset + mvm.reset(maxCount, initCapacity); + EXPECT_TRUE(mvm.getNumKeys() == maxCount); + EXPECT_TRUE(mvm.enoughCapacity(initCapacity)); + + // new update pass. use all capacity + for (uint32_t key = 1; key < maxCount; ++key) { + std::vector<uint32_t> values(key, key); + Histogram needed(Index::maxValues()); + needed[key] = 1; + EXPECT_EQUAL(mvm.getValueCount(key), 0u); + EXPECT_TRUE(mvm.enoughCapacity(needed)); + mvm.set(key, values); + EXPECT_TRUE(!mvm.enoughCapacity(needed)); + } +} + +void +MultiValueMappingTest::testVariousGets() +{ + MvMapping::Histogram initCapacity(Index::maxValues()); + initCapacity[5] = 1; + initCapacity[Index::maxValues()] = 1; + MvMapping mvm(dummyCommittedDocIdLimit, 3, initCapacity); + Index idx; + + mvm.set(1, std::vector<uint32_t>(5, 50)); + mvm.set(2, std::vector<uint32_t>(25, 250)); + EXPECT_TRUE(25 >= Index::maxValues()); + + { + std::vector<uint32_t> buffer(5); + EXPECT_TRUE(mvm.get(0, &buffer[0], 0) == 0); + EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)5)) == 0); + } + { + std::vector<uint32_t> buffer(5); + EXPECT_TRUE(mvm.get(0, &buffer[0], 5) == 0); + EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)5)) == 0); + } + { + std::vector<uint32_t> buffer(10); + EXPECT_TRUE(mvm.get(1, &buffer[0], 3) == 5); + EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)50)) == 3); + } + { + std::vector<uint32_t> buffer(10); + EXPECT_TRUE(mvm.get(1, &buffer[0], 10) == 5); + EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)50)) == 5); + } + { + std::vector<uint32_t> buffer(30); + EXPECT_TRUE(mvm.get(2, &buffer[0], 23) == 25); + EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)250)) == 23); + } + { + std::vector<uint32_t> buffer(30); + EXPECT_TRUE(mvm.get(2, &buffer[0], 30) == 25); + EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)250)) == 25); + } +} + +void +MultiValueMappingTest::testReplace() +{ + MvMapping::Histogram initCapacity(Index::maxValues()); + initCapacity[5] = 1; + initCapacity[Index::maxValues()] = 1; + MvMapping mvm(dummyCommittedDocIdLimit, 3, initCapacity); + Index idx; + + mvm.set(1, std::vector<uint32_t>(5, 50)); + mvm.set(2, std::vector<uint32_t>(25, 100)); + EXPECT_TRUE(25 >= Index::maxValues()); + + { + EXPECT_TRUE(mvm.getValueCount(0) == 0); + std::vector<uint32_t> replace(5, 50); + mvm.replace(0, replace); + EXPECT_TRUE(mvm.getValueCount(0) == 0); + } + { + EXPECT_TRUE(mvm.getValueCount(1) == 5); + std::vector<uint32_t> buffer(5); + EXPECT_TRUE(mvm.get(1, buffer) == 5); + EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)50)) == 5); + + std::vector<uint32_t> replace(5, 55); + mvm.replace(1, replace); + EXPECT_TRUE(mvm.getValueCount(1) == 5); + EXPECT_TRUE(mvm.get(1, buffer) == 5); + EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)55)) == 5); + } + { + EXPECT_TRUE(mvm.getValueCount(2) == 25); + std::vector<uint32_t> buffer(25); + EXPECT_TRUE(mvm.get(2, buffer) == 25); + EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)100)) == 25); + + std::vector<uint32_t> replace(25, 200); + mvm.replace(2, replace); + EXPECT_TRUE(mvm.getValueCount(2) == 25); + EXPECT_TRUE(mvm.get(2, buffer) == 25); + EXPECT_TRUE(static_cast<uint32_t>(std::count(buffer.begin(), buffer.end(), (uint32_t)200)) == 25); + } +} + +void +MultiValueMappingTest::testMemoryUsage() +{ + uint32_t numKeys = Index::maxValues() + 4; + MemoryUsage exp; + exp.incAllocatedBytes(numKeys * sizeof(Index)); + exp.incUsedBytes(numKeys * sizeof(Index)); + uint32_t totalCnt = 0; + + Histogram initCapacity(Index::maxValues()); + for (uint32_t i = 0; i < Index::maxValues(); ++i) { + initCapacity[i] = 2; + exp.incAllocatedBytes(i * 2 * sizeof(uint32_t)); + } + initCapacity[Index::maxValues()] = 12; + exp.incAllocatedBytes(12 * sizeof(vespalib::Array<uint32_t>)); // due to vector vector + + MvMapping mvm(dummyCommittedDocIdLimit, + numKeys, initCapacity, GrowStrategy(numKeys)); + + // usage before inserting values + MemoryUsage usage = mvm.getMemoryUsage(); + EXPECT_EQUAL(mvm.getTotalValueCnt(), totalCnt); + EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes()); + EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes()); + EXPECT_EQUAL(usage.deadBytes(), uint32_t(0)); + EXPECT_EQUAL(usage.allocatedBytesOnHold(), uint32_t(0)); + + // insert values for all keys + for (uint32_t key = 0; key < numKeys; ++key) { + uint32_t cnt = key + 1; + std::vector<uint32_t> values(cnt, key); + mvm.set(key, values); + EXPECT_EQUAL(mvm.getValueCount(key), cnt); + totalCnt += cnt; + exp.incUsedBytes(cnt * sizeof(uint32_t)); + if (cnt >= Index::maxValues()) { + exp.incAllocatedBytes(cnt * sizeof(uint32_t)); + exp.incUsedBytes(sizeof(vespalib::Array<uint32_t>)); // due to vector vector + } + } + + // usage after inserting values + usage = mvm.getMemoryUsage(); + EXPECT_EQUAL(mvm.getTotalValueCnt(), totalCnt); + EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes()); + EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes()); + EXPECT_EQUAL(usage.deadBytes(), uint32_t(0)); + EXPECT_EQUAL(usage.allocatedBytesOnHold(), uint32_t(0)); + + totalCnt = 0; + // insert new values for all keys making dead bytes + for (uint32_t key = 0; key < numKeys; ++key) { + uint32_t cnt = key + 2; + std::vector<uint32_t> values(cnt, key); + mvm.set(key, values); + EXPECT_EQUAL(mvm.getValueCount(key), cnt); + totalCnt += cnt; + exp.incUsedBytes(cnt * sizeof(uint32_t)); + if ((cnt - 1) < Index::maxValues()) { + exp.incDeadBytes((cnt - 1) * sizeof(uint32_t)); // the previous values are marked dead + } else { + exp.incAllocatedBytesOnHold((cnt - 1) * sizeof(uint32_t) + + sizeof(vespalib::Array<uint32_t>)); + } + if (cnt >= Index::maxValues()) { + exp.incAllocatedBytes(cnt * sizeof(uint32_t)); + exp.incUsedBytes(sizeof(vespalib::Array<uint32_t>)); // due to vector vector + } + } + + // usage after inserting new values making dead bytes + usage = mvm.getMemoryUsage(); + EXPECT_EQUAL(mvm.getTotalValueCnt(), totalCnt); + EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes()); + EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes()); + EXPECT_EQUAL(usage.deadBytes(), exp.deadBytes()); + EXPECT_EQUAL(usage.allocatedBytesOnHold(), exp.allocatedBytesOnHold()); + + // make sure all internal vectors are put on hold list + mvm.performCompaction(initCapacity); + usage = mvm.getMemoryUsage(); + EXPECT_EQUAL(mvm.getTotalValueCnt(), totalCnt); + EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes() - exp.deadBytes() - exp.allocatedBytesOnHold()); + EXPECT_EQUAL(usage.deadBytes(), uint32_t(0)); + EXPECT_EQUAL(usage.allocatedBytesOnHold(), exp.allocatedBytes() - numKeys * sizeof(Index) + exp.allocatedBytesOnHold()); + mvm.transferHoldLists(0); + mvm.trimHoldLists(1); + usage = mvm.getMemoryUsage(); + EXPECT_EQUAL(mvm.getTotalValueCnt(), totalCnt); + EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes() - exp.deadBytes() - exp.allocatedBytesOnHold()); + EXPECT_EQUAL(usage.deadBytes(), uint32_t(0)); + EXPECT_EQUAL(usage.allocatedBytesOnHold(), 0u); +} + + +void +MultiValueMappingTest::testShrink() +{ + uint32_t committedDocIdLimit = dummyCommittedDocIdLimit; + MvMapping mvm(committedDocIdLimit); + for (uint32_t i = 0; i < 10; ++i) { + uint32_t k; + mvm.addKey(k); + EXPECT_EQUAL(i, k); + } + mvm.transferHoldLists(0); + mvm.trimHoldLists(1); + uint32_t shrinkTarget = 4; + committedDocIdLimit = shrinkTarget; + mvm.shrinkKeys(shrinkTarget); + mvm.transferHoldLists(1); + mvm.trimHoldLists(2); + EXPECT_EQUAL(shrinkTarget, mvm.getNumKeys()); + EXPECT_EQUAL(shrinkTarget, mvm.getCapacityKeys()); +} + + +void +MultiValueMappingTest::testHoldElem() +{ + uint32_t numKeys = 1; + MemoryUsage exp; + exp.incAllocatedBytes(numKeys * sizeof(Index)); + exp.incUsedBytes(numKeys * sizeof(Index)); + + Histogram initCapacity(Index::maxValues()); + initCapacity[Index::maxValues()] = 3; + exp.incAllocatedBytes(3 * sizeof(vespalib::Array<uint32_t>)); // due to vector vector + + MvMapping mvm(dummyCommittedDocIdLimit, + numKeys, initCapacity, GrowStrategy(numKeys)); + + // usage before inserting values + MemoryUsage usage = mvm.getMemoryUsage(); + EXPECT_EQUAL(mvm.getTotalValueCnt(), 0u); + EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes()); + EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes()); + EXPECT_EQUAL(usage.deadBytes(), exp.deadBytes()); + EXPECT_EQUAL(usage.allocatedBytesOnHold(), exp.allocatedBytesOnHold()); + + uint32_t key = 0; + uint32_t cnt = Index::maxValues() + 3; + { + std::vector<uint32_t> values(cnt, key); + mvm.set(key, values); + exp.incAllocatedBytes(cnt * sizeof(uint32_t)); + exp.incUsedBytes(cnt * sizeof(uint32_t) + + sizeof(vespalib::Array<uint32_t>)); + } + usage = mvm.getMemoryUsage(); + EXPECT_EQUAL(mvm.getTotalValueCnt(), cnt); + EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes()); + EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes()); + EXPECT_EQUAL(usage.deadBytes(), exp.deadBytes()); + EXPECT_EQUAL(usage.allocatedBytesOnHold(), exp.allocatedBytesOnHold()); + ++cnt; + { + std::vector<uint32_t> values(cnt, key); + mvm.set(key, values); + exp.incAllocatedBytes(cnt * sizeof(uint32_t)); + exp.incUsedBytes(cnt * sizeof(uint32_t) + + sizeof(vespalib::Array<uint32_t>)); + exp.incAllocatedBytesOnHold((cnt - 1) * sizeof(uint32_t) + + sizeof(vespalib::Array<uint32_t>)); + } + usage = mvm.getMemoryUsage(); + EXPECT_EQUAL(mvm.getTotalValueCnt(), cnt); + EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes()); + EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes()); + EXPECT_EQUAL(usage.deadBytes(), exp.deadBytes()); + EXPECT_EQUAL(usage.allocatedBytesOnHold(), exp.allocatedBytesOnHold()); + mvm.transferHoldLists(0); + mvm.trimHoldLists(1); + exp.incDeadBytes(sizeof(vespalib::Array<uint32_t>)); + exp.decAllocatedBytes((cnt - 1) * sizeof(uint32_t)); + usage = mvm.getMemoryUsage(); + EXPECT_EQUAL(mvm.getTotalValueCnt(), cnt); + EXPECT_EQUAL(usage.allocatedBytes(), exp.allocatedBytes()); + EXPECT_EQUAL(usage.usedBytes(), exp.usedBytes()); + EXPECT_EQUAL(usage.deadBytes(), exp.deadBytes()); + EXPECT_EQUAL(usage.allocatedBytesOnHold(), 0u); +} + +namespace { + +void +insertValues(MvMapping &mvm, uint32_t key, uint32_t count) +{ + std::vector<uint32_t> values(count, 13); + mvm.set(key, values); +} + +Histogram +createHistogram(uint32_t numValuesPerValueClass) +{ + Histogram result(Index32::maxValues()); + for (uint32_t i = 0; i <= Index32::maxValues(); ++i) { + result[i] = numValuesPerValueClass; + } + return result; +} + +const size_t ADDRESS_LIMIT = 134217728; // Index32::offsetSize() + +struct AddressSpaceFixture +{ + MvMapping mvm; + AddressSpaceFixture() + : mvm(dummyCommittedDocIdLimit, 20, createHistogram(4), GrowStrategy(20)) + {} +}; + +} + +void +MultiValueMappingTest::requireThatAddressSpaceUsageIsReported() +{ + AddressSpaceFixture f; + MvMapping &mvm = f.mvm; + + EXPECT_EQUAL(AddressSpace(0, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 1, 1); + EXPECT_EQUAL(AddressSpace(1, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 2, 2); + insertValues(mvm, 3, 2); + EXPECT_EQUAL(AddressSpace(2, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 4, 13); + insertValues(mvm, 5, 13); + insertValues(mvm, 6, 13); + EXPECT_EQUAL(AddressSpace(3, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 7, 14); + insertValues(mvm, 8, 14); + insertValues(mvm, 9, 14); + EXPECT_EQUAL(AddressSpace(3, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 10, 15); + insertValues(mvm, 11, 16); + insertValues(mvm, 12, 17); + insertValues(mvm, 13, 18); + EXPECT_EQUAL(AddressSpace(4, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); +} + +void +MultiValueMappingTest::requireThatDeadIsNotAccountedInAddressSpaceUsage() +{ + AddressSpaceFixture f; + MvMapping &mvm = f.mvm; + + EXPECT_EQUAL(AddressSpace(0, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 1, 3); + insertValues(mvm, 2, 3); + insertValues(mvm, 3, 3); + insertValues(mvm, 4, 3); + EXPECT_EQUAL(AddressSpace(4, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 1, 4); + EXPECT_EQUAL(AddressSpace(3, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 2, 5); + EXPECT_EQUAL(AddressSpace(2, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 10, 15); + insertValues(mvm, 11, 15); + insertValues(mvm, 12, 15); + insertValues(mvm, 13, 15); + EXPECT_EQUAL(AddressSpace(4, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 10, 14); + EXPECT_EQUAL(AddressSpace(3, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); + insertValues(mvm, 11, 14); + EXPECT_EQUAL(AddressSpace(2, ADDRESS_LIMIT), mvm.getAddressSpaceUsage()); +} + +int +MultiValueMappingTest::Main() +{ + TEST_INIT("multivaluemapping_test"); + + testIndex32(); + testIndex64(); + testSimpleSetAndGet(); + testChangingValueCount(); + testHoldListAndGeneration(); + testManualCompaction(); + testVariousGets(); + testReplace(); + testMemoryUsage(); + testShrink(); + testHoldElem(); + TEST_DO(requireThatAddressSpaceUsageIsReported()); + TEST_DO(requireThatDeadIsNotAccountedInAddressSpaceUsage()); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(search::MultiValueMappingTest); diff --git a/searchlib/src/tests/attribute/postinglist/.gitignore b/searchlib/src/tests/attribute/postinglist/.gitignore new file mode 100644 index 00000000000..8cf10f7f9dc --- /dev/null +++ b/searchlib/src/tests/attribute/postinglist/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +postinglist_test +searchlib_postinglist_test_app diff --git a/searchlib/src/tests/attribute/postinglist/CMakeLists.txt b/searchlib/src/tests/attribute/postinglist/CMakeLists.txt new file mode 100644 index 00000000000..a22d1ae2fdc --- /dev/null +++ b/searchlib/src/tests/attribute/postinglist/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_postinglist_test_app + SOURCES + postinglist.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_postinglist_test_app COMMAND searchlib_postinglist_test_app) diff --git a/searchlib/src/tests/attribute/postinglist/DESC b/searchlib/src/tests/attribute/postinglist/DESC new file mode 100644 index 00000000000..1499e3070fb --- /dev/null +++ b/searchlib/src/tests/attribute/postinglist/DESC @@ -0,0 +1 @@ +This is a test for the AttributePostingList class. diff --git a/searchlib/src/tests/attribute/postinglist/FILES b/searchlib/src/tests/attribute/postinglist/FILES new file mode 100644 index 00000000000..268f6c09f1e --- /dev/null +++ b/searchlib/src/tests/attribute/postinglist/FILES @@ -0,0 +1 @@ +postinglist.cpp diff --git a/searchlib/src/tests/attribute/postinglist/postinglist.cpp b/searchlib/src/tests/attribute/postinglist/postinglist.cpp new file mode 100644 index 00000000000..ab95ce27a0e --- /dev/null +++ b/searchlib/src/tests/attribute/postinglist/postinglist.cpp @@ -0,0 +1,707 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("postinglist_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/util/rand48.h> +#include <algorithm> +#include <limits> +#include <map> +#include <set> + +#include <vespa/searchlib/btree/datastore.h> +#include <vespa/searchlib/btree/btreenodeallocator.hpp> +#include <vespa/searchlib/btree/btreenode.hpp> +#include <vespa/searchlib/btree/btreenodestore.hpp> +#include <vespa/searchlib/btree/btreeiterator.hpp> +#include <vespa/searchlib/btree/btreeroot.hpp> +#include <vespa/searchlib/btree/btreestore.hpp> + +namespace search { + +using vespalib::GenerationHandler; + +/* + * TODO: Make it pass MALLOC_OPTIONS=AJ on freebsd and valgrind on Linux. + */ + +class AttributePostingListTest : public vespalib::TestApp +{ +private: + /* Limited STL version for validation of full version */ + typedef std::set<uint32_t> STLPostingList; + typedef std::map<int, STLPostingList> STLValueTree; + + class RandomValue + { + public: + uint32_t _docId; + int _value; + uint32_t _order; + + RandomValue(void) + : _docId(0), + _value(0u), + _order(0u) + { + } + + RandomValue(uint32_t docId, uint32_t value, uint32_t order) + : _docId(docId), + _value(value), + _order(order) + { + } + + bool + operator<(const RandomValue &rhs) const + { + return (_value < rhs._value || + (_value == rhs._value && + (_docId < rhs._docId || + (_docId == rhs._docId && + _order < rhs._order)))); + } + + bool + operator>(const RandomValue &rhs) const + { + return (_value > rhs._value || + (_value == rhs._value && + (_docId > rhs._docId || + (_docId == rhs._docId && + _order > rhs._order)))); + } + + bool + operator==(const RandomValue &rhs) const + { + return (_value == rhs._value && + _docId == rhs._docId && + _order == rhs._order); + } + }; + + class CompareOrder + { + public: + bool + operator()(const RandomValue &a, const RandomValue &b) + { + return (a._order < b._order || + (a._order == b._order && + (a._value < b._value || + (a._value == b._value && + a._docId < b._docId)))); + } + }; + std::vector<RandomValue> _randomValues; + +public: + typedef btree::DataStore<int> IntKeyStore; + typedef btree::BTreeKeyData<uint32_t, btree::BTreeNoLeafData> + AttributePosting; + typedef btree::BTreeStore<uint32_t, + btree::BTreeNoLeafData, + btree::NoAggregated, + std::less<uint32_t>, + btree::BTreeDefaultTraits> + PostingList; + typedef PostingList::NodeAllocatorType PostingListNodeAllocator; + typedef btree::EntryRef PostingIdx; + typedef btree::EntryRef StoreIndex; + + class IntComp { + private: + const IntKeyStore & _store; + int _value; + int getValue(const StoreIndex & idx) const { + if (idx.valid()) { + return _store.getEntry(idx); + } + return _value; + } + public: + IntComp(const IntKeyStore & store) : _store(store), _value(0) {} + IntComp(const IntKeyStore & store, int value) : _store(store), _value(value) {} + bool operator() (const StoreIndex & lhs, const StoreIndex & rhs) const { + return getValue(lhs) < getValue(rhs); + } + }; + + typedef btree::BTreeRoot<StoreIndex, PostingIdx, + btree::NoAggregated, + const IntComp &> IntEnumTree; + typedef IntEnumTree::NodeAllocatorType IntEnumNodeAllocator; + typedef IntEnumTree Tree; + typedef IntEnumNodeAllocator TreeManager; + typedef IntKeyStore ValueHandle; + typedef std::vector<RandomValue> RandomValuesVector; +private: + GenerationHandler _handler; + IntKeyStore *_intKeyStore; + IntEnumNodeAllocator *_intNodeAlloc; + IntEnumTree *_intTree; + PostingList *_intPostings; + STLValueTree *_stlTree; + + Rand48 _randomGenerator; + uint32_t _generation; + + void + allocTree(void); + + void + freeTree(bool verbose); + + void + fillRandomValues(unsigned int count, + unsigned int mvcount); + + void + insertRandomValues(Tree &tree, + TreeManager &treeMgr, + ValueHandle &valueHandle, + PostingList &postings, + STLValueTree *stlTree, + RandomValuesVector &values); + + void + removeRandomValues(Tree &tree, + TreeManager &treeMgr, + ValueHandle &valueHandle, + PostingList &postings, + STLValueTree *stlTree, + RandomValuesVector &values); + + void + lookupRandomValues(Tree &tree, + TreeManager &treeMgr, + const ValueHandle &valueHandle, + PostingList &postings, + STLValueTree *stlTree, + RandomValuesVector &values); + + void + sortRandomValues(void); + + void + doCompactEnumStore(Tree &tree, + TreeManager &treeMgr, + ValueHandle &valueHandle); + + void + doCompactPostingList(Tree &tree, + TreeManager &treeMgr, + PostingList &postings, + PostingListNodeAllocator &postingsAlloc); + + void + bumpGeneration(Tree &tree, + ValueHandle &valueHandle, + PostingList &postings, + PostingListNodeAllocator &postingsAlloc); + + void + removeOldGenerations(Tree &tree, + ValueHandle &valueHandle, + PostingList &postings, + PostingListNodeAllocator &postingsAlloc); + + static const char * + frozenName(bool frozen) + { + return frozen ? "frozen" : "thawed"; + } +public: + AttributePostingListTest(void) + : vespalib::TestApp(), + _randomValues(), + _handler(), + _intKeyStore(NULL), + _intNodeAlloc(NULL), + _intTree(NULL), + _intPostings(NULL), + _stlTree(NULL), + _randomGenerator() + { + } + + int Main(void); +}; + + + +void +AttributePostingListTest::allocTree(void) +{ + _intKeyStore = new IntKeyStore; + _intNodeAlloc = new IntEnumNodeAllocator(); + _intTree = new IntEnumTree(); + _intPostings = new PostingList(); + _stlTree = new STLValueTree; +} + + +void +AttributePostingListTest::freeTree(bool verbose) +{ + (void) verbose; + LOG(info, + "freeTree before clear: %" PRIu64 " (%" PRIu64 " held)" + ", %zu leaves", + static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytes()), + static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytesOnHold()), + _intTree->size(*_intNodeAlloc)); + _intTree->clear(*_intNodeAlloc); + LOG(info, + "freeTree before unhold: %" PRIu64 " (%" PRIu64 " held)", + static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytes()), + static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytesOnHold())); + _intNodeAlloc->freeze(); + _intPostings->freeze(); + _intNodeAlloc->transferHoldLists(_handler.getCurrentGeneration()); + _intPostings->clearBuilder(); + _intPostings->transferHoldLists(_handler.getCurrentGeneration()); + _handler.incGeneration(); + _intNodeAlloc->trimHoldLists(_handler.getFirstUsedGeneration()); + _intPostings->trimHoldLists(_handler.getFirstUsedGeneration()); + LOG(info, + "freeTree after unhold: %" PRIu64 " (%" PRIu64 " held)", + static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytes()), + static_cast<uint64_t>(_intNodeAlloc->getMemoryUsage().allocatedBytesOnHold())); + delete _stlTree; + _stlTree = NULL; + delete _intTree; + _intTree = NULL; + delete _intNodeAlloc; + _intNodeAlloc = NULL; + delete _intKeyStore; + _intKeyStore = NULL; + delete _intPostings; + _intPostings = NULL; +} + + +void +AttributePostingListTest:: +fillRandomValues(unsigned int count, + unsigned int mvcount) +{ + unsigned int i; + unsigned int j; + unsigned int mv; + unsigned int mvmax; + unsigned int mvcount2; + unsigned int mvcount3; + + mvmax = 100; + mvcount2 = mvcount * (mvmax * (mvmax - 1)) / 2; + LOG(info, + "Filling %u+%u random values", count, mvcount2); + _randomValues.clear(); + _randomValues.reserve(count); + _randomGenerator.srand48(42); + for (i = 0; i <count; i++) { + uint32_t docId = _randomGenerator.lrand48(); + uint32_t val = _randomGenerator.lrand48(); + uint32_t order = _randomGenerator.lrand48(); + _randomValues.push_back(RandomValue(docId, val, order)); + } + for (mv = 1; mv < mvmax; mv++) { + for (i = 0; i < mvcount; i++) { + for (j = 0; j < mv; j++) { + uint32_t docId = _randomGenerator.lrand48(); + uint32_t val = _randomGenerator.lrand48(); + uint32_t order = _randomGenerator.lrand48(); + _randomValues.push_back(RandomValue(docId, val, order)); + } + } + } + mvcount3 = 0; + for (mv = 10; mv < 4000; mv = mv * 3) + { + mvcount3 += mv * 2; + for (j = 0; j < mv; j++) { + uint32_t val = _randomGenerator.lrand48(); + uint32_t docId = _randomGenerator.lrand48(); + uint32_t order = _randomGenerator.lrand48(); + _randomValues.push_back(RandomValue(docId, val, order)); + val = _randomGenerator.lrand48(); + docId = _randomGenerator.lrand48(); + order = _randomGenerator.lrand48(); + _randomValues.push_back(RandomValue(docId, val, order)); + } + } + std::sort(_randomValues.begin(), + _randomValues.end(), + CompareOrder()); + + EXPECT_TRUE(_randomValues.size() == count + mvcount2 + mvcount3); +} + + +void +AttributePostingListTest:: +insertRandomValues(Tree &tree, + TreeManager &treeMgr, + ValueHandle &valueHandle, + PostingList &postings, + STLValueTree *stlTree, + RandomValuesVector & + values) +{ + RandomValuesVector::iterator i; + RandomValuesVector::iterator ie; + + LOG(info, "insertRandomValues start"); + ie = values.end(); + for (i = values.begin(); i != ie; ++i) { + Tree::Iterator itr = tree.find(StoreIndex(), treeMgr, IntComp(valueHandle, i->_value)); + if (!itr.valid()) { +#if 0 + if (valueHandle.needResize()) + doCompactEnumStore(tree, treeMgr, valueHandle); +#endif + StoreIndex idx = valueHandle.addEntry(i->_value); + if (tree.insert(idx, PostingIdx(), treeMgr, IntComp(valueHandle))) { + itr = tree.find(idx, treeMgr, IntComp(valueHandle)); + } + } else { + } + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(i->_value, valueHandle.getEntry(itr.getKey())); + + /* TODO: Insert docid to postinglist */ + PostingIdx oldIdx = itr.getData(); + PostingIdx newIdx = oldIdx; + AttributePosting newPosting(i->_docId, + btree::BTreeNoLeafData()); + std::vector<AttributePosting> additions; + std::vector<uint32_t> removals; + additions.push_back(newPosting); + postings.apply(newIdx, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + std::atomic_thread_fence(std::memory_order_release); + itr.writeData(newIdx); + + if (stlTree != NULL) { + STLValueTree::iterator it; + it = stlTree->find(i->_value); + if (it == stlTree->end()) { + std::pair<STLValueTree::iterator,bool> ir = + stlTree->insert(std::make_pair(i->_value, + STLPostingList())); + ASSERT_TRUE(ir.second && ir.first != stlTree->end() && + ir.first->first == i->_value); + it = ir.first; + } + ASSERT_TRUE(it != stlTree->end() && it->first == i->_value); + it->second.insert(i->_docId); + + if (it->second.empty()) { + stlTree->erase(it); + ASSERT_TRUE(!itr.valid()); + } else { + size_t postingsize; + + ASSERT_TRUE(itr.valid()); + postingsize = postings.size(newIdx); + ASSERT_TRUE(postingsize > 0 && + postingsize == it->second.size()); + STLPostingList::iterator it3; + STLPostingList::iterator it3b; + STLPostingList::iterator it3e; + + PostingList::Iterator it0; + + it3b = it->second.begin(); + it3e = it->second.end(); + it0 = postings.begin(newIdx); + it3 = it3b; + + while (it3 != it3e) { + ASSERT_TRUE(it0.valid()); + ASSERT_TRUE(*it3 == it0.getKey()); + ++it3; + ++it0; + } + ASSERT_TRUE(!it0.valid()); + } + } + } + ASSERT_TRUE(tree.isValid(treeMgr, IntComp(valueHandle))); + LOG(info, "insertRandomValues done"); +} + + +void +AttributePostingListTest:: +removeRandomValues(Tree &tree, + TreeManager &treeMgr, + ValueHandle &valueHandle, + PostingList &postings, + STLValueTree *stlTree, + RandomValuesVector &values) +{ + RandomValuesVector::iterator i; + RandomValuesVector::iterator ie; + + LOG(info, "removeRandomValues start"); + ie = values.end(); + for (i = values.begin(); i != ie; ++i) { + Tree::Iterator itr = tree.find(StoreIndex(), treeMgr, IntComp(valueHandle, i->_value)); + PostingIdx newIdx; + /* + * TODO: Remove docid from postinglist, and only remove + * value from tree if postinglist is empty + */ + if (itr.valid()) { + PostingIdx oldIdx = itr.getData(); + newIdx = oldIdx; + std::vector<AttributePosting> additions; + std::vector<uint32_t> removals; + removals.push_back(i->_docId); + postings.apply(newIdx, &additions[0], &additions[0]+additions.size(), + &removals[0], &removals[0] + removals.size()); + if (newIdx != oldIdx) { + std::atomic_thread_fence(std::memory_order_release); + itr.writeData(newIdx); + } + if (!newIdx.valid()) { + if (tree.remove(StoreIndex(), treeMgr, IntComp(valueHandle, i->_value))) { + itr = tree.find(StoreIndex(), treeMgr, IntComp(valueHandle, i->_value)); + } + } + } + if (stlTree != NULL) { + STLValueTree::iterator it; + it = stlTree->find(i->_value); + ASSERT_TRUE(it != stlTree->end() && it->first == i->_value); + STLPostingList::iterator it2; + it2 = it->second.find(i->_docId); + ASSERT_TRUE(it2 != it->second.end() && + *it2 == i->_docId); + it->second.erase(it2); + + if (it->second.empty()) { + stlTree->erase(it); + ASSERT_TRUE(!itr.valid()); + } else { + size_t postingsize; + + ASSERT_TRUE(itr.valid()); + postingsize = postings.size(newIdx); + ASSERT_TRUE(postingsize > 0 && + postingsize == it->second.size()); + STLPostingList::iterator it3; + STLPostingList::iterator it3b; + STLPostingList::iterator it3e; + + PostingList::Iterator it0; + + it3b = it->second.begin(); + it3e = it->second.end(); + it0 = postings.begin(newIdx); + it3 = it3b; + + while (it3 != it3e) { + ASSERT_TRUE(it0.valid()); + ASSERT_TRUE(*it3 == it0.getKey()); + ++it3; + ++it0; + } + ASSERT_TRUE(!it0.valid()); + } + } + } + ASSERT_TRUE(tree.isValid(treeMgr, IntComp(valueHandle))); + LOG(info, "removeRandomValues done"); +} + + +void +AttributePostingListTest:: +lookupRandomValues(Tree &tree, + TreeManager &treeMgr, + const ValueHandle &valueHandle, + PostingList &postings, + STLValueTree *stlTree, + RandomValuesVector &values) +{ + RandomValuesVector::iterator i; + RandomValuesVector::iterator ie; + + LOG(info, "lookupRandomValues start"); + ie = values.end(); + for (i = values.begin(); i != ie; ++i) { + Tree::Iterator itr = tree.find(StoreIndex(), treeMgr, IntComp(valueHandle, i->_value)); + ASSERT_TRUE(itr.valid() && + valueHandle.getEntry(itr.getKey()) == i->_value); + if (stlTree != NULL) { + STLValueTree::iterator it; + it = stlTree->find(i->_value); + ASSERT_TRUE(it != stlTree->end() && it->first == i->_value); + + if (it->second.empty()) { + stlTree->erase(it); + ASSERT_TRUE(!itr.valid()); + } else { + size_t postingsize; + + ASSERT_TRUE(itr.valid()); + postingsize = postings.size(itr.getData()); + ASSERT_TRUE(postingsize > 0 && + postingsize == it->second.size()); + STLPostingList::iterator it3; + STLPostingList::iterator it3b; + STLPostingList::iterator it3e; + + PostingList::Iterator it0; + + it3b = it->second.begin(); + it3e = it->second.end(); + it0 = postings.begin(itr.getData()); + it3 = it3b; + + while (it3 != it3e) { + ASSERT_TRUE(it0.valid()); + ASSERT_TRUE(*it3 == it0.getKey()); + ++it3; + ++it0; + } + ASSERT_TRUE(!it0.valid()); + } + } + } + LOG(info, "lookupRandomValues done"); +} + + +void +AttributePostingListTest::doCompactEnumStore(Tree &tree, + TreeManager &treeMgr, + ValueHandle &valueHandle) +{ + LOG(info, + "doCompactEnumStore start"); + + Tree::Iterator i = tree.begin(treeMgr); + + uint32_t numBuffers = valueHandle.getNumBuffers(); + std::vector<uint32_t> toHold; + + for (uint32_t bufferId = 0; bufferId < numBuffers; ++bufferId) { + btree::BufferState &state = valueHandle.getBufferState(bufferId); + if (state._state == btree::BufferState::ACTIVE) { + toHold.push_back(bufferId); + // Freelists already disabled due to variable sized data + } + } + valueHandle.switchActiveBuffer(0, 0u); + + for (; i.valid(); ++i) + { + StoreIndex ov = i.getKey(); + StoreIndex nv = valueHandle.addEntry(valueHandle.getEntry(ov)); + + std::atomic_thread_fence(std::memory_order_release); + i.writeKey(nv); + } + typedef GenerationHandler::generation_t generation_t; + for (std::vector<uint32_t>::const_iterator + it = toHold.begin(), ite = toHold.end(); it != ite; ++it) { + valueHandle.holdBuffer(*it); + } + generation_t generation = _handler.getCurrentGeneration(); + valueHandle.transferHoldLists(generation); + _handler.incGeneration(); + valueHandle.trimHoldLists(_handler.getFirstUsedGeneration()); + + LOG(info, + "doCompactEnumStore done"); +} + + +void +AttributePostingListTest:: +doCompactPostingList(Tree &tree, + TreeManager &treeMgr, + PostingList &postings, + PostingListNodeAllocator &postingsAlloc) +{ + LOG(info, + "doCompactPostingList start"); + +#if 0 + Tree::Iterator i(tree.begin(treeMgr)); + + postings.performCompaction(i, capacityNeeded); +#else + (void) tree; + (void) treeMgr; + (void) postings; + (void) postingsAlloc; +#endif + + LOG(info, + "doCompactPostingList done"); +} + + +void +AttributePostingListTest:: +bumpGeneration(Tree &tree, + ValueHandle &valueHandle, + PostingList &postings, + PostingListNodeAllocator &postingsAlloc) +{ + (void) tree; + (void) valueHandle; + postingsAlloc.freeze(); + postingsAlloc.transferHoldLists(_handler.getCurrentGeneration()); + postings.transferHoldLists(_handler.getCurrentGeneration()); + _handler.incGeneration(); +} + +void +AttributePostingListTest:: +removeOldGenerations(Tree &tree, + ValueHandle &valueHandle, + PostingList &postings, + PostingListNodeAllocator &postingsAlloc) +{ + (void) tree; + (void) valueHandle; + postingsAlloc.trimHoldLists(_handler.getFirstUsedGeneration()); + postings.trimHoldLists(_handler.getFirstUsedGeneration()); +} + +int +AttributePostingListTest::Main() +{ + TEST_INIT("postinglist_test"); + + fillRandomValues(1000, 10); + + allocTree(); + insertRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings, + _stlTree, _randomValues); + lookupRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings, + _stlTree, _randomValues); + _intNodeAlloc->freeze(); + _intNodeAlloc->transferHoldLists(_handler.getCurrentGeneration()); + doCompactEnumStore(*_intTree, *_intNodeAlloc, *_intKeyStore); + removeRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings, + _stlTree, _randomValues); + insertRandomValues(*_intTree, *_intNodeAlloc, *_intKeyStore, *_intPostings, + _stlTree, _randomValues); + freeTree(true); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(search::AttributePostingListTest); diff --git a/searchlib/src/tests/attribute/postinglistattribute/.gitignore b/searchlib/src/tests/attribute/postinglistattribute/.gitignore new file mode 100644 index 00000000000..9614cdd7626 --- /dev/null +++ b/searchlib/src/tests/attribute/postinglistattribute/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +postinglistattribute_test +searchlib_postinglistattribute_test_app diff --git a/searchlib/src/tests/attribute/postinglistattribute/CMakeLists.txt b/searchlib/src/tests/attribute/postinglistattribute/CMakeLists.txt new file mode 100644 index 00000000000..77d137c7b6e --- /dev/null +++ b/searchlib/src/tests/attribute/postinglistattribute/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_postinglistattribute_test_app + SOURCES + postinglistattribute_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_postinglistattribute_test_app COMMAND sh postinglistattribute_test.sh) diff --git a/searchlib/src/tests/attribute/postinglistattribute/DESC b/searchlib/src/tests/attribute/postinglistattribute/DESC new file mode 100644 index 00000000000..04c97a729a0 --- /dev/null +++ b/searchlib/src/tests/attribute/postinglistattribute/DESC @@ -0,0 +1 @@ +Unit tests for subclasses of PostingListAttribute. diff --git a/searchlib/src/tests/attribute/postinglistattribute/FILES b/searchlib/src/tests/attribute/postinglistattribute/FILES new file mode 100644 index 00000000000..56029570a21 --- /dev/null +++ b/searchlib/src/tests/attribute/postinglistattribute/FILES @@ -0,0 +1 @@ +postinglistattribute.cpp diff --git a/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp new file mode 100644 index 00000000000..5e248dc8758 --- /dev/null +++ b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.cpp @@ -0,0 +1,1021 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("postinglistattribute_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/stllike/asciistream.h> + +#include <vespa/searchlib/attribute/attribute.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/postinglistattribute.h> +#include <vespa/searchlib/attribute/singlenumericpostattribute.h> +#include <vespa/searchlib/attribute/multinumericpostattribute.h> +#include <vespa/searchlib/attribute/singlestringpostattribute.h> +#include <vespa/searchlib/attribute/multistringpostattribute.h> + +#include <vespa/searchlib/attribute/enumstore.hpp> +#include <vespa/searchlib/attribute/attributevector.hpp> +#include <vespa/vespalib/util/compress.h> + +using std::shared_ptr; + +bool +FastOS_UNIX_File::Sync(void) +{ + // LOG(info, "Skip sync"); + return true; +} + +namespace search { + +using attribute::CollectionType; +using attribute::BasicType; +using attribute::Config; +using queryeval::PostingInfo; +using queryeval::MinMaxPostingInfo; +using search::fef::TermFieldMatchData; +using search::queryeval::SearchIterator; + +typedef std::unique_ptr<AttributeVector::SearchContext> SearchContextPtr; +typedef std::unique_ptr<search::queryeval::SearchIterator> SearchBasePtr; + +void +toStr(std::stringstream &ss, SearchIterator &it) +{ + it.initFullRange(); + it.seek(1u); + bool first = true; + while ( !it.isAtEnd()) { + if (first) + first = false; + else + ss << ","; + ss << it.getDocId(); + it.seek(it.getDocId() + 1); + } +} + + +bool +assertIterator(const std::string &exp, SearchIterator &it) +{ + std::stringstream ss; + toStr(ss, it); + if (!EXPECT_EQUAL(exp, ss.str())) + return false; + return true; +} + + +class PostingListAttributeTest : public vespalib::TestApp +{ +private: + typedef IntegerAttribute::largeint_t largeint_t; + typedef AttributeVector::SP AttributePtr; + typedef std::set<AttributeVector::DocId> DocSet; + + typedef SingleValueNumericPostingAttribute< + EnumAttribute<IntegerAttributeTemplate<int32_t> > > + Int32PostingListAttribute; + typedef MultiValueNumericPostingAttribute< + EnumAttribute<IntegerAttributeTemplate<int32_t> >, + multivalue::MVMTemplateArg< + multivalue::Value<EnumStoreBase::Index>, + multivalue::Index32> > Int32ArrayPostingListAttribute; + typedef MultiValueNumericPostingAttribute< + EnumAttribute<IntegerAttributeTemplate<int32_t> >, + multivalue::MVMTemplateArg< + multivalue::WeightedValue<EnumStoreBase::Index>, + multivalue::Index32> > Int32WsetPostingListAttribute; + + typedef SingleValueNumericPostingAttribute< + EnumAttribute<FloatingPointAttributeTemplate<float> > > + FloatPostingListAttribute; + typedef MultiValueNumericPostingAttribute< + EnumAttribute<FloatingPointAttributeTemplate<float> >, + multivalue::MVMTemplateArg< + multivalue::Value<EnumStoreBase::Index>, + multivalue::Index32> > FloatArrayPostingListAttribute; + typedef MultiValueNumericPostingAttribute< + EnumAttribute<FloatingPointAttributeTemplate<float> >, + multivalue::MVMTemplateArg< + multivalue::WeightedValue<EnumStoreBase::Index>, + multivalue::Index32> > FloatWsetPostingListAttribute; + + typedef SingleValueStringPostingAttribute StringPostingListAttribute; + typedef ArrayStringPostingAttribute StringArrayPostingListAttribute; + typedef WeightedSetStringPostingAttribute StringWsetPostingListAttribute; + + template <typename VectorType> + void + populate(VectorType &v); + + template <typename VectorType> + VectorType & + as(AttributePtr &v); + + IntegerAttribute & + asInt(AttributePtr &v); + + StringAttribute & + asString(AttributePtr &v); + + void + buildTermQuery(std::vector<char> & buffer, + const vespalib::string & index, + const vespalib::string & term, bool prefix); + + template <typename V, typename T> + SearchContextPtr + getSearch(const V & vec, const T & term, bool prefix); + + template <typename V> + SearchContextPtr + getSearch(const V & vec); + + template <typename V> + SearchContextPtr + getSearch2(const V & vec); + + bool + assertSearch(const std::string &exp, StringAttribute &sa); + + void addDocs(const AttributePtr & ptr, uint32_t numDocs); + + template <typename VectorType, typename BufferType, typename Range> + void checkPostingList(const VectorType & vec, const std::vector<BufferType> & values, const Range & range); + + template <typename VectorType, typename BufferType> + void testPostingList(const AttributePtr & ptr1, const AttributePtr & ptr2, + uint32_t numDocs, const std::vector<BufferType> & values); + void testPostingList(); + + template <typename AttributeType, typename ValueType> + void checkPostingList(AttributeType & vec, ValueType value, DocSet expected); + template <typename AttributeType, typename ValueType> + void checkNonExistantPostingList(AttributeType & vec, ValueType value); + template <typename AttributeType, typename ValueType> + void testArithmeticValueUpdate(const AttributePtr & ptr); + void testArithmeticValueUpdate(); + + template <typename VectorType, typename ValueType> + void testReload(const AttributePtr & ptr1, const AttributePtr & ptr2, const ValueType & value); + void testReload(); + + template <typename VectorType> + void + testMinMax(AttributePtr &ptr1, uint32_t trimmed); + + template <typename VectorType> + void + testMinMax(AttributePtr &ptr1, AttributePtr &ptr2); + + void + testMinMax(void); + + void + testStringFold(void); +public: + int Main(); +}; + +template <> +void +PostingListAttributeTest::populate<IntegerAttribute>(IntegerAttribute &v) +{ + for(size_t i(0), m(v.getNumDocs()); i < m; i++) { + v.clearDoc(i); + if (i == 0) + continue; + if (i == 9) + continue; + if (i == 7) { + if (v.hasMultiValue()) { + v.append(i, -42, 27); + v.append(i, -43, 14); + v.append(i, -42, -3); + } else { + EXPECT_TRUE( v.update(i, -43) ); + } + v.commit(); + continue; + } + if (i == 20) { + if (v.hasMultiValue()) { + v.append(i, -42, 27); + v.append(i, -43, 14); + v.append(i, -42, -3); + } else { + EXPECT_TRUE( v.update(i, -43) ); + } + v.commit(); + continue; + } + if (i == 25) { + if (v.hasMultiValue()) { + v.append(i, -42, 27); + v.append(i, -43, 12); + v.append(i, -42, -3); + } else { + EXPECT_TRUE( v.update(i, -43) ); + } + v.commit(); + continue; + } + if (v.hasMultiValue()) { + v.append(i, -42, 3); + } else { + v.update(i, -42); + } + v.commit(); + } + v.commit(); +} + +template <> +void +PostingListAttributeTest::populate<StringAttribute>(StringAttribute &v) +{ + for(size_t i(0), m(v.getNumDocs()); i < m; i++) { + v.clearDoc(i); + if (i == 0) + continue; + if (i == 9) + continue; + if (i == 7) { + if (v.hasMultiValue()) { + v.append(i, "foo", 27); + v.append(i, "bar", 14); + v.append(i, "foo", -3); + } else { + EXPECT_TRUE( v.update(i, "bar") ); + } + v.commit(); + continue; + } + if (i == 20) { + if (v.hasMultiValue()) { + v.append(i, "foo", 27); + v.append(i, "bar", 14); + v.append(i, "foo", -3); + } else { + EXPECT_TRUE( v.update(i, "bar") ); + } + v.commit(); + continue; + } + if (i == 25) { + if (v.hasMultiValue()) { + v.append(i, "foo", 27); + v.append(i, "bar", 12); + v.append(i, "foo", -3); + } else { + EXPECT_TRUE( v.update(i, "bar") ); + } + v.commit(); + continue; + } + if (v.hasMultiValue()) { + v.append(i, "foo", 3); + } else { + v.update(i, "foo"); + } + v.commit(); + } +} + + +template <typename VectorType> +VectorType & +PostingListAttributeTest::as(AttributePtr &v) +{ + VectorType *res = dynamic_cast<VectorType *>(v.get()); + assert(res != NULL); + return *res; +} + + +IntegerAttribute & +PostingListAttributeTest::asInt(AttributePtr &v) +{ + return as<IntegerAttribute>(v); +} + + +StringAttribute & +PostingListAttributeTest::asString(AttributePtr &v) +{ + return as<StringAttribute>(v); +} + + +void +PostingListAttributeTest::buildTermQuery(std::vector<char> &buffer, + const vespalib::string &index, + const vespalib::string &term, + bool prefix) +{ + uint32_t indexLen = index.size(); + uint32_t termLen = term.size(); + uint32_t queryPacketSize = 1 + 2 * 4 + indexLen + termLen; + uint32_t p = 0; + buffer.resize(queryPacketSize); + buffer[p++] = prefix ? ParseItem::ITEM_PREFIXTERM : ParseItem::ITEM_TERM; + p += vespalib::compress::Integer::compressPositive(indexLen, &buffer[p]); + memcpy(&buffer[p], index.c_str(), indexLen); + p += indexLen; + p += vespalib::compress::Integer::compressPositive(termLen, &buffer[p]); + memcpy(&buffer[p], term.c_str(), termLen); + p += termLen; + buffer.resize(p); +} + + +template <typename V, typename T> +SearchContextPtr +PostingListAttributeTest::getSearch(const V &vec, const T &term, bool prefix) +{ + std::vector<char> query; + vespalib::asciistream ss; + ss << term; + buildTermQuery(query, vec.getName(), ss.str(), prefix); + + return (static_cast<const AttributeVector &>(vec)). + getSearch(vespalib::stringref(&query[0], query.size()), + AttributeVector::SearchContext::Params()); +} + + +template <> +SearchContextPtr +PostingListAttributeTest::getSearch<IntegerAttribute>(const IntegerAttribute &v) +{ + return getSearch<IntegerAttribute>(v, "[-42;-42]", false); +} + + +template <> +SearchContextPtr +PostingListAttributeTest::getSearch<StringAttribute>(const StringAttribute &v) +{ + return getSearch<StringAttribute, const vespalib::string &> + (v, "foo", false); +} + + +template <> +SearchContextPtr +PostingListAttributeTest::getSearch2<IntegerAttribute>(const IntegerAttribute &v) +{ + return getSearch<IntegerAttribute>(v, "[-43;-43]", false); +} + + +template <> +SearchContextPtr +PostingListAttributeTest::getSearch2<StringAttribute>(const StringAttribute &v) +{ + return getSearch<StringAttribute, const vespalib::string &> + (v, "bar", false); +} + + +bool +PostingListAttributeTest::assertSearch(const std::string &exp, + StringAttribute &sa) +{ + TermFieldMatchData md; + SearchContextPtr sc = getSearch<StringAttribute>(sa); + sc->fetchPostings(true); + SearchBasePtr sb = sc->createIterator(&md, true); + if (!EXPECT_TRUE(assertIterator(exp, *sb))) + return false; + return true; +} + + +void +PostingListAttributeTest::addDocs(const AttributePtr & ptr, uint32_t numDocs) +{ + for (uint32_t i = 0; i < numDocs; ++i) { + uint32_t doc; + ASSERT_TRUE(ptr->addDoc(doc)); + ASSERT_TRUE(doc == i); + ASSERT_TRUE(ptr->getNumDocs() == i + 1); + } + ASSERT_TRUE(ptr->getNumDocs() == numDocs); +} + +class RangeAlpha { +private: + uint32_t _part; +public: + RangeAlpha(uint32_t part) : _part(part) { } + uint32_t getBegin(uint32_t i) const { return i * _part; } + uint32_t getEnd(uint32_t i) const { return (i + 1) * _part; } +}; + +class RangeBeta { +private: + uint32_t _part; + uint32_t _numValues; +public: + RangeBeta(uint32_t part, uint32_t numValues) : _part(part), _numValues(numValues) { } + uint32_t getBegin(uint32_t i) const { return (_numValues - 1 - i) * _part; } + uint32_t getEnd(uint32_t i) const { return (_numValues - i) * _part; } +}; + +template <typename VectorType, typename BufferType, typename RangeGenerator> +void +PostingListAttributeTest::checkPostingList(const VectorType & vec, const std::vector<BufferType> & values, + const RangeGenerator & range) +{ + const typename VectorType::EnumStore & enumStore = vec.getEnumStore(); + const typename VectorType::Dictionary & dict = + enumStore.getPostingDictionary(); + const typename VectorType::PostingList & postingList = vec.getPostingList(); + + for (size_t i = 0; i < values.size(); ++i) { + uint32_t docBegin = range.getBegin(i); + uint32_t docEnd = range.getEnd(i); + + typename VectorType::DictionaryIterator itr = + dict.find(typename VectorType::EnumIndex(), + typename VectorType::ComparatorType(enumStore, values[i])); + ASSERT_TRUE(itr.valid()); + + typename VectorType::PostingList::Iterator postings; + postings = postingList.begin(itr.getData()); + + uint32_t doc = docBegin; + for (; postings.valid(); ++postings) { + EXPECT_EQUAL(doc++, postings.getKey()); + } + EXPECT_EQUAL(doc, docEnd); + } +} + +template <typename VectorType, typename BufferType> +void +PostingListAttributeTest::testPostingList(const AttributePtr & ptr1, const AttributePtr & ptr2, + uint32_t numDocs, const std::vector<BufferType> & values) +{ + LOG(info, "testPostingList: vector '%s'", ptr1->getName().c_str()); + + VectorType & vec1 = static_cast<VectorType &>(*ptr1.get()); + VectorType & vec2 = static_cast<VectorType &>(*ptr2.get()); + addDocs(ptr1, numDocs); + + uint32_t part = numDocs / values.size(); + + // insert values + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t idx = doc / part; + EXPECT_TRUE(vec1.update(doc, values[idx])); + } + vec1.commit(); + +#if 0 + std::cout << "***** printBuffer 0 ***** " << std::endl; + vec1.getEnumStore().printBuffer(std::cout, 0); + std::cout << "***** printBuffer 1 ***** " << std::endl; + vec1.getEnumStore().printBuffer(std::cout, 1); + std::cout << "***** printCurrentContent ***** " << std::endl; + vec1.getEnumStore().printCurrentContent(std::cout); + std::cout << "***** printPostingListContent *****" << std::endl; + vec1.printPostingListContent(std::cout); +#endif + + // check posting list for correct content + checkPostingList(vec1, values, RangeAlpha(part)); + + // load and save vector + ptr1->saveAs(ptr2->getBaseFileName()); + ptr2->load(); +#if 0 + std::cout << "***** vec2.printPostingListContent *****" << std::endl; + vec2.printPostingListContent(std::cout); +#endif + checkPostingList(vec2, values, RangeAlpha(part)); + + // insert values in another order + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t idx = values.size() - 1 - (doc / part); + EXPECT_TRUE(vec1.update(doc, values[idx])); + } + vec1.commit(); + + // check posting list again for correct content + checkPostingList(vec1, values, RangeBeta(part, values.size())); + + // load and save vector + ptr1->saveAs(ptr2->getBaseFileName()); + ptr2->load(); + checkPostingList(vec2, values, RangeBeta(part, values.size())); +} + +void +PostingListAttributeTest::testPostingList() +{ + uint32_t numDocs = 1000; + uint32_t numValues = 50; + + { // IntegerAttribute + std::vector<largeint_t> values; + for (uint32_t i = 0; i < numValues; ++i) { + values.push_back(i); + } + { + Config cfg(Config(BasicType::INT32, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("sint32_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sint32_2", cfg); + testPostingList<Int32PostingListAttribute>(ptr1, ptr2, numDocs, values); + } + { + Config cfg(Config(BasicType::INT32, CollectionType::ARRAY)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("aint32_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("aint32_2", cfg); + testPostingList<Int32ArrayPostingListAttribute>(ptr1, ptr2, numDocs, values); + } + { + Config cfg(Config(BasicType::INT32, CollectionType::WSET)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("wsint32_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("wsint32_2", cfg); + testPostingList<Int32WsetPostingListAttribute>(ptr1, ptr2, numDocs, values); + } + } + + { // FloatingPointAttribute + std::vector<double> values; + for (uint32_t i = 0; i < numValues; ++i) { + values.push_back(i); + } + { + Config cfg(Config(BasicType::FLOAT, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("sfloat_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sfloat_2", cfg); + testPostingList<FloatPostingListAttribute>(ptr1, ptr2, numDocs, values); + } + { + Config cfg(Config(BasicType::FLOAT, CollectionType::ARRAY)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("afloat_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("afloat_2", cfg); + testPostingList<FloatArrayPostingListAttribute>(ptr1, ptr2, numDocs, values); + } + { + Config cfg(Config(BasicType::FLOAT, CollectionType::WSET)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("wsfloat_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("wsfloat_2", cfg); + testPostingList<FloatWsetPostingListAttribute>(ptr1, ptr2, numDocs, values); + } + } + + { // StringAttribute + std::vector<vespalib::string> values; + std::vector<const char *> charValues; + values.reserve(numValues); + charValues.reserve(numValues); + values.push_back(""); + charValues.push_back(values.back().c_str()); + for (uint32_t i = 1; i < numValues; ++i) { + vespalib::asciistream ss; + ss << "string" << i; + values.push_back(ss.str()); + charValues.push_back(values.back().c_str()); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sstr_2", cfg); + testPostingList<StringPostingListAttribute>(ptr1, ptr2, numDocs, charValues); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::ARRAY)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("astr_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("astr_2", cfg); + testPostingList<StringArrayPostingListAttribute>(ptr1, ptr2, numDocs, charValues); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::WSET)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("wsstr_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("wsstr_2", cfg); + testPostingList<StringWsetPostingListAttribute>(ptr1, ptr2, numDocs, charValues); + } + } +} + +template <typename AttributeType, typename ValueType> +void +PostingListAttributeTest::checkPostingList(AttributeType & vec, ValueType value, DocSet expected) +{ + const typename AttributeType::EnumStore & enumStore = vec.getEnumStore(); + const typename AttributeType::Dictionary & dict = + enumStore.getPostingDictionary(); + const typename AttributeType::PostingList & postingList = vec.getPostingList(); + typename AttributeType::DictionaryIterator itr = + dict.find(typename AttributeType::EnumIndex(), + typename AttributeType::ComparatorType(vec.getEnumStore(), value)); + ASSERT_TRUE(itr.valid()); + + typename AttributeType::PostingList::Iterator postings; + postings = postingList.begin(itr.getData()); + + DocSet::iterator docBegin = expected.begin(); + DocSet::iterator docEnd = expected.end(); + for (; postings.valid(); ++postings) { + EXPECT_EQUAL(*docBegin++, postings.getKey()); + } + EXPECT_TRUE(docBegin == docEnd); +} + +template <typename AttributeType, typename ValueType> +void +PostingListAttributeTest::checkNonExistantPostingList(AttributeType & vec, ValueType value) +{ + const typename AttributeType::Dictionary & dict = + vec.getEnumStore().getPostingDictionary(); + typename AttributeType::DictionaryIterator itr = + dict.find(typename AttributeType::EnumIndex(), + typename AttributeType::ComparatorType(vec.getEnumStore(), value)); + EXPECT_TRUE(!itr.valid()); +} + +template <typename AttributeType, typename ValueType> +void +PostingListAttributeTest::testArithmeticValueUpdate(const AttributePtr & ptr) +{ + LOG(info, "testArithmeticValueUpdate: vector '%s'", ptr->getName().c_str()); + + typedef document::ArithmeticValueUpdate Arith; + AttributeType & vec = static_cast<AttributeType &>(*ptr.get()); + + addDocs(ptr, 4); + + uint32_t allDocs[] = {0, 1, 2, 3}; + checkNonExistantPostingList<AttributeType, ValueType>(vec, 0); + + for (uint32_t doc = 0; doc < 4; ++doc) { + ASSERT_TRUE(vec.update(doc, 100)); + } + ptr->commit(); + + checkNonExistantPostingList<AttributeType, ValueType>(vec, 0); + checkPostingList<AttributeType, ValueType>(vec, 100, DocSet(allDocs, allDocs + 4)); + + EXPECT_TRUE(vec.apply(0, Arith(Arith::Add, 10))); + EXPECT_TRUE(vec.apply(1, Arith(Arith::Sub, 10))); + EXPECT_TRUE(vec.apply(2, Arith(Arith::Mul, 10))); + EXPECT_TRUE(vec.apply(3, Arith(Arith::Div, 10))); + ptr->commit(); + + { + uint32_t docs[] = {0}; + checkPostingList<AttributeType, ValueType>(vec, 110, DocSet(docs, docs + 1)); + } + { + uint32_t docs[] = {1}; + checkPostingList<AttributeType, ValueType>(vec, 90, DocSet(docs, docs + 1)); + } + { + uint32_t docs[] = {2}; + checkPostingList<AttributeType, ValueType>(vec, 1000, DocSet(docs, docs + 1)); + } + { + uint32_t docs[] = {3}; + checkPostingList<AttributeType, ValueType>(vec, 10, DocSet(docs, docs + 1)); + } + + + // several inside a single commit + for (uint32_t doc = 0; doc < 4; ++doc) { + ASSERT_TRUE(vec.update(doc, 2000)); + } + EXPECT_TRUE(vec.apply(0, Arith(Arith::Add, 10))); + EXPECT_TRUE(vec.apply(0, Arith(Arith::Add, 10))); + EXPECT_TRUE(vec.apply(1, Arith(Arith::Sub, 10))); + EXPECT_TRUE(vec.apply(1, Arith(Arith::Sub, 10))); + EXPECT_TRUE(vec.apply(2, Arith(Arith::Mul, 10))); + EXPECT_TRUE(vec.apply(2, Arith(Arith::Mul, 10))); + EXPECT_TRUE(vec.apply(3, Arith(Arith::Div, 10))); + EXPECT_TRUE(vec.apply(3, Arith(Arith::Div, 10))); + ptr->commit(); + + vespalib::asciistream ss; + vec.printPostingListContent(ss); + std::cout << ss.str(); + { + uint32_t docs[] = {0}; + checkPostingList<AttributeType, ValueType>(vec, 2020, DocSet(docs, docs + 1)); + } + { + uint32_t docs[] = {1}; + checkPostingList<AttributeType, ValueType>(vec, 1980, DocSet(docs, docs + 1)); + } + { + uint32_t docs[] = {2}; + checkPostingList<AttributeType, ValueType>(vec, 200000, DocSet(docs, docs + 1)); + } + { + uint32_t docs[] = {3}; + checkPostingList<AttributeType, ValueType>(vec, 20, DocSet(docs, docs + 1)); + } + checkNonExistantPostingList<AttributeType, ValueType>(vec, 100); + checkNonExistantPostingList<AttributeType, ValueType>(vec, 110); + checkNonExistantPostingList<AttributeType, ValueType>(vec, 90); + checkNonExistantPostingList<AttributeType, ValueType>(vec, 1000); + checkNonExistantPostingList<AttributeType, ValueType>(vec, 10); + checkNonExistantPostingList<AttributeType, ValueType>(vec, 2000); + checkNonExistantPostingList<AttributeType, ValueType>(vec, 2010); + checkNonExistantPostingList<AttributeType, ValueType>(vec, 1990); + checkNonExistantPostingList<AttributeType, ValueType>(vec, 20000); + checkNonExistantPostingList<AttributeType, ValueType>(vec, 200); +} + +void +PostingListAttributeTest::testArithmeticValueUpdate() +{ + { // IntegerAttribute + Config cfg(Config(BasicType::INT32, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sint32", cfg); + testArithmeticValueUpdate<Int32PostingListAttribute, largeint_t>(ptr); + } + + { // FloatingPointAttribute + Config cfg(Config(BasicType::FLOAT, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sfloat", cfg); + testArithmeticValueUpdate<FloatPostingListAttribute, double>(ptr); + } +} + + +template <typename VectorType, typename ValueType> +void +PostingListAttributeTest::testReload(const AttributePtr & ptr1, const AttributePtr & ptr2, const ValueType & value) +{ + LOG(info, "testReload: vector '%s'", ptr1->getName().c_str()); + + VectorType & vec1 = static_cast<VectorType &>(*ptr1.get()); + + addDocs(ptr1, 5); + for (uint32_t doc = 0; doc < 5; ++doc) { + EXPECT_TRUE(vec1.update(doc, value)); + } + ptr1->commit(); + + ASSERT_TRUE(ptr1->saveAs(ptr2->getBaseFileName())); + ASSERT_TRUE(ptr2->load()); + + EXPECT_TRUE(ptr2->getNumDocs() == 5); + ValueType buffer[1]; + for (uint32_t doc = 0; doc < 5; ++doc) { + EXPECT_TRUE(ptr2->get(doc, buffer, 1) == 1); + EXPECT_EQUAL(buffer[0], value); + } +} + +void +PostingListAttributeTest::testReload() +{ + { // IntegerAttribute + Config cfg(Config(BasicType::INT32, CollectionType::SINGLE)); + cfg.setFastSearch(true); + { + AttributePtr ptr1 = AttributeFactory::createAttribute("sint32_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sint32_2", cfg); + testReload<Int32PostingListAttribute, largeint_t>(ptr1, ptr2, 100); + } + { + AttributePtr ptr1 = AttributeFactory::createAttribute("sint32_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sint32_2", cfg); + testReload<Int32PostingListAttribute, largeint_t>(ptr1, ptr2, 0); + } + } + + { // FloatingPointAttribute + Config cfg(Config(BasicType::FLOAT, CollectionType::SINGLE)); + cfg.setFastSearch(true); + { + AttributePtr ptr1 = AttributeFactory::createAttribute("sfloat_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sfloat_2", cfg); + testReload<FloatPostingListAttribute, double>(ptr1, ptr2, 100); + } + { + AttributePtr ptr1 = AttributeFactory::createAttribute("sfloat_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sfloat_2", cfg); + testReload<FloatPostingListAttribute, double>(ptr1, ptr2, 0); + } + } + + { // StringAttribute + Config cfg(Config(BasicType::STRING, CollectionType::SINGLE)); + cfg.setFastSearch(true); + { + AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sstr_2", cfg); + testReload<StringPostingListAttribute, vespalib::string>(ptr1, ptr2, "unique"); + } + { + AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sstr_2", cfg); + testReload<StringPostingListAttribute, vespalib::string>(ptr1, ptr2, ""); + } + } +} + +template <typename VectorType> +void +PostingListAttributeTest::testMinMax(AttributePtr &ptr1, uint32_t trimmed) +{ + TermFieldMatchData md; + SearchContextPtr sc = getSearch<VectorType>(as<VectorType>(ptr1)); + sc->fetchPostings(true); + SearchBasePtr sb = sc->createIterator(&md, true); + sb->initFullRange(); + + const PostingInfo *pi = sb->getPostingInfo(); + ASSERT_TRUE(pi != NULL); + const MinMaxPostingInfo *mmpi = + dynamic_cast<const MinMaxPostingInfo *>(pi); + ASSERT_TRUE(mmpi != NULL); + + if (ptr1->hasMultiValue()) { + if (trimmed == 2u) { + EXPECT_EQUAL(3, mmpi->getMinWeight()); + } else { + EXPECT_EQUAL(-3, mmpi->getMinWeight()); + } + EXPECT_EQUAL(3, mmpi->getMaxWeight()); + } else { + EXPECT_EQUAL(1, mmpi->getMinWeight()); + EXPECT_EQUAL(1, mmpi->getMaxWeight()); + } + + sb->seek(1u); + EXPECT_EQUAL(1u, sb->getDocId()); + + sc = getSearch2<VectorType>(as<VectorType>(ptr1)); + sc->fetchPostings(true); + sb = sc->createIterator(&md, true); + sb->initFullRange(); + + pi = sb->getPostingInfo(); + if (trimmed == 2) { + ASSERT_TRUE(pi == NULL); + } else { + ASSERT_TRUE(pi != NULL); + mmpi = dynamic_cast<const MinMaxPostingInfo *>(pi); + ASSERT_TRUE(mmpi != NULL); + + if (ptr1->hasMultiValue()) { + if (trimmed == 0) { + EXPECT_EQUAL(12, mmpi->getMinWeight()); + } else { + EXPECT_EQUAL(14, mmpi->getMinWeight()); + } + EXPECT_EQUAL(14, mmpi->getMaxWeight()); + } else { + EXPECT_EQUAL(1, mmpi->getMinWeight()); + EXPECT_EQUAL(1, mmpi->getMaxWeight()); + } + } + + sb->seek(1u); + if (trimmed == 2u) { + EXPECT_TRUE(sb->isAtEnd()); + } else { + EXPECT_EQUAL(7u, sb->getDocId()); + } +} + +template <typename VectorType> +void +PostingListAttributeTest::testMinMax(AttributePtr &ptr1, AttributePtr &ptr2) +{ + uint32_t numDocs = 100; + addDocs(ptr1, numDocs); + populate(as<VectorType>(ptr1)); + + TEST_DO(testMinMax<VectorType>(ptr1, 0u)); + ASSERT_TRUE(ptr1->saveAs(ptr2->getBaseFileName())); + ASSERT_TRUE(ptr2->load()); + testMinMax<VectorType>(ptr2, 0u); + + ptr2->clearDoc(20); + ptr2->clearDoc(25); + ptr2->commit(); + TEST_DO(testMinMax<VectorType>(ptr2, 1u)); + + ptr2->clearDoc(7); + ptr2->commit(); + TEST_DO(testMinMax<VectorType>(ptr2, 2u)); + +} + +void +PostingListAttributeTest::testMinMax(void) +{ + { + Config cfg(Config(BasicType::INT32, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("sint32_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sint32_2", cfg); + testMinMax<IntegerAttribute>(ptr1, ptr2); + } + { + Config cfg(Config(BasicType::INT32, CollectionType::WSET)); + cfg.setFastSearch(true); + AttributePtr ptr1 = + AttributeFactory::createAttribute("wsint32_1", cfg); + AttributePtr ptr2 = + AttributeFactory::createAttribute("wsint32_2", cfg); + testMinMax<IntegerAttribute>(ptr1, ptr2); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("sstr_2", cfg); + testMinMax<StringAttribute>(ptr1, ptr2); + } + { + Config cfg(Config(BasicType::STRING, CollectionType::WSET)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("wsstr_1", cfg); + AttributePtr ptr2 = AttributeFactory::createAttribute("wsstr_2", cfg); + testMinMax<StringAttribute>(ptr1, ptr2); + } +} + + +void +PostingListAttributeTest::testStringFold(void) +{ + Config cfg(Config(BasicType::STRING, CollectionType::SINGLE)); + cfg.setFastSearch(true); + AttributePtr ptr1 = AttributeFactory::createAttribute("sstr_1", cfg); + + addDocs(ptr1, 6); + + StringAttribute &sa(asString(ptr1)); + + sa.update(1, "a"); + sa.commit(); + sa.update(3, "FOo"); + sa.commit(); + sa.update(4, "foo"); + sa.commit(); + sa.update(5, "z"); + sa.commit(); + + EXPECT_TRUE(assertSearch("3,4", sa)); + + sa.update(2, "FOO"); + sa.commit(); + + EXPECT_TRUE(assertSearch("2,3,4", sa)); + + sa.update(4, ""); + sa.commit(); + + EXPECT_TRUE(assertSearch("2,3", sa)); + + sa.update(2, ""); + sa.commit(); + + EXPECT_TRUE(assertSearch("3", sa)); + + sa.update(3, ""); + sa.commit(); + + EXPECT_TRUE(assertSearch("", sa)); +} + + +int +PostingListAttributeTest::Main() +{ + TEST_INIT("postinglistattribute_test"); + + testPostingList(); + testArithmeticValueUpdate(); + testReload(); + testMinMax(); + testStringFold(); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(search::PostingListAttributeTest); diff --git a/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.sh b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.sh new file mode 100755 index 00000000000..e6f9c214cb9 --- /dev/null +++ b/searchlib/src/tests/attribute/postinglistattribute/postinglistattribute_test.sh @@ -0,0 +1,5 @@ +#!/bin/bash +$VALGRIND ./searchlib_postinglistattribute_test_app +rm -rf *.dat +rm -rf *.idx +rm -rf *.weight diff --git a/searchlib/src/tests/attribute/runnable.h b/searchlib/src/tests/attribute/runnable.h new file mode 100644 index 00000000000..418230a2fc5 --- /dev/null +++ b/searchlib/src/tests/attribute/runnable.h @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/util/sync.h> + +namespace search { + +class Runnable : public FastOS_Runnable +{ +protected: + uint32_t _id; + vespalib::Monitor _cond; + bool _done; + bool _stopped; + +public: + Runnable(uint32_t id) : + _id(id), _cond(), _done(false), _stopped(false) + { } + void Run(FastOS_ThreadInterface *, void *) { + doRun(); + + vespalib::MonitorGuard guard(_cond); + _stopped = true; + guard.broadcast(); + } + virtual void doRun() = 0; + void stop() { + vespalib::MonitorGuard guard(_cond); + _done = true; + } + void join() { + vespalib::MonitorGuard guard(_cond); + while (!_stopped) { + guard.wait(); + } + } +}; + +} // search + diff --git a/searchlib/src/tests/attribute/searchable/.gitignore b/searchlib/src/tests/attribute/searchable/.gitignore new file mode 100644 index 00000000000..663692907f6 --- /dev/null +++ b/searchlib/src/tests/attribute/searchable/.gitignore @@ -0,0 +1,4 @@ +/my_logctl_file +searchlib_attribute_blueprint_test_app +searchlib_attribute_searchable_adapter_test_app +searchlib_attribute_weighted_set_blueprint_test_app diff --git a/searchlib/src/tests/attribute/searchable/CMakeLists.txt b/searchlib/src/tests/attribute/searchable/CMakeLists.txt new file mode 100644 index 00000000000..ed76520af29 --- /dev/null +++ b/searchlib/src/tests/attribute/searchable/CMakeLists.txt @@ -0,0 +1,22 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_attribute_searchable_adapter_test_app + SOURCES + attribute_searchable_adapter_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_attribute_searchable_adapter_test_app COMMAND sh attribute_searchable_adapter_test.sh) +vespa_add_executable(searchlib_attribute_weighted_set_blueprint_test_app + SOURCES + attribute_weighted_set_blueprint_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_attribute_weighted_set_blueprint_test_app COMMAND searchlib_attribute_weighted_set_blueprint_test_app) +vespa_add_executable(searchlib_attribute_blueprint_test_app + SOURCES + attributeblueprint_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_attribute_blueprint_test_app COMMAND searchlib_attribute_blueprint_test_app) diff --git a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp new file mode 100644 index 00000000000..1d69f516b52 --- /dev/null +++ b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.cpp @@ -0,0 +1,689 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> + +#include <vespa/searchcommon/attribute/iattributecontext.h> +#include <vespa/searchlib/attribute/attribute_blueprint_factory.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/attributecontext.h> +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/searchlib/attribute/attributevector.h> +#include <vespa/searchlib/attribute/extendableattributes.h> +#include <vespa/searchlib/attribute/iattributemanager.h> +#include <vespa/searchlib/attribute/predicate_attribute.h> +#include <vespa/searchlib/attribute/singlenumericattribute.h> +#include <vespa/searchlib/attribute/singlenumericattribute.hpp> +#include <vespa/searchlib/attribute/singlenumericpostattribute.hpp> +#include <vespa/searchlib/fef/fef.h> +#include <vespa/searchlib/query/tree/location.h> +#include <vespa/searchlib/query/tree/point.h> +#include <vespa/searchlib/query/tree/predicate_query_term.h> +#include <vespa/searchlib/query/tree/rectangle.h> +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/query/weight.h> +#include <vespa/searchlib/queryeval/blueprint.h> +#include <vespa/searchlib/queryeval/fake_requestcontext.h> +#include <vespa/searchlib/queryeval/field_spec.h> +#include <vespa/searchlib/queryeval/searchiterator.h> +#include <vespa/searchlib/queryeval/wand/parallel_weak_and_search.h> +#include <memory> + +using search::AttributeEnumGuard; +using search::AttributeFactory; +using search::AttributeGuard; +using search::AttributeVector; +using search::IAttributeManager; +using search::IntegerAttribute; +using search::SingleStringExtAttribute; +using search::attribute::IAttributeContext; +using search::fef::MatchData; +using search::fef::MatchDataLayout; +using search::fef::TermFieldHandle; +using search::fef::TermFieldMatchData; +using search::query::Location; +using search::query::Node; +using search::query::Point; +using search::query::PredicateQueryTerm; +using search::query::Rectangle; +using search::query::SimpleDotProduct; +using search::query::SimpleLocationTerm; +using search::query::SimplePredicateQuery; +using search::query::SimplePrefixTerm; +using search::query::SimpleRangeTerm; +using search::query::SimpleSuffixTerm; +using search::query::SimpleSubstringTerm; +using search::query::SimpleStringTerm; +using search::query::SimpleWandTerm; +using search::query::SimpleWeightedSetTerm; +using search::query::Weight; +using search::queryeval::Blueprint; +using search::queryeval::FieldSpec; +using search::queryeval::FakeRequestContext; +using search::queryeval::MinMaxPostingInfo; +using search::queryeval::ParallelWeakAndSearch; +using search::queryeval::PostingInfo; +using search::queryeval::SearchIterator; +using std::vector; +using vespalib::string; +using namespace search::attribute; +using namespace search; + +namespace { + +const string field = "field"; +const string other = "other"; +const int32_t weight = 1; +const uint32_t num_docs = 1000; + +class MyAttributeManager : public IAttributeManager { + AttributeVector::SP _attribute_vector; + AttributeVector::SP _other; + +public: + explicit MyAttributeManager(AttributeVector *attr) + : _attribute_vector(attr), _other() {} + + explicit MyAttributeManager(AttributeVector::SP attr) + : _attribute_vector(attr), _other() {} + + void set_other(AttributeVector::SP attr) { + _other = attr; + } + + virtual AttributeGuard::UP getAttribute(const string &name) const { + if (name == field) { + return AttributeGuard::UP(new AttributeGuard(_attribute_vector)); + } else if (name == other) { + return AttributeGuard::UP(new AttributeGuard(_other)); + } else { + return AttributeGuard::UP(nullptr); + } + } + + virtual AttributeGuard::UP + getAttributeStableEnum(const string &name) const { + if (name == field) { + return AttributeGuard::UP(new AttributeEnumGuard(_attribute_vector)); + } else if (name == other) { + return AttributeGuard::UP(new AttributeEnumGuard(_other)); + } else { + return AttributeGuard::UP(nullptr); + } + } + + virtual void getAttributeList(vector<AttributeGuard> &) const { + assert(!"Not implemented"); + } + virtual IAttributeContext::UP createContext() const { + assert(!"Not implemented"); + return IAttributeContext::UP(); + } +}; + +struct Result { + struct Hit { + uint32_t docid; + double raw_score; + int32_t match_weight; + Hit(uint32_t id, double raw, int32_t match_weight_in) + : docid(id), raw_score(raw), match_weight(match_weight_in) {} + }; + size_t est_hits; + bool est_empty; + bool has_minmax; + int32_t min_weight; + int32_t max_weight; + size_t wand_hits; + int64_t wand_initial_threshold; + double wand_boost_factor; + std::vector<Hit> hits; + vespalib::string iterator_dump; + + Result(size_t est_hits_in, bool est_empty_in) + : est_hits(est_hits_in), est_empty(est_empty_in), + has_minmax(false), min_weight(0), max_weight(0), + wand_hits(0), wand_initial_threshold(0), wand_boost_factor(0.0), + hits(), iterator_dump() {} + + void set_minmax(int32_t min, int32_t max) { + has_minmax = true; + min_weight = min; + max_weight = max; + } +}; + +void extract_posting_info(Result &result, const PostingInfo *postingInfo) { + if (postingInfo != NULL) { + const MinMaxPostingInfo *minMax = dynamic_cast<const MinMaxPostingInfo *>(postingInfo); + if (minMax != NULL) { + result.set_minmax(minMax->getMinWeight(), minMax->getMaxWeight()); + } + } +} + +void extract_wand_params(Result &result, ParallelWeakAndSearch *wand) { + if (wand != nullptr) { + result.wand_hits = wand->getMatchParams().scores.getScoresToTrack(); + result.wand_initial_threshold = wand->getMatchParams().scoreThreshold; + result.wand_boost_factor = wand->getMatchParams().thresholdBoostFactor; + } +} + +Result do_search(IAttributeManager &attribute_manager, const Node &node, bool strict) { + uint32_t fieldId = 0; + AttributeContext ac(attribute_manager); + FakeRequestContext requestContext(&ac); + AttributeBlueprintFactory source; + MatchDataLayout mdl; + TermFieldHandle handle = mdl.allocTermField(fieldId); + MatchData::UP match_data = mdl.createMatchData(); + Blueprint::UP bp = source.createBlueprint(requestContext, FieldSpec(field, fieldId, handle), node); + ASSERT_TRUE(bp.get() != nullptr); + Result result(bp->getState().estimate().estHits, bp->getState().estimate().empty); + bp->fetchPostings(strict); + SearchIterator::UP iterator = bp->createSearch(*match_data, strict); + ASSERT_TRUE(iterator.get() != nullptr); + iterator->initFullRange(); + extract_posting_info(result, iterator->getPostingInfo()); + extract_wand_params(result, dynamic_cast<ParallelWeakAndSearch*>(iterator.get())); + result.iterator_dump = iterator->asString(); + for (uint32_t docid = 1; docid < num_docs; ++docid) { + if (iterator->seek(docid)) { + iterator->unpack(docid); + result.hits.emplace_back(docid, + match_data->resolveTermField(handle)->getRawScore(), + match_data->resolveTermField(handle)->getWeight()); + } + } + return result; +} + +bool search(const Node &node, IAttributeManager &attribute_manager, + bool fast_search = false, bool strict = true) +{ + Result result = do_search(attribute_manager, node, strict); + if (fast_search) { + EXPECT_LESS(result.est_hits, num_docs / 10); + } else { + EXPECT_TRUE(!result.est_empty); + EXPECT_EQUAL(num_docs, result.est_hits); + } + return (result.hits.size() == 1) && (result.hits[0].docid == (num_docs - 1)); +} + +bool search(const string &term, IAttributeManager &attribute_manager, + bool fast_search = false, bool strict = true) +{ + TEST_STATE(term.c_str()); + SimpleStringTerm node(term, "field", 0, Weight(0)); + return search(node, attribute_manager, fast_search, strict); +} + +template <typename T> struct AttributeVectorTypeFinder { + //typedef search::SingleValueStringAttribute Type; + typedef SingleStringExtAttribute Type; + static void add(Type & a, const T & v) { a.add(v, weight); } +}; +template <> struct AttributeVectorTypeFinder<int64_t> { + typedef search::SingleValueNumericAttribute<search::IntegerAttributeTemplate<int64_t> > Type; + static void add(Type & a, int64_t v) { a.set(a.getNumDocs()-1, v); a.commit(); } +}; + +void add_docs(AttributeVector *attr, size_t n) { + AttributeVector::DocId docid; + for (size_t i = 0; i < n; ++i) { + attr->addDoc(docid); + if (attr->inherits(PredicateAttribute::classId)) { + const_cast<uint8_t *>(static_cast<PredicateAttribute *>(attr)->getMinFeatureVector().first)[docid] = 0; + } + } + ASSERT_EQUAL(n - 1, docid); +} + +template <typename T> +MyAttributeManager makeAttributeManager(T value) { + typedef AttributeVectorTypeFinder<T> AT; + typedef typename AT::Type AttributeVectorType; + AttributeVectorType *attr = new AttributeVectorType(field); + add_docs(attr, num_docs); + AT::add(*attr, value); + MyAttributeManager attribute_manager(attr); + return attribute_manager; +} + +MyAttributeManager makeFastSearchLongAttributeManager(int64_t value) { + Config cfg(BasicType::INT64, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributeVector::SP attr_ptr = AttributeFactory::createAttribute(field, cfg); + IntegerAttribute *attr = static_cast<IntegerAttribute *>(attr_ptr.get()); + add_docs(attr, num_docs); + attr->update(num_docs - 1, value); + attr->commit(); + MyAttributeManager attribute_manager(attr_ptr); + return attribute_manager; +} + +TEST("requireThatIteratorsCanBeCreated") { + MyAttributeManager attribute_manager = makeAttributeManager("foo"); + + EXPECT_TRUE(search("foo", attribute_manager)); +} + +TEST("requireThatRangeTermsWorkToo") { + MyAttributeManager attribute_manager = makeAttributeManager(int64_t(42)); + + EXPECT_TRUE(search("[23;46]", attribute_manager)); + EXPECT_TRUE(!search("[10;23]", attribute_manager)); + EXPECT_TRUE(!search(">43", attribute_manager)); + EXPECT_TRUE(search("[10;]", attribute_manager)); +} + +TEST("requireThatPrefixTermsWork") { + MyAttributeManager attribute_manager = makeAttributeManager("foo"); + + SimplePrefixTerm node("fo", "field", 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager)); +} + +TEST("requireThatLocationTermsWork") { + // 0xcc is z-curve for (10, 10). + MyAttributeManager attribute_manager = makeAttributeManager(int64_t(0xcc)); + + SimpleLocationTerm node(Location(Point(10, 10), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager)); + node = SimpleLocationTerm(Location(Point(100, 100), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(!search(node, attribute_manager)); + node = SimpleLocationTerm(Location(Point(13, 13), 4, 0), + field, 0, Weight(0)); + EXPECT_TRUE(!search(node, attribute_manager)); + node = SimpleLocationTerm(Location(Point(10, 13), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager)); +} + +TEST("requireThatOptimizedLocationTermsWork") { + // 0xcc is z-curve for (10, 10). + MyAttributeManager attribute_manager = makeFastSearchLongAttributeManager(int64_t(0xcc)); + + SimpleLocationTerm node(Location(Point(10, 10), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager, true)); + node = SimpleLocationTerm(Location(Point(100, 100), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(!search(node, attribute_manager, true)); + node = SimpleLocationTerm(Location(Point(13, 13), 4, 0), + field, 0, Weight(0)); + EXPECT_TRUE(!search(node, attribute_manager, true)); + node = SimpleLocationTerm(Location(Point(10, 13), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager, true)); +} + +TEST("require that optimized location search works with wrapped bounding box (no hits)") { + // 0xcc is z-curve for (10, 10). + MyAttributeManager attribute_manager = makeFastSearchLongAttributeManager(int64_t(0xcc)); + SimpleLocationTerm term1(Location(Rectangle(5, 5, 15, 15)), field, 0, Weight(0)); // unwrapped + SimpleLocationTerm term2(Location(Rectangle(15, 5, 5, 15)), field, 0, Weight(0)); // wrapped x + SimpleLocationTerm term3(Location(Rectangle(5, 15, 15, 5)), field, 0, Weight(0)); // wrapped y + Result result1 = do_search(attribute_manager, term1, true); + Result result2 = do_search(attribute_manager, term2, true); + Result result3 = do_search(attribute_manager, term3, true); + EXPECT_EQUAL(1u, result1.hits.size()); + EXPECT_EQUAL(0u, result2.hits.size()); + EXPECT_EQUAL(0u, result3.hits.size()); + EXPECT_TRUE(result1.iterator_dump.find("LocationPreFilterIterator") != vespalib::string::npos); + EXPECT_TRUE(result2.iterator_dump.find("EmptySearch") != vespalib::string::npos); + EXPECT_TRUE(result3.iterator_dump.find("EmptySearch") != vespalib::string::npos); +} + +void set_weights(StringAttribute *attr, uint32_t docid, + int32_t foo_weight, int32_t bar_weight, int32_t baz_weight) +{ + attr->clearDoc(docid); + if (foo_weight > 0) attr->append(docid, "foo", foo_weight); + if (bar_weight > 0) attr->append(docid, "bar", bar_weight); + if (baz_weight > 0) attr->append(docid, "baz", baz_weight); + attr->commit(); +} + +MyAttributeManager make_weighted_string_attribute_manager(bool fast_search) { + Config cfg(BasicType::STRING, CollectionType::WSET); + cfg.setFastSearch(fast_search); + AttributeVector::SP attr_ptr = AttributeFactory::createAttribute(field, cfg); + StringAttribute *attr = static_cast<StringAttribute *>(attr_ptr.get()); + add_docs(attr, num_docs); + set_weights(attr, 10, 0, 200, 0); + set_weights(attr, 20, 100, 200, 300); + set_weights(attr, 30, 0, 0, 300); + set_weights(attr, 40, 100, 0, 0); + set_weights(attr, 50, 1000, 0, 300); + MyAttributeManager attribute_manager(attr_ptr); + return attribute_manager; +} + +TEST("require that attribute dot product works") { + for (int i = 0; i <= 0x3; ++i) { + bool fast_search = ((i & 0x1) != 0); + bool strict = ((i & 0x2) != 0); + MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search); + SimpleDotProduct node(field, 0, Weight(1)); + node.append(Node::UP(new SimpleStringTerm("foo", "", 0, Weight(1)))); + node.append(Node::UP(new SimpleStringTerm("bar", "", 0, Weight(1)))); + node.append(Node::UP(new SimpleStringTerm("baz", "", 0, Weight(1)))); + node.append(Node::UP(new SimpleStringTerm("fox", "", 0, Weight(1)))); + Result result = do_search(attribute_manager, node, strict); + ASSERT_EQUAL(5u, result.hits.size()); + if (fast_search) { + EXPECT_EQUAL(8u, result.est_hits); + } else { + // 'fox' is detected to produce no hits since it has no enum value + EXPECT_EQUAL(num_docs * 3, result.est_hits); + } + EXPECT_FALSE(result.est_empty); + EXPECT_EQUAL(10u, result.hits[0].docid); + EXPECT_EQUAL(200.0, result.hits[0].raw_score); + EXPECT_EQUAL(20u, result.hits[1].docid); + EXPECT_EQUAL(600.0, result.hits[1].raw_score); + EXPECT_EQUAL(30u, result.hits[2].docid); + EXPECT_EQUAL(300.0, result.hits[2].raw_score); + EXPECT_EQUAL(40u, result.hits[3].docid); + EXPECT_EQUAL(100.0, result.hits[3].raw_score); + EXPECT_EQUAL(50u, result.hits[4].docid); + EXPECT_EQUAL(1300.0, result.hits[4].raw_score); + } +} + +TEST("require that attribute dot product can produce no hits") { + for (int i = 0; i <= 0x3; ++i) { + bool fast_search = ((i & 0x1) != 0); + bool strict = ((i & 0x2) != 0); + MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search); + SimpleDotProduct node(field, 0, Weight(1)); + node.append(Node::UP(new SimpleStringTerm("notfoo", "", 0, Weight(1)))); + node.append(Node::UP(new SimpleStringTerm("notbar", "", 0, Weight(1)))); + node.append(Node::UP(new SimpleStringTerm("notbaz", "", 0, Weight(1)))); + node.append(Node::UP(new SimpleStringTerm("notfox", "", 0, Weight(1)))); + Result result = do_search(attribute_manager, node, strict); + ASSERT_EQUAL(0u, result.hits.size()); + EXPECT_EQUAL(0u, result.est_hits); + EXPECT_TRUE(result.est_empty); + } +} + +TEST("require that direct attribute iterators work") { + for (int i = 0; i <= 0x3; ++i) { + bool fast_search = ((i & 0x1) != 0); + bool strict = ((i & 0x2) != 0); + MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search); + SimpleStringTerm empty_node("notfoo", "", 0, Weight(1)); + Result empty_result = do_search(attribute_manager, empty_node, strict); + EXPECT_EQUAL(0u, empty_result.hits.size()); + SimpleStringTerm node("foo", "", 0, Weight(1)); + Result result = do_search(attribute_manager, node, strict); + if (fast_search) { + EXPECT_EQUAL(3u, result.est_hits); + EXPECT_TRUE(result.has_minmax); + EXPECT_EQUAL(100, result.min_weight); + EXPECT_EQUAL(1000, result.max_weight); + EXPECT_TRUE(result.iterator_dump.find("DocumentWeightSearchIterator") + != vespalib::string::npos); + } else { + EXPECT_EQUAL(num_docs, result.est_hits); + EXPECT_FALSE(result.has_minmax); + EXPECT_TRUE(result.iterator_dump.find("DocumentWeightSearchIterator") + == vespalib::string::npos); + } + ASSERT_EQUAL(3u, result.hits.size()); + EXPECT_FALSE(result.est_empty); + EXPECT_EQUAL(20u, result.hits[0].docid); + EXPECT_EQUAL(40u, result.hits[1].docid); + EXPECT_EQUAL(50u, result.hits[2].docid); + } +} + +const char *as_str(bool flag) { return flag? "true" : "false"; } + +TEST("require that attribute parallel wand works") { + for (int i = 0; i <= 0x3; ++i) { + bool fast_search = ((i & 0x1) != 0); + bool strict = ((i & 0x2) != 0); + MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search); + SimpleWandTerm node(field, 0, Weight(1), 10, 500, 1.5); + node.append(Node::UP(new SimpleStringTerm("foo", "", 0, Weight(1)))); + node.append(Node::UP(new SimpleStringTerm("bar", "", 0, Weight(1)))); + node.append(Node::UP(new SimpleStringTerm("baz", "", 0, Weight(1)))); + node.append(Node::UP(new SimpleStringTerm("fox", "", 0, Weight(1)))); + Result result = do_search(attribute_manager, node, strict); + EXPECT_FALSE(result.est_empty); + if (fast_search) { + EXPECT_EQUAL(8u, result.est_hits); + } else { + // 'fox' is detected to produce no hits since it has no enum value + EXPECT_EQUAL(num_docs * 3, result.est_hits); + } + if (EXPECT_EQUAL(2u, result.hits.size())) { + if (result.iterator_dump.find("MonitoringDumpIterator") == vespalib::string::npos) { + EXPECT_EQUAL(10u, result.wand_hits); + EXPECT_EQUAL(500, result.wand_initial_threshold); + EXPECT_EQUAL(1.5, result.wand_boost_factor); + } + EXPECT_EQUAL(20u, result.hits[0].docid); + EXPECT_EQUAL(600.0, result.hits[0].raw_score); + EXPECT_EQUAL(50u, result.hits[1].docid); + EXPECT_EQUAL(1300.0, result.hits[1].raw_score); + } else { + fprintf(stderr, " (fast_search: %s, strict: %s)\n", + as_str(fast_search), as_str(strict)); + assert(false); + } + } +} + +TEST("require that attribute weighted set term works") { + for (int i = 0; i <= 0x3; ++i) { + bool fast_search = ((i & 0x1) != 0); + bool strict = ((i & 0x2) != 0); + MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(fast_search); + SimpleWeightedSetTerm node(field, 0, Weight(1)); + node.append(Node::UP(new SimpleStringTerm("foo", "", 0, Weight(10)))); + node.append(Node::UP(new SimpleStringTerm("bar", "", 0, Weight(20)))); + node.append(Node::UP(new SimpleStringTerm("baz", "", 0, Weight(30)))); + node.append(Node::UP(new SimpleStringTerm("fox", "", 0, Weight(40)))); + Result result = do_search(attribute_manager, node, strict); + EXPECT_FALSE(result.est_empty); + ASSERT_EQUAL(5u, result.hits.size()); + if (fast_search && result.iterator_dump.find("MonitoringDumpIterator") == vespalib::string::npos) { + fprintf(stderr, "DUMP: %s\n", result.iterator_dump.c_str()); + EXPECT_TRUE(result.iterator_dump.find("AttributeIteratorPack") != vespalib::string::npos); + } + EXPECT_EQUAL(10u, result.hits[0].docid); + EXPECT_EQUAL(20, result.hits[0].match_weight); + EXPECT_EQUAL(20u, result.hits[1].docid); + EXPECT_EQUAL(30, result.hits[1].match_weight); + EXPECT_EQUAL(30u, result.hits[2].docid); + EXPECT_EQUAL(30, result.hits[2].match_weight); + EXPECT_EQUAL(40u, result.hits[3].docid); + EXPECT_EQUAL(10, result.hits[3].match_weight); + EXPECT_EQUAL(50u, result.hits[4].docid); + EXPECT_EQUAL(30, result.hits[4].match_weight); + } +} + +TEST("require that predicate query in non-predicate field yields empty.") { + MyAttributeManager attribute_manager = makeAttributeManager("foo"); + + PredicateQueryTerm::UP term(new PredicateQueryTerm); + SimplePredicateQuery node(std::move(term), field, 0, Weight(1)); + Result result = do_search(attribute_manager, node, true); + EXPECT_TRUE(result.est_empty); + EXPECT_EQUAL(0u, result.hits.size()); +} + +TEST("require that predicate query in predicate field yields results.") { + PredicateAttribute *attr = + new PredicateAttribute( + field, Config(BasicType::PREDICATE, + CollectionType::SINGLE)); + add_docs(attr, num_docs); + attr->getIndex().indexEmptyDocument(2); // matches anything + attr->getIndex().commit(); + const_cast<PredicateAttribute::IntervalRange *>(attr->getIntervalRangeVector())[2] = 1u; + MyAttributeManager attribute_manager(attr); + + PredicateQueryTerm::UP term(new PredicateQueryTerm); + SimplePredicateQuery node(std::move(term), field, 0, Weight(1)); + Result result = do_search(attribute_manager, node, true); + EXPECT_FALSE(result.est_empty); + EXPECT_EQUAL(1u, result.hits.size()); +} + +TEST("require that substring terms work") { + MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(true); + SimpleSubstringTerm node("a", "", 0, Weight(1)); + Result result = do_search(attribute_manager, node, true); + ASSERT_EQUAL(4u, result.hits.size()); + EXPECT_EQUAL(10u, result.hits[0].docid); + EXPECT_EQUAL(20u, result.hits[1].docid); + EXPECT_EQUAL(30u, result.hits[2].docid); + EXPECT_EQUAL(50u, result.hits[3].docid); +} + +TEST("require that suffix terms work") { + MyAttributeManager attribute_manager = make_weighted_string_attribute_manager(true); + SimpleSuffixTerm node("oo", "", 0, Weight(1)); + Result result = do_search(attribute_manager, node, true); + ASSERT_EQUAL(3u, result.hits.size()); + EXPECT_EQUAL(20u, result.hits[0].docid); + EXPECT_EQUAL(40u, result.hits[1].docid); + EXPECT_EQUAL(50u, result.hits[2].docid); +} + +void set_attr_value(AttributeVector &attr, uint32_t docid, size_t value) { + IntegerAttribute *int_attr = dynamic_cast<IntegerAttribute *>(&attr); + FloatingPointAttribute *float_attr = dynamic_cast<FloatingPointAttribute *>(&attr); + StringAttribute *string_attr = dynamic_cast<StringAttribute *>(&attr); + if (int_attr != nullptr) { + int_attr->update(docid, value); + int_attr->commit(); + } else if (float_attr != nullptr) { + float_attr->update(docid, value); + float_attr->commit(); + } else if (string_attr != nullptr) { + ASSERT_LESS(value, size_t(27*26 + 26)); + vespalib::string str; + str.push_back('a' + value / 27); + str.push_back('a' + value % 27); + string_attr->update(docid, str); + string_attr->commit(); + } else { + ASSERT_TRUE(false); + } +} + +MyAttributeManager make_diversity_setup(BasicType::Type field_type, + bool field_fast_search, + BasicType::Type other_type, + bool other_fast_search) +{ + Config field_cfg(field_type, CollectionType::SINGLE); + field_cfg.setFastSearch(field_fast_search); + AttributeVector::SP field_attr = AttributeFactory::createAttribute(field, field_cfg); + Config other_cfg(other_type, CollectionType::SINGLE); + other_cfg.setFastSearch(other_fast_search); + AttributeVector::SP other_attr = AttributeFactory::createAttribute(other, other_cfg); + add_docs(&*field_attr, num_docs); + add_docs(&*other_attr, num_docs); + for (size_t i = 1; i < num_docs; ++i) { + set_attr_value(*field_attr, i, i / 5); + set_attr_value(*other_attr, i, i / 10); + } + MyAttributeManager attribute_manager(field_attr); + attribute_manager.set_other(other_attr); + return attribute_manager; +} + +size_t diversity_hits(IAttributeManager &manager, const vespalib::string &term, bool strict) { + SimpleRangeTerm node(term, "", 0, Weight(1)); + Result result = do_search(manager, node, strict); + return result.hits.size(); +} + +std::pair<size_t,size_t> diversity_docid_range(IAttributeManager &manager, const vespalib::string &term, bool strict) { + SimpleRangeTerm node(term, "", 0, Weight(1)); + Result result = do_search(manager, node, strict); + std::pair<size_t, size_t> range(0, 0); + for (const Result::Hit &hit: result.hits) { + if (range.first == 0) { + range.first = hit.docid; + range.second = hit.docid; + } else { + EXPECT_GREATER(size_t(hit.docid), range.second); + range.second = hit.docid; + } + } + return range; +} + +TEST("require that diversity range searches work for various types") { + for (auto field_type: std::vector<BasicType::Type>({BasicType::INT32, BasicType::DOUBLE})) { + for (auto other_type: std::vector<BasicType::Type>({BasicType::INT16, BasicType::INT32, BasicType::INT64, + BasicType::FLOAT, BasicType::DOUBLE, BasicType::STRING})) + { + for (bool other_fast_search: std::vector<bool>({true, false})) { + MyAttributeManager manager = make_diversity_setup(field_type, true, other_type, other_fast_search); + for (bool strict: std::vector<bool>({true, false})) { + TEST_STATE(vespalib::make_string("field_type: %s, other_type: %s, other_fast_search: %s, strict: %s", + BasicType(field_type).asString(), BasicType(other_type).asString(), + other_fast_search ? "true" : "false", strict ? "true" : "false").c_str()); + EXPECT_EQUAL(999u, diversity_hits(manager, "[;;1000;other;10]", strict)); + EXPECT_EQUAL(999u, diversity_hits(manager, "[;;-1000;other;10]", strict)); + EXPECT_EQUAL(100u, diversity_hits(manager, "[;;1000;other;1]", strict)); + EXPECT_EQUAL(100u, diversity_hits(manager, "[;;-1000;other;1]", strict)); + EXPECT_EQUAL(300u, diversity_hits(manager, "[;;1000;other;3]", strict)); + EXPECT_EQUAL(300u, diversity_hits(manager, "[;;-1000;other;3]", strict)); + EXPECT_EQUAL(10u, diversity_hits(manager, "[;;10;other;3]", strict)); + EXPECT_EQUAL(10u, diversity_hits(manager, "[;;-10;other;3]", strict)); + EXPECT_EQUAL(1u, diversity_docid_range(manager, "[;;10;other;3]", strict).first); + EXPECT_EQUAL(30u, diversity_docid_range(manager, "[;;10;other;3]", strict).second); + EXPECT_EQUAL(965u, diversity_docid_range(manager, "[;;-10;other;3]", strict).first); + EXPECT_EQUAL(997u, diversity_docid_range(manager, "[;;-10;other;3]", strict).second); + } + } + } + } +} + +TEST("require that diversity also works for a single unique value") { + MyAttributeManager manager = make_diversity_setup(BasicType::INT32, true, BasicType::INT32, true); + EXPECT_EQUAL(2u, diversity_hits(manager, "[2;2;100;other;2]", true)); + EXPECT_EQUAL(2u, diversity_hits(manager, "[2;2;-100;other;2]", true)); + EXPECT_EQUAL(2u, diversity_hits(manager, "[2;2;100;other;2]", false)); + EXPECT_EQUAL(2u, diversity_hits(manager, "[2;2;-100;other;2]", false)); +} + +TEST("require that diversity range searches gives empty results for non-existing diversity attributes") { + MyAttributeManager manager = make_diversity_setup(BasicType::INT32, true, BasicType::INT32, true); + EXPECT_EQUAL(0u, diversity_hits(manager, "[;;1000;bogus;10]", true)); + EXPECT_EQUAL(0u, diversity_hits(manager, "[;;-1000;bogus;10]", true)); + EXPECT_EQUAL(0u, diversity_hits(manager, "[;;1000;;10]", true)); + EXPECT_EQUAL(0u, diversity_hits(manager, "[;;-1000;;10]", true)); +} + +TEST("require that loose diversity gives enough diversity and hits while doing less work") { + MyAttributeManager manager = make_diversity_setup(BasicType::INT32, true, BasicType::INT32, true); + EXPECT_EQUAL(999u, diversity_hits(manager, "[;;1000;other;10;4;loose]", true)); + EXPECT_EQUAL(1u, diversity_docid_range(manager, "[;;10;other;3;2;loose]", true).first); + EXPECT_EQUAL(16u, diversity_docid_range(manager, "[;;10;other;3;2;loose]", true).second); +} + +TEST("require that strict diversity gives enough diversity and hits while doing less work, even though more than loose, but more correct than loose") { + MyAttributeManager manager = make_diversity_setup(BasicType::INT32, true, BasicType::INT32, true); + EXPECT_EQUAL(999u, diversity_hits(manager, "[;;-1000;other;10;4;strict]", true)); + EXPECT_EQUAL(1u, diversity_docid_range(manager, "[;;10;other;3;2;strict]", true).first); + EXPECT_EQUAL(23u, diversity_docid_range(manager, "[;;10;other;3;2;strict]", true).second); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.sh b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.sh new file mode 100755 index 00000000000..9fcee4b1ebb --- /dev/null +++ b/searchlib/src/tests/attribute/searchable/attribute_searchable_adapter_test.sh @@ -0,0 +1,4 @@ +#!/bin/bash +$VALGRIND ./searchlib_attribute_searchable_adapter_test_sh +rm -f ./my_logctl_file +VESPA_LOG_CONTROL_FILE=./my_logctl_file VESPA_LOG_LEVEL=all $VALGRIND ./searchlib_attribute_searchable_adapter_test_app diff --git a/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp b/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp new file mode 100644 index 00000000000..bd781a37a5b --- /dev/null +++ b/searchlib/src/tests/attribute/searchable/attribute_weighted_set_blueprint_test.cpp @@ -0,0 +1,231 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/testkit/testapp.h> + +#include <vespa/searchlib/attribute/attribute_blueprint_factory.h> +#include <vespa/searchlib/attribute/attribute_weighted_set_blueprint.h> +#include <vespa/searchlib/attribute/iattributemanager.h> +#include <vespa/searchlib/attribute/attributecontext.h> +#include <vespa/searchlib/attribute/attributevector.h> +#include <vespa/searchlib/attribute/extendableattributes.h> +#include <vespa/searchlib/attribute/singlestringattribute.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/fef/fef.h> +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/queryeval/field_spec.h> +#include <vespa/searchlib/queryeval/searchiterator.h> +#include <vespa/searchlib/queryeval/blueprint.h> +#include <vespa/searchlib/queryeval/fake_result.h> +#include <vespa/searchlib/queryeval/weighted_set_term_search.h> +#include <vespa/searchlib/queryeval/fake_requestcontext.h> +#include <memory> +#include <string> +#include <map> + +#include <vespa/searchlib/attribute/enumstore.hpp> +#include <vespa/searchlib/attribute/singlestringattribute.h> + +using namespace search; +using namespace search::query; +using namespace search::fef; +using namespace search::queryeval; +using namespace search::attribute; + +namespace { + +class FakeAttributeManager : public IAttributeManager +{ +private: + typedef std::map<std::string, AttributeVector::SP> Map; + Map _map; + + AttributeVector::SP lookup(const std::string &name) const { + Map::const_iterator pos = _map.find(name); + if (pos == _map.end()) { + return AttributeVector::SP(); + } + return pos->second; + } + +public: + FakeAttributeManager() : _map() {} + + void addAttribute(AttributeVector::SP attr) { + _map[attr->getName()] = attr; + } + + virtual AttributeGuard::UP getAttribute(const vespalib::string &name) const { + return AttributeGuard::UP(new AttributeGuard(lookup(name))); + } + + virtual AttributeGuard::UP getAttributeStableEnum(const vespalib::string &name) const { + return AttributeGuard::UP(new AttributeEnumGuard(lookup(name))); + } + + virtual void getAttributeList(std::vector<AttributeGuard> &list) const { + Map::const_iterator pos = _map.begin(); + for (; pos != _map.end(); ++pos) { + list.push_back(pos->second); + } + } + + virtual IAttributeContext::UP createContext() const { + return IAttributeContext::UP(new AttributeContext(*this)); + } +}; + +void +setupAttributeManager(FakeAttributeManager &manager) +{ + AttributeVector::DocId docId; + { + AttributeVector::SP attr_sp = AttributeFactory::createAttribute( + "integer", Config(BasicType("int64"))); + IntegerAttribute *attr = (IntegerAttribute*)(attr_sp.get()); + attr->addDoc(docId); + assert(0u == docId); + for (size_t i = 1; i < 10; ++i) { + attr->addDoc(docId); + assert(i == docId); + attr->update(docId, i); + attr->commit(); + } + manager.addAttribute(attr_sp); + } + { + AttributeVector::SP attr_sp = AttributeFactory::createAttribute( + "string", Config(BasicType("string"))); + StringAttribute *attr = (StringAttribute*)(attr_sp.get()); + attr->addDoc(docId); + assert(0u == docId); + for (size_t i = 1; i < 10; ++i) { + attr->addDoc(docId); + assert(i == docId); + attr->update(i, std::string(1, '1' + i - 1).c_str()); + attr->commit(); + } + manager.addAttribute(attr_sp); + } + { + AttributeVector::SP attr_sp = AttributeFactory::createAttribute( + "multi", Config(BasicType("int64"), search::attribute::CollectionType("array"))); + IntegerAttribute *attr = (IntegerAttribute*)(attr_sp.get()); + attr->addDoc(docId); + assert(0u == docId); + for (size_t i = 1; i < 10; ++i) { + attr->addDoc(docId); + assert(i == docId); + attr->append(docId, i, 0); + attr->append(docId, i + 10, 1); + attr->commit(); + } + manager.addAttribute(attr_sp); + } +} + +struct WS { + static const uint32_t fieldId = 42; + IAttributeManager & attribute_manager; + MatchDataLayout layout; + TermFieldHandle handle; + std::vector<std::pair<std::string, uint32_t> > tokens; + + WS(IAttributeManager & manager) : attribute_manager(manager), layout(), handle(layout.allocTermField(fieldId)), tokens() { + MatchData::UP tmp = layout.createMatchData(); + ASSERT_TRUE(tmp->resolveTermField(handle)->getFieldId() == fieldId); + } + + WS &add(const std::string &token, uint32_t weight) { + tokens.push_back(std::make_pair(token, weight)); + return *this; + } + + Node::UP createNode() const { + SimpleWeightedSetTerm *node = new SimpleWeightedSetTerm("view", 0, Weight(0)); + for (size_t i = 0; i < tokens.size(); ++i) { + node->append(Node::UP(new SimpleStringTerm(tokens[i].first, "view", 0, Weight(tokens[i].second)))); + } + return Node::UP(node); + } + + bool isGenericSearch(Searchable &searchable, const std::string &field, bool strict) const { + AttributeContext ac(attribute_manager); + FakeRequestContext requestContext(&ac); + MatchData::UP md = layout.createMatchData(); + Node::UP node = createNode(); + FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle)); + queryeval::Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, *node); + bp->fetchPostings(strict); + SearchIterator::UP sb = bp->createSearch(*md, strict); + return (dynamic_cast<WeightedSetTermSearch*>(sb.get()) != 0); + } + + FakeResult search(Searchable &searchable, const std::string &field, bool strict) const { + AttributeContext ac(attribute_manager); + FakeRequestContext requestContext(&ac); + MatchData::UP md = layout.createMatchData(); + Node::UP node = createNode(); + FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle)); + queryeval::Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, *node); + bp->fetchPostings(strict); + SearchIterator::UP sb = bp->createSearch(*md, strict); + FakeResult result; + sb->initFullRange(); + for (uint32_t docId = 1; docId < 10; ++docId) { + if (sb->seek(docId)) { + sb->unpack(docId); + result.doc(docId); + TermFieldMatchData &data = *md->resolveTermField(handle); + FieldPositionsIterator itr = data.getIterator(); + for (; itr.valid(); itr.next()) { + result.elem(itr.getElementId()); + result.weight(itr.getElementWeight()); + result.pos(itr.getPosition()); + } + } + } + return result; + } +}; + +} // namespace <unnamed> + +class Test : public vespalib::TestApp +{ +public: + int Main(); +}; + +int +Test::Main() +{ + TEST_INIT("attribute_weighted_set_test"); + { + FakeAttributeManager manager; + setupAttributeManager(manager); + AttributeBlueprintFactory adapter; + + FakeResult expect = FakeResult() + .doc(3).elem(0).weight(30).pos(0) + .doc(5).elem(0).weight(50).pos(0) + .doc(7).elem(0).weight(70).pos(0); + WS ws = WS(manager).add("7", 70).add("5", 50).add("3", 30); + + EXPECT_TRUE(ws.isGenericSearch(adapter, "integer", true)); + EXPECT_TRUE(!ws.isGenericSearch(adapter, "integer", false)); + EXPECT_TRUE(ws.isGenericSearch(adapter, "string", true)); + EXPECT_TRUE(!ws.isGenericSearch(adapter, "string", false)); + EXPECT_TRUE(ws.isGenericSearch(adapter, "multi", true)); + EXPECT_TRUE(ws.isGenericSearch(adapter, "multi", false)); + + EXPECT_EQUAL(expect, ws.search(adapter, "integer", true)); + EXPECT_EQUAL(expect, ws.search(adapter, "integer", false)); + EXPECT_EQUAL(expect, ws.search(adapter, "string", true)); + EXPECT_EQUAL(expect, ws.search(adapter, "string", false)); + EXPECT_EQUAL(expect, ws.search(adapter, "multi", true)); + EXPECT_EQUAL(expect, ws.search(adapter, "multi", false)); + } + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/attribute/searchable/attributeblueprint_test.cpp b/searchlib/src/tests/attribute/searchable/attributeblueprint_test.cpp new file mode 100644 index 00000000000..ed851d872e1 --- /dev/null +++ b/searchlib/src/tests/attribute/searchable/attributeblueprint_test.cpp @@ -0,0 +1,240 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("attributeblueprint_test"); + +#include <vespa/searchcommon/attribute/iattributecontext.h> +#include <vespa/searchlib/attribute/attribute_blueprint_factory.h> +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/searchlib/attribute/attributecontext.h> +#include <vespa/searchlib/attribute/attributevector.h> +#include <vespa/searchlib/attribute/extendableattributes.h> +#include <vespa/searchlib/attribute/singlenumericattribute.h> +#include <vespa/searchlib/attribute/singlenumericattribute.hpp> +#include <vespa/searchlib/attribute/singlenumericpostattribute.hpp> +#include <vespa/searchlib/attribute/iattributemanager.h> +#include <vespa/searchlib/fef/matchdata.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchlib/query/tree/location.h> +#include <vespa/searchlib/query/tree/point.h> +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/query/weight.h> +#include <vespa/searchlib/queryeval/searchiterator.h> +#include <vespa/searchlib/queryeval/fake_requestcontext.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <memory> +#include <string> + +using search::AttributeEnumGuard; +using search::AttributeGuard; +using search::AttributeVector; +using search::IAttributeManager; +using search::SingleStringExtAttribute; +using search::attribute::IAttributeContext; +using search::fef::MatchData; +using search::fef::TermFieldMatchData; +using search::query::Location; +using search::query::Node; +using search::query::Point; +using search::query::SimpleLocationTerm; +using search::query::SimplePrefixTerm; +using search::query::SimpleStringTerm; +using search::query::Weight; +using search::queryeval::Blueprint; +using search::queryeval::FieldSpec; +using search::queryeval::SearchIterator; +using search::queryeval::FakeRequestContext; +using std::string; +using std::vector; +using namespace search::attribute; +using namespace search; + +namespace { + +class Test : public vespalib::TestApp { + void requireThatIteratorsCanBeCreated(); + void requireThatRangeTermsWorkToo(); + void requireThatPrefixTermsWork(); + void requireThatLocationTermsWork(); + void requireThatFastSearchLocationTermsWork(); + + bool search(const string &term, IAttributeManager &attribute_manager); + bool search(const Node &term, IAttributeManager &attribute_manager); + +public: + int Main(); +}; + +int +Test::Main() +{ + TEST_INIT("attributeblueprint_test"); + + TEST_DO(requireThatIteratorsCanBeCreated()); + TEST_DO(requireThatRangeTermsWorkToo()); + TEST_DO(requireThatPrefixTermsWork()); + TEST_DO(requireThatLocationTermsWork()); + TEST_DO(requireThatFastSearchLocationTermsWork()); + + TEST_DONE(); +} + +const string field = "field"; +const int32_t weight = 1; + +class MyAttributeManager : public IAttributeManager { + AttributeVector::SP _attribute_vector; + AttributeVector::DocId _docid; + +public: + MyAttributeManager(AttributeVector *attr) + : _attribute_vector(attr) {} + + virtual AttributeGuard::UP getAttribute(const string &) const { + return AttributeGuard::UP(new AttributeGuard(_attribute_vector)); + } + + virtual AttributeGuard::UP + getAttributeStableEnum(const string &) const { + return AttributeGuard::UP(new AttributeEnumGuard(_attribute_vector)); + } + + virtual void getAttributeList(vector<AttributeGuard> &) const { + assert(!"Not implemented"); + } + virtual IAttributeContext::UP createContext() const { + assert(!"Not implemented"); + return IAttributeContext::UP(); + } +}; + +bool Test::search(const string &term, IAttributeManager &attribute_manager) { + TEST_STATE(term.c_str()); + SimpleStringTerm node(term, "field", 0, Weight(0)); + bool ret = search(node, attribute_manager); + return ret; +} + +bool Test::search(const Node &node, IAttributeManager &attribute_manager) { + AttributeContext ac(attribute_manager); + FakeRequestContext requestContext(&ac); + MatchData::UP md(MatchData::makeTestInstance(0, 1, 1)); + AttributeBlueprintFactory source; + Blueprint::UP result = source.createBlueprint(requestContext, FieldSpec(field, 0, 0), node); + ASSERT_TRUE(result.get()); + EXPECT_TRUE(!result->getState().estimate().empty); + EXPECT_EQUAL(3u, result->getState().estimate().estHits); + result->fetchPostings(true); + SearchIterator::UP iterator = result->createSearch(*md, true); + ASSERT_TRUE((bool)iterator); + iterator->initFullRange(); + EXPECT_TRUE(!iterator->seek(1)); + return iterator->seek(2); +} + +template <typename T> struct AttributeVectorTypeFinder { + typedef SingleStringExtAttribute Type; + static void add(Type & a, const T & v) { a.add(v, weight); } +}; +template <> struct AttributeVectorTypeFinder<int64_t> { + typedef search::SingleValueNumericAttribute<search::IntegerAttributeTemplate<int64_t> > Type; + static void add(Type & a, int64_t v) { a.set(a.getNumDocs()-1, v); a.commit(); } +}; + +struct FastSearchLongAttribute { + typedef search::SingleValueNumericPostingAttribute< search::EnumAttribute<search::IntegerAttributeTemplate<int64_t> > > Type; + static void add(Type & a, int64_t v) { a.update(a.getNumDocs()-1, v); a.commit(); } +}; + +template <typename AT, typename T> +MyAttributeManager fill(typename AT::Type * attr, T value) { + AttributeVector::DocId docid; + attr->addDoc(docid); + attr->addDoc(docid); + attr->addDoc(docid); + assert(2u == docid); + AT::add(*attr, value); + MyAttributeManager attribute_manager(attr); + return attribute_manager; +} + +template <typename T> +MyAttributeManager makeAttributeManager(T value) { + typedef AttributeVectorTypeFinder<T> AT; + typedef typename AT::Type AttributeVectorType; + AttributeVectorType *attr = new AttributeVectorType(field); + return fill<AT, T>(attr, value); +} + +MyAttributeManager makeFastSearchLongAttribute(int64_t value) { + typedef FastSearchLongAttribute::Type AttributeVectorType; + Config cfg(BasicType::fromType(int64_t()), CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributeVectorType *attr = new AttributeVectorType(field, cfg); + return fill<FastSearchLongAttribute, int64_t>(attr, value); +} + +void Test::requireThatIteratorsCanBeCreated() { + MyAttributeManager attribute_manager = makeAttributeManager("foo"); + + EXPECT_TRUE(search("foo", attribute_manager)); +} + +void Test::requireThatRangeTermsWorkToo() { + MyAttributeManager attribute_manager = makeAttributeManager(int64_t(42)); + + EXPECT_TRUE(search("[23;46]", attribute_manager)); + EXPECT_TRUE(!search("[10;23]", attribute_manager)); + EXPECT_TRUE(!search(">43", attribute_manager)); + EXPECT_TRUE(search("[10;]", attribute_manager)); +} + +void Test::requireThatPrefixTermsWork() +{ + MyAttributeManager attribute_manager = makeAttributeManager("foo"); + + SimplePrefixTerm node("fo", "field", 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager)); +} + +void Test::requireThatLocationTermsWork() { + // 0xcc is z-curve for (10, 10). + MyAttributeManager attribute_manager = makeAttributeManager(int64_t(0xcc)); + + SimpleLocationTerm node(Location(Point(10, 10), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager)); + node = SimpleLocationTerm(Location(Point(100, 100), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(!search(node, attribute_manager)); + node = SimpleLocationTerm(Location(Point(13, 13), 4, 0), + field, 0, Weight(0)); + EXPECT_TRUE(!search(node, attribute_manager)); + node = SimpleLocationTerm(Location(Point(10, 13), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager)); +} + +void Test::requireThatFastSearchLocationTermsWork() { + // 0xcc is z-curve for (10, 10). + MyAttributeManager attribute_manager = makeFastSearchLongAttribute(int64_t(0xcc)); + + SimpleLocationTerm node(Location(Point(10, 10), 3, 0), + field, 0, Weight(0)); +#if 0 + EXPECT_TRUE(search(node, attribute_manager)); + node = SimpleLocationTerm(Location(Point(100, 100), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(!search(node, attribute_manager)); + node = SimpleLocationTerm(Location(Point(13, 13), 4, 0), + field, 0, Weight(0)); + EXPECT_TRUE(!search(node, attribute_manager)); + node = SimpleLocationTerm(Location(Point(10, 13), 3, 0), + field, 0, Weight(0)); + EXPECT_TRUE(search(node, attribute_manager)); +#endif +} + +} // namespace + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/attribute/searchcontext/.gitignore b/searchlib/src/tests/attribute/searchcontext/.gitignore new file mode 100644 index 00000000000..61dc5e8fc8e --- /dev/null +++ b/searchlib/src/tests/attribute/searchcontext/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +searchcontext_test +searchlib_searchcontext_test_app diff --git a/searchlib/src/tests/attribute/searchcontext/CMakeLists.txt b/searchlib/src/tests/attribute/searchcontext/CMakeLists.txt new file mode 100644 index 00000000000..24652373a00 --- /dev/null +++ b/searchlib/src/tests/attribute/searchcontext/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_searchcontext_test_app + SOURCES + searchcontext.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_searchcontext_test_app COMMAND sh searchcontext_test.sh) diff --git a/searchlib/src/tests/attribute/searchcontext/DESC b/searchlib/src/tests/attribute/searchcontext/DESC new file mode 100644 index 00000000000..8ce9805dbb0 --- /dev/null +++ b/searchlib/src/tests/attribute/searchcontext/DESC @@ -0,0 +1 @@ +Unit test for AttributeVector::SearchContext using all attribute vector implementations. diff --git a/searchlib/src/tests/attribute/searchcontext/FILES b/searchlib/src/tests/attribute/searchcontext/FILES new file mode 100644 index 00000000000..cebd66e863f --- /dev/null +++ b/searchlib/src/tests/attribute/searchcontext/FILES @@ -0,0 +1 @@ +searchcontext.cpp diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext.cpp b/searchlib/src/tests/attribute/searchcontext/searchcontext.cpp new file mode 100644 index 00000000000..6c69e79a93b --- /dev/null +++ b/searchlib/src/tests/attribute/searchcontext/searchcontext.cpp @@ -0,0 +1,1900 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +#include <vespa/searchlib/attribute/attribute.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/attributeiterators.h> +#include <vespa/searchlib/attribute/flagattribute.h> +#include <vespa/searchlib/attribute/singlenumericattribute.h> +#include <vespa/searchlib/attribute/multinumericattribute.h> +#include <vespa/searchlib/attribute/singlestringattribute.h> +#include <vespa/searchlib/attribute/multistringattribute.h> +#include <vespa/searchlib/common/bitvectoriterator.h> +#include <vespa/searchlib/fef/matchdata.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchlib/fef/termfieldmatchdataarray.h> +#include <vespa/searchlib/fef/termfieldmatchdataposition.h> +#include <vespa/searchlib/queryeval/searchiterator.h> +#include <vespa/searchlib/queryeval/emptysearch.h> +#include <vespa/searchlib/queryeval/hitcollector.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/util/compress.h> +#include <vespa/vespalib/stllike/asciistream.h> +#include <vespa/searchlib/test/initrange.h> +#include <iterator> +#include <set> + +#include <vespa/searchlib/attribute/attributevector.hpp> + +LOG_SETUP("searchcontext_test"); + +namespace search { + +namespace +{ + +bool +isUnsignedSmallIntAttribute(const AttributeVector &a) +{ + switch (a.getBasicType()) + { + case attribute::BasicType::UINT1: + case attribute::BasicType::UINT2: + case attribute::BasicType::UINT4: + return true; + default: + return false; + } +} + +} + +typedef AttributeVector::SP AttributePtr; +typedef std::unique_ptr<AttributeVector::SearchContext> SearchContextPtr; +typedef AttributeVector::SearchContext SearchContext; +using attribute::Config; +using attribute::BasicType; +using attribute::CollectionType; +typedef AttributeVector::largeint_t largeint_t; +typedef queryeval::SearchIterator::UP SearchBasePtr; +typedef std::unique_ptr<ResultSet> ResultSetPtr; + +using queryeval::HitCollector; +using queryeval::SearchIterator; +using fef::MatchData; +using fef::TermFieldMatchData; +using fef::TermFieldMatchDataArray; +using fef::TermFieldMatchDataPosition; + +class DocSet : public std::set<uint32_t> +{ +public: + DocSet() : std::set<uint32_t>() {} + DocSet(const uint32_t *b, const uint32_t *e) : std::set<uint32_t>(b, e) {} + DocSet & put(const uint32_t &v) { + insert(v); + return *this; + } +}; + +template <typename V, typename T> +class PostingList +{ +private: + V * _vec; + T _value; + DocSet _hits; + +public: + PostingList(V & vec, T value) : _vec(&vec), _value(value), _hits() {} + const V & getAttribute() const { return *_vec; } + V & getAttribute() { return *_vec; } + const T & getValue() const { return _value; } + DocSet & getHits() { return _hits; } + const DocSet & getHits() const { return _hits; } + uint32_t getHitCount() const { return _hits.size(); } +}; + +class DocRange +{ +public: + uint32_t start; + uint32_t end; + DocRange(uint32_t start_, uint32_t end_) : start(start_), end(end_) {} +}; + +class SearchContextTest : public vespalib::TestApp +{ +private: + typedef std::map<vespalib::string, Config> ConfigMap; + // Map of all config objects + ConfigMap _integerCfg; + ConfigMap _floatCfg; + ConfigMap _stringCfg; + + + // helper functions + void + addReservedDoc(AttributeVector &ptr); + + void addDocs(AttributeVector & ptr, uint32_t numDocs); + template <typename T> + void fillVector(std::vector<T> & values, size_t numValues); + template <typename V, typename T> + void fillAttribute(V & vec, const std::vector<T> & values); + template <typename V, typename T> + void resetAttribute(V & vec, const T & value); + template <typename V, typename T> + void fillPostingList(PostingList<V, T> & pl, const DocRange & range); + template <typename V, typename T> + void fillPostingList(PostingList<V, T> & pl); + void buildTermQuery(std::vector<char> & buffer, const vespalib::string & index, const vespalib::string & term, + QueryTermSimple::SearchTerm termType=QueryTermSimple::WORD); + template <typename V, typename T> + SearchContextPtr getSearch(const V & vec, const T & term, QueryTermSimple::SearchTerm termType=QueryTermSimple::WORD); + ResultSetPtr performSearch(SearchIterator & sb, uint32_t numDocs); + template <typename V, typename T> + ResultSetPtr performSearch(const V & vec, const T & term, QueryTermSimple::SearchTerm termType=QueryTermSimple::WORD); + template <typename V> + void performSearch(const V & vec, const vespalib::string & term, + const DocSet & expected, QueryTermSimple::SearchTerm termType); + void checkResultSet(const ResultSet & rs, const DocSet & exp, bool bitVector); + + template<typename T, typename A> + void testInitRange(T key, const vespalib::string & keyAsString, const ConfigMap & cfgs); + void testInitRange(); + // test search functionality + template <typename V, typename T> + void testFind(const PostingList<V, T> & first); + + template <typename V, typename T> + void testSearch(V & attribute, uint32_t numDocs, const std::vector<T> & values); + template<typename T, typename A> + void testSearch(const ConfigMap & cfgs); + template <typename V, typename T> + void testMultiValueSearchHelper(V & vec, const std::vector<T> & values); + template <typename V, typename T> + void testMultiValueSearch(V & first, V & second, const std::vector<T> & values); + void testSearch(); + + class IteratorTester { + public: + virtual bool matches(const SearchIterator & base) const = 0; + virtual ~IteratorTester() { } + }; + class AttributeIteratorTester : public IteratorTester + { + public: + virtual bool matches(const SearchIterator & base) const { + return dynamic_cast<const AttributeIterator *>(&base) != NULL; + } + }; + class FlagAttributeIteratorTester : public IteratorTester + { + public: + virtual bool matches(const SearchIterator & base) const { + return (dynamic_cast<const FlagAttributeIterator *>(&base) != NULL) || + (dynamic_cast<const BitVectorIterator *>(&base) != NULL) || + (dynamic_cast<const queryeval::EmptySearch *>(&base) != NULL); + } + }; + class AttributePostingListIteratorTester : public IteratorTester + { + public: + virtual bool matches(const SearchIterator & base) const { + return dynamic_cast<const AttributePostingListIterator *>(&base) != NULL || + dynamic_cast<const queryeval::EmptySearch *>(&base) != NULL; + + } + }; + + + // test search iterator functionality + void testStrictSearchIterator(SearchContext & threeHits, + SearchContext & noHits, + const IteratorTester & typeTester); + void testNonStrictSearchIterator(SearchContext & threeHits, + SearchContext & noHits, + const IteratorTester & typeTester); + void fillForSearchIteratorTest(IntegerAttribute * ia); + void fillForSemiNibbleSearchIteratorTest(IntegerAttribute * ia); + void testSearchIterator(); + + + // test search iterator unpacking + void fillForSearchIteratorUnpackingTest(IntegerAttribute * ia, bool extra); + void testSearchIteratorUnpacking(const AttributePtr & ptr, + SearchContext & sc, + bool extra, + bool strict); + void testSearchIteratorUnpacking(); + + + // test range search + template <typename VectorType> + void performRangeSearch(const VectorType & vec, const vespalib::string & term, + const DocSet & expected); + template <typename VectorType, typename ValueType> + void testRangeSearch(const AttributePtr & ptr, uint32_t numDocs, std::vector<ValueType> values); + void testRangeSearch(); + void testRangeSearchLimited(); + + + // test case insensitive search + void performCaseInsensitiveSearch(const StringAttribute & vec, const vespalib::string & term, + const DocSet & expected); + void testCaseInsensitiveSearch(const AttributePtr & ptr); + void testCaseInsensitiveSearch(); + void testRegexSearch(const AttributePtr & ptr); + void testRegexSearch(); + + + // test prefix search + void performPrefixSearch(const StringAttribute & vec, const vespalib::string & term, + const DocSet & expected, QueryTermSimple::SearchTerm termType); + void testPrefixSearch(const AttributePtr & ptr); + void testPrefixSearch(); + + // test that search is working after clear doc + template <typename VectorType, typename ValueType> + void requireThatSearchIsWorkingAfterClearDoc(const vespalib::string & name, const Config & cfg, + ValueType startValue, const vespalib::string & term); + void requireThatSearchIsWorkingAfterClearDoc(); + + // test that search is working after load and clear doc + template <typename VectorType, typename ValueType> + void requireThatSearchIsWorkingAfterLoadAndClearDoc(const vespalib::string & name, const Config & cfg, + ValueType startValue, ValueType defaultValue, + const vespalib::string & term); + void requireThatSearchIsWorkingAfterLoadAndClearDoc(); + + template <typename VectorType, typename ValueType> + void requireThatSearchIsWorkingAfterUpdates(const vespalib::string & name, + const Config & cfg, + ValueType value1, + ValueType value2); + void requireThatSearchIsWorkingAfterUpdates(); + + void requireThatFlagAttributeIsWorkingWhenNewDocsAreAdded(); + + template <typename VectorType, typename ValueType> + void requireThatInvalidSearchTermGivesZeroHits(const vespalib::string & name, + const Config & cfg, + ValueType value); + void requireThatInvalidSearchTermGivesZeroHits(); + + void requireThatFlagAttributeHandlesTheByteRange(); + + void requireThatOutOfBoundsSearchTermGivesZeroHits(const vespalib::string &name, + const Config &cfg, + int64_t maxValue); + void requireThatOutOfBoundsSearchTermGivesZeroHits(); + + // init maps with config objects + void initIntegerConfig(); + void initFloatConfig(); + void initStringConfig(); + +public: + SearchContextTest(); + int Main(); +}; + + +void +SearchContextTest::addReservedDoc(AttributeVector &ptr) +{ + ptr.addReservedDoc(); +} + + +void +SearchContextTest::addDocs(AttributeVector & ptr, uint32_t numDocs) +{ + uint32_t docId; + addReservedDoc(ptr); + for (uint32_t i = 1; i <= numDocs; ++i) { + ptr.addDoc(docId); + EXPECT_EQUAL(docId, i); + } + ASSERT_TRUE(ptr.getNumDocs() == numDocs + 1); +} + +template <typename T> +void +SearchContextTest::fillVector(std::vector<T> & values, size_t numValues) +{ + values.clear(); + values.reserve(numValues); + for (size_t i = 1; i <= numValues; ++i) { + values.push_back(static_cast<T>(i)); + } +} + +template <> +void +SearchContextTest::fillVector(std::vector<vespalib::string> & values, size_t numValues) +{ + values.clear(); + values.reserve(numValues); + for (size_t i = 0; i < numValues; ++i) { + vespalib::asciistream ss; + ss << "string" << (i < 10 ? "0" : "") << i; + values.push_back(ss.str()); + } +} + +template <typename V, typename T> +void +SearchContextTest::fillAttribute(V & vec, const std::vector<T> & values) +{ + for (uint32_t doc = 1; doc < vec.getNumDocs(); ++doc) { + ASSERT_TRUE(doc < vec.getNumDocs()); + vec.clearDoc(doc); + uint32_t valueCount = doc % (values.size() + 1); + for (uint32_t i = 0; i < valueCount; ++i) { + // std::cout << "append(" << doc << ", " << values[i] << ")" << std::endl; + EXPECT_TRUE(vec.append(doc, values[i], 1)); + } + } + vec.commit(true); +} + +template <typename V, typename T> +void +SearchContextTest::resetAttribute(V & vec, const T & value) +{ + for (uint32_t doc = 1; doc < vec.getNumDocs(); ++doc) { + ASSERT_TRUE(doc < vec.getNumDocs()); + EXPECT_TRUE(vec.update(doc, value)); + } + vec.commit(true); +} + +template <typename V, typename T> +void +SearchContextTest::fillPostingList(PostingList<V, T> & pl, const DocRange & range) +{ + pl.getHits().clear(); + for (uint32_t doc = range.start; doc < range.end; ++doc) { + ASSERT_TRUE(doc < pl.getAttribute().getNumDocs()); + EXPECT_TRUE(pl.getAttribute().update(doc, pl.getValue())); + pl.getHits().insert(doc); + } + pl.getAttribute().commit(true); +} + +template <typename V, typename T> +void +SearchContextTest::fillPostingList(PostingList<V, T> & pl) +{ + AttributeVector & vec = dynamic_cast<AttributeVector &>(pl.getAttribute()); + pl.getHits().clear(); + uint32_t sz = vec.getMaxValueCount(); + T * buf = new T[sz]; + for (uint32_t doc = 1; doc < vec.getNumDocs(); ++doc) { + uint32_t valueCount = vec.get(doc, buf, sz); + EXPECT_TRUE(valueCount <= sz); + for (uint32_t i = 0; i < valueCount; ++i) { + if (buf[i] == pl.getValue()) { + //std::cout << "hit for doc(" << doc << "): buf[" << i << "] (=" << buf[i] << ") == " << pl.getValue() << std::endl; + pl.getHits().insert(doc); + break; + } + } + } + delete [] buf; +} + +void +SearchContextTest::buildTermQuery(std::vector<char> & buffer, const vespalib::string & index, const vespalib::string & term, QueryTermSimple::SearchTerm termType) +{ + uint32_t indexLen = index.size(); + uint32_t termLen = term.size(); + uint32_t queryPacketSize = 1 + 2 * 4 + indexLen + termLen; + uint32_t p = 0; + buffer.resize(queryPacketSize); + switch (termType) { + case QueryTermSimple::PREFIXTERM: buffer[p++] = ParseItem::ITEM_PREFIXTERM; break; + case QueryTermSimple::REGEXP: buffer[p++] = ParseItem::ITEM_REGEXP; break; + default: + buffer[p++] = ParseItem::ITEM_TERM; + break; + } + p += vespalib::compress::Integer::compressPositive(indexLen, &buffer[p]); + memcpy(&buffer[p], index.c_str(), indexLen); + p += indexLen; + p += vespalib::compress::Integer::compressPositive(termLen, &buffer[p]); + memcpy(&buffer[p], term.c_str(), termLen); + p += termLen; + buffer.resize(p); +} + +template <typename V, typename T> +SearchContextPtr +SearchContextTest::getSearch(const V & vec, const T & term, QueryTermSimple::SearchTerm termType) +{ + std::vector<char> query; + vespalib::asciistream ss; + ss << term; + buildTermQuery(query, vec.getName(), ss.str(), termType); + + return (dynamic_cast<const AttributeVector &>(vec)). + getSearch(vespalib::stringref(&query[0], query.size()), + AttributeVector::SearchContext::Params()); +} + +ResultSetPtr +SearchContextTest::performSearch(SearchIterator & sb, uint32_t numDocs) +{ + HitCollector hc(numDocs, numDocs, 0); + sb.initFullRange(); + // assume strict toplevel search object located at start + for (sb.seek(1u); ! sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) { + hc.addHit(sb.getDocId(), 0.0); + } + return hc.getResultSet(); +} + +template <typename V, typename T> +ResultSetPtr +SearchContextTest::performSearch(const V & vec, const T & term, QueryTermSimple::SearchTerm termType) +{ + TermFieldMatchData dummy; + SearchContextPtr sc = getSearch(vec, term, termType); + sc->fetchPostings(true); + SearchBasePtr sb = sc->createIterator(&dummy, true); + ResultSetPtr rs = performSearch(*sb, vec.getNumDocs()); + return rs; +} + +template <typename V> +void +SearchContextTest::performSearch(const V & vec, const vespalib::string & term, + const DocSet & expected, QueryTermSimple::SearchTerm termType) +{ +#if 0 + std::cout << "performSearch[" << term << "]: {"; + std::copy(expected.begin(), expected.end(), std::ostream_iterator<uint32_t>(std::cout, ", ")); + std::cout << "}, prefix(" << (prefix ? "true" : "false") << ")" << std::endl; +#endif + { // strict search iterator + ResultSetPtr rs = performSearch(vec, term, termType); + checkResultSet(*rs, expected, false); + } +} + +void +SearchContextTest::checkResultSet(const ResultSet & rs, const DocSet & expected, bool bitVector) +{ + EXPECT_EQUAL(rs.getNumHits(), expected.size()); + if (bitVector) { + const BitVector * vec = rs.getBitOverflow(); + if (expected.size() != 0) { + ASSERT_TRUE(vec != NULL); + for (const auto & expect : expected) { + EXPECT_TRUE(vec->testBit(expect)); + } + } + } else { + const RankedHit * array = rs.getArray(); + if (expected.size() != 0) { + ASSERT_TRUE(array != NULL); + uint32_t i = 0; + for (DocSet::const_iterator iter = expected.begin(); + iter != expected.end(); ++iter, ++i) + { + EXPECT_TRUE(array[i]._docId == *iter); + } + } + } +} + + +//----------------------------------------------------------------------------- +// Test search functionality +//----------------------------------------------------------------------------- +template <typename V, typename T> +void +SearchContextTest::testFind(const PostingList<V, T> & pl) +{ + { // strict search iterator + SearchContextPtr sc = getSearch(pl.getAttribute(), pl.getValue()); + sc->fetchPostings(true); + TermFieldMatchData dummy; + SearchBasePtr sb = sc->createIterator(&dummy, true); + ResultSetPtr rs = performSearch(*sb, pl.getAttribute().getNumDocs()); + checkResultSet(*rs, pl.getHits(), false); + } +} + +template <typename V, typename T> +void +SearchContextTest::testSearch(V & attribute, uint32_t numDocs, const std::vector<T> & values) +{ + LOG(info, "testSearch: vector '%s' with %u documents and %lu unique values", + attribute.getName().c_str(), numDocs, static_cast<unsigned long>(values.size())); + + // fill attribute vectors + addDocs(attribute, numDocs); + + std::vector<PostingList<V, T> > lists; + + // fill posting lists + ASSERT_TRUE((attribute.getNumDocs() - 1) % values.size() == 0); + uint32_t hitCount = attribute.getNumDocs() / values.size(); + for (uint32_t i = 0; i < values.size(); ++i) { + // for each value a range with hitCount documents will hit on that value + lists.push_back(PostingList<V, T>(attribute, values[i])); + fillPostingList(lists.back(), DocRange(i * hitCount + 1, (i + 1) * hitCount + 1)); + } + + // test find() + for (const auto & list : lists) { + testFind(list); + } +} + +template <typename V, typename T> +void +SearchContextTest::testMultiValueSearchHelper(V & vec, const std::vector<T> & values) +{ + std::vector<PostingList<V, T> > lists; + + // fill posting lists based on attribute content + for (const T & value : values) { + lists.push_back(PostingList<V, T>(vec, value)); + fillPostingList(lists.back()); + } + + // test find() + for (const auto & list : lists) { + //std::cout << "testFind(lists[" << i << "]): value = " << lists[i].getValue() + // << ", hit count = " << lists[i].getHitCount() << std::endl; + testFind(list); + } +} + +template <typename V, typename T> +void +SearchContextTest::testMultiValueSearch(V & first, V & second, const std::vector<T> & values) +{ + addDocs(first, second.getNumDocs()); + LOG(info, "testMultiValueSearch: vector '%s' with %u documents and %lu unique values", + first.getName().c_str(), first.getNumDocs(), static_cast<unsigned long>(values.size())); + + fillAttribute(first, values); + + testMultiValueSearchHelper(first, values); + + ASSERT_TRUE(first.saveAs(second.getBaseFileName())); + ASSERT_TRUE(second.load()); + + testMultiValueSearchHelper(second, values); + + size_t sz = values.size(); + ASSERT_TRUE(sz > 2); + std::vector<T> subset; + // values[sz - 2] is not used -> 0 hits + // values[sz - 1] is used once -> 1 hit + for (size_t i = 0; i < sz - 2; ++i) { + subset.push_back(values[i]); + } + + fillAttribute(first, subset); + + ASSERT_TRUE(1u < first.getNumDocs()); + EXPECT_TRUE(first.append(1u, values[sz - 1], 1)); + first.commit(true); + + testMultiValueSearchHelper(first, values); + + ASSERT_TRUE(first.saveAs(second.getBaseFileName())); + ASSERT_TRUE(second.load()); + + testMultiValueSearchHelper(second, values); +} + +template<typename T, typename A> +void SearchContextTest::testSearch(const ConfigMap & cfgs) { + uint32_t numDocs = 100; + uint32_t numUniques = 20; + std::vector<T> values; + fillVector(values, numUniques); + for (const auto & cfg : cfgs) { + AttributePtr second = AttributeFactory::createAttribute(cfg.first + "-2", cfg.second); + testSearch(*(dynamic_cast<A *>(second.get())), numDocs, values); + if (second->hasMultiValue()) { + AttributePtr first = AttributeFactory::createAttribute(cfg.first + "-1", cfg.second); + testMultiValueSearch(*(dynamic_cast<A *>(first.get())), + *(dynamic_cast<A *>(second.get())), values); + } + } +} + +using search::test::InitRangeVerifier; + +template<typename T, typename A> +void SearchContextTest::testInitRange(T key, const vespalib::string & keyAsString, const ConfigMap & cfgs) { + InitRangeVerifier ir; + for (const auto & cfg : cfgs) { + AttributePtr attribute = AttributeFactory::createAttribute(cfg.first + "-initrange", cfg.second); + addDocs(*attribute, ir.getDocIdLimit()); + for (uint32_t doc : ir.getExpectedDocIds()) { + EXPECT_TRUE(nullptr != dynamic_cast<A *>(attribute.get())); + EXPECT_TRUE(dynamic_cast<A *>(attribute.get())->update(doc, key)); + } + attribute->commit(true); + SearchContextPtr sc = getSearch(*attribute, keyAsString); + ASSERT_TRUE(sc->valid()); + sc->fetchPostings(true); + TermFieldMatchData dummy; + SearchBasePtr sb = sc->createIterator(&dummy, true); + ir.verify(*sb); + } +} + +void SearchContextTest::testInitRange() { + testInitRange<AttributeVector::largeint_t, IntegerAttribute>(42, "42", _integerCfg); + testInitRange<double, FloatingPointAttribute>(42.42, "42.42", _floatCfg); + testInitRange<vespalib::string, StringAttribute>("any-key", "any-key", _stringCfg); +} + +void +SearchContextTest::testSearch() +{ + const uint32_t numDocs = 100; + const uint32_t numUniques = 20; + + { // IntegerAttribute + for (const auto & cfg : _integerCfg) { + AttributePtr attribute = AttributeFactory::createAttribute(cfg.first + "-3", cfg.second); + SearchContextPtr sc = getSearch(*attribute, "100"); + ASSERT_TRUE(sc->valid()); + sc = getSearch(*attribute, "1A0"); + EXPECT_FALSE( sc->valid() ); + } + + + { // CollectionType::ARRAY Flags. + std::vector<AttributeVector::largeint_t> values; + fillVector(values, numUniques); + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr second = AttributeFactory::createAttribute("flags-2", cfg); + testSearch(*(dynamic_cast<IntegerAttribute *>(second.get())), numDocs, values); + AttributePtr first = AttributeFactory::createAttribute("flags-1", cfg); + testMultiValueSearch(*(dynamic_cast<IntegerAttribute *>(first.get())), + *(dynamic_cast<IntegerAttribute *>(second.get())), values); + } + } + + { // FloatingPointAttribute + for (const auto & cfg : _floatCfg) { + AttributePtr attribute = AttributeFactory::createAttribute(cfg.first + "-3", cfg.second); + SearchContextPtr sc = getSearch(*attribute, "100"); + ASSERT_TRUE(sc->valid()); + sc = getSearch(*attribute, "7.3"); + ASSERT_TRUE( sc->valid() ); + sc = getSearch(*attribute, "1A0"); + EXPECT_FALSE( sc->valid() ); + } + } + + testSearch<AttributeVector::largeint_t, IntegerAttribute>(_integerCfg); + testSearch<double, FloatingPointAttribute>(_floatCfg); + testSearch<vespalib::string, StringAttribute>(_stringCfg); +} + +//----------------------------------------------------------------------------- +// Test search iterator functionality +//----------------------------------------------------------------------------- +void +SearchContextTest::testStrictSearchIterator(SearchContext & threeHits, + SearchContext & noHits, + const IteratorTester & typeTester) +{ + TermFieldMatchData dummy; + { // search for value with 3 hits + threeHits.fetchPostings(true); + SearchBasePtr sb = threeHits.createIterator(&dummy, true); + sb->initFullRange(); + EXPECT_TRUE(typeTester.matches(*sb)); + EXPECT_TRUE(sb->getDocId() == sb->beginId() || + sb->getDocId() == 1u); + EXPECT_TRUE(sb->seek(1)); + EXPECT_EQUAL(sb->getDocId(), 1u); + EXPECT_TRUE(!sb->seek(2)); + EXPECT_EQUAL(sb->getDocId(), 3u); + EXPECT_TRUE(sb->seek(3)); + EXPECT_EQUAL(sb->getDocId(), 3u); + EXPECT_TRUE(!sb->seek(4)); + EXPECT_EQUAL(sb->getDocId(), 5u); + EXPECT_TRUE(sb->seek(5)); + EXPECT_EQUAL(sb->getDocId(), 5u); + EXPECT_TRUE(!sb->seek(6)); + EXPECT_TRUE(sb->isAtEnd()); + } + + { // search for value with no hits + noHits.fetchPostings(true); + SearchBasePtr sb = noHits.createIterator(&dummy, true); + sb->initFullRange(); + ASSERT_TRUE(typeTester.matches(*sb)); + EXPECT_TRUE(sb->getDocId() == sb->beginId() || + sb->isAtEnd()); + EXPECT_TRUE(!sb->seek(1)); + EXPECT_TRUE(sb->isAtEnd()); + } +} + +void +SearchContextTest::testNonStrictSearchIterator(SearchContext & threeHits, + SearchContext & noHits, + const IteratorTester & typeTester) +{ + TermFieldMatchData dummy; + { // search for value with three hits + threeHits.fetchPostings(false); + SearchBasePtr sb = threeHits.createIterator(&dummy, false); + sb->initFullRange(); + EXPECT_TRUE(typeTester.matches(*sb)); + EXPECT_TRUE(sb->seek(1)); + EXPECT_EQUAL(sb->getDocId(), 1u); + EXPECT_TRUE(!sb->seek(2)); + EXPECT_EQUAL(sb->getDocId(), 1u); + EXPECT_TRUE(sb->seek(3)); + EXPECT_EQUAL(sb->getDocId(), 3u); + EXPECT_TRUE(!sb->seek(4)); + EXPECT_EQUAL(sb->getDocId(), 3u); + EXPECT_TRUE(sb->seek(5)); + EXPECT_EQUAL(sb->getDocId(), 5u); + EXPECT_TRUE(!sb->seek(6)); + EXPECT_TRUE(sb->getDocId() == 5u || sb->isAtEnd()); + } + { // search for value with no hits + noHits.fetchPostings(false); + SearchBasePtr sb = noHits.createIterator(&dummy, false); + sb->initFullRange(); + + EXPECT_TRUE(typeTester.matches(*sb)); + EXPECT_TRUE(sb->getDocId() == sb->beginId() || + sb->isAtEnd()); + EXPECT_TRUE(!sb->seek(1)); + EXPECT_NOT_EQUAL(sb->getDocId(), 1u); + EXPECT_TRUE(!sb->seek(6)); + EXPECT_NOT_EQUAL(sb->getDocId(), 6u); + } +} + +void +SearchContextTest::fillForSearchIteratorTest(IntegerAttribute * ia) +{ + addReservedDoc(*ia); + ia->addDocs(5); + ia->update(1, 10); + ia->update(2, 20); + ia->update(3, 10); + ia->update(4, 20); + ia->update(5, 10); + ia->commit(true); +} + +void +SearchContextTest::fillForSemiNibbleSearchIteratorTest(IntegerAttribute * ia) +{ + addReservedDoc(*ia); + ia->addDocs(5); + ia->update(1, 1); + ia->update(2, 2); + ia->update(3, 1); + ia->update(4, 2); + ia->update(5, 1); + ia->commit(true); +} + +void +SearchContextTest::testSearchIterator() +{ + { + Config cfg(BasicType::INT32, CollectionType::SINGLE); + AttributePtr ptr = AttributeFactory::createAttribute("s-int32", cfg); + fillForSearchIteratorTest(dynamic_cast<IntegerAttribute *>(ptr.get())); + + SearchContextPtr threeHits = getSearch(*ptr.get(), 10); + SearchContextPtr noHits = getSearch(*ptr.get(), 30); + AttributeIteratorTester tester; + testStrictSearchIterator(*threeHits, *noHits, tester); + threeHits = getSearch(*ptr.get(), 10); + noHits = getSearch(*ptr.get(), 30); + testNonStrictSearchIterator(*threeHits, *noHits, tester); + } + { + Config cfg(BasicType::UINT2, CollectionType::SINGLE); + AttributePtr ptr = AttributeFactory::createAttribute("s-uint2", cfg); + fillForSemiNibbleSearchIteratorTest(dynamic_cast<IntegerAttribute *> + (ptr.get())); + + SearchContextPtr threeHits = getSearch(*ptr.get(), 1); + SearchContextPtr noHits = getSearch(*ptr.get(), 3); + AttributeIteratorTester tester; + testStrictSearchIterator(*threeHits, *noHits, tester); + threeHits = getSearch(*ptr.get(), 1); + noHits = getSearch(*ptr.get(), 3); + testNonStrictSearchIterator(*threeHits, *noHits, tester); + } + { + Config cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sfs-int32", cfg); + fillForSearchIteratorTest(dynamic_cast<IntegerAttribute *>(ptr.get())); + + SearchContextPtr threeHits = getSearch(*ptr.get(), 10); + SearchContextPtr noHits = getSearch(*ptr.get(), 30); + AttributePostingListIteratorTester tester; + testStrictSearchIterator(*threeHits, *noHits, tester); + } + { + Config cfg(BasicType::STRING, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("sfs-string", cfg); + StringAttribute * sa = dynamic_cast<StringAttribute *>(ptr.get()); + addReservedDoc(*ptr); + ptr->addDocs(5); + sa->update(1, "three"); + sa->update(2, "two"); + sa->update(3, "three"); + sa->update(4, "two"); + sa->update(5, "three"); + ptr->commit(true); + + SearchContextPtr threeHits = getSearch(*ptr.get(), "three"); + SearchContextPtr noHits = getSearch(*ptr.get(), "none"); + AttributePostingListIteratorTester tester; + testStrictSearchIterator(*threeHits, *noHits, tester); + } + { + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("flags", cfg); + fillForSearchIteratorTest(dynamic_cast<IntegerAttribute *>(ptr.get())); + + SearchContextPtr threeHits = getSearch(*ptr.get(), 10); + SearchContextPtr noHits = getSearch(*ptr.get(), 30); + FlagAttributeIteratorTester tester; + testStrictSearchIterator(*threeHits, *noHits, tester); + threeHits = getSearch(*ptr.get(), 10); + noHits = getSearch(*ptr.get(), 30); + testNonStrictSearchIterator(*threeHits, *noHits, tester); + } +} + + + +//----------------------------------------------------------------------------- +// Test search iterator unpacking +//----------------------------------------------------------------------------- +void +SearchContextTest::fillForSearchIteratorUnpackingTest(IntegerAttribute * ia, + bool extra) +{ + addReservedDoc(*ia); + ia->addDocs(3); + if (ia->getCollectionType() == CollectionType::SINGLE) { + ia->update(1, 10); + ia->update(2, 10); + ia->update(3, 10); + } else if (ia->getCollectionType() == CollectionType::ARRAY) { + ia->append(1, 10, 1); + ia->append(2, 10, 1); + ia->append(2, 10, 1); + ia->append(3, 10, 1); + ia->append(3, 10, 1); + ia->append(3, 10, 1); + } else { // WEIGHTED SET + ia->append(1, 10, -50); + ia->append(2, 10, 0); + ia->append(3, 10, 50); + } + ia->commit(true); + if (!extra) + return; + ia->addDocs(20); + for (uint32_t d = 4; d < 24; ++d) { + if (ia->getCollectionType() == CollectionType::SINGLE) + ia->update(d, 10); + else + ia->append(d, 10, 1); + } + ia->commit(true); +} + +void +SearchContextTest::testSearchIteratorUnpacking(const AttributePtr & attr, + SearchContext & sc, + bool extra, + bool strict) +{ + LOG(info, + "testSearchIteratorUnpacking: vector '%s'", attr->getName().c_str()); + + TermFieldMatchData md; + md.reset(100); + + TermFieldMatchDataPosition pos; + pos.setElementWeight(100); + md.appendPosition(pos); + + sc.fetchPostings(strict); + SearchBasePtr sb = sc.createIterator(&md, strict); + sb->initFullRange(); + + std::vector<int32_t> weights(3); + if (attr->getCollectionType() == CollectionType::SINGLE || + (attr->getCollectionType() == CollectionType::ARRAY && attr->getBasicType() == BasicType::INT8)) + { + weights[0] = 1; + weights[1] = 1; + weights[2] = 1; + } else if (attr->getCollectionType() == CollectionType::ARRAY) { + weights[0] = 1; + weights[1] = 2; + weights[2] = 3; + } else { + weights[0] = -50; + weights[1] = 0; + weights[2] = 50; + } + + // unpack and check weights + sb->unpack(1); + EXPECT_EQUAL(sb->getDocId(), 1u); + EXPECT_EQUAL(md.getDocId(), 1u); + EXPECT_EQUAL(md.getWeight(), weights[0]); + + sb->unpack(2); + EXPECT_EQUAL(sb->getDocId(), 2u); + EXPECT_EQUAL(md.getDocId(), 2u); + EXPECT_EQUAL(md.getWeight(), weights[1]); + + sb->unpack(3); + EXPECT_EQUAL(sb->getDocId(), 3u); + EXPECT_EQUAL(md.getDocId(), 3u); + EXPECT_EQUAL(md.getWeight(), weights[2]); + if (extra) { + sb->unpack(4); + EXPECT_EQUAL(sb->getDocId(), 4u); + EXPECT_EQUAL(md.getDocId(), 4u); + EXPECT_EQUAL(md.getWeight(), 1); + } +} + +void +SearchContextTest::testSearchIteratorUnpacking() +{ + std::vector<std::pair<vespalib::string, Config> > config; + + { + Config cfg(BasicType::INT32, CollectionType::SINGLE); + config.emplace_back("s-int32", cfg); + } + { + Config cfg(BasicType::UINT4, CollectionType::SINGLE); + config.emplace_back("s-uint4", cfg); + } + { + Config cfg(BasicType::INT32, CollectionType::ARRAY); + config.emplace_back("a-int32", cfg); + } + { + Config cfg(BasicType::INT32, CollectionType::WSET); + config.emplace_back("w-int32", cfg); + } + { + Config cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + config.emplace_back("sfs-int32", cfg); + } + { + Config cfg(BasicType::INT32, CollectionType::ARRAY); + cfg.setFastSearch(true); + config.emplace_back("afs-int32", cfg); + } + { + Config cfg(BasicType::INT32, CollectionType::WSET); + cfg.setFastSearch(true); + config.emplace_back("wfs-int32", cfg); + } + { + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + config.emplace_back("flags", cfg); + } + + for (const auto & cfg : config) { + AttributePtr ptr = AttributeFactory::createAttribute(cfg.first, cfg.second); + fillForSearchIteratorUnpackingTest(dynamic_cast<IntegerAttribute *>(ptr.get()), false); + SearchContextPtr sc = getSearch(*ptr.get(), 10); + testSearchIteratorUnpacking(ptr, *sc, false, true); + sc = getSearch(*ptr.get(), 10); + testSearchIteratorUnpacking(ptr, *sc, false, false); + if (cfg.second.fastSearch()) { + AttributePtr ptr2 = AttributeFactory::createAttribute(cfg.first + "-extra", cfg.second); + fillForSearchIteratorUnpackingTest(dynamic_cast<IntegerAttribute *>(ptr2.get()), true); + SearchContextPtr sc2 = getSearch(*ptr2.get(), 10); + testSearchIteratorUnpacking(ptr2, *sc2, true, true); + sc2 = getSearch(*ptr2.get(), 10); + testSearchIteratorUnpacking(ptr2, *sc2, true, false); + } + } +} + + + +//----------------------------------------------------------------------------- +// Test range search +//----------------------------------------------------------------------------- + +template <typename VectorType> +void +SearchContextTest::performRangeSearch(const VectorType & vec, const vespalib::string & term, + const DocSet & expected) +{ + performSearch(vec, term, expected, QueryTermSimple::WORD); +} + +template <typename VectorType, typename ValueType> +void +SearchContextTest::testRangeSearch(const AttributePtr & ptr, uint32_t numDocs, std::vector<ValueType> values) +{ + LOG(info, "testRangeSearch: vector '%s'", ptr->getName().c_str()); + + VectorType & vec = dynamic_cast<VectorType &>(*ptr.get()); + + addDocs(vec, numDocs); + + std::map<ValueType, DocSet> postingList; + + uint32_t docCnt = 0; + for (uint32_t i = 0; i < values.size() && docCnt < numDocs; i+=2) { + //std::cout << "postingList[" << values[i] << "]: {"; + for (uint32_t j = 0; j < (i + 1) && docCnt < numDocs; ++j, ++docCnt) { + EXPECT_TRUE(vec.update(docCnt + 1u, values[i])); + postingList[values[i]].insert(docCnt + 1u); + //std::cout << docCnt << ", "; + } + //std::cout << "}" << std::endl; + } + ptr->commit(true); + uint32_t smallHits = 0; + ValueType zeroValue = 0; + bool smallUInt = isUnsignedSmallIntAttribute(vec); + if (smallUInt) { + for (uint32_t i = docCnt ; i < numDocs; ++i) { + postingList[zeroValue].insert(i + 1u); + ++smallHits; + } + } + + // test less than ("<a") + for (uint32_t i = 0; i < values.size(); ++i) { + vespalib::asciistream ss; + ss << "<" << values[i]; + DocSet expected; + if (smallUInt) { + expected.insert(postingList[zeroValue].begin(), + postingList[zeroValue].end()); + } + for (uint32_t j = 0; j < i; ++j) { + expected.insert(postingList[values[j]].begin(), postingList[values[j]].end()); + } + performRangeSearch(vec, ss.str(), expected); + } + + // test greater than (">a") + for (uint32_t i = 0; i < values.size(); ++i) { + vespalib::asciistream ss; + ss << ">" << values[i]; + DocSet expected; + for (uint32_t j = i + 1; j < values.size(); ++j) { + expected.insert(postingList[values[j]].begin(), postingList[values[j]].end()); + } + performRangeSearch(vec, ss.str(), expected); + } + + // test range ("[a;b]") + for (uint32_t i = 0; i < values.size(); ++i) { + for (uint32_t j = 0; j < values.size(); ++j) { // illegal range when j < i + vespalib::asciistream ss; + ss << "[" << values[i] << ";" << values[j] << "]"; + DocSet expected; + for (uint32_t k = i; k < j + 1; ++k) { + expected.insert(postingList[values[k]].begin(), postingList[values[k]].end()); + } + performRangeSearch(vec, ss.str(), expected); + } + } + + { // test large range + vespalib::asciistream ss; + ss << "[" << (values.front() - 1) << ";" << (values.back() + 1) << "]"; + DocSet expected; + for (uint32_t doc = 0; doc < numDocs; ++doc) { + expected.insert(doc + 1); + } + performRangeSearch(vec, ss.str(), expected); + } +} + +void +SearchContextTest::testRangeSearchLimited() +{ + largeint_t VALUES [] = {0,1,1,2,3,4,5,6,7,8,9,9,10 }; + std::vector<largeint_t> values(VALUES, VALUES+sizeof(VALUES)/sizeof(VALUES[0])); + Config cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("limited-int32", cfg); + IntegerAttribute & vec = dynamic_cast<IntegerAttribute &>(*ptr); + addDocs(vec, values.size()); + for (size_t i(1); i < values.size(); i++) { + EXPECT_TRUE(vec.update(i, values[i])); + } + ptr->commit(true); + + DocSet expected; + for (size_t i(1); i < 12; i++) { + expected.put(i); + } + performRangeSearch(vec, "[1;9]", expected); + performRangeSearch(vec, "[1;9;100]", expected); + performRangeSearch(vec, "[1;9;-100]", expected); + expected.clear(); + expected.put(3); + performRangeSearch(vec, "<1;3>", expected); + expected.put(4); + performRangeSearch(vec, "<1;3]", expected); + expected.clear(); + expected.put(1).put(2).put(3); + performRangeSearch(vec, "[1;3>", expected); + expected.put(4); + performRangeSearch(vec, "[1;3]", expected); + expected.clear(); + expected.put(1).put(2); + performRangeSearch(vec, "[1;9;1]", expected); + performRangeSearch(vec, "[1;9;2]", expected); + expected.put(3); + performRangeSearch(vec, "[1;9;3]", expected); + expected.clear(); + expected.put(10).put(11); + performRangeSearch(vec, "[1;9;-1]", expected); + performRangeSearch(vec, "[1;9;-2]", expected); + expected.put(9); + performRangeSearch(vec, "[1;9;-3]", expected); + performRangeSearch(vec, "[1;9;-3]", expected); + + expected.clear(); + for (size_t i(1); i < 13; i++) { + expected.put(i); + } + performRangeSearch(vec, "[;;100]", expected); + performRangeSearch(vec, "[;;-100]", expected); + + expected.clear(); + expected.put(1).put(2); + performRangeSearch(vec, "[;;1]", expected); + expected.clear(); + expected.put(12); + performRangeSearch(vec, "[;;-1]", expected); +} + +void +SearchContextTest::testRangeSearch() +{ + const uint32_t numDocs = 100; + const uint32_t numValues = 20; + const uint32_t numNibbleValues = 9; + + { // IntegerAttribute + std::vector<largeint_t> values; + std::vector<largeint_t> nibbleValues; + largeint_t start = 1; + + for (uint32_t i = 0; i < numValues; ++i) { + values.push_back(start + i); + } + for (uint32_t i = 0; i < numNibbleValues; ++i) { + nibbleValues.push_back(start + i); + } + + for (const auto & cfg : _integerCfg) { + AttributePtr ptr = AttributeFactory::createAttribute(cfg.first, cfg.second); + testRangeSearch<IntegerAttribute, largeint_t>(ptr, numDocs, values); + } + { // CollectionType::ARRAY Flags. + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + AttributePtr ptr = AttributeFactory::createAttribute("flags", cfg); + testRangeSearch<IntegerAttribute, largeint_t>(ptr, numDocs, values); + } + { + Config cfg(BasicType::UINT4, CollectionType::SINGLE); + AttributePtr ptr = AttributeFactory::createAttribute("s-uint4", cfg); + testRangeSearch<IntegerAttribute, largeint_t>(ptr, numDocs, nibbleValues); + } + } + + { // FloatingPointAttribute + std::vector<double> values; + double start = 1; + + for (uint32_t i = 0; i < numValues; ++i) { + values.push_back(start + i); + } + + for (const auto & cfg : _floatCfg) { + AttributePtr ptr = AttributeFactory::createAttribute(cfg.first, cfg.second); + testRangeSearch<FloatingPointAttribute, double>(ptr, numDocs, values); + } + } +} + + +//----------------------------------------------------------------------------- +// Test case insensitive search +//----------------------------------------------------------------------------- + +void +SearchContextTest::performCaseInsensitiveSearch(const StringAttribute & vec, const vespalib::string & term, + const DocSet & expected) +{ + performSearch(vec, term, expected, QueryTermSimple::WORD); +} + +void +SearchContextTest::testCaseInsensitiveSearch(const AttributePtr & ptr) +{ + LOG(info, "testCaseInsensitiveSearch: vector '%s'", ptr->getName().c_str()); + + StringAttribute & vec = dynamic_cast<StringAttribute &>(*ptr.get()); + + uint32_t numDocs = 5 * 5; + addDocs(*ptr.get(), numDocs); + + const char * terms[][5] = { + {"lower", "upper", "firstupper", "mixedcase", "intermixedcase"}, // lower + {"LOWER", "UPPER", "FIRSTUPPER", "MIXEDCASE", "INTERMIXEDCASE"}, // upper + {"Lower", "Upper", "Firstupper", "Mixedcase", "Intermixedcase"}, // firstUpper + {"Lower", "Upper", "FirstUpper", "MixedCase", "InterMixedCase"}, // mixedCase + {"lower", "upper", "firstUpper", "mixedCase", "interMixedCase"}, // interMixedCase + }; + + uint32_t doc = 1; + for (uint32_t j = 0; j < 5; ++j) { + for (uint32_t i = 0; i < 5; ++i) { + ASSERT_TRUE(doc < vec.getNumDocs()); + EXPECT_TRUE(vec.update(doc++, terms[i][j])); + } + } + + ptr->commit(true); + + const char * buffer[1]; + doc = 1; + for (uint32_t j = 0; j < 5; ++j) { + for (uint32_t i = 0; i < 5; ++i) { + EXPECT_EQUAL(ptr->get(doc++, buffer, 1), uint32_t(1)); + EXPECT_EQUAL(vespalib::string(buffer[0]), vespalib::string(terms[i][j])); + } + } + + DocSet empty; + for (uint32_t j = 0; j < 5; ++j) { + DocSet expected; + for (doc = j * 5 + 1; doc < (j + 1) * 5 + 1; ++doc) { + expected.insert(doc); + } + // for non-posting attributes only lower case search terms should give hits + performCaseInsensitiveSearch(vec, terms[0][j], expected); + + if (ptr->getConfig().fastSearch()) { + for (uint32_t i = 1; i < 5; ++i) { + performCaseInsensitiveSearch(vec, terms[i][j], expected); + } + } else { + for (uint32_t i = 1; i < 4; ++i) { + performCaseInsensitiveSearch(vec, terms[i][j], empty); + } + } + } + performCaseInsensitiveSearch(vec, "none", empty); + performCaseInsensitiveSearch(vec, "NONE", empty); + performCaseInsensitiveSearch(vec, "None", empty); +} + +void +SearchContextTest::testRegexSearch(const AttributePtr & ptr) +{ + LOG(info, "testRegexSearch: vector '%s'", ptr->getName().c_str()); + + StringAttribute & vec = dynamic_cast<StringAttribute &>(*ptr.get()); + + uint32_t numDocs = 6; + addDocs(*ptr.get(), numDocs); + + const char * strings [] = {"abc1def", "abc2Def", "abc2def", "abc4def", "abc5def", "abc6def"}; + std::vector<const char *> terms = { "abc", "bc2de" }; + + for (uint32_t doc = 1; doc < numDocs + 1; ++doc) { + ASSERT_TRUE(doc < vec.getNumDocs()); + EXPECT_TRUE(vec.update(doc, strings[doc - 1])); + } + + ptr->commit(true); + + std::vector<DocSet> expected; + DocSet empty; + { + uint32_t docs[] = {1, 2, 3, 4, 5, 6}; + expected.push_back(DocSet(docs, docs + 6)); // "abc" + } + { + uint32_t docs[] = {2, 3}; + expected.push_back(DocSet(docs, docs + 2)); // "bc2de" + } + + for (uint32_t i = 0; i < terms.size(); ++i) { + performSearch(vec, terms[i], expected[i], QueryTermSimple::REGEXP); + performSearch(vec, terms[i], empty, QueryTermSimple::WORD); + } +} + + +void +SearchContextTest::testCaseInsensitiveSearch() +{ + for (const auto & cfg : _stringCfg) { + testCaseInsensitiveSearch(AttributeFactory::createAttribute(cfg.first, cfg.second)); + } +} + +void +SearchContextTest::testRegexSearch() +{ + for (const auto & cfg : _stringCfg) { + testRegexSearch(AttributeFactory::createAttribute(cfg.first, cfg.second)); + } +} + + +//----------------------------------------------------------------------------- +// Test prefix search +//----------------------------------------------------------------------------- + +void +SearchContextTest::performPrefixSearch(const StringAttribute & vec, const vespalib::string & term, + const DocSet & expected, QueryTermSimple::SearchTerm termType) +{ + performSearch(vec, term, expected, termType); +} + +void +SearchContextTest::testPrefixSearch(const AttributePtr & ptr) +{ + LOG(info, "testPrefixSearch: vector '%s'", ptr->getName().c_str()); + + StringAttribute & vec = dynamic_cast<StringAttribute &>(*ptr.get()); + + uint32_t numDocs = 6; + addDocs(*ptr.get(), numDocs); + + const char * strings [] = {"prefixsearch", "PREFIXSEARCH", "PrefixSearch", "precommit", "PRECOMMIT", "PreCommit"}; + const char * terms[][3] = {{"pre", "PRE", "Pre"}, {"pref", "PREF", "Pref"}, + {"prec", "PREC", "PreC"}, {"prex", "PREX", "Prex"}}; + + for (uint32_t doc = 1; doc < numDocs + 1; ++doc) { + ASSERT_TRUE(doc < vec.getNumDocs()); + EXPECT_TRUE(vec.update(doc, strings[doc - 1])); + } + + ptr->commit(true); + + std::vector<DocSet> expected; + DocSet empty; + { + uint32_t docs[] = {1, 2, 3, 4, 5, 6}; + expected.push_back(DocSet(docs, docs + 6)); // "pre" + } + { + uint32_t docs[] = {1, 2, 3}; + expected.push_back(DocSet(docs, docs + 3)); // "pref" + } + { + uint32_t docs[] = {4, 5, 6}; + expected.push_back(DocSet(docs, docs + 3)); // "prec" + } + expected.push_back(DocSet()); // "prex" + + for (uint32_t i = 0; i < 4; ++i) { + for (uint32_t j = 0; j < 3; ++j) { + if (j == 0 || ptr->getConfig().fastSearch()) { + performPrefixSearch(vec, terms[i][j], expected[i], QueryTermSimple::PREFIXTERM); + performPrefixSearch(vec, terms[i][j], empty, QueryTermSimple::WORD); + } else { + performPrefixSearch(vec, terms[i][j], empty, QueryTermSimple::PREFIXTERM); + performPrefixSearch(vec, terms[i][j], empty, QueryTermSimple::WORD); + } + } + } +} + + +void +SearchContextTest::testPrefixSearch() +{ + for (const auto & cfg : _stringCfg) { + testPrefixSearch(AttributeFactory::createAttribute(cfg.first, cfg.second)); + } +} + +template <typename VectorType, typename ValueType> +void +SearchContextTest::requireThatSearchIsWorkingAfterClearDoc(const vespalib::string & name, + const Config & cfg, + ValueType startValue, + const vespalib::string & term) +{ + AttributePtr a = AttributeFactory::createAttribute(name, cfg); + LOG(info, "requireThatSearchIsWorkingAfterClearDoc: vector '%s', term '%s'", + a->getName().c_str(), term.c_str()); + addReservedDoc(*a); + a->addDocs(4); + VectorType & v = dynamic_cast<VectorType &>(*a); + resetAttribute(v, startValue); + { + ResultSetPtr rs = performSearch(v, term); + EXPECT_EQUAL(4u, rs->getNumHits()); + ASSERT_TRUE(4u == rs->getNumHits()); + const RankedHit * array = rs->getArray(); + EXPECT_EQUAL(1u, array[0]._docId); + EXPECT_EQUAL(2u, array[1]._docId); + EXPECT_EQUAL(3u, array[2]._docId); + EXPECT_EQUAL(4u, array[3]._docId); + } + a->clearDoc(1); + a->clearDoc(3); + a->commit(true); + { + ResultSetPtr rs = performSearch(v, term); + EXPECT_EQUAL(2u, rs->getNumHits()); + const RankedHit * array = rs->getArray(); + EXPECT_EQUAL(2u, array[0]._docId); + EXPECT_EQUAL(4u, array[1]._docId); + } +} + +void +SearchContextTest::requireThatSearchIsWorkingAfterClearDoc() +{ + for (const auto & cfg : _integerCfg) { + requireThatSearchIsWorkingAfterClearDoc<IntegerAttribute>(cfg.first, cfg.second, 10, "10"); + requireThatSearchIsWorkingAfterClearDoc<IntegerAttribute>(cfg.first, cfg.second, 10, "<11"); + } + + for (const auto & cfg : _floatCfg) { + requireThatSearchIsWorkingAfterClearDoc<FloatingPointAttribute>(cfg.first, cfg.second, 10.5, "10.5"); + requireThatSearchIsWorkingAfterClearDoc<FloatingPointAttribute>(cfg.first, cfg.second, 10.5, "<10.6"); + } + + for (const auto & cfg : _stringCfg) { + requireThatSearchIsWorkingAfterClearDoc<StringAttribute>(cfg.first, cfg.second, "start", "start"); + } +} + +template <typename VectorType, typename ValueType> +void +SearchContextTest::requireThatSearchIsWorkingAfterLoadAndClearDoc(const vespalib::string & name, + const Config & cfg, + ValueType startValue, + ValueType defaultValue, + const vespalib::string & term) +{ + AttributePtr a = AttributeFactory::createAttribute(name, cfg); + LOG(info, "requireThatSearchIsWorkingAfterLoadAndClearDoc: vector '%s', term '%s'", + a->getName().c_str(), term.c_str()); + addReservedDoc(*a); + a->addDocs(15); + VectorType & va = dynamic_cast<VectorType &>(*a); + resetAttribute(va, startValue); // triggers vector vector in posting list (count 15) + AttributePtr b = AttributeFactory::createAttribute(name + "-save", cfg); + EXPECT_TRUE(a->saveAs(b->getBaseFileName())); + EXPECT_TRUE(b->load()); + b->clearDoc(6); // goes from vector vector to single vector with count 14 + b->commit(true); + { + ResultSetPtr rs = performSearch(dynamic_cast<VectorType &>(*b), term); + EXPECT_EQUAL(14u, rs->getNumHits()); + const RankedHit * array = rs->getArray(); + for (uint32_t i = 0; i < 14; ++i) { + if (i < 5) { + EXPECT_EQUAL(i + 1, array[i]._docId); + } else + EXPECT_EQUAL(i + 2, array[i]._docId); + } + } + ValueType buf; + if (cfg.collectionType().isMultiValue()) { + EXPECT_EQUAL(0u, b->get(6, &buf, 1)); + } else { + EXPECT_EQUAL(1u, b->get(6, &buf, 1)); + EXPECT_EQUAL(defaultValue, buf); + } +} + +void +SearchContextTest::requireThatSearchIsWorkingAfterLoadAndClearDoc() +{ + { + int64_t value = 10; + int64_t defValue = search::attribute::getUndefined<int32_t>(); + requireThatSearchIsWorkingAfterLoadAndClearDoc<IntegerAttribute>("s-fs-int32", _integerCfg["s-fs-int32"], + value, defValue, "10"); + requireThatSearchIsWorkingAfterLoadAndClearDoc<IntegerAttribute>("a-fs-int32", _integerCfg["a-fs-int32"], + value, defValue, "10"); + } + { + vespalib::string value = "foo"; + vespalib::string defValue = ""; + requireThatSearchIsWorkingAfterLoadAndClearDoc<StringAttribute>("s-fs-str", _stringCfg["s-fs-str"], + value, defValue, value); + requireThatSearchIsWorkingAfterLoadAndClearDoc<StringAttribute>("a-fs-str", _stringCfg["a-fs-str"], + value, defValue, value); + } +} + +template <typename VectorType, typename ValueType> +void +SearchContextTest::requireThatSearchIsWorkingAfterUpdates(const vespalib::string & name, + const Config & cfg, + ValueType value1, + ValueType value2) +{ + AttributePtr a = AttributeFactory::createAttribute(name, cfg); + VectorType & va = dynamic_cast<VectorType &>(*a); + LOG(info, "requireThatSearchIsWorkingAfterUpdates: vector '%s'", a->getName().c_str()); + addReservedDoc(*a); + a->addDocs(2); + va.update(1, value1); + va.commit(true); + va.update(2, value1); + va.update(2, value2); + va.commit(true); + { + ResultSetPtr rs = performSearch(va, value1); + EXPECT_EQUAL(1u, rs->getNumHits()); // doc 1 should not have this value + } + { + ResultSetPtr rs = performSearch(va, value2); + EXPECT_EQUAL(1u, rs->getNumHits()); + } +} + +void +SearchContextTest::requireThatSearchIsWorkingAfterUpdates() +{ + for (const auto & cfg : _integerCfg) { + requireThatSearchIsWorkingAfterUpdates<IntegerAttribute>(cfg.first, cfg.second, 10, 20); + } + + for (const auto & cfg : _stringCfg) { + requireThatSearchIsWorkingAfterUpdates<StringAttribute>(cfg.first, cfg.second, "foo", "bar"); + } +} + +void +SearchContextTest::requireThatFlagAttributeIsWorkingWhenNewDocsAreAdded() +{ + LOG(info, "requireThatFlagAttributeIsWorkingWhenNewDocsAreAdded()"); + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + { + cfg.setGrowStrategy(GrowStrategy(1, 0, 1)); + AttributePtr a = AttributeFactory::createAttribute("flags", cfg); + FlagAttribute & fa = dynamic_cast<FlagAttribute &>(*a); + addReservedDoc(fa); + fa.addDocs(1); + fa.append(1, 10, 1); + fa.append(1, 24, 1); + fa.commit(true); + fa.addDocs(1); + fa.append(2, 20, 1); + fa.append(2, 24, 1); + fa.commit(true); + fa.addDocs(1); + fa.append(3, 30, 1); + fa.append(3, 26, 1); + fa.commit(true); + fa.addDocs(1); + fa.append(4, 40, 1); + fa.append(4, 24, 1); + fa.commit(true); + { + ResultSetPtr rs = performSearch(fa, "<24"); + EXPECT_EQUAL(2u, rs->getNumHits()); + EXPECT_EQUAL(1u, rs->getArray()[0]._docId); + EXPECT_EQUAL(2u, rs->getArray()[1]._docId); + } + { + ResultSetPtr rs = performSearch(fa, "24"); + EXPECT_EQUAL(3u, rs->getNumHits()); + EXPECT_EQUAL(1u, rs->getArray()[0]._docId); + EXPECT_EQUAL(2u, rs->getArray()[1]._docId); + EXPECT_EQUAL(4u, rs->getArray()[2]._docId); + } + } + { + cfg.setGrowStrategy(GrowStrategy(4, 0, 4)); + AttributePtr a = AttributeFactory::createAttribute("flags", cfg); + FlagAttribute & fa = dynamic_cast<FlagAttribute &>(*a); + std::vector<uint32_t> exp50; + std::vector<uint32_t> exp60; + addReservedDoc(fa); + for (uint32_t i = 0; i < 200; ++i) { + uint32_t docId; + EXPECT_TRUE(fa.addDoc(docId)); + if (i % 2 == 0) { + fa.append(docId, 50, 1); + exp50.push_back(docId); + } else { + fa.append(docId, 60, 1); + exp60.push_back(docId); + } + fa.commit(true); + { + ResultSetPtr rs1 = performSearch(fa, "50"); + ResultSetPtr rs2 = performSearch(fa, "<51"); + EXPECT_EQUAL(exp50.size(), rs1->getNumHits()); + EXPECT_EQUAL(exp50.size(), rs2->getNumHits()); + for (size_t j = 0; j < exp50.size(); ++j) { + EXPECT_EQUAL(exp50[j], rs1->getArray()[j]._docId); + EXPECT_EQUAL(exp50[j], rs2->getArray()[j]._docId); + } + } + { + ResultSetPtr rs = performSearch(fa, "60"); + EXPECT_EQUAL(exp60.size(), rs->getNumHits()); + for (size_t j = 0; j < exp60.size(); ++j) { + EXPECT_EQUAL(exp60[j], rs->getArray()[j]._docId); + } + } + } + } +} + +template <typename VectorType, typename ValueType> +void +SearchContextTest::requireThatInvalidSearchTermGivesZeroHits(const vespalib::string & name, + const Config & cfg, + ValueType value) +{ + AttributePtr a = AttributeFactory::createAttribute(name, cfg); + VectorType & va = dynamic_cast<VectorType &>(*a); + LOG(info, "requireThatInvalidSearchTermGivesZeroHits: vector '%s'", a->getName().c_str()); + addReservedDoc(*a); + a->addDocs(1); + va.update(1, value); + va.commit(true); + ResultSetPtr rs = performSearch(va, "foo"); + EXPECT_EQUAL(0u, rs->getNumHits()); +} + +void +SearchContextTest::requireThatInvalidSearchTermGivesZeroHits() +{ + for (const auto & cfg : _integerCfg) { + requireThatInvalidSearchTermGivesZeroHits<IntegerAttribute>(cfg.first, cfg.second, 10); + } + for (const auto & cfg : _floatCfg) { + requireThatInvalidSearchTermGivesZeroHits<FloatingPointAttribute>(cfg.first, cfg.second, 10); + } +} + +void +SearchContextTest::requireThatFlagAttributeHandlesTheByteRange() +{ + LOG(info, "requireThatFlagAttributeHandlesTheByteRange()"); + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + + AttributePtr a = AttributeFactory::createAttribute("flags", cfg); + FlagAttribute & fa = dynamic_cast<FlagAttribute &>(*a); + addReservedDoc(fa); + fa.addDocs(5); + fa.append(1, -128, 1); + fa.append(2, -64, 1); + fa.append(2, -8, 1); + fa.append(3, 0, 1); + fa.append(3, 8, 1); + fa.append(4, 64, 1); + fa.append(4, 24, 1); + fa.append(5, 127, 1); + fa.commit(true); + + performSearch(fa, "-128", DocSet().put(1), QueryTermSimple::WORD); + performSearch(fa, "127", DocSet().put(5), QueryTermSimple::WORD); + performSearch(fa, ">-128", DocSet().put(2).put(3).put(4).put(5), QueryTermSimple::WORD); + performSearch(fa, "<127", DocSet().put(1).put(2).put(3).put(4), QueryTermSimple::WORD); + performSearch(fa, "[-128;-8]", DocSet().put(1).put(2), QueryTermSimple::WORD); + performSearch(fa, "[-8;8]", DocSet().put(2).put(3), QueryTermSimple::WORD); + performSearch(fa, "[8;127]", DocSet().put(3).put(4).put(5), QueryTermSimple::WORD); + performSearch(fa, "[-129;-8]", DocSet().put(1).put(2), QueryTermSimple::WORD); + performSearch(fa, "[8;128]", DocSet().put(3).put(4).put(5), QueryTermSimple::WORD); +} + +void +SearchContextTest::requireThatOutOfBoundsSearchTermGivesZeroHits(const vespalib::string &name, + const Config &cfg, + int64_t maxValue) +{ + AttributePtr a = AttributeFactory::createAttribute(name, cfg); + IntegerAttribute &ia = dynamic_cast<IntegerAttribute &>(*a); + addReservedDoc(*a); + a->addDocs(1); + ia.update(1, maxValue); + ia.commit(true); + vespalib::string term = vespalib::make_string("%" PRIu64 "", (int64_t) maxValue + 1); + LOG(info, "requireThatOutOfBoundsSearchTermGivesZeroHits: vector '%s', term '%s'", a->getName().c_str(), term.c_str()); + ResultSetPtr rs = performSearch(ia, term); + EXPECT_EQUAL(0u, rs->getNumHits()); +} + +void +SearchContextTest::requireThatOutOfBoundsSearchTermGivesZeroHits() +{ + for (const auto & cfg : _integerCfg) { + int32_t maxValue = std::numeric_limits<int32_t>::max(); + requireThatOutOfBoundsSearchTermGivesZeroHits(cfg.first, cfg.second, maxValue); + } + { + Config cfg(BasicType::INT8, CollectionType::ARRAY); + cfg.setFastSearch(true); + int8_t maxValue = std::numeric_limits<int8_t>::max(); + requireThatOutOfBoundsSearchTermGivesZeroHits("flags", cfg, maxValue); + } +} + + +void +SearchContextTest::initIntegerConfig() +{ + { // CollectionType::SINGLE + Config cfg(BasicType::INT32, CollectionType::SINGLE); + _integerCfg["s-int32"] = cfg; + } + { // CollectionType::SINGLE && fastSearch + Config cfg(BasicType::INT32, CollectionType::SINGLE); + cfg.setFastSearch(true); + _integerCfg["s-fs-int32"] = cfg; + } + { // CollectionType::ARRAY + Config cfg(BasicType::INT32, CollectionType::ARRAY); + _integerCfg["a-int32"] = cfg; + } + { // CollectionType::ARRAY && fastSearch + Config cfg(BasicType::INT32, CollectionType::ARRAY); + cfg.setFastSearch(true); + _integerCfg["a-fs-int32"] = cfg; + } + { // CollectionType::WSET + Config cfg(BasicType::INT32, CollectionType::WSET); + _integerCfg["w-int32"] = cfg; + } + { // CollectionType::WSET && fastSearch + Config cfg(BasicType::INT32, CollectionType::WSET); + cfg.setFastSearch(true); + _integerCfg["w-fs-int32"] = cfg; + } +} + +void +SearchContextTest::initFloatConfig() +{ + { // CollectionType::SINGLE + Config cfg(BasicType::FLOAT, CollectionType::SINGLE); + _floatCfg["s-float"] = cfg; + } + { // CollectionType::SINGLE && fastSearch + Config cfg(BasicType::FLOAT, CollectionType::SINGLE); + cfg.setFastSearch(true); + _floatCfg["s-fs-float"] = cfg; + } + { // CollectionType::ARRAY + Config cfg(BasicType::FLOAT, CollectionType::ARRAY); + _floatCfg["a-float"] = cfg; + } + { // CollectionType::ARRAY && fastSearch + Config cfg(BasicType::FLOAT, CollectionType::ARRAY); + cfg.setFastSearch(true); + _floatCfg["a-fs-float"] = cfg; + } + { // CollectionType::WSET + Config cfg(BasicType::FLOAT, CollectionType::WSET); + _floatCfg["w-float"] = cfg; + } + { // CollectionType::WSET && fastSearch + Config cfg(BasicType::FLOAT, CollectionType::WSET); + cfg.setFastSearch(true); + _floatCfg["w-fs-float"] = cfg; + } +} + +void +SearchContextTest::initStringConfig() +{ + { // CollectionType::SINGLE + Config cfg(BasicType::STRING, CollectionType::SINGLE); + _stringCfg["s-str"] = cfg; + } + { // CollectionType::ARRAY + Config cfg(BasicType::STRING, CollectionType::ARRAY); + _stringCfg["a-str"] = cfg; + } + { // CollectionType::WSET + Config cfg(BasicType::STRING, CollectionType::WSET); + _stringCfg["w-str"] = cfg; + } + { // CollectionType::SINGLE && fastSearch + Config cfg(BasicType::STRING, CollectionType::SINGLE); + cfg.setFastSearch(true); + _stringCfg["s-fs-str"] = cfg; + } + { // CollectionType::ARRAY && fastSearch + Config cfg(BasicType::STRING, CollectionType::ARRAY); + cfg.setFastSearch(true); + _stringCfg["a-fs-str"] = cfg; + } + { // CollectionType::WSET && fastSearch + Config cfg(BasicType::STRING, CollectionType::WSET); + cfg.setFastSearch(true); + _stringCfg["w-fs-str"] = cfg; + } +} + +SearchContextTest::SearchContextTest() : + _integerCfg(), + _floatCfg(), + _stringCfg() +{ + initIntegerConfig(); + initFloatConfig(); + initStringConfig(); +} + +int +SearchContextTest::Main() +{ + TEST_INIT("searchcontext_test"); + EXPECT_TRUE(true); + + testSearch(); + testInitRange(); + testRangeSearch(); + testRangeSearchLimited(); + testCaseInsensitiveSearch(); + testRegexSearch(); + testPrefixSearch(); + testSearchIterator(); + testSearchIteratorUnpacking(); + TEST_DO(requireThatSearchIsWorkingAfterClearDoc()); + TEST_DO(requireThatSearchIsWorkingAfterLoadAndClearDoc()); + TEST_DO(requireThatSearchIsWorkingAfterUpdates()); + TEST_DO(requireThatFlagAttributeIsWorkingWhenNewDocsAreAdded()); + TEST_DO(requireThatInvalidSearchTermGivesZeroHits()); + TEST_DO(requireThatFlagAttributeHandlesTheByteRange()); + TEST_DO(requireThatOutOfBoundsSearchTermGivesZeroHits()); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(search::SearchContextTest); diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.sh b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.sh new file mode 100755 index 00000000000..3aae4bfe4d5 --- /dev/null +++ b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.sh @@ -0,0 +1,5 @@ +#!/bin/bash +$VALGRIND ./searchlib_searchcontext_test_app +rm -rf *.dat +rm -rf *.idx +rm -rf *.weight diff --git a/searchlib/src/tests/attribute/sourceselector/.gitignore b/searchlib/src/tests/attribute/sourceselector/.gitignore new file mode 100644 index 00000000000..265c856fd01 --- /dev/null +++ b/searchlib/src/tests/attribute/sourceselector/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +sourceselector_test +searchlib_sourceselector_test_app diff --git a/searchlib/src/tests/attribute/sourceselector/CMakeLists.txt b/searchlib/src/tests/attribute/sourceselector/CMakeLists.txt new file mode 100644 index 00000000000..24b7a75dd07 --- /dev/null +++ b/searchlib/src/tests/attribute/sourceselector/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_sourceselector_test_app + SOURCES + sourceselector_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_sourceselector_test_app COMMAND searchlib_sourceselector_test_app) diff --git a/searchlib/src/tests/attribute/sourceselector/DESC b/searchlib/src/tests/attribute/sourceselector/DESC new file mode 100644 index 00000000000..7568f5de080 --- /dev/null +++ b/searchlib/src/tests/attribute/sourceselector/DESC @@ -0,0 +1 @@ +This is a test of the sourceselector interface. diff --git a/searchlib/src/tests/attribute/sourceselector/FILES b/searchlib/src/tests/attribute/sourceselector/FILES new file mode 100644 index 00000000000..0d2803e762d --- /dev/null +++ b/searchlib/src/tests/attribute/sourceselector/FILES @@ -0,0 +1 @@ +sourceselector.cpp diff --git a/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp b/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp new file mode 100644 index 00000000000..a3595f8724d --- /dev/null +++ b/searchlib/src/tests/attribute/sourceselector/sourceselector_test.cpp @@ -0,0 +1,216 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for sourceselector. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("sourceselector_test"); + +#include <vespa/searchlib/attribute/fixedsourceselector.h> +#include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/vespalib/testkit/testapp.h> + +using std::unique_ptr; +using std::string; +using namespace search; +using namespace search::queryeval; +using search::common::FileHeaderContext; +using search::index::DummyFileHeaderContext; + +namespace { +template <typename T, size_t N> size_t arraysize(const T (&)[N]) { return N; } + +const uint32_t maxDocId = 4096; +struct DocSource { uint32_t docId; uint8_t source; }; +const DocSource docs[] = { {0,1}, {1, 0}, {2, 2}, {4, 3}, {8, 9}, {16, 178}, + {32, 1}, {64, 2}, {128, 3}, {256,4}, {512, 2}, + {1024, 1}, {2048,5}, {maxDocId,1} }; +const string index_dir = "test_data"; +const string base_file_name = "test_data/sourcelist"; +const string base_file_name2 = "test_data/sourcelist2"; +const uint32_t default_source = 7; +const uint32_t base_id = 42; + +class Test : public vespalib::TestApp +{ +public: + int Main(); +private: + void testSourceSelector(const DocSource *docSource, size_t sz, uint8_t defaultSource, ISourceSelector & selector); + void testFixed(const DocSource *docSource, size_t sz); + template <typename SelectorType> + void requireThatSelectorCanCloneAndSubtract(); + void requireThatSelectorCanCloneAndSubtract(); + template <typename SelectorType> + void requireThatSelectorCanSaveAndLoad(); + void requireThatSelectorCanSaveAndLoad(); + template <typename SelectorType> + void requireThatCompleteSourceRangeIsHandled(); + void requireThatCompleteSourceRangeIsHandled(); + template <typename SelectorType> + void requireThatSourcesAreCountedCorrectly(); + void requireThatSourcesAreCountedCorrectly(); +}; + +int +Test::Main() +{ + TEST_INIT("sourceselector_test"); + + if (_argc > 0) { + DummyFileHeaderContext::setCreator(_argv[0]); + } + testFixed(docs, arraysize(docs)); + TEST_DO(requireThatSelectorCanCloneAndSubtract()); + TEST_DO(requireThatSelectorCanSaveAndLoad()); + TEST_DO(requireThatCompleteSourceRangeIsHandled()); + TEST_DO(requireThatSourcesAreCountedCorrectly()); + + TEST_DONE(); +} + +void setSources(ISourceSelector &selector) { + for (size_t i = 0; i < arraysize(docs); ++i) { + selector.setSource(docs[i].docId, docs[i].source); + } +} + +void Test::testFixed(const DocSource *docSource, size_t sz) +{ + FixedSourceSelector selector(default_source, base_file_name, 10); + EXPECT_EQUAL(default_source, selector.getDefaultSource()); + EXPECT_EQUAL(10u, selector.getDocIdLimit()); +// EXPECT_EQUAL(default_source, selector.createIterator()->getSource(maxDocId + 1)); + setSources(selector); + testSourceSelector(docSource, sz, selector.getDefaultSource(), selector); + EXPECT_EQUAL(maxDocId+1, selector.getDocIdLimit()); +} + +void Test::testSourceSelector(const DocSource *docSource, size_t sz, + uint8_t defaultSource, ISourceSelector &selector) +{ + { + ISourceSelector::Iterator::UP it(selector.createIterator()); + for (size_t i = 0; i < sz; ++i) { + EXPECT_EQUAL(docSource[i].source, it->getSource(docSource[i].docId)); + } + } + { + ISourceSelector::Iterator::UP it(selector.createIterator()); + for (size_t i = 0, j = 0; i <= docSource[sz - 1].docId; ++i) { + if (i != docSource[j].docId) { + EXPECT_EQUAL(defaultSource, it->getSource(i)); + } else { + EXPECT_EQUAL(docSource[j].source, it->getSource(i)); + ++j; + } + } + } +} + +template <typename SelectorType> +void +Test::requireThatSelectorCanCloneAndSubtract() +{ + SelectorType selector(default_source, base_file_name); + setSources(selector); + selector.setBaseId(base_id); + + const uint32_t diff = 3; + typename SelectorType::UP + new_selector(selector.cloneAndSubtract(base_file_name2, diff)); + EXPECT_EQUAL(default_source - diff, new_selector->getDefaultSource()); + EXPECT_EQUAL(base_id + diff, new_selector->getBaseId()); + EXPECT_EQUAL(maxDocId+1, new_selector->getDocIdLimit()); + + ISourceSelector::Iterator::UP it(new_selector->createIterator()); + for(size_t i = 0; i < arraysize(docs); ++i) { + if (docs[i].source > diff) { + EXPECT_EQUAL(docs[i].source - diff, it->getSource(docs[i].docId)); + } else { + EXPECT_EQUAL(0, it->getSource(docs[i].docId)); + } + } +} + +void +Test::requireThatSelectorCanCloneAndSubtract() +{ + requireThatSelectorCanCloneAndSubtract<FixedSourceSelector>(); +} + +template <typename SelectorType> +void +Test::requireThatSelectorCanSaveAndLoad() +{ + SelectorType selector(default_source, base_file_name2); + setSources(selector); + selector.setBaseId(base_id); + selector.setSource(maxDocId + 1, default_source); + + FastOS_FileInterface::EmptyAndRemoveDirectory(index_dir.c_str()); + FastOS_FileInterface::MakeDirIfNotPresentOrExit(index_dir.c_str()); + + SourceSelector::SaveInfo::UP save_info = + selector.extractSaveInfo(base_file_name); + save_info->save(TuneFileAttributes(), DummyFileHeaderContext()); + typename SelectorType::UP + selector2(SelectorType::load(base_file_name)); + testSourceSelector(docs, arraysize(docs), default_source, *selector2); + EXPECT_EQUAL(base_id, selector2->getBaseId()); + EXPECT_EQUAL(maxDocId + 2, selector2->getDocIdLimit()); + + FastOS_FileInterface::EmptyAndRemoveDirectory(index_dir.c_str()); +} + +void +Test::requireThatSelectorCanSaveAndLoad() +{ + requireThatSelectorCanSaveAndLoad<FixedSourceSelector>(); +} + +template <typename SelectorType> +void +Test::requireThatCompleteSourceRangeIsHandled() +{ + SelectorType selector(default_source, base_file_name); + for (uint32_t i = 0; i < ISourceSelector::SOURCE_LIMIT; ++i) { + selector.setSource(i, i); + } + ISourceSelector::Iterator::UP itr = selector.createIterator(); + for (uint32_t i = 0; i < ISourceSelector::SOURCE_LIMIT; ++i) { + EXPECT_EQUAL((queryeval::Source)i, itr->getSource(i)); + } +} + +void +Test::requireThatCompleteSourceRangeIsHandled() +{ + requireThatCompleteSourceRangeIsHandled<FixedSourceSelector>(); +} + +template <typename SelectorType> +void +Test::requireThatSourcesAreCountedCorrectly() +{ + SelectorType selector(default_source, base_file_name); + for (uint32_t i = 0; i < 256; ++i) { + selector.setSource(i, i%16); + } + SourceSelector::Histogram hist = selector.getDistribution(); + for (uint32_t i = 0; i < 16; ++i) { + EXPECT_EQUAL(16u, hist[i]); + } + for (uint32_t i = 16; i < 256; ++i) { + EXPECT_EQUAL(0u, hist[i]); + } +} + +void +Test::requireThatSourcesAreCountedCorrectly() +{ + requireThatSourcesAreCountedCorrectly<FixedSourceSelector>(); +} + +} // namespace + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/attribute/stringattribute/.gitignore b/searchlib/src/tests/attribute/stringattribute/.gitignore new file mode 100644 index 00000000000..0e8a04bc19d --- /dev/null +++ b/searchlib/src/tests/attribute/stringattribute/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +stringattribute_test +searchlib_stringattribute_test_app diff --git a/searchlib/src/tests/attribute/stringattribute/CMakeLists.txt b/searchlib/src/tests/attribute/stringattribute/CMakeLists.txt new file mode 100644 index 00000000000..032ce9cac4e --- /dev/null +++ b/searchlib/src/tests/attribute/stringattribute/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_stringattribute_test_app + SOURCES + stringattribute_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_stringattribute_test_app COMMAND sh stringattribute_test.sh) diff --git a/searchlib/src/tests/attribute/stringattribute/DESC b/searchlib/src/tests/attribute/stringattribute/DESC new file mode 100644 index 00000000000..5d94ab94325 --- /dev/null +++ b/searchlib/src/tests/attribute/stringattribute/DESC @@ -0,0 +1 @@ +Unit tests for SingleValueStringAttribute and MultiValueStringAttribute. diff --git a/searchlib/src/tests/attribute/stringattribute/FILES b/searchlib/src/tests/attribute/stringattribute/FILES new file mode 100644 index 00000000000..e68ef57177d --- /dev/null +++ b/searchlib/src/tests/attribute/stringattribute/FILES @@ -0,0 +1 @@ +stringattribute.cpp diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp new file mode 100644 index 00000000000..154340ba408 --- /dev/null +++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp @@ -0,0 +1,453 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("stringattribute_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/attribute/enumstore.h> +#include <vespa/searchlib/attribute/singlestringattribute.h> +#include <vespa/searchlib/attribute/singlestringpostattribute.h> +#include <vespa/searchlib/attribute/multistringattribute.h> +#include <vespa/searchlib/attribute/multistringpostattribute.h> + +#include <vespa/searchlib/attribute/enumstore.hpp> +#include <vespa/searchlib/attribute/singlestringattribute.h> +#include <vespa/searchlib/attribute/singlestringpostattribute.hpp> +#include <vespa/searchlib/attribute/multistringattribute.h> +#include <vespa/searchlib/attribute/multistringpostattribute.hpp> + +namespace search { + +using attribute::CollectionType; +using attribute::IAttributeVector; + +class StringAttributeTest : public vespalib::TestApp +{ +private: + typedef ArrayStringAttribute ArrayStr; + typedef WeightedSetStringAttribute WeightedSetStr; + typedef ArrayStringPostingAttribute ArrayStrPosting; + typedef WeightedSetStringPostingAttribute WeightedSetStrPosting; + typedef attribute::Config Config; + typedef attribute::BasicType BasicType; + + template <typename Attribute> + void addDocs(Attribute & vec, uint32_t numDocs); + template <typename Attribute> + void checkCount(Attribute & vec, uint32_t doc, uint32_t valueCount, + uint32_t numValues, const vespalib::string & value); + void testMultiValue(); + template <typename Attribute> + void testMultiValue(Attribute & attr, uint32_t numDocs); + void testMultiValueMultipleClearDocBetweenCommit(); + void testMultiValueRemove(); + void testSingleValue(); + void testDefaultValueOnAddDoc(AttributeVector & v); + template <typename Attribute> + void testSingleValue(Attribute & svsa, Config &cfg); + +public: + int Main(); +}; + +template <typename Attribute> +void +StringAttributeTest::addDocs(Attribute & vec, uint32_t numDocs) +{ + for (uint32_t i = 0; i < numDocs; ++i) { + typename Attribute::DocId doc; + EXPECT_TRUE(vec.addDoc(doc)); + EXPECT_TRUE(doc == i); + EXPECT_TRUE(vec.getNumDocs() == i + 1); + EXPECT_TRUE(vec.getValueCount(doc) == 0); + } + EXPECT_TRUE(vec.getNumDocs() == numDocs); +} + +template <typename Attribute> +void +StringAttributeTest::checkCount(Attribute & vec, uint32_t doc, uint32_t valueCount, + uint32_t numValues, const vespalib::string & value) +{ + std::vector<vespalib::string> buffer(valueCount); + EXPECT_TRUE(static_cast<uint32_t>(vec.getValueCount(doc)) == valueCount); + EXPECT_TRUE(vec.get(doc, &buffer[0], buffer.size()) == valueCount); + EXPECT_TRUE(std::count(buffer.begin(), buffer.end(), value) == numValues); +} + + +void +StringAttributeTest::testMultiValue() +{ + uint32_t numDocs = ArrayStr::MultiValueMapping::maxValues() + 1; + + { // Array String Attribute + ASSERT_TRUE(ArrayStr::MultiValueMapping::maxValues() == numDocs - 1); + ArrayStr attr("a-string"); + testMultiValue(attr, numDocs); + } + { // Weighted Set String Attribute + ASSERT_TRUE(WeightedSetStr::MultiValueMapping::maxValues() == numDocs - 1); + WeightedSetStr attr("ws-string", + Config(BasicType::STRING, CollectionType::WSET)); + testMultiValue(attr, numDocs); + } + { // Array String Posting Attribute + ASSERT_TRUE(ArrayStrPosting::MultiValueMapping::maxValues() == numDocs - 1); + Config cfg(BasicType::STRING, CollectionType::ARRAY); + cfg.setFastSearch(true); + ArrayStrPosting attr("a-fs-string", cfg); + testMultiValue(attr, numDocs); + } + { // Weighted Set String Posting Attribute + ASSERT_TRUE(WeightedSetStrPosting::MultiValueMapping::maxValues() == numDocs - 1); + Config cfg(BasicType::STRING, CollectionType::WSET); + cfg.setFastSearch(true); + WeightedSetStrPosting attr("ws-fs-string", cfg); + testMultiValue(attr, numDocs); + } + +} + + +template <typename Attribute> +void +StringAttributeTest::testMultiValue(Attribute & attr, uint32_t numDocs) +{ + EXPECT_TRUE(attr.getNumDocs() == 0); + + // generate two sets of unique strings + std::vector<vespalib::string> uniqueStrings; + uniqueStrings.reserve(numDocs - 1); + for (uint32_t i = 0; i < numDocs - 1; ++i) { + char unique[16]; + sprintf(unique, i < 10 ? "enum0%u" : "enum%u", i); + uniqueStrings.push_back(vespalib::string(unique)); + } + std::vector<vespalib::string> newUniques; + newUniques.reserve(numDocs - 1); + for (uint32_t i = 0; i < numDocs - 1; ++i) { + char unique[16]; + sprintf(unique, i < 10 ? "unique0%u" : "unique%u", i); + newUniques.push_back(vespalib::string(unique)); + } + + // add docs + addDocs(attr, numDocs); + + // insert values + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t valueCount = doc; + for (uint32_t j = 0; j < valueCount; ++j) { + EXPECT_TRUE(attr.append(doc, uniqueStrings[j], 1)); + } + attr.commit(); + } + + //attr.getEnumStore().printCurrentContent(); + + // check values and enums + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t valueCount = attr.getValueCount(doc); + EXPECT_TRUE(valueCount == doc); + + // test get first + if (valueCount == 0) { + EXPECT_TRUE(attr.get(doc) == NULL); + EXPECT_TRUE(attr.getEnum(doc) == std::numeric_limits<uint32_t>::max()); + } else { + EXPECT_TRUE(strcmp(attr.get(doc), uniqueStrings[0].c_str()) == 0); + uint32_t e; + EXPECT_TRUE(attr.findEnum(uniqueStrings[0].c_str(), e)); + EXPECT_TRUE(attr.getEnum(doc) == e); + } + + // test get all + std::vector<vespalib::string> values(valueCount); + EXPECT_TRUE(attr.get(doc, &values[0], valueCount) == valueCount); + + std::vector<uint32_t> enums(valueCount); + EXPECT_TRUE((static_cast<search::attribute::IAttributeVector &>(attr)).get(doc, &enums[0], valueCount) == valueCount); + + for (uint32_t j = 0; j < valueCount; ++j) { + //LOG(info, "doc[%u][%u] = %s", doc, j, values[j].c_str()); + EXPECT_TRUE(values[j] == uniqueStrings[j]); + uint32_t e = 100; + EXPECT_TRUE(attr.findEnum(values[j].c_str(), e)); + EXPECT_TRUE(enums[j] == e); + } + } + + // check for correct refcounts + for (uint32_t i = 0; i < uniqueStrings.size(); ++i) { + typename Attribute::EnumStore::Index idx; + EXPECT_TRUE(attr.getEnumStore().findIndex(uniqueStrings[i].c_str(), idx)); + uint32_t expectedUsers = numDocs - 1 - i; + EXPECT_EQUAL(expectedUsers, attr.getEnumStore().getRefCount(idx)); + } + + typename Attribute::Histogram remaining = attr.getMultiValueMapping().getRemaining(); + for (typename Attribute::Histogram::const_iterator it(remaining.begin()), mt(remaining.end()); it != mt; ++it) { + EXPECT_TRUE(it->second == 0); + } + + // clear and insert new unique strings + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t oldValueCount = doc; + uint32_t valueCount = numDocs - 1 - doc; + //LOG(info, "clear and insert: doc = %u, valueCount = %u", doc, valueCount); + EXPECT_TRUE(attr.clearDoc(doc) == oldValueCount); + for (uint32_t j = 0; j < valueCount; ++j) { + EXPECT_TRUE(attr.append(doc, newUniques[j], 1)); + } + attr.commit(); + + //attr.getEnumStore().printCurrentContent(); + } + + // check values and enums + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t valueCount = attr.getValueCount(doc); + uint32_t expectedValueCount = numDocs - 1 - doc; + EXPECT_TRUE(valueCount == expectedValueCount); + + // test get all + std::vector<vespalib::string> values(valueCount); + EXPECT_TRUE(attr.get(doc, &values[0], valueCount) == valueCount); + + std::vector<uint32_t> enums(valueCount); + EXPECT_TRUE((static_cast<search::attribute::IAttributeVector &>(attr)).get(doc, &enums[0], valueCount) == valueCount); + + for (uint32_t j = 0; j < valueCount; ++j) { + //LOG(info, "doc[%u][%u] = %s", doc, j, values[j].c_str()); + EXPECT_TRUE(values[j] == newUniques[j]); + uint32_t e = 100; + EXPECT_TRUE(attr.findEnum(values[j].c_str(), e)); + EXPECT_TRUE(enums[j] == e); + } + } + + // check that enumXX strings are removed + for (uint32_t i = 0; i < uniqueStrings.size(); ++i) { + uint32_t e; + EXPECT_TRUE(!attr.findEnum(uniqueStrings[i].c_str(), e)); + } + + // check for correct refcounts + for (uint32_t i = 0; i < newUniques.size(); ++i) { + typename Attribute::EnumStore::Index idx; + EXPECT_TRUE(attr.getEnumStore().findIndex(newUniques[i].c_str(), idx)); + uint32_t expectedUsers = numDocs - 1 - i; + EXPECT_EQUAL(expectedUsers, attr.getEnumStore().getRefCount(idx)); + } +} + +void +StringAttributeTest::testMultiValueMultipleClearDocBetweenCommit() +{ + // This is also tested for all array attributes in attribute unit test + ArrayStr mvsa("a-string"); + uint32_t numDocs = 50; + addDocs(mvsa, numDocs); + std::vector<vespalib::string> buffer(numDocs); + + for (uint32_t doc = 0; doc < numDocs; ++doc) { + uint32_t valueCount = doc; + EXPECT_TRUE(mvsa.clearDoc(doc) == 0); + for (uint32_t j = 0; j < valueCount; ++j) { + EXPECT_TRUE(mvsa.append(doc, "first", 1)); + } + EXPECT_TRUE(mvsa.clearDoc(doc) == 0); + for (uint32_t j = 0; j < valueCount; ++j) { + EXPECT_TRUE(mvsa.append(doc, "second", 1)); + } + mvsa.commit(); + + // check for correct values + checkCount(mvsa, doc, valueCount, valueCount, "second"); + } +} + + +void +StringAttributeTest::testMultiValueRemove() +{ + // This is also tested for all array attributes in attribute unit test + ArrayStr mvsa("a-string"); + uint32_t numDocs = 50; + addDocs(mvsa, numDocs); + std::vector<vespalib::string> buffer(9); + + for (uint32_t doc = 0; doc < numDocs; ++doc) { + EXPECT_TRUE(mvsa.append(doc, "one", 1)); + for (uint32_t i = 0; i < 3; ++i) { + EXPECT_TRUE(mvsa.append(doc, "three", 1)); + } + for (uint32_t i = 0; i < 5; ++i) { + EXPECT_TRUE(mvsa.append(doc, "five", 1)); + } + + mvsa.commit(); + checkCount(mvsa, doc, 9, 1, "one"); + checkCount(mvsa, doc, 9, 3, "three"); + checkCount(mvsa, doc, 9, 5, "five"); + + EXPECT_TRUE(mvsa.remove(doc, "zero", 1)); + mvsa.commit(); + checkCount(mvsa, doc, 9, 1, "one"); + checkCount(mvsa, doc, 9, 3, "three"); + checkCount(mvsa, doc, 9, 5, "five"); + + EXPECT_TRUE(mvsa.remove(doc, "one", 1)); + mvsa.commit(); + checkCount(mvsa, doc, 8, 0, "one"); + checkCount(mvsa, doc, 8, 3, "three"); + checkCount(mvsa, doc, 8, 5, "five"); + + EXPECT_TRUE(mvsa.remove(doc, "five", 1)); + mvsa.commit(); + checkCount(mvsa, doc, 3, 0, "one"); + checkCount(mvsa, doc, 3, 3, "three"); + checkCount(mvsa, doc, 3, 0, "five"); + } +} + +void +StringAttributeTest::testSingleValue() +{ + { + Config cfg(BasicType::STRING, CollectionType::SINGLE); + SingleValueStringAttribute svsa("svsa", cfg); + const IAttributeVector * ia = &svsa; + EXPECT_TRUE(dynamic_cast<const SingleValueEnumAttributeBase *>(ia) != nullptr); + testSingleValue(svsa, cfg); + + SingleValueStringAttribute svsb("svsa", cfg); + testDefaultValueOnAddDoc(svsb); + } + { + Config cfg(BasicType::STRING, CollectionType::SINGLE); + cfg.setFastSearch(true); + SingleValueStringPostingAttribute svsa("svspb", cfg); + testSingleValue(svsa, cfg); + + SingleValueStringPostingAttribute svsb("svspb", cfg); + testDefaultValueOnAddDoc(svsb); + } +} + +void StringAttributeTest::testDefaultValueOnAddDoc(AttributeVector & v) +{ + EXPECT_EQUAL(0u, v.getNumDocs()); + v.addReservedDoc(); + EXPECT_EQUAL(1u, v.getNumDocs()); + EXPECT_TRUE( EnumStoreBase::Index(v.getEnum(0)).valid() ); + uint32_t doc(7); + EXPECT_TRUE( v.addDoc(doc) ); + EXPECT_EQUAL(1u, doc); + EXPECT_EQUAL(2u, v.getNumDocs()); + EXPECT_TRUE( EnumStoreBase::Index(v.getEnum(doc)).valid() ); + EXPECT_EQUAL(0u, strlen(v.getString(doc, NULL, 0))); +} + +template <typename Attribute> +void +StringAttributeTest::testSingleValue(Attribute & svsa, Config &cfg) +{ + StringAttribute & v = svsa; + const char * t = "not defined"; + uint32_t doc = 2000; + uint32_t e1 = 2000; + uint32_t e2 = 2000; + uint32_t numDocs = 1000; + char tmp[32]; + + // add docs + for (uint32_t i = 0; i < numDocs; ++i) { + EXPECT_TRUE( v.addDoc(doc) ); + EXPECT_TRUE( doc == i ); + EXPECT_TRUE( v.getNumDocs() == i + 1 ); + EXPECT_TRUE( v.getValueCount(doc) == 1 ); + EXPECT_TRUE( ! EnumStoreBase::Index(v.getEnum(doc)).valid() ); + } + + std::map<vespalib::string, uint32_t> enums; + // 10 unique strings + for (uint32_t i = 0; i < numDocs; ++i) { + sprintf(tmp, "enum%u", i % 10); + EXPECT_TRUE( v.update(i, tmp) ); + EXPECT_TRUE( v.getValueCount(i) == 1 ); + EXPECT_TRUE( ! EnumStoreBase::Index(v.getEnum(i)).valid() ); + if ((i % 10) == 9) { + v.commit(); + for (uint32_t j = i - 9; j <= i; ++j) { + sprintf(tmp, "enum%u", j % 10); + EXPECT_TRUE( strcmp(t = v.get(j), tmp) == 0 ); + e1 = v.getEnum(j); + EXPECT_TRUE( v.findEnum(t, e2) ); + EXPECT_TRUE( e1 == e2 ); + if (enums.count(vespalib::string(t)) == 0) { + enums[vespalib::string(t)] = e1; + } else { + EXPECT_TRUE( e1 == enums[vespalib::string(t)]); + EXPECT_TRUE( e2 == enums[vespalib::string(t)]); + } + } + } + } + + //svsa.printBuffers(); + + // 1000 unique strings + for (uint32_t i = 0; i < numDocs; ++i) { + sprintf(tmp, "unique%u", i); + EXPECT_TRUE( v.update(i, tmp) ); + sprintf(tmp, "enum%u", i % 10); + EXPECT_TRUE( strcmp(v.get(i), tmp) == 0 ); + if ((i % 10) == 9) { + //LOG(info, "commit: i = %u", i); + v.commit(); + for (uint32_t j = i - 9; j <= i; ++j) { + sprintf(tmp, "unique%u", j); + EXPECT_TRUE( strcmp(t = v.get(j), tmp) == 0 ); + e1 = v.getEnum(j); + EXPECT_TRUE( v.findEnum(t, e2) ); + EXPECT_TRUE( e1 == e2 ); + } + //svsa.printBuffers(); + } + } + //svsa.printBuffers(); + + // check that enumX strings are removed ( + for (uint32_t i = 0; i < 10; ++i) { + sprintf(tmp, "enum%u", i); + EXPECT_TRUE( !v.findEnum(tmp, e1) ); + } + + + Attribute load("load", cfg); + svsa.saveAs(load.getBaseFileName()); + load.load(); +} + + + +int +StringAttributeTest::Main() +{ + TEST_INIT("stringattribute_test"); + + testMultiValue(); + + testMultiValueMultipleClearDocBetweenCommit(); + + testMultiValueRemove(); + + testSingleValue(); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(search::StringAttributeTest); diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.sh b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.sh new file mode 100755 index 00000000000..d7ac263c1c9 --- /dev/null +++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.sh @@ -0,0 +1,3 @@ +#!/bin/bash +$VALGRIND ./searchlib_stringattribute_test_app +rm -rf *.dat diff --git a/searchlib/src/tests/attribute/tensorattribute/.gitignore b/searchlib/src/tests/attribute/tensorattribute/.gitignore new file mode 100644 index 00000000000..08519fe7ae8 --- /dev/null +++ b/searchlib/src/tests/attribute/tensorattribute/.gitignore @@ -0,0 +1 @@ +searchlib_tensorattribute_test_app diff --git a/searchlib/src/tests/attribute/tensorattribute/CMakeLists.txt b/searchlib/src/tests/attribute/tensorattribute/CMakeLists.txt new file mode 100644 index 00000000000..ec16b4363eb --- /dev/null +++ b/searchlib/src/tests/attribute/tensorattribute/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_tensorattribute_test_app + SOURCES + tensorattribute_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_tensorattribute_test_app COMMAND sh tensorattribute_test.sh) diff --git a/searchlib/src/tests/attribute/tensorattribute/DESC b/searchlib/src/tests/attribute/tensorattribute/DESC new file mode 100644 index 00000000000..1cd9aa7cf14 --- /dev/null +++ b/searchlib/src/tests/attribute/tensorattribute/DESC @@ -0,0 +1 @@ +Unit tests for TensorAttribute. diff --git a/searchlib/src/tests/attribute/tensorattribute/FILES b/searchlib/src/tests/attribute/tensorattribute/FILES new file mode 100644 index 00000000000..1c8480ffde7 --- /dev/null +++ b/searchlib/src/tests/attribute/tensorattribute/FILES @@ -0,0 +1 @@ +tensorattribute.cpp diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp new file mode 100644 index 00000000000..137f93bcffe --- /dev/null +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.cpp @@ -0,0 +1,217 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("tensorattribute_test"); +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchlib/attribute/tensorattribute.h> +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/vespalib/tensor/tensor_factory.h> +#include <vespa/vespalib/tensor/default_tensor.h> +#include <vespa/vespalib/tensor/simple/simple_tensor_builder.h> + +using search::attribute::TensorAttribute; +using search::AttributeGuard; +using search::AttributeVector; +using vespalib::tensor::Tensor; +using vespalib::tensor::TensorCells; +using vespalib::tensor::TensorDimensions; +using vespalib::tensor::TensorFactory; +using vespalib::tensor::TensorType; +using vespalib::tensor::SimpleTensorBuilder; + +namespace vespalib { +namespace tensor { + +static bool operator==(const Tensor &lhs, const Tensor &rhs) +{ + return lhs.equals(rhs); +} + +} +} + + +struct Fixture +{ + using BasicType = search::attribute::BasicType; + using CollectionType = search::attribute::CollectionType; + using Config = search::attribute::Config; + + Config _cfg; + vespalib::string _name; + std::shared_ptr<TensorAttribute> _tensorAttr; + std::shared_ptr<AttributeVector> _attr; + vespalib::tensor::DefaultTensor::builder _builder; + + Fixture(const vespalib::string &typeSpec) + : _cfg(BasicType::TENSOR, CollectionType::SINGLE), + _name("test"), + _tensorAttr(), + _attr() + { + _cfg.setTensorType(TensorType::fromSpec(typeSpec)); + _tensorAttr = std::make_shared<TensorAttribute>(_name, _cfg); + _attr = _tensorAttr; + _attr->addReservedDoc(); + } + + Tensor::UP createTensor(const TensorCells &cells) { + return TensorFactory::create(cells, _builder); + } + Tensor::UP createTensor(const TensorCells &cells, + const TensorDimensions &dimensions) { + return TensorFactory::create(cells, dimensions, _builder); + } + + void ensureSpace(uint32_t docId) { + while (_attr->getNumDocs() <= docId) { + uint32_t newDocId = 0u; + _attr->addDoc(newDocId); + _attr->commit(); + } + } + + void clearTensor(uint32_t docId) { + ensureSpace(docId); + _tensorAttr->clearDoc(docId); + _attr->commit(); + } + + void setTensor(uint32_t docId, const Tensor &tensor) { + ensureSpace(docId); + _tensorAttr->setTensor(docId, tensor); + _attr->commit(); + } + + search::attribute::Status getStatus() { + _attr->commit(true); + return _attr->getStatus(); + } + + void + assertGetNoTensor(uint32_t docId) { + AttributeGuard guard(_attr); + Tensor::UP actTensor = _tensorAttr->getTensor(docId); + EXPECT_FALSE(actTensor); + } + + void + assertGetTensor(const Tensor &expTensor, uint32_t docId) + { + AttributeGuard guard(_attr); + Tensor::UP actTensor = _tensorAttr->getTensor(docId); + EXPECT_TRUE(static_cast<bool>(actTensor)); + EXPECT_EQUAL(expTensor, *actTensor); + } + + void + assertGetTensor(const TensorCells &expCells, + const TensorDimensions &expDimensions, + uint32_t docId) + { + Tensor::UP expTensor = createTensor(expCells, expDimensions); + assertGetTensor(*expTensor, docId); + } + + void save() { + bool saveok = _attr->save(); + EXPECT_TRUE(saveok); + } + + void load() { + _tensorAttr = std::make_shared<TensorAttribute>(_name, _cfg); + _attr = _tensorAttr; + bool loadok = _attr->load(); + EXPECT_TRUE(loadok); + } +}; + + +TEST_F("Test empty tensor attribute", Fixture("tensor()")) +{ + EXPECT_EQUAL(1u, f._attr->getNumDocs()); + EXPECT_EQUAL(1u, f._attr->getCommittedDocIdLimit()); +} + + +TEST_F("Test setting tensor value", Fixture("tensor(x{}, y{})")) +{ + f.ensureSpace(4); + EXPECT_EQUAL(5u, f._attr->getNumDocs()); + EXPECT_EQUAL(5u, f._attr->getCommittedDocIdLimit()); + TEST_DO(f.assertGetNoTensor(4)); + f.setTensor(4, *f.createTensor({}, {})); + TEST_DO(f.assertGetTensor({}, {"x", "y"}, 4)); + f.setTensor(3, *f.createTensor({ {{}, 3} }, { "x", "y"})); + TEST_DO(f.assertGetTensor({ {{}, 3} }, { "x", "y"}, 3)); + TEST_DO(f.assertGetNoTensor(2)); + TEST_DO(f.clearTensor(3)); + TEST_DO(f.assertGetNoTensor(3)); +} + + +TEST_F("Test saving / loading tensor attribute", Fixture("tensor(x{}, y{})")) +{ + f.ensureSpace(4); + f.setTensor(4, *f.createTensor({}, {})); + f.setTensor(3, *f.createTensor({ {{}, 3} }, { "x", "y"})); + TEST_DO(f.save()); + TEST_DO(f.load()); + EXPECT_EQUAL(5u, f._attr->getNumDocs()); + EXPECT_EQUAL(5u, f._attr->getCommittedDocIdLimit()); + TEST_DO(f.assertGetTensor({ {{}, 3} }, { "x", "y"}, 3)); + TEST_DO(f.assertGetTensor({}, {"x", "y"}, 4)); + TEST_DO(f.assertGetNoTensor(2)); +} + + +TEST_F("Test compaction of tensor attribute", Fixture("tensor(x{}, y{})")) +{ + f.ensureSpace(4); + Tensor::UP emptytensor = f.createTensor({}, {}); + Tensor::UP emptyxytensor = f.createTensor({}, {"x", "y"}); + Tensor::UP simpletensor = f.createTensor({ {{}, 3} }, { "x", "y"}); + Tensor::UP filltensor = f.createTensor({ {{}, 5} }, { "x", "y"}); + f.setTensor(4, *emptytensor); + f.setTensor(3, *simpletensor); + f.setTensor(2, *filltensor); + f.clearTensor(2); + f.setTensor(2, *filltensor); + search::attribute::Status oldStatus = f.getStatus(); + search::attribute::Status newStatus = oldStatus; + uint64_t iter = 0; + uint64_t iterLimit = 100000; + for (; iter < iterLimit; ++iter) { + f.clearTensor(2); + f.setTensor(2, *filltensor); + newStatus = f.getStatus(); + if (newStatus.getUsed() < oldStatus.getUsed()) { + break; + } + oldStatus = newStatus; + } + EXPECT_GREATER(iterLimit, iter); + LOG(info, + "iter = %" PRIu64 ", memory usage %" PRIu64 ", -> %" PRIu64, + iter, oldStatus.getUsed(), newStatus.getUsed()); + TEST_DO(f.assertGetNoTensor(1)); + TEST_DO(f.assertGetTensor(*filltensor, 2)); + TEST_DO(f.assertGetTensor(*simpletensor, 3)); + TEST_DO(f.assertGetTensor(*emptyxytensor, 4)); +} + +TEST_F("Test tensortype file header tag", Fixture("tensor(x[10])")) +{ + f.ensureSpace(4); + TEST_DO(f.save()); + + vespalib::FileHeader header; + FastOS_File file; + EXPECT_TRUE(file.OpenReadOnly("test.dat")); + (void) header.readFile(file); + file.Close(); + EXPECT_TRUE(header.hasTag("tensortype")); + EXPECT_EQUAL("tensor(x[10])", header.getTag("tensortype").asString()); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.sh b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.sh new file mode 100644 index 00000000000..2e940d5d99a --- /dev/null +++ b/searchlib/src/tests/attribute/tensorattribute/tensorattribute_test.sh @@ -0,0 +1,3 @@ +#!/bin/bash +$VALGRIND ./searchlib_tensorattribute_test_app +rm -rf *.dat diff --git a/searchlib/src/tests/bitcompression/expgolomb/.gitignore b/searchlib/src/tests/bitcompression/expgolomb/.gitignore new file mode 100644 index 00000000000..5ba0f36a2f0 --- /dev/null +++ b/searchlib/src/tests/bitcompression/expgolomb/.gitignore @@ -0,0 +1 @@ +searchlib_expgolomb_test_app diff --git a/searchlib/src/tests/bitcompression/expgolomb/CMakeLists.txt b/searchlib/src/tests/bitcompression/expgolomb/CMakeLists.txt new file mode 100644 index 00000000000..f724773dfd6 --- /dev/null +++ b/searchlib/src/tests/bitcompression/expgolomb/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_expgolomb_test_app + SOURCES + expgolomb_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_expgolomb_test_app NO_VALGRIND COMMAND searchlib_expgolomb_test_app) diff --git a/searchlib/src/tests/bitcompression/expgolomb/DESC b/searchlib/src/tests/bitcompression/expgolomb/DESC new file mode 100644 index 00000000000..4abef0ecf24 --- /dev/null +++ b/searchlib/src/tests/bitcompression/expgolomb/DESC @@ -0,0 +1 @@ +Exp golomb encoding / decoding test. Take a look at expgolomb_test.cpp for details. diff --git a/searchlib/src/tests/bitcompression/expgolomb/FILES b/searchlib/src/tests/bitcompression/expgolomb/FILES new file mode 100644 index 00000000000..dbc3fa5e527 --- /dev/null +++ b/searchlib/src/tests/bitcompression/expgolomb/FILES @@ -0,0 +1 @@ +expgolomb_test.cpp diff --git a/searchlib/src/tests/bitcompression/expgolomb/expgolomb_test.cpp b/searchlib/src/tests/bitcompression/expgolomb/expgolomb_test.cpp new file mode 100644 index 00000000000..dcf0f69ee55 --- /dev/null +++ b/searchlib/src/tests/bitcompression/expgolomb/expgolomb_test.cpp @@ -0,0 +1,621 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("expglomb_test"); +#include <vespa/searchlib/bitcompression/compression.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vector> + +using search::bitcompression::DecodeContext64; +using search::bitcompression::DecodeContext64Base; +using search::bitcompression::EncodeContext64; +using search::bitcompression::EncodeContext64Base; + +template <bool bigEndian> +class DecodeContext : public DecodeContext64<bigEndian> +{ +public: + using Parent = DecodeContext64<bigEndian>; + using Parent::defineReadOffset; + using EC = EncodeContext64<bigEndian>; + + DecodeContext(const uint64_t *compr, int bitOffset) + : DecodeContext64<bigEndian>(compr, bitOffset) + { + this->defineReadOffset(0); + } +}; + + +class IDecodeFunc +{ +public: + virtual uint64_t decode() = 0; + virtual void skip() = 0; + virtual uint64_t decodeSmall() = 0; + virtual uint64_t decodeSmallApply() = 0; + virtual void skipSmall() = 0; + + virtual ~IDecodeFunc() { } + +}; + + +/* + * Exp golomb decode functions getting kValue from a variable, i.e. + * compiler is not allowed to generate shift instructions with immediate values. + * Expressions involving kValue are not constant and can thus not be + * folded to constant values. + */ +template <bool bigEndian> +class DecodeExpGolombVarK : public IDecodeFunc +{ +public: + using DCB = DecodeContext64Base; + using DC = DecodeContext<bigEndian>; + using EC = typename DC::EC; + + DCB &_dc; + int _kValue; + + DecodeExpGolombVarK(DCB &dc, int kValue) + : _dc(dc), + _kValue(kValue) + { + } + + virtual uint64_t decode() + { + unsigned int length; + uint64_t val64; + UC64_DECODEEXPGOLOMB(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, _kValue, EC); + return val64; + } + + virtual void skip() + { + unsigned int length; + UC64_SKIPEXPGOLOMB(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, _kValue, EC); + } + + virtual uint64_t decodeSmall() + { + unsigned int length; + uint64_t val64; + UC64_DECODEEXPGOLOMB_SMALL(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, _kValue, EC); + return val64; + } + + virtual uint64_t decodeSmallApply() + { + unsigned int length; + uint64_t val64; + UC64_DECODEEXPGOLOMB_SMALL_APPLY(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, _kValue, EC, val64 =); + return val64; + } + + virtual void skipSmall() + { + unsigned int length; + UC64_SKIPEXPGOLOMB_SMALL(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, _kValue, EC); + } + + static std::unique_ptr<IDecodeFunc> + make(DCB &dc, int kValue) + { + return std::unique_ptr<IDecodeFunc> + (new DecodeExpGolombVarK<bigEndian>(dc, kValue)); + } +}; + + +/* + * Exp golomb decode functions getting kValue from a template argument + * i.e. compiler is allowed to generate shift instructions with + * immediate values and fold constant expressions involving kValue. + */ +template <bool bigEndian, int kValue> +class DecodeExpGolombConstK : public IDecodeFunc +{ +public: + using DCB = DecodeContext64Base; + using DC = DecodeContext<bigEndian>; + using EC = typename DC::EC; + + DCB &_dc; + + DecodeExpGolombConstK(DCB &dc) + : _dc(dc) + { + } + + virtual uint64_t decode() + { + unsigned int length; + uint64_t val64; + UC64_DECODEEXPGOLOMB(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, kValue, EC); + return val64; + } + + virtual void skip() + { + unsigned int length; + UC64_SKIPEXPGOLOMB(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, kValue, EC); + } + + virtual uint64_t decodeSmall() + { + unsigned int length; + uint64_t val64; + UC64_DECODEEXPGOLOMB_SMALL(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, kValue, EC); + return val64; + } + + virtual uint64_t decodeSmallApply() + { + unsigned int length; + uint64_t val64; + UC64_DECODEEXPGOLOMB_SMALL_APPLY(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, kValue, EC, val64 =); + return val64; + } + + virtual void skipSmall() + { + unsigned int length; + UC64_SKIPEXPGOLOMB_SMALL(_dc._val, _dc._valI, _dc._preRead, + _dc._cacheInt, kValue, EC); + } + + static std::unique_ptr<IDecodeFunc> + make(DCB &dc, int) + { + return std::unique_ptr<IDecodeFunc> + (new DecodeExpGolombConstK<bigEndian, kValue>(dc)); + } +}; + + +using IDecodeFuncFactory = + std::unique_ptr<IDecodeFunc> (*)(DecodeContext64Base &dc, int kValue); + + +template <bool bigEndian> +class DecodeFuncFactories +{ +public: + using IDF = IDecodeFuncFactory; + std::vector<IDF> _constK; + IDF _varK; + +public: + DecodeFuncFactories(); + + void + addConstKFactory(int kValue, IDecodeFuncFactory factory) + { + assert(static_cast<unsigned int>(kValue) == _constK.size()); + _constK.push_back(factory); + } + + IDecodeFuncFactory + getConstKFactory(int kValue) const + { + assert(kValue >= 0 && + static_cast<unsigned int>(kValue) < _constK.size()); + return _constK[kValue]; + } + + IDecodeFuncFactory + getVarKFactory() const + { + return _varK; + } +}; + + +template <bool bigEndian> +struct RegisterFactoryPtr; + + +template <bool bigEndian> +using RegisterFactory = void (*)(DecodeFuncFactories<bigEndian> &factories, + RegisterFactoryPtr<bigEndian> &ptr); + + +template <bool bigEndian> +struct RegisterFactoryPtr +{ + RegisterFactory<bigEndian> _ptr; + + RegisterFactoryPtr(RegisterFactory<bigEndian> ptr) + : _ptr(ptr) + { + } +}; + + +template <bool bigEndian, int kValue> +class RegisterFactories +{ +public: + static void registerFactory(DecodeFuncFactories<bigEndian> &factories, + RegisterFactoryPtr<bigEndian> &ptr) + { + factories.addConstKFactory(kValue, + &DecodeExpGolombConstK<bigEndian, kValue>:: + make); + ptr._ptr = &RegisterFactories<bigEndian, kValue+1>::registerFactory; + } +}; + + +template <bool bigEndian> +class RegisterFactories<bigEndian, 64> +{ +public: + static void registerFactory(DecodeFuncFactories<bigEndian> &factories, + RegisterFactoryPtr<bigEndian> &ptr) + { + (void) factories; + ptr._ptr = nullptr; + } +}; + + +template <bool bigEndian> +DecodeFuncFactories<bigEndian>::DecodeFuncFactories() + : _constK(), + _varK(&DecodeExpGolombVarK<bigEndian>::make) +{ + RegisterFactoryPtr<bigEndian> f( + &RegisterFactories<bigEndian, 0>::registerFactory); + while (f._ptr) { + (*f._ptr)(*this, f); + } +} + + +class TestFixtureBase +{ +public: + std::vector<uint64_t> _randNums; + using EC = EncodeContext64Base; + + void fillRandNums(); + + void + calcBoundaries(int kValue, bool small, std::vector<uint64_t> &v); + + void + testBoundaries(int kValue, bool small, + std::vector<uint64_t> &v, + DecodeContext64Base &dc, + DecodeContext64Base &dcSkip, + DecodeContext64Base &dcApply, + IDecodeFunc &df, + IDecodeFunc &dfSkip, + IDecodeFunc &dfApply); + + void + testRandNums(DecodeContext64Base &dc, + DecodeContext64Base &dcSkip, + IDecodeFunc &df, + IDecodeFunc &dfSkip); +}; + + +void +TestFixtureBase::fillRandNums() +{ + for (int i = 0; i < 10000; ++i) { + uint64_t rval = rand(); + rval <<= 30; + rval |= rand(); + _randNums.push_back(rval); + } + for (int i = 0; i < 10000; ++i) { + uint64_t rval = rand(); + rval <<= 30; + rval |= rand(); + uint32_t bits = (rand() & 63); + rval &= ((UINT64_C(1) << bits) - 1); + _randNums.push_back(rval); + } +} + + +namespace +{ + +/* + * Add values around a calculated boundary, to catch off by one errors. + */ +void +addBoundary(uint64_t boundary, uint64_t maxVal, std::vector<uint64_t> &v) +{ + uint64_t low = boundary > 2u ? boundary - 2 : 0; + uint64_t high = maxVal - 2u < boundary ? maxVal : boundary + 2; + assert(low <= high); + LOG(info, "low=0x%lx, high=0x%lx", low, high); + uint64_t i = low; + for (;;) { + v.push_back(i); + if (i == high) + break; + ++i; + } +} + +} + +void +TestFixtureBase::calcBoundaries(int kValue, bool small, + std::vector<uint64_t> &v) +{ + const char *smallStr = small ? "small" : "not small"; + v.push_back(0); + uint64_t maxVal = EC::maxExpGolombVal(kValue); // encode method limit + if (small) { + maxVal = EC::maxExpGolombVal(kValue, 64); + } + LOG(debug, "kValue=%u, %s, maxVal is 0x%lx", kValue, smallStr, maxVal); + for (int bits = kValue + 1; + bits + kValue <= 128 && (bits <= 64 || !small); + ++bits) { + uint64_t boundary = EC::maxExpGolombVal(kValue, bits); + if (bits + kValue == 128) { + LOG(debug, + "boundary for kValue=%d, %s, bits=%d: 0x%lx", + kValue, smallStr, bits, boundary); + } + addBoundary(boundary, maxVal, v); + } + std::sort(v.begin(), v.end()); + auto ve = std::unique(v.begin(), v.end()); + uint32_t oldSize = v.size(); + v.resize(ve - v.begin()); + uint32_t newSize = v.size(); + LOG(debug, + "kValues=%u, %s, boundaries %u -> %u, maxVal=0x%lx, highest=0x%lx", + kValue, smallStr, oldSize, newSize, maxVal, v.back()); +} + + +void +TestFixtureBase::testBoundaries(int kValue, bool small, + std::vector<uint64_t> &v, + DecodeContext64Base &dc, + DecodeContext64Base &dcSkip, + DecodeContext64Base &dcApply, + IDecodeFunc &df, + IDecodeFunc &dfSkip, + IDecodeFunc &dfApply) +{ + uint32_t bits = 0; + uint64_t maxSame = 0; + + for (auto num : v) { + uint64_t prevPos = dc.getReadOffset(); + uint64_t val64 = small ? df.decodeSmall() : df.decode(); + EXPECT_EQUAL(num, val64); + uint64_t currPos = dc.getReadOffset(); + if (small) { + dfSkip.skipSmall(); + } else { + dfSkip.skip(); + } + EXPECT_EQUAL(currPos, dcSkip.getReadOffset()); + if (small) { + uint64_t sval64 = dfApply.decodeSmallApply(); + EXPECT_EQUAL(num, sval64); + EXPECT_EQUAL(currPos, dcApply.getReadOffset()); + } + if (num == 0) { + bits = currPos - prevPos; + maxSame = EC::maxExpGolombVal(kValue, bits); + } else { + assert(bits <= currPos - prevPos); + if (bits < currPos - prevPos) { + ASSERT_EQUAL(bits + 2, currPos - prevPos); + bits += 2; + ASSERT_EQUAL(maxSame + 1, num); + maxSame = EC::maxExpGolombVal(kValue, bits); + } + } + } +} + + +void +TestFixtureBase::testRandNums(DecodeContext64Base &dc, + DecodeContext64Base &dcSkip, + IDecodeFunc &df, + IDecodeFunc &dfSkip) +{ + for (auto num : _randNums) { + uint64_t val64 = df.decode(); + EXPECT_EQUAL(num, val64); + uint64_t currPos = dc.getReadOffset(); + dfSkip.skip(); + EXPECT_EQUAL(currPos, dcSkip.getReadOffset()); + } +} + + + +template <bool bigEndian> +class TestFixture : public TestFixtureBase +{ +public: + DecodeFuncFactories<bigEndian> _factories; + using DC = DecodeContext<bigEndian>; + using EC = typename DC::EC; + using Parent = TestFixtureBase; + using Parent::testBoundaries; + using Parent::testRandNums; + + TestFixture() + : TestFixtureBase(), + _factories() + { + fillRandNums(); + } + + void + testBoundaries(int kValue, bool small, + std::vector<uint64_t> &v, + IDecodeFuncFactory f, + search::ComprFileWriteContext &wc); + void + testBoundaries(int kValue, bool small, std::vector<uint64_t> &v); + + void + testBoundaries(); + + void + testRandNums(int kValue, + IDecodeFuncFactory f, + search::ComprFileWriteContext &wc); + + void + testRandNums(int kValue); + + void + testRandNums(); +}; + + +template <bool bigEndian> +void +TestFixture<bigEndian>::testBoundaries(int kValue, bool small, + std::vector<uint64_t> &v, + IDecodeFuncFactory f, + search::ComprFileWriteContext &wc) +{ + DC dc(static_cast<const uint64_t *>(wc._comprBuf), 0); + DC dcSkip(static_cast<const uint64_t *>(wc._comprBuf), 0); + DC dcApply(static_cast<const uint64_t *>(wc._comprBuf), 0); + std::unique_ptr<IDecodeFunc> df((*f)(dc, kValue)); + std::unique_ptr<IDecodeFunc> dfSkip((*f)(dcSkip, kValue)); + std::unique_ptr<IDecodeFunc> dfApply((*f)(dcApply, kValue)); + testBoundaries(kValue, small, v, dc, dcSkip, dcApply, + *df, *dfSkip, *dfApply); +} + + +template <bool bigEndian> +void +TestFixture<bigEndian>::testBoundaries(int kValue, bool small, + std::vector<uint64_t> &v) +{ + EC e; + search::ComprFileWriteContext wc(e); + wc.allocComprBuf(32768, 32768); + e.setupWrite(wc); + for (auto num : v) { + e.encodeExpGolomb(num, kValue); + if (e._valI >= e._valE) + wc.writeComprBuffer(false); + } + e.flush(); + + IDecodeFuncFactory f = _factories.getConstKFactory(kValue); + testBoundaries(kValue, small, v, f, wc); + f = _factories.getVarKFactory(); + testBoundaries(kValue, small, v, f, wc); +} + + +template <bool bigEndian> +void +TestFixture<bigEndian>::testBoundaries() +{ + for (int kValue = 0; kValue < 64; ++kValue) { + std::vector<uint64_t> v; + calcBoundaries(kValue, false, v); + testBoundaries(kValue, false, v); + /* + * Note: We don't support kValue being 63 for when decoding + * "small" numbers (limited to 64 bits in encoded form) since + * performance penalty is not worth the extra flexibility. + */ + if (kValue < 63) { + v.clear(); + calcBoundaries(kValue, true, v); + testBoundaries(kValue, true, v); + } + } +} + + +template <bool bigEndian> +void +TestFixture<bigEndian>::testRandNums(int kValue, + IDecodeFuncFactory f, + search::ComprFileWriteContext &wc) +{ + DC dc(static_cast<const uint64_t *>(wc._comprBuf), 0); + DC dcSkip(static_cast<const uint64_t *>(wc._comprBuf), 0); + std::unique_ptr<IDecodeFunc> df((*f)(dc, kValue)); + std::unique_ptr<IDecodeFunc> dfSkip((*f)(dcSkip, kValue)); + testRandNums(dc, dcSkip, *df, *dfSkip); +} + + +template <bool bigEndian> +void +TestFixture<bigEndian>::testRandNums(int kValue) +{ + EC e; + search::ComprFileWriteContext wc(e); + wc.allocComprBuf(32768, 32768); + e.setupWrite(wc); + for (auto num : _randNums) { + e.encodeExpGolomb(num, kValue); + if (e._valI >= e._valE) + wc.writeComprBuffer(false); + } + e.flush(); + + IDecodeFuncFactory f = _factories.getConstKFactory(kValue); + testRandNums(kValue, f, wc); + f = _factories.getVarKFactory(); + testRandNums(kValue, f, wc); +} + + +template <bool bigEndian> +void +TestFixture<bigEndian>::testRandNums() +{ + for (int k = 0; k < 64; ++k) { + testRandNums(k); + } +} + + +TEST_F("Test bigendian expgolomb encoding/decoding", TestFixture<true>) +{ + f.testRandNums(); + f.testBoundaries(); +} + + +TEST_F("Test little expgolomb encoding/decoding", TestFixture<false>) +{ + f.testRandNums(); + f.testBoundaries(); +} + + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/bitvector/.gitignore b/searchlib/src/tests/bitvector/.gitignore new file mode 100644 index 00000000000..21aed8ce6b2 --- /dev/null +++ b/searchlib/src/tests/bitvector/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +bitvectorbenchmark_test +searchlib_bitvectorbenchmark_test_app diff --git a/searchlib/src/tests/bitvector/CMakeLists.txt b/searchlib/src/tests/bitvector/CMakeLists.txt new file mode 100644 index 00000000000..7edae6f7cc4 --- /dev/null +++ b/searchlib/src/tests/bitvector/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_bitvectorbenchmark_test_app + SOURCES + bitvectorbenchmark.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_bitvectorbenchmark_test_app COMMAND searchlib_bitvectorbenchmark_test_app BENCHMARK) diff --git a/searchlib/src/tests/bitvector/DESC b/searchlib/src/tests/bitvector/DESC new file mode 100644 index 00000000000..1a6c0fc2959 --- /dev/null +++ b/searchlib/src/tests/bitvector/DESC @@ -0,0 +1 @@ +This is a test for the BitVector class. diff --git a/searchlib/src/tests/bitvector/FILES b/searchlib/src/tests/bitvector/FILES new file mode 100644 index 00000000000..0688c3933eb --- /dev/null +++ b/searchlib/src/tests/bitvector/FILES @@ -0,0 +1 @@ +bitvectorbenchmark.cpp diff --git a/searchlib/src/tests/bitvector/bitvectorbenchmark.cpp b/searchlib/src/tests/bitvector/bitvectorbenchmark.cpp new file mode 100644 index 00000000000..c9b962495f4 --- /dev/null +++ b/searchlib/src/tests/bitvector/bitvectorbenchmark.cpp @@ -0,0 +1,225 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +#include <vespa/searchlib/common/bitvector.h> +#include <iostream> +#include <string> +#include <vector> + +LOG_SETUP("bitvectorbenchmark"); + +namespace search { + +class BitVectorBenchmark : public FastOS_Application +{ +private: + std::vector<BitVector *> _bv; + std::vector<unsigned int> _bvc; + void testCountSpeed1(); + void testCountSpeed2(); + void testCountSpeed3(); + void testOrSpeed1(); + void testOrSpeed2(); + static void usage(); + void init(size_t n); +public: + BitVectorBenchmark(); + ~BitVectorBenchmark(); + int Main(); +}; + +BitVectorBenchmark::BitVectorBenchmark() : + _bv() +{ +} + +BitVectorBenchmark::~BitVectorBenchmark() +{ + for(size_t i(0); i < _bv.size(); i++) { + delete _bv[i]; + } +} + +void BitVectorBenchmark::usage() +{ + std::cout << "usage: bitvectorbenchmark [-n numBits] [-t operation]" << std::endl; +} + +void BitVectorBenchmark::init(size_t n) +{ + BitVector *a(BitVector::create(n).release()); + BitVector *b(BitVector::create(n).release()); + srand(1); + for(size_t i(0), j(0); i < n; i += rand()%10, j++) { + a->flip(i); + } + for(size_t i(0), j(0); i < n; i += rand()%10, j++) { + b->flip(i); + } + a->invalidateCachedCount(); + b->invalidateCachedCount(); + _bv.push_back(a); + _bvc.push_back(a->countTrueBits()); + _bv.push_back(b); + _bvc.push_back(b->countTrueBits()); +} + +void BitVectorBenchmark::testOrSpeed1() +{ + _bv[0]->orWith(*_bv[1]); +} + +void BitVectorBenchmark::testCountSpeed1() +{ + _bv[0]->invalidateCachedCount(); + unsigned int cnt = _bv[0]->countTrueBits(); + assert(cnt = _bvc[0]); + (void) cnt; +} + +static int bitTab[256] = { + 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4, + 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5, + 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5, + 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, + 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5, + 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, + 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, + 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, + 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5, + 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, + 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, + 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, + 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, + 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, + 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, + 4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8 +}; + +void BitVectorBenchmark::testCountSpeed2() +{ + const unsigned char * p = reinterpret_cast<const unsigned char *>(_bv[0]->getStart()); + size_t sz = _bv[0]->size()/8; + size_t sum0(0); + size_t sum1(0); + size_t sum2(0); + size_t sum3(0); + for (size_t i(0); i < sz; i+=4) { + sum0 += bitTab[p[i+0]]; + sum1 += bitTab[p[i+1]]; + sum2 += bitTab[p[i+2]]; + sum3 += bitTab[p[i+3]]; + } + assert(sum0 + sum1 + sum2 + sum3 == _bvc[0]); +} + + +static int +popCount(unsigned int bits) +{ + unsigned int odd = bits & 0x55555555; + unsigned int even = bits & 0xaaaaaaaa; + bits = odd + (even >> 1); + odd = bits & 0x33333333; + even = bits & 0xcccccccc; + bits = odd + (even >> 2); + odd = bits & 0x0f0f0f0f; + even = bits & 0xf0f0f0f0; + bits = odd + (even >> 4); + odd = bits & 0x00ff00ff; + even = bits & 0xff00ff00; + bits = odd + (even >> 8); + odd = bits & 0x0000ffff; + even = bits & 0xffff0000; + bits = odd + (even >> 16); + return bits; +} + + +void +BitVectorBenchmark::testCountSpeed3() +{ + const unsigned int * p = static_cast<const unsigned int *>(_bv[0]->getStart()); + const unsigned int * pe = p + (_bv[0]->size()/(sizeof(uint32_t)*8)); + size_t sum(0); + for (; p < pe; ++p) { + sum += popCount(*p); + } + assert(sum == _bvc[0]); +} + +void BitVectorBenchmark::testOrSpeed2() +{ + typedef uint64_t T; + T * a = reinterpret_cast<T *>(_bv[0]->getStart()); + const T * b = reinterpret_cast<const T *>(_bv[1]->getStart()); + size_t sz = _bv[0]->size()/(8*sizeof(*a)); + for (size_t i(0); i < sz; i+=2) { + a[i] |= b[i]; + a[i+1] |= b[i+1]; + // a[i+2] |= b[i+2]; + // a[i+3] |= b[i+3]; + } +} + +int BitVectorBenchmark::Main() +{ + int idx = 1; + std::string operation; + size_t numBits(8*1000000); + char opt; + const char * arg; + bool optError = false; + while ((opt = GetOpt("n:t:", arg, idx)) != -1) { + switch (opt) { + case 'n': + numBits = strtoll(arg, NULL, 10); + break; + case 't': + operation = arg; + break; + default: + optError = true; + break; + } + } + + if ((_argc != idx ) || optError) { + usage(); + return -1; + } + + init(numBits); + for (size_t i(0); i < operation.size(); i++) { + char op(operation[i]); + size_t splitBits1 = rand() % numBits; + size_t splitBits2 = rand() % numBits; + if (splitBits1 > splitBits2) + std::swap(splitBits1, splitBits2); + for (size_t j(0); j < 1000; j++) { + if (op == 'c') { + testCountSpeed1(); + } else if (op == 'd') { + testCountSpeed2(); + } else if (op == 'e') { + testCountSpeed3(); + } else if (op == 'o') { + testOrSpeed1(); + } else if (op == 'p') { + testOrSpeed2(); + } else { + std::cerr << "Unknown operation " << op << std::endl; + } + } + } + + return 0; +} +} + +int main(int argc, char ** argv) +{ + search::BitVectorBenchmark myapp; + return myapp.Entry(argc, argv); +} + diff --git a/searchlib/src/tests/btree/.gitignore b/searchlib/src/tests/btree/.gitignore new file mode 100644 index 00000000000..a6bdd572c7d --- /dev/null +++ b/searchlib/src/tests/btree/.gitignore @@ -0,0 +1,3 @@ +iteratespeed +searchlib_btreeaggregation_test_app +searchlib_iteratespeed_app diff --git a/searchlib/src/tests/btree/CMakeLists.txt b/searchlib/src/tests/btree/CMakeLists.txt new file mode 100644 index 00000000000..d88953d43fd --- /dev/null +++ b/searchlib/src/tests/btree/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_btreeaggregation_test_app + SOURCES + btreeaggregation_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_btreeaggregation_test_app COMMAND searchlib_btreeaggregation_test_app) +vespa_add_executable(searchlib_iteratespeed_app + SOURCES + iteratespeed.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_iteratespeed_app COMMAND searchlib_iteratespeed_app BENCHMARK) diff --git a/searchlib/src/tests/btree/DESC b/searchlib/src/tests/btree/DESC new file mode 100644 index 00000000000..da074ca2c45 --- /dev/null +++ b/searchlib/src/tests/btree/DESC @@ -0,0 +1 @@ +btree aggregation test. Take a look at btreeaggregation_test.cpp for details. diff --git a/searchlib/src/tests/btree/FILES b/searchlib/src/tests/btree/FILES new file mode 100644 index 00000000000..45756255961 --- /dev/null +++ b/searchlib/src/tests/btree/FILES @@ -0,0 +1 @@ +btreeaggregation_test.cpp diff --git a/searchlib/src/tests/btree/btreeaggregation_test.cpp b/searchlib/src/tests/btree/btreeaggregation_test.cpp new file mode 100644 index 00000000000..bb8e86ef49d --- /dev/null +++ b/searchlib/src/tests/btree/btreeaggregation_test.cpp @@ -0,0 +1,1146 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("btreeaggregation_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <string> +#include <set> +#include <iostream> +#include <vespa/searchlib/btree/btreeroot.h> +#include <vespa/searchlib/btree/btreebuilder.h> +#include <vespa/searchlib/btree/btreenodeallocator.h> +#include <vespa/searchlib/btree/btree.h> +#include <vespa/searchlib/btree/btreestore.h> +#include <vespa/searchlib/util/rand48.h> + +#include <vespa/searchlib/btree/btreenodeallocator.hpp> +#include <vespa/searchlib/btree/btreenode.hpp> +#include <vespa/searchlib/btree/btreenodestore.hpp> +#include <vespa/searchlib/btree/btreeiterator.hpp> +#include <vespa/searchlib/btree/btreeroot.hpp> +#include <vespa/searchlib/btree/btreebuilder.hpp> +#include <vespa/searchlib/btree/btree.hpp> +#include <vespa/searchlib/btree/btreestore.hpp> +#include <vespa/searchlib/btree/btreeaggregator.hpp> + +using vespalib::GenerationHandler; + +namespace search { +namespace btree { + +namespace { + +int32_t +toVal(uint32_t key) +{ + return key + 1000; +} + +int32_t +toHighVal(uint32_t key) +{ + return toVal(key) + 1000; +} + +int32_t +toLowVal(uint32_t key) +{ + return toVal(key) - 1000000; +} + +int32_t +toNotVal(uint32_t key) +{ + return key + 2000; +} + +template <typename AggrT> +void +aggrToStr(std::stringstream &ss, const AggrT &aggr) +{ + (void) aggr; + ss << "[noaggr]"; +} + +template <> +void +aggrToStr<MinMaxAggregated>(std::stringstream &ss, + const MinMaxAggregated &aggr) +{ + ss << "[min=" << aggr.getMin() << ",max=" << aggr.getMax() << "]"; +} + + +template <typename LeafNode> +void +leafNodeToStr(std::stringstream &ss, const LeafNode &n) +{ + ss << "["; + for (uint32_t i = 0; i < n.validSlots(); ++i) { + if (i > 0) ss << ","; + ss << n.getKey(i) << ":" << n.getData(i); + } + aggrToStr(ss, n.getAggregated()); + ss << "]"; +} + +template <typename InternalNode, typename LeafNode, typename NodeAllocator> +void +nodeToStr(std::stringstream &ss, const BTreeNode::Ref &node, + const NodeAllocator &allocator) +{ + if (!node.valid()) { + ss << "[]"; + return; + } + if (allocator.isLeafRef(node)) { + leafNodeToStr(ss, *allocator.mapLeafRef(node)); + return; + } + const InternalNode &n(*allocator.mapInternalRef(node)); + ss << "["; + for (uint32_t i = 0; i < n.validSlots(); ++i) { + if (i > 0) ss << ","; + ss << n.getKey(i) << ":"; + nodeToStr<InternalNode, + LeafNode, + NodeAllocator>(ss, n.getChild(i), allocator); + } + aggrToStr(ss, n.getAggregated()); + ss << "]"; +} + + +template <typename Tree> +void +treeToStr(std::stringstream &ss, const Tree &t) +{ + nodeToStr<typename Tree::InternalNodeType, + typename Tree::LeafNodeType, + typename Tree::NodeAllocatorType>(ss, t.getRoot(), t.getAllocator()); +} + + +} + +typedef BTreeTraits<4, 4, 31, false> MyTraits; + +#define KEYWRAP + +#ifdef KEYWRAP + +// Force use of functor to compare keys. +class WrapInt +{ +public: + int _val; + WrapInt(int val) : _val(val) {} + WrapInt(void) : _val(0) {} + bool operator==(const WrapInt & rhs) const { return _val == rhs._val; } +}; + +std::ostream & +operator<<(std::ostream &s, const WrapInt &i) +{ + s << i._val; + return s; +} + +typedef WrapInt MyKey; +class MyComp +{ +public: + bool + operator()(const WrapInt &a, const WrapInt &b) const + { + return a._val < b._val; + } +}; + +#define UNWRAP(key) (key._val) +#else +typedef int MyKey; +typedef std::less<int> MyComp; +#define UNWRAP(key) (key) +#endif + +typedef BTree<MyKey, int32_t, + btree::MinMaxAggregated, + MyComp, MyTraits, + MinMaxAggrCalc> MyTree; +typedef BTreeStore<MyKey, int32_t, + btree::MinMaxAggregated, + MyComp, + BTreeDefaultTraits, + MinMaxAggrCalc> MyTreeStore; +typedef MyTree::Builder MyTreeBuilder; +typedef MyTree::LeafNodeType MyLeafNode; +typedef MyTree::InternalNodeType MyInternalNode; +typedef MyTree::NodeAllocatorType MyNodeAllocator; +typedef MyTree::Builder::Aggregator MyAggregator; +typedef MyTree::AggrCalcType MyAggrCalc; +typedef std::pair<MyKey, int32_t> LeafPair; +typedef MyTreeStore::KeyDataType MyKeyData; +typedef MyTreeStore::KeyDataTypeRefPair MyKeyDataRefPair; + +typedef BTree<int, BTreeNoLeafData, btree::NoAggregated> SetTreeB; + +typedef BTreeTraits<16, 16, 10, false> LSeekTraits; +typedef BTree<int, BTreeNoLeafData, btree::NoAggregated, + std::less<int>, LSeekTraits> SetTreeL; + +struct LeafPairLess { + bool operator()(const LeafPair & lhs, const LeafPair & rhs) const { + return UNWRAP(lhs.first) < UNWRAP(rhs.first); + } +}; + + +class MockTree +{ +public: + typedef std::map<uint32_t, int32_t> MTree; + typedef std::map<int32_t, std::set<uint32_t> > MRTree; + MTree _tree; + MRTree _rtree; + + MockTree() + : _tree(), + _rtree() + { + } + + + void + erase(uint32_t key) + { + MTree::iterator it(_tree.find(key)); + if (it == _tree.end()) + return; + int32_t oval = it->second; + MRTree::iterator rit(_rtree.find(oval)); + assert(rit != _rtree.end()); + size_t ecount = rit->second.erase(key); + assert(ecount == 1); + (void) ecount; + if (rit->second.empty()) { + _rtree.erase(oval); + } + _tree.erase(key); + } + + void + insert(uint32_t key, int32_t val) + { + erase(key); + _tree[key] = val; + _rtree[val].insert(key); + } +}; + + +class MyTreeForceApplyStore : public MyTreeStore +{ +public: + typedef MyComp CompareT; + + bool + insert(EntryRef &ref, const KeyType &key, const DataType &data, + CompareT comp = CompareT()); + + bool + remove(EntryRef &ref, const KeyType &key, CompareT comp = CompareT()); +}; + + +bool +MyTreeForceApplyStore::insert(EntryRef &ref, + const KeyType &key, const DataType &data, + CompareT comp) +{ + bool retVal = true; + if (ref.valid()) { + RefType iRef(ref); + uint32_t clusterSize = getClusterSize(iRef); + if (clusterSize == 0) { + const BTreeType *tree = getTreeEntry(iRef); + const NodeAllocatorType &allocator = getAllocator(); + Iterator itr = tree->find(key, allocator, comp); + if (itr.valid()) + retVal = false; + } else { + const KeyDataType *old = getKeyDataEntry(iRef, clusterSize); + const KeyDataType *olde = old + clusterSize; + const KeyDataType *oldi = lower_bound(old, olde, key, comp); + if (oldi < olde && !comp(key, oldi->_key)) + retVal = false; // key already present + } + } + KeyDataType addition(key, data); + if (retVal) { + apply(ref, &addition, &addition+1, NULL, NULL, comp); + } + return retVal; +} + + +bool +MyTreeForceApplyStore::remove(EntryRef &ref, const KeyType &key, + CompareT comp) +{ + bool retVal = true; + if (!ref.valid()) + retVal = false; // not found + else { + RefType iRef(ref); + uint32_t clusterSize = getClusterSize(iRef); + if (clusterSize == 0) { + const BTreeType *tree = getTreeEntry(iRef); + const NodeAllocatorType &allocator = getAllocator(); + Iterator itr = tree->find(key, allocator, comp); + if (!itr.valid()) + retVal = false; + } else { + const KeyDataType *old = getKeyDataEntry(iRef, clusterSize); + const KeyDataType *olde = old + clusterSize; + const KeyDataType *oldi = lower_bound(old, olde, key, comp); + if (oldi == olde || comp(key, oldi->_key)) + retVal = false; // not found + } + } + std::vector<KeyDataType> additions; + std::vector<KeyType> removals; + removals.push_back(key); + apply(ref, + &additions[0], &additions[additions.size()], + &removals[0], &removals[removals.size()], + comp); + return retVal; +} + + +template <typename ManagerType> +void +freezeTree(GenerationHandler &g, ManagerType &m) +{ + m.freeze(); + m.transferHoldLists(g.getCurrentGeneration()); + g.incGeneration(); + m.trimHoldLists(g.getFirstUsedGeneration()); +} + +template <typename ManagerType> +void +cleanup(GenerationHandler &g, ManagerType &m) +{ + freezeTree(g, m); +} + +template <typename ManagerType, typename NodeType> +void +cleanup(GenerationHandler & g, + ManagerType & m, + BTreeNode::Ref n1Ref, NodeType * n1, + BTreeNode::Ref n2Ref = BTreeNode::Ref(), NodeType * n2 = NULL) +{ + assert(ManagerType::isValidRef(n1Ref)); + m.holdNode(n1Ref, n1); + if (n2 != NULL) { + assert(ManagerType::isValidRef(n2Ref)); + m.holdNode(n2Ref, n2); + } else { + assert(!ManagerType::isValidRef(n2Ref)); + } + cleanup(g, m); +} + +class Test : public vespalib::TestApp { +private: + template <typename Tree> + bool + assertTree(const std::string & exp, const Tree &t); + + template <typename Tree> + bool + assertAggregated(const MockTree &m, const Tree &t); + + template <typename TreeStore> + bool + assertAggregated(const MockTree &m, const TreeStore &s, EntryRef ref); + + void + buildSubTree(const std::vector<LeafPair> &sub, + size_t numEntries); + + void requireThatNodeInsertWorks(); + void requireThatNodeSplitInsertWorks(); + void requireThatNodeStealWorks(); + void requireThatNodeRemoveWorks(); + void requireThatWeCanInsertAndRemoveFromTree(); + void requireThatSortedTreeInsertWorks(); + void requireThatCornerCaseTreeFindWorks(); + void requireThatBasicTreeIteratorWorks(); + void requireThatTreeIteratorAssignWorks(); + void requireThatUpdateOfKeyWorks(); + void requireThatUpdateOfDataWorks(); + + template <typename TreeStore> + void + requireThatSmallNodesWorks(); +public: + int Main(); +}; + + +template<typename Tree> +bool +Test::assertTree(const std::string &exp, const Tree &t) +{ + std::stringstream ss; + treeToStr(ss, t); + if (!EXPECT_EQUAL(exp, ss.str())) return false; + return true; +} + + +template <typename Tree> +bool +Test::assertAggregated(const MockTree &m, const Tree &t) +{ + const MinMaxAggregated &ta(t.getAggregated()); + if (t.getRoot().valid()) { + return + EXPECT_FALSE(m._rtree.empty()) && + EXPECT_EQUAL(m._rtree.rbegin()->first, + ta.getMax()) && + EXPECT_EQUAL(m._rtree.begin()->first, + ta.getMin()); + } else { + return EXPECT_TRUE(m._rtree.empty()) && + EXPECT_EQUAL(std::numeric_limits<int32_t>::min(), + ta.getMax()) && + EXPECT_EQUAL(std::numeric_limits<int32_t>::max(), + ta.getMin()); + } +} + +template <typename TreeStore> +bool +Test::assertAggregated(const MockTree &m, const TreeStore &s, EntryRef ref) +{ + typename TreeStore::Iterator i(s.begin(ref)); + MinMaxAggregated sa(s.getAggregated(ref)); + const MinMaxAggregated &ia(i.getAggregated()); + if (ref.valid()) { + return + EXPECT_FALSE(m._rtree.empty()) && + EXPECT_EQUAL(m._rtree.rbegin()->first, + ia.getMax()) && + EXPECT_EQUAL(m._rtree.begin()->first, + ia.getMin()) && + EXPECT_EQUAL(m._rtree.rbegin()->first, + sa.getMax()) && + EXPECT_EQUAL(m._rtree.begin()->first, + sa.getMin()); + } else { + return EXPECT_TRUE(m._rtree.empty()) && + EXPECT_EQUAL(std::numeric_limits<int32_t>::min(), + ia.getMax()) && + EXPECT_EQUAL(std::numeric_limits<int32_t>::max(), + ia.getMin()) && + EXPECT_EQUAL(std::numeric_limits<int32_t>::min(), + sa.getMax()) && + EXPECT_EQUAL(std::numeric_limits<int32_t>::max(), + sa.getMin()); + } +} + + +void +Test::requireThatNodeInsertWorks() +{ + MyTree t; + t.insert(20, 102); + EXPECT_TRUE(assertTree("[20:102[min=102,max=102]]", t)); + t.insert(10, 101); + EXPECT_TRUE(assertTree("[10:101,20:102[min=101,max=102]]", t)); + t.insert(30, 103); + t.insert(40, 104); + EXPECT_TRUE(assertTree("[10:101,20:102,30:103,40:104" + "[min=101,max=104]]", t)); +} + +void +getLeafNode(MyTree &t) +{ + t.insert(1, 101); + t.insert(3, 103); + t.insert(5, 105); + t.insert(7, 107); +// EXPECT_TRUE(assertTree("[1:101,3:103,5:105,7:107[min=101,max=107]]", t)); +} + +void +Test::requireThatNodeSplitInsertWorks() +{ + { // new entry in current node + MyTree t; + getLeafNode(t); + t.insert(4, 104); + EXPECT_TRUE(assertTree("[4:" + "[1:101,3:103,4:104[min=101,max=104]]" + ",7:" + "[5:105,7:107[min=105,max=107]]" + "[min=101,max=107]]", t)); + } + { // new entry in split node + MyTree t; + getLeafNode(t); + t.insert(6, 106); + EXPECT_TRUE(assertTree("[5:" + "[1:101,3:103,5:105[min=101,max=105]]" + ",7:" + "[6:106,7:107[min=106,max=107]]" + "[min=101,max=107]]", t)); + } + { // new entry at end + MyTree t; + getLeafNode(t); + t.insert(8, 108); + EXPECT_TRUE(assertTree("[5:" + "[1:101,3:103,5:105[min=101,max=105]]" + ",8:" + "[7:107,8:108[min=107,max=108]]" + "[min=101,max=108]]", t)); + } +} + +struct BTreeStealTraits +{ + static const size_t LEAF_SLOTS = 6; + static const size_t INTERNAL_SLOTS = 6; + static const size_t PATH_SIZE = 20; + static const bool BINARY_SEEK = true; +}; + +void +Test::requireThatNodeStealWorks() +{ + typedef BTree<MyKey, int32_t, + btree::MinMaxAggregated, + MyComp, BTreeStealTraits, + MinMaxAggrCalc> MyStealTree; + { // steal all from left + MyStealTree t; + t.insert(10, 110); + t.insert(20, 120); + t.insert(30, 130); + t.insert(40, 140); + t.insert(50, 150); + t.insert(60, 160); + t.insert(35, 135); + t.remove(35); + EXPECT_TRUE(assertTree("[30:" + "[10:110,20:120,30:130[min=110,max=130]]" + ",60:" + "[40:140,50:150,60:160[min=140,max=160]]" + "[min=110,max=160]]", t)); + t.remove(50); + EXPECT_TRUE(assertTree("[10:110,20:120,30:130,40:140,60:160" + "[min=110,max=160]]", t)); + } + { // steal all from right + MyStealTree t; + t.insert(10, 110); + t.insert(20, 120); + t.insert(30, 130); + t.insert(40, 140); + t.insert(50, 150); + t.insert(60, 160); + t.insert(35, 135); + t.remove(35); + EXPECT_TRUE(assertTree("[30:" + "[10:110,20:120,30:130[min=110,max=130]]" + ",60:" + "[40:140,50:150,60:160[min=140,max=160]]" + "[min=110,max=160]]", t)); + t.remove(20); + EXPECT_TRUE(assertTree("[10:110,30:130,40:140,50:150,60:160" + "[min=110,max=160]]", t)); + } + { // steal some from left + MyStealTree t; + t.insert(10, 110); + t.insert(20, 120); + t.insert(30, 130); + t.insert(60, 160); + t.insert(70, 170); + t.insert(80, 180); + t.insert(50, 150); + t.insert(40, 140); + EXPECT_TRUE(assertTree("[50:" + "[10:110,20:120,30:130,40:140,50:150" + "[min=110,max=150]]" + ",80:" + "[60:160,70:170,80:180[min=160,max=180]]" + "[min=110,max=180]]", t)); + t.remove(60); + EXPECT_TRUE(assertTree("[40:" + "[10:110,20:120,30:130,40:140" + "[min=110,max=140]]" + ",80:" + "[50:150,70:170,80:180[min=150,max=180]]" + "[min=110,max=180]]", t)); + } + { // steal some from right + MyStealTree t; + t.insert(10, 110); + t.insert(20, 120); + t.insert(30, 130); + t.insert(40, 140); + t.insert(50, 150); + t.insert(60, 160); + t.insert(70, 170); + t.insert(80, 180); + t.insert(90, 190); + t.remove(40); + EXPECT_TRUE(assertTree("[30:" + "[10:110,20:120,30:130" + "[min=110,max=130]]" + ",90:" + "[50:150,60:160,70:170,80:180,90:190" + "[min=150,max=190]]" + "[min=110,max=190]]", t)); + t.remove(20); + EXPECT_TRUE(assertTree("[50:" + "[10:110,30:130,50:150" + "[min=110,max=150]]" + ",90:" + "[60:160,70:170,80:180,90:190" + "[min=160,max=190]]" + "[min=110,max=190]]", t)); + } +} + +void +Test::requireThatNodeRemoveWorks() +{ + MyTree t; + getLeafNode(t); + t.remove(3); + EXPECT_TRUE(assertTree("[1:101,5:105,7:107[min=101,max=107]]", t)); + t.remove(1); + EXPECT_TRUE(assertTree("[5:105,7:107[min=105,max=107]]", t)); + t.remove(7); + EXPECT_TRUE(assertTree("[5:105[min=105,max=105]]", t)); +} + +void +generateData(std::vector<LeafPair> & data, size_t numEntries) +{ + data.reserve(numEntries); + Rand48 rnd; + rnd.srand48(10); + for (size_t i = 0; i < numEntries; ++i) { + int num = rnd.lrand48() % 10000000; + uint32_t val = toVal(num); + data.push_back(std::make_pair(num, val)); + } +} + +void +Test::buildSubTree(const std::vector<LeafPair> &sub, + size_t numEntries) +{ + GenerationHandler g; + MyTree tree; + MyTreeBuilder builder(tree.getAllocator()); + MockTree mock; + + std::vector<LeafPair> sorted(sub.begin(), sub.begin() + numEntries); + std::sort(sorted.begin(), sorted.end(), LeafPairLess()); + for (size_t i = 0; i < numEntries; ++i) { + int num = UNWRAP(sorted[i].first); + const uint32_t & val = sorted[i].second; + builder.insert(num, val); + mock.insert(num, val); + } + tree.assign(builder); + assert(numEntries == tree.size()); + assert(tree.isValid()); + + TEST_DO(EXPECT_TRUE(assertAggregated(mock, tree))); + EXPECT_EQUAL(numEntries, tree.size()); + EXPECT_TRUE(tree.isValid()); + MyTree::Iterator itr = tree.begin(); + MyTree::Iterator ritr = itr; + if (numEntries > 0) { + EXPECT_TRUE(ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + --ritr; + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(numEntries, ritr.position()); + --ritr; + EXPECT_TRUE(ritr.valid()); + EXPECT_EQUAL(numEntries - 1, ritr.position()); + } else { + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + --ritr; + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + } + for (size_t i = 0; i < numEntries; ++i) { + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(sorted[i].first, itr.getKey()); + EXPECT_EQUAL(sorted[i].second, itr.getData()); + ++itr; + } + EXPECT_TRUE(!itr.valid()); + ritr = itr; + EXPECT_TRUE(!ritr.valid()); + --ritr; + for (size_t i = 0; i < numEntries; ++i) { + EXPECT_TRUE(ritr.valid()); + EXPECT_EQUAL(sorted[numEntries - 1 - i].first, ritr.getKey()); + EXPECT_EQUAL(sorted[numEntries - 1 - i].second, ritr.getData()); + --ritr; + } + EXPECT_TRUE(!ritr.valid()); +} + +void +Test::requireThatWeCanInsertAndRemoveFromTree() +{ + GenerationHandler g; + MyTree tree; + MockTree mock; + std::vector<LeafPair> exp; + std::vector<LeafPair> sorted; + TEST_DO(EXPECT_TRUE(assertAggregated(mock, tree))); + size_t numEntries = 1000; + generateData(exp, numEntries); + sorted = exp; + std::sort(sorted.begin(), sorted.end(), LeafPairLess()); + // insert entries + for (size_t i = 0; i < numEntries; ++i) { + int num = UNWRAP(exp[i].first); + const uint32_t & val = exp[i].second; + EXPECT_TRUE(!tree.find(num).valid()); + //LOG(info, "insert[%zu](%d, %s)", i, num, str.c_str()); + EXPECT_TRUE(tree.insert(num, val)); + EXPECT_TRUE(!tree.insert(num, val)); + mock.insert(num, val); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, tree))); + for (size_t j = 0; j <= i; ++j) { + //LOG(info, "find[%zu](%d)", j, exp[j].first._val); + MyTree::Iterator itr = tree.find(exp[j].first); + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(exp[j].first, itr.getKey()); + EXPECT_EQUAL(exp[j].second, itr.getData()); + } + EXPECT_EQUAL(i + 1u, tree.size()); + EXPECT_TRUE(tree.isValid()); + buildSubTree(exp, i + 1); + } + //std::cout << "tree: " << tree.toString() << std::endl; + + { + MyTree::Iterator itr = tree.begin(); + MyTree::Iterator itre = itr; + MyTree::Iterator itre2; + MyTree::Iterator ritr = itr; + while (itre.valid()) + ++itre; + if (numEntries > 0) { + EXPECT_TRUE(ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + --ritr; + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(numEntries, ritr.position()); + --ritr; + EXPECT_TRUE(ritr.valid()); + EXPECT_EQUAL(numEntries - 1, ritr.position()); + } else { + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + --ritr; + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + } + MyTree::Iterator pitr = itr; + for (size_t i = 0; i < numEntries; ++i) { + ssize_t si = i; + ssize_t sileft = numEntries - i; + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(i, itr.position()); + EXPECT_EQUAL(sileft, itre - itr); + EXPECT_EQUAL(-sileft, itr - itre); + EXPECT_EQUAL(sileft, itre2 - itr); + EXPECT_EQUAL(-sileft, itr - itre2); + EXPECT_EQUAL(si, itr - tree.begin()); + EXPECT_EQUAL(-si, tree.begin() - itr); + EXPECT_EQUAL(i != 0, itr - pitr); + EXPECT_EQUAL(-(i != 0), pitr - itr); + EXPECT_EQUAL(sorted[i].first, itr.getKey()); + EXPECT_EQUAL(sorted[i].second, itr.getData()); + pitr = itr; + ++itr; + ritr = itr; + --ritr; + EXPECT_TRUE(ritr.valid()); + EXPECT_TRUE(ritr == pitr); + } + EXPECT_TRUE(!itr.valid()); + EXPECT_EQUAL(numEntries, itr.position()); + ssize_t sNumEntries = numEntries; + EXPECT_EQUAL(sNumEntries, itr - tree.begin()); + EXPECT_EQUAL(-sNumEntries, tree.begin() - itr); + EXPECT_EQUAL(1, itr - pitr); + EXPECT_EQUAL(-1, pitr - itr); + } + // compact full tree by calling incremental compaction methods in a loop + { + MyTree::NodeAllocatorType &manager = tree.getAllocator(); + std::vector<uint32_t> toHold = manager.startCompact(); + MyTree::Iterator itr = tree.begin(); + tree.setRoot(itr.moveFirstLeafNode(tree.getRoot())); + while (itr.valid()) { + // LOG(info, "Leaf moved to %d", UNWRAP(itr.getKey())); + itr.moveNextLeafNode(); + } + manager.finishCompact(toHold); + manager.freeze(); + manager.transferHoldLists(g.getCurrentGeneration()); + g.incGeneration(); + manager.trimHoldLists(g.getFirstUsedGeneration()); + } + // remove entries + for (size_t i = 0; i < numEntries; ++i) { + int num = UNWRAP(exp[i].first); + //LOG(info, "remove[%zu](%d)", i, num); + //std::cout << "tree: " << tree.toString() << std::endl; + EXPECT_TRUE(tree.remove(num)); + EXPECT_TRUE(!tree.find(num).valid()); + EXPECT_TRUE(!tree.remove(num)); + EXPECT_TRUE(tree.isValid()); + mock.erase(num); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, tree))); + for (size_t j = i + 1; j < numEntries; ++j) { + MyTree::Iterator itr = tree.find(exp[j].first); + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(exp[j].first, itr.getKey()); + EXPECT_EQUAL(exp[j].second, itr.getData()); + } + EXPECT_EQUAL(numEntries - 1 - i, tree.size()); + } +} + +void +Test::requireThatSortedTreeInsertWorks() +{ + { + MyTree tree; + MockTree mock; + TEST_DO(EXPECT_TRUE(assertAggregated(mock, tree))); + for (int i = 0; i < 1000; ++i) { + EXPECT_TRUE(tree.insert(i, toVal(i))); + mock.insert(i, toVal(i)); + MyTree::Iterator itr = tree.find(i); + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(toVal(i), itr.getData()); + EXPECT_TRUE(tree.isValid()); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, tree))); + } + } + { + MyTree tree; + MockTree mock; + TEST_DO(EXPECT_TRUE(assertAggregated(mock, tree))); + for (int i = 1000; i > 0; --i) { + EXPECT_TRUE(tree.insert(i, toVal(i))); + mock.insert(i, toVal(i)); + MyTree::Iterator itr = tree.find(i); + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(toVal(i), itr.getData()); + EXPECT_TRUE(tree.isValid()); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, tree))); + } + } +} + +void +Test::requireThatCornerCaseTreeFindWorks() +{ + GenerationHandler g; + MyTree tree; + for (int i = 1; i < 100; ++i) { + tree.insert(i, toVal(i)); + } + EXPECT_TRUE(!tree.find(0).valid()); // lower than lowest + EXPECT_TRUE(!tree.find(1000).valid()); // higher than highest +} + +void +Test::requireThatBasicTreeIteratorWorks() +{ + GenerationHandler g; + MyTree tree; + EXPECT_TRUE(!tree.begin().valid()); + std::vector<LeafPair> exp; + size_t numEntries = 1000; + generateData(exp, numEntries); + for (size_t i = 0; i < numEntries; ++i) { + tree.insert(exp[i].first, exp[i].second); + } + std::sort(exp.begin(), exp.end(), LeafPairLess()); + size_t ei = 0; + MyTree::Iterator itr = tree.begin(); + MyTree::Iterator ritr; + EXPECT_EQUAL(1000u, itr.size()); + for (; itr.valid(); ++itr) { + //LOG(info, "itr(%d, %s)", itr.getKey(), itr.getData().c_str()); + EXPECT_EQUAL(UNWRAP(exp[ei].first), UNWRAP(itr.getKey())); + EXPECT_EQUAL(exp[ei].second, itr.getData()); + ei++; + ritr = itr; + } + EXPECT_EQUAL(numEntries, ei); + for (; ritr.valid(); --ritr) { + --ei; + //LOG(info, "itr(%d, %s)", itr.getKey(), itr.getData().c_str()); + EXPECT_EQUAL(UNWRAP(exp[ei].first), UNWRAP(ritr.getKey())); + EXPECT_EQUAL(exp[ei].second, ritr.getData()); + } +} + + + +void +Test::requireThatTreeIteratorAssignWorks() +{ + GenerationHandler g; + MyTree tree; + for (int i = 0; i < 1000; ++i) { + tree.insert(i, toVal(i)); + } + for (int i = 0; i < 1000; ++i) { + MyTree::Iterator itr = tree.find(i); + MyTree::Iterator itr2 = itr; + EXPECT_TRUE(itr == itr2); + int expNum = i; + for (; itr2.valid(); ++itr2) { + EXPECT_EQUAL(expNum++, UNWRAP(itr2.getKey())); + } + EXPECT_EQUAL(1000, expNum); + } +} + +struct UpdKeyComp { + int _remainder; + mutable size_t _numErrors; + UpdKeyComp(int remainder) : _remainder(remainder), _numErrors(0) {} + bool operator() (const int & lhs, const int & rhs) const { + if (lhs % 2 != _remainder) ++_numErrors; + if (rhs % 2 != _remainder) ++_numErrors; + return lhs < rhs; + } +}; + +void +Test::requireThatUpdateOfKeyWorks() +{ + typedef BTree<int, BTreeNoLeafData, + btree::NoAggregated, + UpdKeyComp &> UpdKeyTree; + typedef UpdKeyTree::Iterator UpdKeyTreeIterator; + GenerationHandler g; + UpdKeyTree t; + UpdKeyComp cmp1(0); + for (int i = 0; i < 1000; i+=2) { + EXPECT_TRUE(t.insert(i, BTreeNoLeafData(), cmp1)); + } + EXPECT_EQUAL(0u, cmp1._numErrors); + for (int i = 0; i < 1000; i+=2) { + UpdKeyTreeIterator itr = t.find(i, cmp1); + itr.writeKey(i + 1); + } + UpdKeyComp cmp2(1); + for (int i = 1; i < 1000; i+=2) { + UpdKeyTreeIterator itr = t.find(i, cmp2); + EXPECT_TRUE(itr.valid()); + } + EXPECT_EQUAL(0u, cmp2._numErrors); +} + + +void +Test::requireThatUpdateOfDataWorks() +{ + // typedef MyTree::Iterator Iterator; + GenerationHandler g; + MyTree t; + MockTree mock; + MyAggrCalc ac; + MyTree::NodeAllocatorType &manager = t.getAllocator(); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, t))); + for (int i = 0; i < 1000; i+=2) { + EXPECT_TRUE(t.insert(i, toVal(i))); + mock.insert(i, toVal(i)); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, t))); + } + freezeTree(g, manager); + for (int i = 0; i < 1000; i+=2) { + MyTree::Iterator itr = t.find(i); + MyTree::Iterator itr2 = itr; + t.thaw(itr); + itr.updateData(toHighVal(i), ac); + EXPECT_EQUAL(toHighVal(i), itr.getData()); + EXPECT_EQUAL(toVal(i), itr2.getData()); + mock.erase(i); + mock.insert(i, toHighVal(i)); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, t))); + freezeTree(g, manager); + itr = t.find(i); + itr2 = itr; + t.thaw(itr); + itr.updateData(toLowVal(i), ac); + EXPECT_EQUAL(toLowVal(i), itr.getData()); + EXPECT_EQUAL(toHighVal(i), itr2.getData()); + mock.erase(i); + mock.insert(i, toLowVal(i)); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, t))); + freezeTree(g, manager); + itr = t.find(i); + itr2 = itr; + t.thaw(itr); + itr.updateData(toVal(i), ac); + EXPECT_EQUAL(toVal(i), itr.getData()); + EXPECT_EQUAL(toLowVal(i), itr2.getData()); + mock.erase(i); + mock.insert(i, toVal(i)); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, t))); + freezeTree(g, manager); + } +} + + +template <typename TreeStore> +void +Test::requireThatSmallNodesWorks(void) +{ + GenerationHandler g; + TreeStore s; + MockTree mock; + + EntryRef root; + EXPECT_EQUAL(0u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root))); + EXPECT_TRUE(s.insert(root, 40, toVal(40))); + mock.insert(40, toVal(40)); + EXPECT_TRUE(!s.insert(root, 40, toNotVal(40))); + EXPECT_EQUAL(1u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root))); + EXPECT_TRUE(s.insert(root, 20, toVal(20))); + mock.insert(20, toVal(20)); + EXPECT_TRUE(!s.insert(root, 20, toNotVal(20))); + EXPECT_TRUE(!s.insert(root, 40, toNotVal(40))); + EXPECT_EQUAL(2u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root))); + EXPECT_TRUE(s.insert(root, 60, toVal(60))); + mock.insert(60, toVal(60)); + EXPECT_TRUE(!s.insert(root, 60, toNotVal(60))); + EXPECT_TRUE(!s.insert(root, 20, toNotVal(20))); + EXPECT_TRUE(!s.insert(root, 40, toNotVal(40))); + EXPECT_EQUAL(3u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root))); + EXPECT_TRUE(s.insert(root, 50, toVal(50))); + mock.insert(50, toVal(50)); + EXPECT_TRUE(!s.insert(root, 50, toNotVal(50))); + EXPECT_TRUE(!s.insert(root, 60, toNotVal(60))); + EXPECT_TRUE(!s.insert(root, 20, toNotVal(20))); + EXPECT_TRUE(!s.insert(root, 40, toNotVal(40))); + EXPECT_EQUAL(4u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root))); + + for (uint32_t i = 0; i < 100; ++i) { + EXPECT_TRUE(s.insert(root, 1000 + i, 42)); + mock.insert(1000 + i, 42); + if (i > 0) { + EXPECT_TRUE(!s.insert(root, 1000 + i - 1, 42)); + } + EXPECT_EQUAL(5u + i, s.size(root)); + EXPECT_EQUAL(5u + i <= 8u, s.isSmallArray(root)); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root))); + } + EXPECT_TRUE(s.remove(root, 40)); + mock.erase(40); + EXPECT_TRUE(!s.remove(root, 40)); + EXPECT_EQUAL(103u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root))); + EXPECT_TRUE(s.remove(root, 20)); + mock.erase(20); + EXPECT_TRUE(!s.remove(root, 20)); + EXPECT_EQUAL(102u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root))); + EXPECT_TRUE(s.remove(root, 50)); + mock.erase(50); + EXPECT_TRUE(!s.remove(root, 50)); + EXPECT_EQUAL(101u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root))); + for (uint32_t i = 0; i < 100; ++i) { + EXPECT_TRUE(s.remove(root, 1000 + i)); + mock.erase(1000 + i); + if (i > 0) { + EXPECT_TRUE(!s.remove(root, 1000 + i - 1)); + } + EXPECT_EQUAL(100 - i, s.size(root)); + EXPECT_EQUAL(100 - i <= 8u, s.isSmallArray(root)); + TEST_DO(EXPECT_TRUE(assertAggregated(mock, s, root))); + } + EXPECT_EQUAL(1u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + + s.clear(root); + s.clearBuilder(); + s.freeze(); + s.transferHoldLists(g.getCurrentGeneration()); + g.incGeneration(); + s.trimHoldLists(g.getFirstUsedGeneration()); +} + + +int +Test::Main() +{ + TEST_INIT("btreeaggregation_test"); + + requireThatNodeInsertWorks(); + requireThatNodeSplitInsertWorks(); + requireThatNodeStealWorks(); + requireThatNodeRemoveWorks(); + requireThatWeCanInsertAndRemoveFromTree(); + requireThatSortedTreeInsertWorks(); + requireThatCornerCaseTreeFindWorks(); + requireThatBasicTreeIteratorWorks(); + requireThatTreeIteratorAssignWorks(); + requireThatUpdateOfKeyWorks(); + requireThatUpdateOfDataWorks(); + TEST_DO(requireThatSmallNodesWorks<MyTreeStore>()); + TEST_DO(requireThatSmallNodesWorks<MyTreeForceApplyStore>()); + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::btree::Test); diff --git a/searchlib/src/tests/btree/iteratespeed.cpp b/searchlib/src/tests/btree/iteratespeed.cpp new file mode 100644 index 00000000000..719dc28c036 --- /dev/null +++ b/searchlib/src/tests/btree/iteratespeed.cpp @@ -0,0 +1,213 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("iteratespeed"); +#include <string> +#include <vespa/searchlib/btree/btreeroot.h> +#include <vespa/searchlib/btree/btreebuilder.h> +#include <vespa/searchlib/btree/btreenodeallocator.h> +#include <vespa/searchlib/btree/btree.h> +#include <vespa/searchlib/btree/btreestore.h> +#include <vespa/searchlib/util/rand48.h> + +#include <vespa/searchlib/btree/btreenodeallocator.hpp> +#include <vespa/searchlib/btree/btreenode.hpp> +#include <vespa/searchlib/btree/btreenodestore.hpp> +#include <vespa/searchlib/btree/btreeiterator.hpp> +#include <vespa/searchlib/btree/btreeroot.hpp> +#include <vespa/searchlib/btree/btreebuilder.hpp> +#include <vespa/searchlib/btree/btree.hpp> +#include <vespa/searchlib/btree/btreestore.hpp> + +namespace search { +namespace btree { + +enum class IterateMethod +{ + FORWARD, + BACKWARDS, + LAMBDA +}; + +class IterateSpeed : public FastOS_Application +{ + template <typename Traits, IterateMethod iterateMethod> + void + workLoop(int loops, bool enableForward, bool enableBackwards, + bool enableLambda, int leafSlots); + + void usage(); + + int + Main(void); +}; + + +namespace { + +const char *iterateMethodName(IterateMethod iterateMethod) +{ + switch (iterateMethod) { + case IterateMethod::FORWARD: + return "forward"; + case IterateMethod::BACKWARDS: + return "backwards"; + default: + return "lambda"; + } +} + +} + +template <typename Traits, IterateMethod iterateMethod> +void +IterateSpeed::workLoop(int loops, bool enableForward, bool enableBackwards, + bool enableLambda, int leafSlots) +{ + if ((iterateMethod == IterateMethod::FORWARD && !enableForward) || + (iterateMethod == IterateMethod::BACKWARDS && !enableBackwards) || + (iterateMethod == IterateMethod::LAMBDA && !enableLambda) || + (leafSlots != 0 && + leafSlots != static_cast<int>(Traits::LEAF_SLOTS))) + return; + vespalib::GenerationHandler g; + using Tree = BTree<int, int, btree::NoAggregated, std::less<int>, Traits>; + using Builder = typename Tree::Builder; + using ConstIterator = typename Tree::ConstIterator; + Tree tree; + Builder builder(tree.getAllocator()); + size_t numEntries = 1000000; + size_t numInnerLoops = 1000; + for (size_t i = 0; i < numEntries; ++i) { + builder.insert(i, 0); + } + tree.assign(builder); + assert(numEntries == tree.size()); + assert(tree.isValid()); + for (int l = 0; l < loops; ++l) { + fastos::TimeStamp before = fastos::ClockSystem::now(); + uint64_t sum = 0; + for (size_t innerl = 0; innerl < numInnerLoops; ++innerl) { + if (iterateMethod == IterateMethod::FORWARD) { + ConstIterator itr(BTreeNode::Ref(), tree.getAllocator()); + itr.begin(tree.getRoot()); + while (itr.valid()) { + sum += itr.getKey(); + ++itr; + } + } else if (iterateMethod == IterateMethod::BACKWARDS) { + ConstIterator itr(BTreeNode::Ref(), tree.getAllocator()); + itr.end(tree.getRoot()); + --itr; + while (itr.valid()) { + sum += itr.getKey(); + --itr; + } + } else { + tree.getAllocator().foreach_key(tree.getRoot(), + [&](int key) { sum += key; } ); + } + } + fastos::TimeStamp after = fastos::ClockSystem::now(); + double used = after.sec() - before.sec(); + printf("Elapsed time for iterating %ld steps is %8.5f, " + "direction=%s, fanout=%u,%u, sum=%" PRIu64 "\n", + numEntries * numInnerLoops, + used, + iterateMethodName(iterateMethod), + static_cast<int>(Traits::LEAF_SLOTS), + static_cast<int>(Traits::INTERNAL_SLOTS), + sum); + fflush(stdout); + } +} + + +void +IterateSpeed::usage() +{ + printf("iteratspeed " + "[-F <leafSlots>] " + "[-b] " + "[-c <numLoops>] " + "[-f] " + "[-l]\n"); +} + +int +IterateSpeed::Main() +{ + int argi; + char c; + const char *optArg; + argi = 1; + int loops = 1; + bool backwards = false; + bool forwards = false; + bool lambda = false; + int leafSlots = 0; + while ((c = GetOpt("F:bc:fl", optArg, argi)) != -1) { + switch (c) { + case 'F': + leafSlots = atoi(optArg); + break; + case 'b': + backwards = true; + break; + case 'c': + loops = atoi(optArg); + break; + case 'f': + forwards = true; + break; + case 'l': + lambda = true; + break; + default: + usage(); + return 1; + } + } + if (!backwards && !forwards && !lambda) { + backwards = true; + forwards = true; + lambda = true; + } + + using SmallTraits = BTreeTraits<4, 4, 31, false>; + using DefTraits = BTreeDefaultTraits; + using LargeTraits = BTreeTraits<32, 16, 10, true>; + using HugeTraits = BTreeTraits<64, 16, 10, true>; + workLoop<SmallTraits, IterateMethod::FORWARD>(loops, forwards, backwards, + lambda, leafSlots); + workLoop<DefTraits, IterateMethod::FORWARD>(loops, forwards, backwards, + lambda, leafSlots); + workLoop<LargeTraits, IterateMethod::FORWARD>(loops, forwards, backwards, + lambda, leafSlots); + workLoop<HugeTraits, IterateMethod::FORWARD>(loops, forwards, backwards, + lambda, leafSlots); + workLoop<SmallTraits, IterateMethod::BACKWARDS>(loops, forwards, backwards, + lambda, leafSlots); + workLoop<DefTraits, IterateMethod::BACKWARDS>(loops, forwards, backwards, + lambda, leafSlots); + workLoop<LargeTraits, IterateMethod::BACKWARDS>(loops, forwards, backwards, + lambda, leafSlots); + workLoop<HugeTraits, IterateMethod::BACKWARDS>(loops, forwards, backwards, + lambda, leafSlots); + workLoop<SmallTraits, IterateMethod::LAMBDA>(loops, forwards, backwards, + lambda, leafSlots); + workLoop<DefTraits, IterateMethod::LAMBDA>(loops, forwards, backwards, + lambda, leafSlots); + workLoop<LargeTraits, IterateMethod::LAMBDA>(loops, forwards, backwards, + lambda, leafSlots); + workLoop<HugeTraits, IterateMethod::LAMBDA>(loops, forwards, backwards, + lambda, leafSlots); + return 0; +} + +} +} + +FASTOS_MAIN(search::btree::IterateSpeed); + + diff --git a/searchlib/src/tests/bytecomplens/.gitignore b/searchlib/src/tests/bytecomplens/.gitignore new file mode 100644 index 00000000000..afe9bff02f6 --- /dev/null +++ b/searchlib/src/tests/bytecomplens/.gitignore @@ -0,0 +1,5 @@ +*.So +.depend* +Makefile +bytecomp_test +searchlib_bytecomp_test_app diff --git a/searchlib/src/tests/bytecomplens/CMakeLists.txt b/searchlib/src/tests/bytecomplens/CMakeLists.txt new file mode 100644 index 00000000000..188c3fccbdf --- /dev/null +++ b/searchlib/src/tests/bytecomplens/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_bytecomp_test_app + SOURCES + bytecomp.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_bytecomp_test_app NO_VALGRIND COMMAND searchlib_bytecomp_test_app) diff --git a/searchlib/src/tests/bytecomplens/DESC b/searchlib/src/tests/bytecomplens/DESC new file mode 100644 index 00000000000..e40e528ddea --- /dev/null +++ b/searchlib/src/tests/bytecomplens/DESC @@ -0,0 +1 @@ +Test of search::ByteCompressedLengths class. Look at bytecomp.cpp for details. diff --git a/searchlib/src/tests/bytecomplens/FILES b/searchlib/src/tests/bytecomplens/FILES new file mode 100644 index 00000000000..c44e7f254f8 --- /dev/null +++ b/searchlib/src/tests/bytecomplens/FILES @@ -0,0 +1 @@ +bytecomplens.cpp diff --git a/searchlib/src/tests/bytecomplens/bytecomp.cpp b/searchlib/src/tests/bytecomplens/bytecomp.cpp new file mode 100644 index 00000000000..63aa2da15f6 --- /dev/null +++ b/searchlib/src/tests/bytecomplens/bytecomp.cpp @@ -0,0 +1,102 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <memory> +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("bytecomplens_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/util/random.h> +#include <vespa/searchlib/docstore/bytecomplens.h> + + +class Test : public vespalib::TestApp { +private: + void testRandomLengths(); + +public: + int Main() { + TEST_INIT("bytecomplens_test"); + testRandomLengths(); TEST_FLUSH(); + TEST_DONE(); + } +}; + +TEST_APPHOOK(Test); + + +void +Test::testRandomLengths() +{ + vespalib::RandomGen rndgen(0x07031969); + +#define TBLSIZ 0xc00000 + + auto lentable = std::unique_ptr<uint32_t[]>(new uint32_t[TBLSIZ]); + auto offtable = std::unique_ptr<uint64_t[]>(new uint64_t[TBLSIZ]); + + uint64_t offset = 16; + + for (int i = 0; i < TBLSIZ; i++) { + int sel = rndgen.nextInt32(); + int val = rndgen.nextInt32(); + switch (sel & 0x7) { + case 0: + val &= 0x7F; + break; + case 1: + val &= 0xFF; + break; + case 3: + val &= 0x1FFF; + break; + case 4: + val &= 0x3FFF; + break; + case 5: + val &= 0x7FFF; + break; + case 6: + val &= 0xFFFF; + break; + case 7: + default: + val &= 0xFFFFF; + break; + } + offtable[i] = offset; + lentable[i] = val; + offset += val; + } + + LOG(info, "made %d random offsets", TBLSIZ); + + search::ByteCompressedLengths foo; + + LOG(info, "empty BCL using %9ld bytes memory", foo.memoryUsed()); + + foo.addOffsetTable(TBLSIZ/4, offtable.get()); + foo.addOffsetTable(TBLSIZ/4, offtable.get() + 1*(TBLSIZ/4)); + + LOG(info, "half BCL using %9ld bytes memory", foo.memoryUsed()); + + search::ByteCompressedLengths bar; + foo.swap(bar); + bar.addOffsetTable(TBLSIZ/4, offtable.get() + 2*(TBLSIZ/4)); + bar.addOffsetTable(TBLSIZ/4, offtable.get() + 3*(TBLSIZ/4)); + foo.swap(bar); + + LOG(info, "full BCL using %9ld bytes memory", foo.memoryUsed()); + + LOG(info, "constructed %d byte compressed lengths", TBLSIZ-1); + + for (int i = 0; i < TBLSIZ-1; i++) { + search::ByteCompressedLengths::OffLen offlen; + offlen = foo.getOffLen(i); + + if ((i % 1000000) == 0) { + LOG(info, "data blob [%d] length %ld offset %ld", i, offlen.length, offlen.offset); + } + EXPECT_EQUAL(lentable[i], offlen.length); + EXPECT_EQUAL(offtable[i], offlen.offset); + } +} + diff --git a/searchlib/src/tests/bytecomplens/example.txt b/searchlib/src/tests/bytecomplens/example.txt new file mode 100644 index 00000000000..6dc3df0118a --- /dev/null +++ b/searchlib/src/tests/bytecomplens/example.txt @@ -0,0 +1,122 @@ +offset length BCN val L0 len/off skipL1 skipL2 skipL3 + +976 18707 [ 93 92 01 ] 3/0 976/0/0/0 +19683 11527 [ 87 5A ] 2/3 +31210 3926 [ D6 1E ] 2/5 +35136 2 [ 02 ] 1/7 +35138 6060 [ AC 2F ] 2/8 34162/8 +41198 649445 [ E5 D1 27 ] 3/10 +690643 2866 [ B2 16 ] 2/13 +693509 824767 [ BF AB 32 ] 3/15 +1518276 499173 [ E5 BB 1E ] 3/18 1483138/10 +2017449 20455 [ E7 9F 01 ] 3/21 +2037904 11 [ 0B ] 1/24 +2037915 19207 [ 87 96 01 ] 3/25 +2057122 6355 [ D3 31 ] 2/28 538846/10 +2063477 3422 [ DE 1A ] 2/30 +2066899 10683 [ BB 53 ] 2/32 +2077582 7360 [ C0 39 ] 2/34 +2084942 17969 [ B1 8C 01 ] 3/36 2083966/36/12 +2102911 6114 [ E2 2F ] 2/39 +2109025 31741 [ FD F7 01 ] 3/41 +2140766 581588 [ D4 BF 23 ] 3/44 +2722354 5341 [ DD 29 ] 2/47 637412/11 +2727695 13774 [ CE 6B ] 2/49 +2741469 717809 [ F1 E7 2B ] 3/51 +3459278 815406 [ AE E2 31 ] 3/54 +4274684 89 [ 59 ] 1/57 1552330/10 +4274773 4545 [ C1 23 ] 2/58 +4279318 803868 [ 9C 88 31 ] 3/60 +5083186 12865 [ C1 64 ] 2/63 +5096051 75 [ 4B ] 1/65 821367/8 +5096126 40734 [ 9E BE 02 ] 3/66 +5136860 101 [ 65 ] 1/69 +5136961 128 [ 80 01 ] 2/70 +5137089 253 [ FD 01 ] 2/72 3052147/36/12 +5137342 13 [ 0D ] 1/74 +5137355 24986 [ 9A C3 01 ] 3/75 +5162341 231 [ E7 01 ] 2/78 +5162572 997853 [ DD F3 3C ] 3/80 25483/8 +6160425 4728 [ F8 24 ] 2/83 +6165153 2025 [ E9 0F ] 2/85 +6167178 7281 [ F1 38 ] 2/87 +6174459 1026302 [ FE D1 3E ] 3/89 1011887/9 +7200761 848783 [ 8F E7 33 ] 3/92 +8049544 145767 [ E7 F2 08 ] 3/95 +8195311 19103 [ 9F 95 01 ] 3/98 +8214414 22166 [ 96 AD 01 ] 3/101 2039955/12 +8236580 30020 [ C4 EA 01 ] 3/104 +8266600 13 [ 0D ] 1/107 +8266613 120 [ 78 ] 1/108 +8266733 22398 [ FE AE 01 ] 3/109 3129644/37/12 +8289131 10832 [ D0 54 ] 2/112 +8299963 3765 [ B5 1D ] 2/114 +8303728 432771 [ 83 B5 1A ] 3/116 +8736499 30133 [ B5 EB 01 ] 3/119 469766/10 +8766632 6444 [ AC 32 ] 2/122 +8773076 16033 [ A1 7D ] 2/124 +8789109 78 [ 4E ] 1/126 +8789187 12510 [ DE 61 ] 2/127 52688/8 +8801697 12441 [ 99 61 ] 2/129 +8814138 117 [ 75 ] 1/131 +8814255 7147 [ EB 37 ] 2/132 +8821402 189 [ BD 01 ] 2/134 32215/7 +8821591 199704 [ 98 98 0C ] 3/136 +9021295 13240 [ B8 67 ] 2/139 +9034535 110 [ 6E ] 1/141 +9034645 31677 [ BD F7 01 ] 3/142 9034645/142/48/17 +9066322 18547 [ F3 90 01 ] 3/145 +9084869 734679 [ D7 EB 2C ] 3/148 +9819548 112 [ 70 ] 1/151 +9819660 883565 [ ED F6 35 ] 3/152 785015/10 +10703225 10290 [ B2 50 ] 2/155 +10713515 21410 [ A2 A7 01 ] 3/157 +10734925 15 [ 0F ] 1/160 +10734940 747774 [ FE D1 2D ] 3/161 915280/9 +11482714 39 [ 27 ] 1/164 +11482753 77 [ 4D ] 1/165 +11482830 235 [ EB 01 ] 2/166 +11483065 1991 [ C7 0F ] 2/168 748125/7 +11485056 9187 [ E3 47 ] 2/170 +11494243 18800 [ F0 92 01 ] 3/172 +11513043 1042219 [ AB CE 3F ] 3/175 +12555262 9154 [ C2 47 ] 2/178 3520617/36/12 +12564416 43582 [ BE D4 02 ] 3/180 +12607998 847240 [ 88 DB 33 ] 3/183 +13455238 4726 [ F6 24 ] 2/186 +13459964 590348 [ 8C 84 24 ] 3/188 904702/10 +14050312 8659 [ D3 43 ] 2/191 +14058971 116 [ 74 ] 1/193 +14059087 13563 [ FB 69 ] 2/194 +14072650 713064 [ E8 C2 2B ] 3/196 612686/8 +14785714 40321 [ 81 BB 02 ] 3/199 +14826035 2296 [ F8 11 ] 2/202 +14828331 7273 [ E9 38 ] 2/204 +14835604 68285 [ BD 95 04 ] 3/206 762954/10 +14903889 235 [ EB 01 ] 2/209 +14904124 4669 [ BD 24 ] 2/211 +14908793 28535 [ F7 DE 01 ] 3/213 +14937328 19 [ 13 ] 1/216 2382066/38/12 +14937347 5369 [ F9 29 ] 2/217 +14942716 602191 [ CF E0 24 ] 3/219 +15544907 2653 [ DD 14 ] 2/222 +15547560 25755 [ 9B C9 01 ] 3/224 610232/8 +15573315 11349 [ D5 58 ] 2/227 +15584664 15006 [ 9E 75 ] 2/229 +15599670 89 [ 59 ] 1/231 +15599759 52772 [ A4 9C 03 ] 3/232 52199/8 +15652531 776175 [ EF AF 2F ] 3/235 +16428706 126 [ 7E ] 1/238 +16428832 3884 [ AC 1E ] 2/239 +16432716 33958 [ A6 89 02 ] 3/241 832957/9 +16466674 122 [ 7A ] 1/244 +16466796 41895 [ A7 C7 02 ] 3/245 +16508691 105882 [ 9A BB 06 ] 3/248 +16614573 11067 [ BB 56 ] 2/251 1677245/35/12 +16625640 4588 [ EC 23 ] 2/253 +16630228 7349 [ B5 39 ] 2/255 +16637577 902638 [ EE 8B 37 ] 3/257 +17540215 8737 [ A1 44 ] 2/260 925642/9 +17548952 29186 [ 82 E4 01 ] 3/262 +17578138 41 [ 29 ] 1/265 +17578179 diff --git a/searchlib/src/tests/bytecomplens/tblprint.cpp b/searchlib/src/tests/bytecomplens/tblprint.cpp new file mode 100644 index 00000000000..93657d82178 --- /dev/null +++ b/searchlib/src/tests/bytecomplens/tblprint.cpp @@ -0,0 +1,357 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("tblprint"); +#include <vespa/vespalib/util/random.h> + +#include <vector> +#include <vespa/vespalib/data/databuffer.h> + + +/** + * Class compressing a table of offsets in memory. + * After adding (n) offsets you can access + * (n-1) pairs of (length, offset). + * All offsets must be increasing, but they + * may be added in several chunks. + **/ +class ByteCompressedLengths +{ +public: + /** + * Construct an empty instance + **/ + ByteCompressedLengths(); + + /** + * add the given offset table. + * @param entries number of offsets to store. + * @param offsets table that contains (entries) offsets. + **/ + void addOffsetTable(uint64_t entries, uint64_t *offsets); + + /** + * free resources + **/ + ~ByteCompressedLengths(); + + /** + * Fetch a length and offset from compressed data. + * Note invariant: id < size(); size() == (entries-1) + * + * @param id The index into the offset table + * @param offset Will be incremented by offset[id] + * @return The delta (offset[id+1] - offset[id]) + **/ + uint64_t getLength(uint64_t id, uint64_t &offset) const; + + /** + * The number of (length, offset) pairs stored + **/ + uint64_t size() const { return _entries; } + + struct L3Entry { + uint64_t offset; + uint64_t l0toff; + uint64_t l1toff; + uint64_t l2toff; + }; + vespalib::DataBuffer _l0space; + vespalib::DataBuffer _l1space; + vespalib::DataBuffer _l2space; + const uint8_t *_l0table; + const uint8_t *_l1table; + const uint8_t *_l2table; + + std::vector<L3Entry> _l3table; + + uint64_t _lenSum1; + uint64_t _lenSum2; + uint64_t _l0oSum1; + uint64_t _l0oSum2; + uint64_t _l1oSum2; + uint64_t _last_offset; + uint64_t _entries; + + void addOffset(uint64_t offset); +}; + +/** + * get "Byte Compressed Number" from buffer, incrementing pointer + **/ +static inline uint64_t getBCN(const uint8_t *&buffer) +{ + uint8_t b = *buffer++; + uint64_t len = (b & 127); + unsigned shiftLen = 0; + while (b & 128) { + shiftLen += 7; + b = *buffer++; + len |= ((b & 127) << shiftLen); + } + return len; +} + +static size_t writeLen(vespalib::DataBuffer &buf, uint64_t len) +{ + size_t bytes = 0; + do { + uint8_t b = len & 127; + len >>= 7; + if (len > 0) { + b |= 128; + } + buf.ensureFree(1); + buf.writeInt8(b); + ++bytes; + } while (len > 0); + return bytes; +} + + +ByteCompressedLengths::ByteCompressedLengths() + : _l0space(), + _l1space(), + _l2space(), + _l3table(), + _lenSum1(0), + _lenSum2(0), + _l0oSum1(0), + _l0oSum2(0), + _l1oSum2(0), + _last_offset(0), + _entries(0) +{ +} + + +void +ByteCompressedLengths::addOffset(uint64_t offset) +{ + assert(offset >= _last_offset); + + uint64_t len = offset - _last_offset; + uint64_t i = _entries++; + + if ((i & 3) == 0) { + _lenSum2 += _lenSum1; + _l0oSum2 += _l0oSum1; + + uint64_t t1n = i >> 2; + if ((t1n & 3) == 0) { + uint64_t t2n = t1n >> 2; + + if ((t2n & 3) == 0) { + L3Entry e; + e.offset = _last_offset; + e.l0toff = _l0space.getDataLen(); + e.l1toff = _l1space.getDataLen(); + e.l2toff = _l2space.getDataLen(); + + _l3table.push_back(e); + } else { + writeLen(_l2space, _lenSum2); + writeLen(_l2space, _l0oSum2); + writeLen(_l2space, _l1oSum2); + } + _lenSum2 = 0; + _l0oSum2 = 0; + _l1oSum2 = 0; + } else { + _l1oSum2 += writeLen(_l1space, _lenSum1); + _l1oSum2 += writeLen(_l1space, _l0oSum1); + } + _lenSum1 = 0; + _l0oSum1 = 0; + } + _l0oSum1 += writeLen(_l0space, len); + _lenSum1 += len; + _last_offset = offset; +} + + +void +ByteCompressedLengths::addOffsetTable(uint64_t entries, uint64_t *offsets) +{ + if (entries == 0) return; + // Do we have some offsets already? + if (_entries > 0) { + // yes, add first offset normally + addOffset(offsets[0]); + } else { + // no, special treatment for very first offset + _last_offset = offsets[0]; + } + for (uint64_t cnt = 1; cnt < entries; ++cnt) { + addOffset(offsets[cnt]); + } + _l0table = (uint8_t *)_l0space.getData(); + _l1table = (uint8_t *)_l1space.getData(); + _l2table = (uint8_t *)_l2space.getData(); + + LOG(debug, "compressed %ld offsets", (_entries+1)); + LOG(debug, "(%ld bytes)", (_entries+1)*sizeof(uint64_t)); + LOG(debug, "to (%ld + %ld + %ld) bytes + %ld l3entries", + _l0space.getDataLen(), + _l1space.getDataLen(), + _l2space.getDataLen(), + _l3table.size()); + LOG(debug, "(%ld bytes)", + (_l0space.getDataLen() + _l1space.getDataLen() + _l2space.getDataLen() + + _l3table.size()*sizeof(L3Entry))); +} + + +ByteCompressedLengths::~ByteCompressedLengths() +{ +} + +uint64_t +ByteCompressedLengths::getLength(uint64_t numSkip, uint64_t &offset) const +{ + assert(numSkip < _entries); + + unsigned skipL0 = numSkip & 3; + unsigned skipL1 = (numSkip >> 2) & 3; + unsigned skipL2 = (numSkip >> 4) & 3; + uint64_t skipL3 = (numSkip >> 6); + + offset += _l3table[skipL3].offset; + uint64_t l0toff = _l3table[skipL3].l0toff; + uint64_t l1toff = _l3table[skipL3].l1toff; + uint64_t l2toff = _l3table[skipL3].l2toff; + + // printf("start off %ld l0off %ld l1off %ld l2off %ld\n", offset, l0toff, l1toff, l2toff); + + const uint8_t *l2pos = _l2table + l2toff; + + while (skipL2 > 0) { + --skipL2; + offset += getBCN(l2pos); + l0toff += getBCN(l2pos); + l1toff += getBCN(l2pos); + } + + const uint8_t *l1pos = _l1table + l1toff; + + while (skipL1 > 0) { + --skipL1; + offset += getBCN(l1pos); + l0toff += getBCN(l1pos); + + } + const uint8_t *l0pos = _l0table + l0toff; + + while (skipL0 > 0) { + --skipL0; + offset += getBCN(l0pos); + } + // printf("end off %ld l0off %ld l1off %ld l2off %ld\n", offset, l0toff, l1toff, l2toff); + return getBCN(l0pos); +} + + + +class Test { +public: + static void printTable(); +}; + + + +int main(int /*argc*/, char ** /*argv*/) +{ + Test::printTable(); + return 0; +} + +void +Test::printTable() +{ + vespalib::RandomGen rndgen(0x07031969); +#define TBLSIZ 120 + uint32_t *lentable = new uint32_t[TBLSIZ]; + uint64_t *offtable = new uint64_t[TBLSIZ]; + + uint64_t offset = 16 + TBLSIZ*8; + + for (int i = 0; i < TBLSIZ; i++) { + int sel = rndgen.nextInt32(); + int val = rndgen.nextInt32(); + switch (sel & 0x7) { + case 0: + val &= 0x7F; + break; + case 1: + val &= 0xFF; + break; + case 3: + val &= 0x1FFF; + break; + case 4: + val &= 0x3FFF; + break; + case 5: + val &= 0x7FFF; + break; + case 6: + val &= 0xFFFF; + break; + case 7: + default: + val &= 0xFFFFF; + break; + } + offtable[i] = offset; + lentable[i] = val; + offset += val; + } + + ByteCompressedLengths foo; + foo.addOffsetTable(TBLSIZ, offtable); + + const uint8_t *l1pos = foo._l1table; + const uint8_t *l2pos = foo._l2table; + + printf("%s\t%s\t%s\t%s\t%s\t%s\t%s\n", + "offset", "length", "BCN val", "L0 len/off", "skipL1", "skipL2", "skipL3"); + + int slb = 0; + for (int i = 0; i+1 < TBLSIZ; i++) { + printf("%ld\t%d\t[", offtable[i], lentable[i]); + int bytes=0; + uint64_t len = lentable[i]; + do { + uint8_t b = len & 127; + len >>= 7; + if (len > 0) { + b |= 128; + } + printf(" %02X", b); + ++bytes; + } while (len > 0); + printf(" ]\t%d", bytes); + printf("/%d", slb); + slb += bytes; + + if ((i & 63) == 0) { + printf("\t\t\t%ld/%ld/%ld/%ld", + foo._l3table[i >> 6].offset, + foo._l3table[i >> 6].l0toff, + foo._l3table[i >> 6].l1toff, + foo._l3table[i >> 6].l2toff); + } else + if ((i & 15) == 0) { + printf("\t\t%ld", getBCN(l2pos)); + printf("/%ld", getBCN(l2pos)); + printf("/%ld", getBCN(l2pos)); + } else + if ((i & 3) == 0) { + printf("\t%ld", getBCN(l1pos)); + printf("/%ld", getBCN(l1pos)); + } + printf("\n"); + } + printf("%ld\n", offtable[TBLSIZ-1]); + fflush(stdout); +} diff --git a/searchlib/src/tests/common/bitvector/.gitignore b/searchlib/src/tests/common/bitvector/.gitignore new file mode 100644 index 00000000000..bdc2879ea74 --- /dev/null +++ b/searchlib/src/tests/common/bitvector/.gitignore @@ -0,0 +1,8 @@ +.depend +Makefile +*_test +*_benchmark +/bitvector_test-common +searchlib_condensedbitvector_test_app +searchlib_bitvector_benchmark_app +searchlib_bitvector_test-common_app diff --git a/searchlib/src/tests/common/bitvector/CMakeLists.txt b/searchlib/src/tests/common/bitvector/CMakeLists.txt new file mode 100644 index 00000000000..ce49872319a --- /dev/null +++ b/searchlib/src/tests/common/bitvector/CMakeLists.txt @@ -0,0 +1,22 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_bitvector_test-common_app + SOURCES + bitvector_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_bitvector_test-common_app COMMAND searchlib_bitvector_test-common_app) +vespa_add_executable(searchlib_bitvector_benchmark_app + SOURCES + bitvector_benchmark.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_bitvector_benchmark_app COMMAND searchlib_bitvector_benchmark_app BENCHMARK) +vespa_add_executable(searchlib_condensedbitvector_test_app + SOURCES + condensedbitvector_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_condensedbitvector_test_app COMMAND searchlib_condensedbitvector_test_app) diff --git a/searchlib/src/tests/common/bitvector/DESC b/searchlib/src/tests/common/bitvector/DESC new file mode 100644 index 00000000000..313f0f89f2a --- /dev/null +++ b/searchlib/src/tests/common/bitvector/DESC @@ -0,0 +1 @@ +bitvector test. Take a look at bitvector_test.cpp for details. diff --git a/searchlib/src/tests/common/bitvector/FILES b/searchlib/src/tests/common/bitvector/FILES new file mode 100644 index 00000000000..a2583d74519 --- /dev/null +++ b/searchlib/src/tests/common/bitvector/FILES @@ -0,0 +1 @@ +bitvector_test.cpp diff --git a/searchlib/src/tests/common/bitvector/bitvector_benchmark.cpp b/searchlib/src/tests/common/bitvector/bitvector_benchmark.cpp new file mode 100644 index 00000000000..cc0ef78c193 --- /dev/null +++ b/searchlib/src/tests/common/bitvector/bitvector_benchmark.cpp @@ -0,0 +1,37 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("bitvector_benchmark"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/common/bitvector.h> + +using namespace search; + +namespace { + +size_t scan(BitVector & bv) __attribute__((noinline)); + +size_t scan(BitVector & bv) +{ + size_t count(0); + for (BitVector::Index i(bv.getFirstTrueBit()), m(bv.size()); i < m; i = bv.getNextTrueBit(i+1)) { + count++; + } + return count; +} + +} + +// This test is 10% faster with table lookup than with runtime shifting. +TEST("speed of getNextTrueBit") +{ + BitVector::UP bv(BitVector::create(100000000)); + bv->setInterval(0, bv->size() - 1); + + for (size_t i(0); i < 10; i++) { + EXPECT_EQUAL(bv->size(), scan(*bv)); + } + EXPECT_EQUAL(bv->size(), bv->countTrueBits()); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/common/bitvector/bitvector_test.cpp b/searchlib/src/tests/common/bitvector/bitvector_test.cpp new file mode 100644 index 00000000000..11c43166ef5 --- /dev/null +++ b/searchlib/src/tests/common/bitvector/bitvector_test.cpp @@ -0,0 +1,541 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("bitvector_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/stllike/asciistream.h> +#include <vespa/searchlib/common/growablebitvector.h> +#include <vespa/searchlib/common/partialbitvector.h> +#include <vespa/searchlib/common/rankedhit.h> +#include <vespa/searchlib/common/bitvectoriterator.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchlib/fef/termfieldmatchdataarray.h> +#include <vespa/searchlib/util/rand48.h> + +using namespace search; + +namespace { + +std::string +toString(const BitVector & bv) +{ + std::stringstream ss; + ss << "["; + bool first = true; + uint32_t nextBit = bv.getStartIndex(); + for (;;) { + nextBit = bv.getNextTrueBit(nextBit); + if (nextBit >= bv.size()) { + break; + } + if (!first) { + ss << ","; + } + ss << nextBit++; + first = false; + } + ss << "]"; + return ss.str(); +} + + +std::string +toString(BitVectorIterator &b) +{ + std::stringstream ss; + ss << "["; + bool first = true; + b.initFullRange(); + for (uint32_t docId = 1; ! b.isAtEnd(docId); ) { + if (!b.seek(docId)) { + docId = std::max(docId + 1, b.getDocId()); + if (b.isAtEnd(docId)) + break; + continue; + } + if (!first) { + ss << ","; + } + b.unpack(docId); + ss << docId++; + first = false; + } + ss << "]"; + return ss.str(); +} + + + +uint32_t +myCountInterval(const BitVector &bv, uint32_t low, uint32_t high) +{ + uint32_t res = 0u; + if (bv.size() == 0u) + return 0u; + if (high >= bv.size()) + high = bv.size() - 1; + for (; low <= high; ++low) { + if (bv.testBit(low)) + ++res; + } + return res; +} + +void +scan(uint32_t count, uint32_t offset, uint32_t size, Rand48 &rnd) +{ + std::vector<uint32_t> lids; + lids.reserve(count); + uint32_t end = size + offset; + for (uint32_t i = 0; i < count; ++i) { + uint32_t lid = offset + (rnd.lrand48() % (size - 1)) + 1; + lids.push_back(lid); + } + std::sort(lids.begin(), lids.end()); + lids.resize(std::unique(lids.begin(), lids.end()) - lids.begin()); + BitVector::UP bv(BitVector::create(offset, end)); + for (auto lid : lids) { + bv->setBit(lid); + } + EXPECT_EQUAL(bv->getFirstTrueBit(), bv->getNextTrueBit(bv->getStartIndex())); + uint32_t prevLid = bv->getStartIndex(); + for (auto lid : lids) { + EXPECT_EQUAL(lid, bv->getNextTrueBit(prevLid + 1)); + EXPECT_EQUAL(prevLid, bv->getPrevTrueBit(lid - 1)); + prevLid = lid; + } + EXPECT_TRUE(bv->getNextTrueBit(prevLid + 1) >= end); + EXPECT_EQUAL(prevLid, bv->getPrevTrueBit(end - 1)); +} + +void +scanWithOffset(uint32_t offset) +{ + Rand48 rnd; + + rnd.srand48(32); + scan(10, offset, 1000000, rnd); + scan(100, offset, 1000000, rnd); + scan(1000, offset, 1000000, rnd); + scan(10000, offset, 1000000, rnd); + scan(100000, offset, 1000000, rnd); + scan(500000, offset, 1000000, rnd); + scan(1000000, offset, 1000000, rnd); +} + +} + +bool +assertBV(const std::string & exp, const BitVector & act) +{ + bool res1 = EXPECT_EQUAL(exp, toString(act)); + search::fef::TermFieldMatchData f; + search::fef::TermFieldMatchDataArray a; + a.add(&f); + queryeval::SearchIterator::UP it(BitVectorIterator::create(&act, a, true)); + BitVectorIterator & b(dynamic_cast<BitVectorIterator &>(*it)); + bool res2 = EXPECT_EQUAL(exp, toString(b)); + return res1 && res2; +} + +void +fill(BitVector & bv, const std::vector<uint32_t> & bits, uint32_t offset) +{ + for (uint32_t bit : bits) { + bv.setBit(bit + offset); + } +} + +vespalib::string +fill(const std::vector<uint32_t> & bits, uint32_t offset) +{ + vespalib::asciistream os; + os << "["; + size_t count(0); + for (uint32_t bit : bits) { + count++; + os << bit + offset; + if (count != bits.size()) { os << ","; } + } + os << "]"; + return os.str(); +} + +std::vector<uint32_t> A = {7, 39, 71, 103}; +std::vector<uint32_t> B = {15, 39, 71, 100}; + +void +testAnd(uint32_t offset) +{ + uint32_t end = offset + 128; + BitVector::UP v1(BitVector::create(offset, end)); + BitVector::UP v2(BitVector::create(offset, end)); + BitVector::UP v3(BitVector::create(offset, end)); + + fill(*v1, A, offset); + fill(*v3, A, offset); + fill(*v2, B, offset); + EXPECT_TRUE(assertBV(fill(A, offset), *v1)); + EXPECT_TRUE(assertBV(fill(B, offset), *v2)); + + EXPECT_TRUE(assertBV(fill(A, offset), *v3)); + v3->andWith(*v2); + EXPECT_TRUE(assertBV(fill({39,71}, offset), *v3)); + + EXPECT_TRUE(assertBV(fill(A, offset), *v1)); + EXPECT_TRUE(assertBV(fill(B, offset), *v2)); +} + +void +testOr(uint32_t offset) +{ + uint32_t end = offset + 128; + BitVector::UP v1(BitVector::create(offset, end)); + BitVector::UP v2(BitVector::create(offset, end)); + BitVector::UP v3(BitVector::create(offset, end)); + + fill(*v1, A, offset); + fill(*v3, A, offset); + fill(*v2, B, offset); + EXPECT_TRUE(assertBV(fill(A, offset), *v1)); + EXPECT_TRUE(assertBV(fill(B, offset), *v2)); + + EXPECT_TRUE(assertBV(fill(A, offset), *v3)); + v3->orWith(*v2); + EXPECT_TRUE(assertBV(fill({7,15,39,71,100,103}, offset), *v3)); + + EXPECT_TRUE(assertBV(fill(A, offset), *v1)); + EXPECT_TRUE(assertBV(fill(B, offset), *v2)); +} + +void +testAndNot(uint32_t offset) +{ + uint32_t end = offset + 128; + BitVector::UP v1(BitVector::create(offset, end)); + BitVector::UP v2(BitVector::create(offset, end)); + BitVector::UP v3(BitVector::create(offset, end)); + + fill(*v1, A, offset); + fill(*v3, A, offset); + fill(*v2, B, offset); + EXPECT_TRUE(assertBV(fill(A, offset), *v1)); + EXPECT_TRUE(assertBV(fill(B, offset), *v2)); + + EXPECT_TRUE(assertBV(fill(A, offset), *v3)); + v3->andNotWith(*v2); + EXPECT_TRUE(assertBV(fill({7,103}, offset), *v3)); + + EXPECT_TRUE(assertBV(fill(A, offset), *v1)); + EXPECT_TRUE(assertBV(fill(B, offset), *v2)); + + v3->clear(); + fill(*v3, A, offset); + EXPECT_TRUE(assertBV(fill(A, offset), *v3)); + + + std::vector<RankedHit> rh; + rh.emplace_back(15u+offset, 0.0); + rh.emplace_back(39u+offset, 0.0); + rh.emplace_back(71u+offset, 0.0); + rh.emplace_back(100u+offset, 0.0); + + v3->andNotWithT(RankedHitIterator(&rh[0], 4)); + EXPECT_TRUE(assertBV(fill({7,103}, offset), *v3)); +} + +TEST("requireThatSequentialOperationsOnPartialWorks") +{ + PartialBitVector p1(717,919); + + EXPECT_FALSE(p1.hasTrueBits()); + EXPECT_EQUAL(0u, p1.countTrueBits()); + p1.setBit(719); + EXPECT_EQUAL(0u, p1.countTrueBits()); + p1.invalidateCachedCount(); + EXPECT_TRUE(p1.hasTrueBits()); + EXPECT_EQUAL(1u, p1.countTrueBits()); + p1.slowSetBit(718); + p1.slowSetBit(739); + p1.slowSetBit(871); + p1.slowSetBit(903); + EXPECT_EQUAL(5u, p1.countTrueBits()); + EXPECT_TRUE(assertBV("[718,719,739,871,903]", p1)); + + PartialBitVector p2(717,919); + EXPECT_FALSE(p1 == p2); + p2.slowSetBit(719); + p2.slowSetBit(718); + p2.slowSetBit(739); + p2.slowSetBit(871); + EXPECT_FALSE(p1 == p2); + p2.slowSetBit(903); + EXPECT_TRUE(p1 == p2); + + AllocatedBitVector full(1000); + full.setInterval(0, 1000); + EXPECT_EQUAL(5u, p2.countTrueBits()); + p2.orWith(full); + EXPECT_EQUAL(202u, p2.countTrueBits()); +} + +TEST("requireThatInitRangeStaysWithinBounds") { + AllocatedBitVector v1(128); + search::fef::TermFieldMatchData f; + search::fef::TermFieldMatchDataArray a; + a.add(&f); + queryeval::SearchIterator::UP it(BitVectorIterator::create(&v1, a, true)); + it->initRange(700, 800); + EXPECT_TRUE(it->isAtEnd()); +} + +TEST("requireThatAndWorks") { + for (uint32_t offset(0); offset < 100; offset++) { + testAnd(offset); + } +} + +TEST("requireThatOrWorks") { + for (uint32_t offset(0); offset < 100; offset++) { + testOr(offset); + } +} + + +TEST("requireThatAndNotWorks") { + for (uint32_t offset(0); offset < 100; offset++) { + testAndNot(offset); + } +} + +TEST("requireThatClearWorks") +{ + AllocatedBitVector v1(128); + + v1.setBit(7); + v1.setBit(39); + v1.setBit(71); + v1.setBit(103); + EXPECT_TRUE(assertBV("[7,39,71,103]", v1)); + + v1.clear(); + EXPECT_TRUE(assertBV("[]", v1)); +} + +TEST("requireThatForEachWorks") { + AllocatedBitVector v1(128); + + v1.setBit(7); + v1.setBit(39); + v1.setBit(71); + v1.setBit(103); + EXPECT_EQUAL(128u, v1.size()); + + size_t sum(0); + v1.foreach_truebit([&](uint32_t key) { sum += key; }); + EXPECT_EQUAL(220u, sum); + + sum = 0; + v1.foreach_truebit([&](uint32_t key) { sum += key; }, 7); + EXPECT_EQUAL(220u, sum); + + sum = 0; + v1.foreach_truebit([&](uint32_t key) { sum += key; }, 6, 7); + EXPECT_EQUAL(0u, sum); + sum = 0; + v1.foreach_truebit([&](uint32_t key) { sum += key; }, 7, 8); + EXPECT_EQUAL(7u, sum); + sum = 0; + v1.foreach_truebit([&](uint32_t key) { sum += key; }, 8, 9); + EXPECT_EQUAL(0u, sum); + + sum = 0; + v1.foreach_truebit([&](uint32_t key) { sum += key; }, 8); + EXPECT_EQUAL(213u, sum); + + sum = 0; + v1.foreach_falsebit([&](uint32_t key) { sum += key; }, 5, 6); + EXPECT_EQUAL(5u, sum); + + sum = 0; + v1.foreach_falsebit([&](uint32_t key) { sum += key; }, 5, 7); + EXPECT_EQUAL(11u, sum); + + sum = 0; + v1.foreach_falsebit([&](uint32_t key) { sum += key; }, 5, 8); + EXPECT_EQUAL(11u, sum); + + sum = 0; + v1.foreach_falsebit([&](uint32_t key) { sum += key; }, 5, 9); + EXPECT_EQUAL(19u, sum); + + sum = 0; + v1.foreach_falsebit([&](uint32_t key) { sum += key; }, 6); + EXPECT_EQUAL(size_t((((6+127)*(127-6 + 1)) >> 1) - 220), sum); +} + + +TEST("requireThatSetWorks") +{ + AllocatedBitVector v1(128); + + v1.setBit(7); + v1.setBit(39); + v1.setBit(71); + v1.setBit(103); + EXPECT_TRUE(assertBV("[7,39,71,103]", v1)); + v1.invalidateCachedCount(); + EXPECT_EQUAL(4u, v1.countTrueBits()); + + v1.setBit(80); + EXPECT_EQUAL(4u, v1.countTrueBits()); + v1.invalidateCachedCount(); + EXPECT_EQUAL(5u, v1.countTrueBits()); + EXPECT_TRUE(assertBV("[7,39,71,80,103]", v1)); + + v1.clearBit(35); + EXPECT_EQUAL(5u, v1.countTrueBits()); + v1.invalidateCachedCount(); + EXPECT_EQUAL(5u, v1.countTrueBits()); + EXPECT_TRUE(assertBV("[7,39,71,80,103]", v1)); + v1.clearBit(71); + EXPECT_EQUAL(5u, v1.countTrueBits()); + v1.invalidateCachedCount(); + EXPECT_EQUAL(4u, v1.countTrueBits()); + EXPECT_TRUE(assertBV("[7,39,80,103]", v1)); + + v1.slowSetBit(39); + EXPECT_EQUAL(4u, v1.countTrueBits()); + EXPECT_TRUE(assertBV("[7,39,80,103]", v1)); + v1.slowSetBit(57); + EXPECT_EQUAL(5u, v1.countTrueBits()); + EXPECT_TRUE(assertBV("[7,39,57,80,103]", v1)); +} + + +TEST("requireThatClearIntervalWorks") +{ + AllocatedBitVector v1(1200); + + v1.setBit(7); + v1.setBit(39); + v1.setBit(71); + v1.setBit(103); + v1.setBit(200); + v1.setBit(500); + EXPECT_TRUE(assertBV("[7,39,71,103,200,500]", v1)); + + v1.clearInterval(40, 70); + EXPECT_TRUE(assertBV("[7,39,71,103,200,500]", v1)); + v1.clearInterval(39, 71); + EXPECT_TRUE(assertBV("[7,71,103,200,500]", v1)); + v1.clearInterval(39, 72); + EXPECT_TRUE(assertBV("[7,103,200,500]", v1)); + v1.clearInterval(20, 501); + EXPECT_TRUE(assertBV("[7]", v1)); +} + + +TEST("requireThatSetIntervalWorks") +{ + AllocatedBitVector v1(1200); + + EXPECT_FALSE(v1.hasTrueBits()); + v1.setBit(7); + v1.setBit(39); + v1.setBit(71); + v1.setBit(103); + v1.setBit(200); + v1.setBit(500); + EXPECT_TRUE(assertBV("[7,39,71,103,200,500]", v1)); + + v1.setInterval(40, 46); + EXPECT_TRUE(assertBV("[7,39,40,41,42,43,44,45,71,103,200,500]", v1)); + EXPECT_TRUE(v1.hasTrueBits()); + v1.invalidateCachedCount(); + EXPECT_EQUAL(12u, v1.countTrueBits()); + EXPECT_EQUAL(12u, v1.countInterval(1, 1199)); + EXPECT_EQUAL(12u, myCountInterval(v1, 1, 1199)); + + v1.setInterval(40, 200); + EXPECT_EQUAL(164u, v1.countInterval(1, 1199)); + EXPECT_EQUAL(164u, myCountInterval(v1, 1, 1199)); + EXPECT_EQUAL(163u, v1.countInterval(1, 201)); + EXPECT_EQUAL(162u, v1.countInterval(1, 200)); + EXPECT_EQUAL(163u, v1.countInterval(7, 201)); + EXPECT_EQUAL(162u, v1.countInterval(8, 201)); + EXPECT_EQUAL(161u, v1.countInterval(8, 200)); + v1.clearInterval(72, 174); + EXPECT_EQUAL(62u, v1.countInterval(1, 1199)); + EXPECT_EQUAL(62u, myCountInterval(v1, 1, 1199)); + EXPECT_EQUAL(61u, v1.countInterval(1, 201)); + EXPECT_EQUAL(60u, v1.countInterval(1, 200)); + EXPECT_EQUAL(61u, v1.countInterval(7, 201)); + EXPECT_EQUAL(60u, v1.countInterval(8, 201)); + EXPECT_EQUAL(59u, v1.countInterval(8, 200)); + EXPECT_EQUAL(51u, v1.countInterval(8, 192)); + EXPECT_EQUAL(50u, v1.countInterval(8, 191)); + + EXPECT_EQUAL(1u, v1.countInterval(1, 20)); + EXPECT_EQUAL(1u, v1.countInterval(7, 20)); + EXPECT_EQUAL(0u, v1.countInterval(8, 20)); + EXPECT_EQUAL(1u, v1.countInterval(1, 8)); + EXPECT_EQUAL(0u, v1.countInterval(1, 7)); +} + +TEST("requireThatScanWorks") +{ + scanWithOffset(0); + scanWithOffset(19876); +} + +TEST("requireThatGrowWorks") +{ + vespalib::GenerationHolder g; + GrowableBitVector v(200, 200, g); + + v.setBit(7); + v.setBit(39); + v.setBit(71); + v.setBit(103); + + EXPECT_EQUAL(200u, v.size()); + v.invalidateCachedCount(); + EXPECT_TRUE(assertBV("[7,39,71,103]", v)); + EXPECT_EQUAL(4u, v.countTrueBits()); + v.reserve(204); + EXPECT_EQUAL(200u, v.size()); + EXPECT_EQUAL(204u, v.capacity()); + EXPECT_TRUE(assertBV("[7,39,71,103]", v)); + EXPECT_EQUAL(4u, v.countTrueBits()); + v.extend(202); + EXPECT_EQUAL(202u, v.size()); + EXPECT_EQUAL(204u, v.capacity()); + EXPECT_TRUE(assertBV("[7,39,71,103]", v)); + EXPECT_EQUAL(4u, v.countTrueBits()); + v.shrink(200); + EXPECT_EQUAL(200u, v.size()); + EXPECT_EQUAL(204u, v.capacity()); + EXPECT_TRUE(assertBV("[7,39,71,103]", v)); + EXPECT_EQUAL(4u, v.countTrueBits()); + v.reserve(204); + EXPECT_EQUAL(200u, v.size()); + EXPECT_EQUAL(204u, v.capacity()); + EXPECT_TRUE(assertBV("[7,39,71,103]", v)); + EXPECT_EQUAL(4u, v.countTrueBits()); + v.shrink(202); + EXPECT_EQUAL(202u, v.size()); + EXPECT_EQUAL(204u, v.capacity()); + EXPECT_TRUE(assertBV("[7,39,71,103]", v)); + EXPECT_EQUAL(4u, v.countTrueBits()); + + v.shrink(100); + EXPECT_EQUAL(100u, v.size()); + EXPECT_EQUAL(204u, v.capacity()); + EXPECT_TRUE(assertBV("[7,39,71]", v)); + EXPECT_EQUAL(3u, v.countTrueBits()); + g.transferHoldLists(1); + g.trimHoldLists(2); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/common/bitvector/condensedbitvector_test.cpp b/searchlib/src/tests/common/bitvector/condensedbitvector_test.cpp new file mode 100644 index 00000000000..eddd3941c35 --- /dev/null +++ b/searchlib/src/tests/common/bitvector/condensedbitvector_test.cpp @@ -0,0 +1,49 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/common/condensedbitvectors.h> +#include <vespa/log/log.h> + +LOG_SETUP("condensedbitvector_test"); + +using search::CondensedBitVector; +using vespalib::GenerationHolder; + +TEST("Verify state after init") +{ + GenerationHolder genHolder; + CondensedBitVector::UP cbv(CondensedBitVector::create(8, genHolder)); + EXPECT_EQUAL(32u, cbv->getKeyCapacity()); + EXPECT_EQUAL(8u, cbv->getCapacity()); + EXPECT_EQUAL(8u, cbv->getSize()); +} + + +TEST("Verify set/get") +{ + GenerationHolder genHolder; + CondensedBitVector::UP cbv(CondensedBitVector::create(8, genHolder)); + for (size_t i(0); i < 32; i++) { + for (size_t j(0); j < 8; j++) { + EXPECT_FALSE(cbv->get(i,j)); + } + } + cbv->set(23,5, false); + EXPECT_FALSE(cbv->get(23, 5)); + for (size_t i(0); i < 32; i++) { + for (size_t j(0); j < 8; j++) { + EXPECT_FALSE(cbv->get(i,j)); + } + } + cbv->set(23,5, true); + EXPECT_TRUE(cbv->get(23, 5)); + size_t sum(0); + for (size_t i(0); i < 32; i++) { + for (size_t j(0); j < 8; j++) { + sum += cbv->get(i,j) ? 1 : 0; + } + } + EXPECT_EQUAL(1u, sum); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/common/foregroundtaskexecutor/.gitignore b/searchlib/src/tests/common/foregroundtaskexecutor/.gitignore new file mode 100644 index 00000000000..0bd7759156b --- /dev/null +++ b/searchlib/src/tests/common/foregroundtaskexecutor/.gitignore @@ -0,0 +1 @@ +searchlib_foregroundtaskexecutor_test_app diff --git a/searchlib/src/tests/common/foregroundtaskexecutor/CMakeLists.txt b/searchlib/src/tests/common/foregroundtaskexecutor/CMakeLists.txt new file mode 100644 index 00000000000..dd0e5c0b039 --- /dev/null +++ b/searchlib/src/tests/common/foregroundtaskexecutor/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_foregroundtaskexecutor_test_app + SOURCES + foregroundtaskexecutor_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_foregroundtaskexecutor_test_app COMMAND searchlib_foregroundtaskexecutor_test_app) diff --git a/searchlib/src/tests/common/foregroundtaskexecutor/DESC b/searchlib/src/tests/common/foregroundtaskexecutor/DESC new file mode 100644 index 00000000000..bfa0dfa3e6a --- /dev/null +++ b/searchlib/src/tests/common/foregroundtaskexecutor/DESC @@ -0,0 +1 @@ +foregroundtaskexecutor test. Take a look at foregroundtaskexecutor_test.cpp for details. diff --git a/searchlib/src/tests/common/foregroundtaskexecutor/FILES b/searchlib/src/tests/common/foregroundtaskexecutor/FILES new file mode 100644 index 00000000000..5c0c9178abd --- /dev/null +++ b/searchlib/src/tests/common/foregroundtaskexecutor/FILES @@ -0,0 +1 @@ +foregroundtaskexecutor_test.cpp diff --git a/searchlib/src/tests/common/foregroundtaskexecutor/foregroundtaskexecutor_test.cpp b/searchlib/src/tests/common/foregroundtaskexecutor/foregroundtaskexecutor_test.cpp new file mode 100644 index 00000000000..49ebbf12bc0 --- /dev/null +++ b/searchlib/src/tests/common/foregroundtaskexecutor/foregroundtaskexecutor_test.cpp @@ -0,0 +1,124 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("foregroundtaskexecutor_test"); +#include <vespa/searchlib/common/foregroundtaskexecutor.h> +#include <vespa/vespalib/testkit/testapp.h> + +#include <mutex> +#include <condition_variable> + +namespace search +{ + +namespace common +{ + + +class Fixture +{ +public: + ForegroundTaskExecutor _threads; + + Fixture() + : _threads() + { + } +}; + + +class TestObj +{ +public: + std::mutex _m; + std::condition_variable _cv; + int _done; + int _fail; + int _val; + + TestObj() + : _m(), + _cv(), + _done(0), + _fail(0), + _val(0) + { + } + + void + modify(int oldValue, int newValue) + { + { + std::lock_guard<std::mutex> guard(_m); + if (_val == oldValue) { + _val = newValue; + } else { + ++_fail; + } + ++_done; + } + _cv.notify_all(); + } + + void + wait(int wantDone) + { + std::unique_lock<std::mutex> guard(_m); + _cv.wait(guard, [=] { return this->_done >= wantDone; }); + } +}; + +TEST_F("testExecute", Fixture) { + std::shared_ptr<TestObj> tv(std::make_shared<TestObj>()); + EXPECT_EQUAL(0, tv->_val); + f._threads.execute(1, [=]() { tv->modify(0, 42); }); + tv->wait(1); + EXPECT_EQUAL(0, tv->_fail); + EXPECT_EQUAL(42, tv->_val); + f._threads.sync(); + EXPECT_EQUAL(0, tv->_fail); + EXPECT_EQUAL(42, tv->_val); +} + + +TEST_F("require that task with same id are serialized", Fixture) +{ + std::shared_ptr<TestObj> tv(std::make_shared<TestObj>()); + EXPECT_EQUAL(0, tv->_val); + f._threads.execute(0, [=]() { usleep(2000); tv->modify(0, 14); }); + f._threads.execute(0, [=]() { tv->modify(14, 42); }); + tv->wait(2); + EXPECT_EQUAL(0, tv->_fail); + EXPECT_EQUAL(42, tv->_val); + f._threads.sync(); + EXPECT_EQUAL(0, tv->_fail); + EXPECT_EQUAL(42, tv->_val); +} + +TEST_F("require that task with different ids are serialized", Fixture) +{ + int tryCnt = 0; + for (tryCnt = 0; tryCnt < 100; ++tryCnt) { + std::shared_ptr<TestObj> tv(std::make_shared<TestObj>()); + EXPECT_EQUAL(0, tv->_val); + f._threads.execute(0, [=]() { usleep(2000); tv->modify(0, 14); }); + f._threads.execute(1, [=]() { tv->modify(14, 42); }); + tv->wait(2); + if (tv->_fail != 1) { + continue; + } + EXPECT_EQUAL(1, tv->_fail); + EXPECT_EQUAL(14, tv->_val); + f._threads.sync(); + EXPECT_EQUAL(1, tv->_fail); + EXPECT_EQUAL(14, tv->_val); + break; + } + EXPECT_TRUE(tryCnt >= 100); +} + + +} // namespace common +} // namespace search + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/common/location/.gitignore b/searchlib/src/tests/common/location/.gitignore new file mode 100644 index 00000000000..ec9acbe771e --- /dev/null +++ b/searchlib/src/tests/common/location/.gitignore @@ -0,0 +1 @@ +searchlib_location_test_app diff --git a/searchlib/src/tests/common/location/CMakeLists.txt b/searchlib/src/tests/common/location/CMakeLists.txt new file mode 100644 index 00000000000..3617657cdf9 --- /dev/null +++ b/searchlib/src/tests/common/location/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_location_test_app + SOURCES + location_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_location_test_app COMMAND searchlib_location_test_app) diff --git a/searchlib/src/tests/common/location/FILES b/searchlib/src/tests/common/location/FILES new file mode 100644 index 00000000000..7bd6fa8b581 --- /dev/null +++ b/searchlib/src/tests/common/location/FILES @@ -0,0 +1 @@ +location_test.cpp diff --git a/searchlib/src/tests/common/location/location_test.cpp b/searchlib/src/tests/common/location/location_test.cpp new file mode 100644 index 00000000000..1cbe24ec225 --- /dev/null +++ b/searchlib/src/tests/common/location/location_test.cpp @@ -0,0 +1,119 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchlib/common/location.h> + +using search::common::Location; + +bool is_parseable(const char *str) { + Location loc; + return loc.parse(str); +} + +Location parse(const char *str) { + Location loc; + if (!EXPECT_TRUE(loc.parse(str))) { + fprintf(stderr, " parse error: %s\n", loc.getParseError()); + } + return loc; +} + +TEST("require that malformed bounding boxes are not parseable") { + EXPECT_TRUE(is_parseable("[2,10,20,30,40]")); + EXPECT_FALSE(is_parseable("[2,10,20,30,40][2,10,20,30,40]")); + EXPECT_FALSE(is_parseable("[1,10,20,30,40]")); + EXPECT_FALSE(is_parseable("[3,10,20,30,40]")); + EXPECT_FALSE(is_parseable("[2, 10, 20, 30, 40]")); + EXPECT_FALSE(is_parseable("[2,10,20,30,40")); + EXPECT_FALSE(is_parseable("[2,10,20,30]")); + EXPECT_FALSE(is_parseable("[10,20,30,40]")); +} + +TEST("require that malformed circles are not parseable") { + EXPECT_TRUE(is_parseable("(2,10,20,5,0,0,0)")); + EXPECT_FALSE(is_parseable("(2,10,20,5,0,0,0)(2,10,20,5,0,0,0)")); + EXPECT_FALSE(is_parseable("(1,10,20,5,0,0,0)")); + EXPECT_FALSE(is_parseable("(3,10,20,5,0,0,0)")); + EXPECT_FALSE(is_parseable("(2, 10, 20, 5, 0, 0, 0)")); + EXPECT_FALSE(is_parseable("(2,10,20,5)")); + EXPECT_FALSE(is_parseable("(2,10,20,5,0,0,0")); + EXPECT_FALSE(is_parseable("(2,10,20,5,0,0,0,1000")); + EXPECT_FALSE(is_parseable("(10,20,5)")); +} + +TEST("require that bounding boxes can be parsed") { + Location loc = parse("[2,10,20,30,40]"); + EXPECT_EQUAL(false, loc.getRankOnDistance()); + EXPECT_EQUAL(true, loc.getPruneOnDistance()); + EXPECT_EQUAL(0u, loc.getXAspect()); + EXPECT_EQUAL(0, loc.getX()); + EXPECT_EQUAL(0, loc.getY()); + EXPECT_EQUAL(std::numeric_limits<uint32_t>::max(), loc.getRadius()); + EXPECT_EQUAL(10, loc.getMinX()); + EXPECT_EQUAL(20, loc.getMinY()); + EXPECT_EQUAL(30, loc.getMaxX()); + EXPECT_EQUAL(40, loc.getMaxY()); +} + +TEST("require that circles can be parsed") { + Location loc = parse("(2,10,20,5,0,0,0)"); + EXPECT_EQUAL(true, loc.getRankOnDistance()); + EXPECT_EQUAL(true, loc.getPruneOnDistance()); + EXPECT_EQUAL(0u, loc.getXAspect()); + EXPECT_EQUAL(10, loc.getX()); + EXPECT_EQUAL(20, loc.getY()); + EXPECT_EQUAL(5u, loc.getRadius()); + EXPECT_EQUAL(5, loc.getMinX()); + EXPECT_EQUAL(15, loc.getMinY()); + EXPECT_EQUAL(15, loc.getMaxX()); + EXPECT_EQUAL(25, loc.getMaxY()); +} + +TEST("require that circles can have aspect ratio") { + Location loc = parse("(2,10,20,5,0,0,0,2147483648)"); + EXPECT_EQUAL(true, loc.getRankOnDistance()); + EXPECT_EQUAL(true, loc.getPruneOnDistance()); + EXPECT_EQUAL(2147483648u, loc.getXAspect()); + EXPECT_EQUAL(10, loc.getX()); + EXPECT_EQUAL(20, loc.getY()); + EXPECT_EQUAL(5u, loc.getRadius()); + EXPECT_EQUAL(-1, loc.getMinX()); + EXPECT_EQUAL(15, loc.getMinY()); + EXPECT_EQUAL(21, loc.getMaxX()); + EXPECT_EQUAL(25, loc.getMaxY()); +} + +TEST("require that bounding box can be specified after circle") { + Location loc = parse("(2,10,20,5,0,0,0)[2,10,20,30,40]"); + EXPECT_EQUAL(true, loc.getRankOnDistance()); + EXPECT_EQUAL(true, loc.getPruneOnDistance()); + EXPECT_EQUAL(0u, loc.getXAspect()); + EXPECT_EQUAL(10, loc.getX()); + EXPECT_EQUAL(20, loc.getY()); + EXPECT_EQUAL(5u, loc.getRadius()); + EXPECT_EQUAL(10, loc.getMinX()); + EXPECT_EQUAL(20, loc.getMinY()); + EXPECT_EQUAL(15, loc.getMaxX()); + EXPECT_EQUAL(25, loc.getMaxY()); +} + +TEST("require that circles can be specified after bounding box") { + Location loc = parse("[2,10,20,30,40](2,10,20,5,0,0,0)"); + EXPECT_EQUAL(true, loc.getRankOnDistance()); + EXPECT_EQUAL(true, loc.getPruneOnDistance()); + EXPECT_EQUAL(0u, loc.getXAspect()); + EXPECT_EQUAL(10, loc.getX()); + EXPECT_EQUAL(20, loc.getY()); + EXPECT_EQUAL(5u, loc.getRadius()); + EXPECT_EQUAL(10, loc.getMinX()); + EXPECT_EQUAL(20, loc.getMinY()); + EXPECT_EQUAL(15, loc.getMaxX()); + EXPECT_EQUAL(25, loc.getMaxY()); +} + +TEST("require that santa search gives non-wrapped bounding box") { + Location loc = parse("(2,122163600,89998536,290112,4,2000,0,109704)"); + EXPECT_GREATER_EQUAL(loc.getMaxX(), loc.getMinX()); + EXPECT_GREATER_EQUAL(loc.getMaxY(), loc.getMinY()); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/common/packets/.gitignore b/searchlib/src/tests/common/packets/.gitignore new file mode 100644 index 00000000000..e3dcf5376d5 --- /dev/null +++ b/searchlib/src/tests/common/packets/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +packets_test +searchlib_packets_test_app diff --git a/searchlib/src/tests/common/packets/CMakeLists.txt b/searchlib/src/tests/common/packets/CMakeLists.txt new file mode 100644 index 00000000000..e35883b1d8c --- /dev/null +++ b/searchlib/src/tests/common/packets/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_packets_test_app + SOURCES + packets_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_packets_test_app COMMAND searchlib_packets_test_app) diff --git a/searchlib/src/tests/common/packets/DESC b/searchlib/src/tests/common/packets/DESC new file mode 100644 index 00000000000..0808703b5fb --- /dev/null +++ b/searchlib/src/tests/common/packets/DESC @@ -0,0 +1 @@ +packets test. Take a look at packets.cpp for details. diff --git a/searchlib/src/tests/common/packets/FILES b/searchlib/src/tests/common/packets/FILES new file mode 100644 index 00000000000..35191f9a36d --- /dev/null +++ b/searchlib/src/tests/common/packets/FILES @@ -0,0 +1 @@ +packets.cpp diff --git a/searchlib/src/tests/common/packets/packets_test.cpp b/searchlib/src/tests/common/packets/packets_test.cpp new file mode 100644 index 00000000000..443436537e1 --- /dev/null +++ b/searchlib/src/tests/common/packets/packets_test.cpp @@ -0,0 +1,705 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("packets_test"); + +#include <vespa/searchlib/common/mapnames.h> +#include <vespa/searchlib/common/packets.h> +#include <vector> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/util/stringfmt.h> + +using namespace search::fs4transport; + +// ---------------------------------------------------------------------------- +// +// Utilities +// +// ---------------------------------------------------------------------------- + +#define QRF_RANKTYPE QRF_RANKTYPE_DOUBLE + +#define PCODE_BEGIN PCODE_EOL +#define PCODE_END PCODE_LastCode + +class MyPersistentPacketStreamer : public FS4PersistentPacketStreamer { +public: + MyPersistentPacketStreamer() : + FS4PersistentPacketStreamer(FS4PacketFactory::CreateFS4Packet) { + // empty + } + + uint32_t getChannelId(uint32_t pcode, uint32_t chid) { + return HasChannelID(pcode) ? chid : -1u; + } +}; + +FNET_Packet * +testEncodeDecode(FS4PersistentPacketStreamer &streamer, FNET_Packet &packet) +{ + FNET_Context ctx; + FNET_DataBuffer buf; + buf.WriteInt32(0xdeadbeef); // buffers can have extra data at the front. + streamer.Encode(&packet, 1u, &buf); + buf.DataToDead(sizeof(uint32_t)); + + FNET_DataBuffer lhs; + lhs.WriteBytes(buf.GetData(), buf.GetDataLen()); + + buf.WriteInt32(0xdeadbeef); // buffers can have extra data at the end. + + bool broken; + uint32_t plen, pcode, chid; + MyPersistentPacketStreamer myStreamer; + EXPECT_TRUE(streamer.GetPacketInfo(&buf, &plen, &pcode, &chid, &broken)); + if ((pcode & ~PCODE_MASK) == 0) { + EXPECT_EQUAL(packet.GetLength(), plen); + } + EXPECT_EQUAL(packet.GetPCODE() & PCODE_MASK, pcode & PCODE_MASK); + EXPECT_EQUAL(myStreamer.getChannelId(pcode, 1u), chid); + + FNET_Packet *ret = streamer.Decode(&buf, plen, pcode, ctx); + ASSERT_TRUE(ret); + if (ret->GetPCODE() == (pcode & PCODE_MASK)) { + FNET_DataBuffer rhs; + streamer.Encode(ret, 1u, &rhs); + if (!EXPECT_TRUE(lhs.Equals(&rhs))) { + lhs.HexDump(); + rhs.HexDump(); + } + } else { + // Packet was transcoded. + } + return ret; +} + +FNET_Packet * +testEncodeDecode(FNET_Packet &packet) +{ + return testEncodeDecode(FS4PersistentPacketStreamer::Instance, packet); +} + +void fillProperties(FS4Properties &props, const std::string &name, + uint32_t len) { + props.setName(name); + props.allocEntries(len); + for (uint32_t i = 0; i < len; ++i) { + std::string key = vespalib::make_string("key%d", i); + props.setKey(i, key); + + std::string val = vespalib::make_string("val%d", i); + props.setValue(i, val); + } +} + +void testProperties(FS4Properties &props, const std::string &name, + uint32_t len) { + EXPECT_EQUAL(name, props.getName()); + EXPECT_EQUAL(name.size(), props.getNameLen()); + for (uint32_t i = 0; i < len; ++i) { + std::string key = vespalib::make_string("key%d", i); + EXPECT_EQUAL(key, std::string(props.getKey(i), props.getKeyLen(i))); + + std::string val = vespalib::make_string("val%d", i); + EXPECT_EQUAL(val, + std::string(props.getValue(i), props.getValueLen(i))); + } +} + + +// ---------------------------------------------------------------------------- +// +// Tests +// +// ---------------------------------------------------------------------------- + +document::GlobalId gid0("aaaaaaaaaaaa"); +document::GlobalId gid1("bbbbbbbbbbbb"); + +TEST("testPacketArray") { + PacketArray arr; + for (uint32_t i = 0; i < 32; ++i) { + EXPECT_EQUAL(i, arr.Length()); + arr.Add(new FNET_ControlPacket(i)); + EXPECT_EQUAL(i, static_cast<FNET_ControlPacket&>(*arr.Array()[i]).GetCommand()); + } + for (uint32_t i = 0; i < arr.Length(); ++i) { + delete static_cast<FNET_ControlPacket *>(arr.Array()[i]); + } +} + +TEST("testPacketFactory") { + ASSERT_TRUE(FS4PacketFactory::CreateFS4Packet(PCODE_BEGIN - 1) == NULL); + + ASSERT_TRUE(FS4PacketFactory::CreateFS4Packet(PCODE_END) == NULL); + + for (uint32_t pcode = PCODE_BEGIN; pcode < PCODE_END; ++pcode) { + if ((pcode != PCODE_MLD_QUERYRESULT2_NOTUSED) && + (pcode != PCODE_QUERY_NOTUSED) && + (pcode != PCODE_QUERY2_NOTUSED) && + (pcode != PCODE_MLD_GETDOCSUMS2_NOTUSED)) + { + std::unique_ptr<FNET_Packet> aptr(FS4PacketFactory::CreateFS4Packet(pcode)); + ASSERT_TRUE(aptr.get() != NULL); + EXPECT_EQUAL(pcode, aptr->GetPCODE()); + } + } +} + +TEST("testPersistentPacketStreamer") { + for (uint32_t pcode = PCODE_BEGIN; pcode < PCODE_END; ++pcode) { + if ((pcode == PCODE_QUERYX) || + (pcode != PCODE_MLD_QUERYRESULT2_NOTUSED) || + (pcode != PCODE_MLD_GETDOCSUMS2_NOTUSED)) + { + continue; + } + std::unique_ptr<FNET_Packet> arg(FS4PacketFactory::CreateFS4Packet(pcode)); + std::unique_ptr<FNET_Packet> ret(testEncodeDecode(FS4PersistentPacketStreamer::Instance, *arg)); + EXPECT_TRUE(ret.get() != NULL); + + FNET_Packet *raw = testEncodeDecode(FS4PersistentPacketStreamer::Instance, + *FS4PacketFactory::CreateFS4Packet(pcode)); + EXPECT_TRUE(raw != NULL); + } +} + +TEST("testProperties") { + FS4Properties src; + fillProperties(src, "foo", 32u); + testProperties(src, "foo", 32u); + + FNET_DataBuffer buf; + src.encode(buf); + FNET_DataBuffer lhs; + lhs.WriteBytes(buf.GetData(), buf.GetDataLen()); + + uint32_t len = buf.GetDataLen(); + FS4Properties dst; + dst.decode(buf, len); + EXPECT_EQUAL(src.getLength(), dst.getLength()); + + testProperties(dst, "foo", 32u); + + FNET_DataBuffer rhs; + dst.encode(rhs); + EXPECT_TRUE(lhs.Equals(&rhs)); +} + +TEST("testEol") { + FS4Packet_EOL *src = dynamic_cast<FS4Packet_EOL*>(FS4PacketFactory::CreateFS4Packet(PCODE_EOL)); + ASSERT_TRUE(src != NULL); + + std::vector<FNET_Packet*> lst { src, testEncodeDecode(*src) }; + + for (FNET_Packet * packet : lst) { + FS4Packet_EOL *ptr = dynamic_cast<FS4Packet_EOL*>(packet); + ASSERT_TRUE(ptr != NULL); + EXPECT_EQUAL((uint32_t)PCODE_EOL, ptr->GetPCODE()); + EXPECT_EQUAL(0u, ptr->GetLength()); + + delete ptr; + } +} + +TEST("testError") { + FS4Packet_ERROR *src = dynamic_cast<FS4Packet_ERROR*>(FS4PacketFactory::CreateFS4Packet(PCODE_ERROR)); + ASSERT_TRUE(src != NULL); + src->_errorCode = 1u; + src->setErrorMessage("foo"); + + std::vector<FNET_Packet*> lst { src, testEncodeDecode(*src) }; + + for (FNET_Packet * packet : lst) { + FS4Packet_ERROR *ptr = dynamic_cast<FS4Packet_ERROR*>(packet); + ASSERT_TRUE(ptr != NULL); + EXPECT_EQUAL((uint32_t)PCODE_ERROR, ptr->GetPCODE()); + EXPECT_EQUAL(11u, ptr->GetLength()); + EXPECT_EQUAL(1u, ptr->_errorCode); + EXPECT_EQUAL("foo", ptr->_message); + + delete ptr; + } +} + +TEST("testDocsum") { + FS4Packet_DOCSUM *src = dynamic_cast<FS4Packet_DOCSUM*>(FS4PacketFactory::CreateFS4Packet(PCODE_DOCSUM)); + ASSERT_TRUE(src != NULL); + src->setGid(gid0); + src->SetBuf("foo", 3u); + + std::vector<FNET_Packet*> lst { src, testEncodeDecode(*src) }; + + for (FNET_Packet * packet : lst) { + FS4Packet_DOCSUM *ptr = dynamic_cast<FS4Packet_DOCSUM*>(packet); + ASSERT_TRUE(ptr != NULL); + EXPECT_EQUAL((uint32_t)PCODE_DOCSUM, ptr->GetPCODE()); + EXPECT_EQUAL(3u + 12u, ptr->GetLength()); + EXPECT_EQUAL(gid0, ptr->getGid()); + EXPECT_EQUAL("foo", std::string(ptr->getBuf().c_str(), ptr->getBuf().size())); + + delete ptr; + } +} + +TEST("testMonitorQueryX") { + FS4Packet_MONITORQUERYX *src = dynamic_cast<FS4Packet_MONITORQUERYX*>(FS4PacketFactory::CreateFS4Packet(PCODE_MONITORQUERYX)); + ASSERT_TRUE(src != NULL); + src->_qflags = 1u; + + std::vector<FNET_Packet*> lst; + for (uint32_t i = MQF_QFLAGS, len = (uint32_t)(MQF_QFLAGS << 1); i < len; ++i) { + if (i & ~FNET_MQF_SUPPORTED_MASK) { + continue; // not supported; + } + src->_features = i; + lst.push_back(testEncodeDecode(*src)); + } + src->_features = (uint32_t)-1; + lst.push_back(src); + + for (FNET_Packet * packet : lst) { + FS4Packet_MONITORQUERYX *ptr = dynamic_cast<FS4Packet_MONITORQUERYX*>(packet); + ASSERT_TRUE(ptr != NULL); + EXPECT_EQUAL((uint32_t)PCODE_MONITORQUERYX, ptr->GetPCODE()); + EXPECT_EQUAL(ptr->_features & MQF_QFLAGS ? 1u : 0u, ptr->_qflags); + + delete ptr; + } +} + +TEST("testMonitorResultX") { + FS4Packet_MONITORRESULTX *src = dynamic_cast<FS4Packet_MONITORRESULTX*>(FS4PacketFactory::CreateFS4Packet(PCODE_MONITORRESULTX)); + ASSERT_TRUE(src != NULL); + src->_partid = 1u; + src->_timestamp = 2u; + src->_totalNodes = 3u; + src->_activeNodes = 4u; + src->_totalParts = 5u; + src->_activeParts = 6u; + src->_rflags = 7u; + + std::vector<FNET_Packet*> lst; + for (uint32_t i = MRF_MLD, len = (uint32_t)(MRF_RFLAGS << 1); i < len; ++i) { + if (i & ~FNET_MRF_SUPPORTED_MASK) { + continue; // not supported; + } + src->_features = i; + lst.push_back(testEncodeDecode(*src)); + } + src->_features = (uint32_t)-1; + lst.push_back(src); + + for (FNET_Packet * packet : lst) { + FS4Packet_MONITORRESULTX *ptr = dynamic_cast<FS4Packet_MONITORRESULTX*>(packet); + ASSERT_TRUE(ptr != NULL); + EXPECT_EQUAL((uint32_t)PCODE_MONITORRESULTX, ptr->GetPCODE()); + EXPECT_EQUAL(1u, ptr->_partid); + EXPECT_EQUAL(2u, ptr->_timestamp); + EXPECT_EQUAL(ptr->_features & MRF_MLD ? 3u : 0u, ptr->_totalNodes); + EXPECT_EQUAL(ptr->_features & MRF_MLD ? 4u : 0u, ptr->_activeNodes); + EXPECT_EQUAL(ptr->_features & MRF_MLD ? 5u : 0u, ptr->_totalParts); + EXPECT_EQUAL(ptr->_features & MRF_MLD ? 6u : 0u, ptr->_activeParts); + EXPECT_EQUAL(ptr->_features & MRF_RFLAGS ? 7u : 0u, ptr->_rflags); + + delete ptr; + } +} + +TEST("testClearCaches") { + FS4Packet_CLEARCACHES *src = dynamic_cast<FS4Packet_CLEARCACHES*>(FS4PacketFactory::CreateFS4Packet(PCODE_CLEARCACHES)); + ASSERT_TRUE(src != NULL); + + std::vector<FNET_Packet*> lst { src, testEncodeDecode(*src) }; + + for (FNET_Packet * packet : lst) { + FS4Packet_CLEARCACHES *ptr = dynamic_cast<FS4Packet_CLEARCACHES*>(packet); + ASSERT_TRUE(ptr != NULL); + EXPECT_EQUAL((uint32_t)PCODE_CLEARCACHES, ptr->GetPCODE()); + EXPECT_EQUAL(0u, ptr->GetLength()); + + delete ptr; + } +} + +TEST("testQueueLen") { + FS4Packet_QUEUELEN *src = dynamic_cast<FS4Packet_QUEUELEN*>(FS4PacketFactory::CreateFS4Packet(PCODE_QUEUELEN)); + ASSERT_TRUE(src != NULL); + src->_queueLen = 1u; + src->_dispatchers = 2u; + + std::vector<FNET_Packet*> lst { src, testEncodeDecode(*src) }; + + for (FNET_Packet * packet : lst) { + FS4Packet_QUEUELEN *ptr = dynamic_cast<FS4Packet_QUEUELEN*>(packet); + ASSERT_TRUE(ptr != NULL); + EXPECT_EQUAL((uint32_t)PCODE_QUEUELEN, ptr->GetPCODE()); + EXPECT_EQUAL(8u, ptr->GetLength()); + EXPECT_EQUAL(1u, ptr->_queueLen); + EXPECT_EQUAL(2u, ptr->_dispatchers); + + delete ptr; + } +} + +TEST("testQueryResultX") { + FS4Packet_QUERYRESULTX *src = dynamic_cast<FS4Packet_QUERYRESULTX*>(FS4PacketFactory::CreateFS4Packet(PCODE_QUERYRESULTX)); + ASSERT_TRUE(src != NULL); + src->_offset = 1u; + src->_totNumDocs = 2u; + src->_maxRank = (search::HitRank)3; + src->setDistributionKey(4u); + src->_coverageDocs = 6u; + src->_activeDocs = 7u; + uint32_t sortIndex[3] = { 0u, 1u, 3u /* size of data */}; // numDocs + 1 + src->SetSortDataRef(2, sortIndex, "foo"); + src->SetAggrDataRef("bar", 3u); + src->SetGroupDataRef("baz", 3u); + src->AllocateHits(2); + src->_hits[0]._gid = gid0; + src->_hits[0]._metric = (search::HitRank)2; + src->_hits[0]._partid = 3u; + src->_hits[0].setDistributionKey(4u); + src->_hits[1]._gid = gid1; + src->_hits[1]._metric = (search::HitRank)3; + src->_hits[1]._partid = 4u; + src->_hits[1].setDistributionKey(5u); + + std::vector<FNET_Packet*> lst; + for (uint32_t i = QRF_MLD, len = (uint32_t)(QRF_GROUPDATA << 1); i < len; ++i) { + if (i & ~FNET_QRF_SUPPORTED_MASK) { + continue; // not supported; + } + src->_features = i; + lst.push_back(testEncodeDecode(*src)); + } + src->_features = (uint32_t)-1; + lst.push_back(src); + + for (FNET_Packet * packet : lst) { + FS4Packet_QUERYRESULTX *ptr = dynamic_cast<FS4Packet_QUERYRESULTX*>(packet); + ASSERT_TRUE(ptr != NULL); + EXPECT_EQUAL((uint32_t)PCODE_QUERYRESULTX, ptr->GetPCODE()); + + EXPECT_EQUAL(1u, ptr->_offset); + EXPECT_EQUAL(2u, ptr->_totNumDocs); + EXPECT_EQUAL((search::HitRank)3, ptr->_maxRank); + EXPECT_EQUAL(4u, ptr->getDistributionKey()); + EXPECT_EQUAL(ptr->_features & QRF_COVERAGE ? 6u : 0u, ptr->_coverageDocs); + EXPECT_EQUAL(ptr->_features & QRF_COVERAGE ? 7u : 0u, ptr->_activeDocs); + if (ptr->_features & QRF_SORTDATA) { + EXPECT_EQUAL(0u, ptr->_sortIndex[0]); + EXPECT_EQUAL(1u, ptr->_sortIndex[1]); + EXPECT_EQUAL(3u, ptr->_sortIndex[2]); + EXPECT_EQUAL("foo", std::string(ptr->_sortData, ptr->_sortIndex[2])); + } else { + EXPECT_EQUAL((void*)NULL, ptr->_sortIndex); + EXPECT_EQUAL((void*)NULL, ptr->_sortData); + } + if (ptr->_features & QRF_AGGRDATA) { + EXPECT_EQUAL("bar", std::string(ptr->_aggrData, ptr->_aggrDataLen)); + } else { + EXPECT_EQUAL(0u, ptr->_aggrDataLen); + EXPECT_EQUAL((void*)NULL, ptr->_aggrData); + } + if (ptr->_features & QRF_GROUPDATA) { + EXPECT_EQUAL("baz", std::string(ptr->_groupData, ptr->_groupDataLen)); + } else { + EXPECT_EQUAL(0u, ptr->_groupDataLen); + EXPECT_EQUAL((void*)NULL, ptr->_groupData); + } + EXPECT_EQUAL(2u, ptr->_numDocs); + for (uint32_t i = 0; i < ptr->_numDocs; ++i) { + EXPECT_EQUAL(i == 0 ? gid0 : gid1, ptr->_hits[i]._gid); + EXPECT_EQUAL((search::HitRank)2 + i, ptr->_hits[i]._metric); + EXPECT_EQUAL(ptr->_features & QRF_MLD ? 3u + i : 0u, ptr->_hits[i]._partid); + EXPECT_EQUAL(ptr->_features & QRF_MLD ? 4u + i : ptr->getDistributionKey(), ptr->_hits[i].getDistributionKey()); + } + + delete ptr; + } +} + +FS4Packet_QUERYX * +createAndFill_QUERYX() +{ + FS4Packet_QUERYX *src = dynamic_cast<FS4Packet_QUERYX*>(FS4PacketFactory::CreateFS4Packet(PCODE_QUERYX)); + ASSERT_TRUE(src != NULL); + src->_offset = 2u; + src->_maxhits = 3u; + src->setTimeout(fastos::TimeStamp(4*fastos::TimeStamp::MS)); + EXPECT_EQUAL(fastos::TimeStamp(4*fastos::TimeStamp::MS), src->getTimeout()); + src->setTimeout(fastos::TimeStamp(-4*fastos::TimeStamp::MS)); + EXPECT_EQUAL(0l, src->getTimeout()); + src->setTimeout(fastos::TimeStamp(4*fastos::TimeStamp::MS)); + EXPECT_EQUAL(fastos::TimeStamp(4*fastos::TimeStamp::MS), src->getTimeout()); + src->_qflags = 5u; + src->setRanking("seven"); + src->_numStackItems = 14u; + src->_propsVector.resize(2); + fillProperties(src->_propsVector[0], "foo", 8); + fillProperties(src->_propsVector[1], "bar", 16); + src->setSortSpec("sortspec"); + src->setAggrSpec("aggrspec"); + src->setGroupSpec("groupspec"); + src->setLocation("location"); + src->setStackDump("stackdump"); + return src; +} + +void +verifyQueryX(FS4Packet_QUERYX & queryX, uint32_t features) +{ + EXPECT_EQUAL((uint32_t)PCODE_QUERYX, queryX.GetPCODE()); + EXPECT_EQUAL(features, queryX._features); + EXPECT_EQUAL(2u, queryX._offset); + EXPECT_EQUAL(3u, queryX._maxhits); + EXPECT_EQUAL(fastos::TimeStamp(4*fastos::TimeStamp::MS), queryX.getTimeout()); + EXPECT_EQUAL(0x5u, queryX._qflags); + if (queryX._features & QF_RANKP) { + EXPECT_EQUAL("seven", queryX._ranking); + } else { + EXPECT_EQUAL("", queryX._ranking); + } + EXPECT_EQUAL(queryX._features & QF_PARSEDQUERY ? 14u : 0u, queryX._numStackItems); + if (queryX._features & QF_PROPERTIES) { + EXPECT_EQUAL(2u, queryX._propsVector.size()); + testProperties(queryX._propsVector[0], "foo", 8); + testProperties(queryX._propsVector[1], "bar", 16); + } else { + EXPECT_EQUAL(0u, queryX._propsVector.size()); + } + if (queryX._features & QF_SORTSPEC) { + EXPECT_EQUAL("sortspec", queryX._sortSpec); + } else { + EXPECT_EQUAL(0u, queryX._sortSpec.size()); + } + if (queryX._features & QF_AGGRSPEC) { + EXPECT_EQUAL("aggrspec", queryX._aggrSpec); + } else { + EXPECT_EQUAL(0u, queryX._aggrSpec.size()); + } + if (queryX._features & QF_GROUPSPEC) { + EXPECT_EQUAL("groupspec", queryX._groupSpec); + } else { + EXPECT_EQUAL(0u, queryX._groupSpec.size()); + } + if (queryX._features & QF_LOCATION) { + EXPECT_EQUAL("location", queryX._location); + } else { + EXPECT_EQUAL(0u, queryX._location.size()); + } + if (queryX._features & QF_PARSEDQUERY) { + EXPECT_EQUAL("stackdump", queryX._stackDump); + } else { + EXPECT_EQUAL(0u, queryX._stackDump.size()); + } +} + +TEST("testQueryX") { + FS4Packet_QUERYX *src = createAndFill_QUERYX(); + std::vector<std::pair<FNET_Packet*, uint32_t>> lst; + for (uint32_t i = QF_PARSEDQUERY, len = (uint32_t)(QF_GROUPSPEC << 1), skip = 0; i < len; ++i) { + if (!(i & QF_PARSEDQUERY)) { + continue; // skip most + } + if (i & ~FNET_QF_SUPPORTED_MASK) { + continue; // not supported + } + if (++skip % 10) { + continue; // skip most + } + src->_features = i; + lst.emplace_back(testEncodeDecode(*src), i); + } + src->_features = uint32_t(-1); + lst.emplace_back(src, -1); + + for (const auto & pfPair : lst) { + FS4Packet_QUERYX *ptr = dynamic_cast<FS4Packet_QUERYX*>(pfPair.first); + ASSERT_TRUE(ptr != NULL); + verifyQueryX(*ptr, pfPair.second); + + delete ptr; + } +} + +TEST("testSharedPacket") { + FNET_Packet::SP src(createAndFill_QUERYX()); + static_cast<FS4Packet_QUERYX *>(src.get())->_features=FNET_QF_SUPPORTED_MASK; + FNET_Packet::SP decoded(testEncodeDecode(*src)); + verifyQueryX(*static_cast<FS4Packet_QUERYX *>(decoded.get()), FNET_QF_SUPPORTED_MASK); + EXPECT_TRUE(decoded.get() != nullptr); + FS4Packet_Shared shared(decoded); + FNET_Packet::UP decoded2(testEncodeDecode(shared)); + EXPECT_TRUE(decoded2.get() != nullptr); + EXPECT_TRUE(nullptr == dynamic_cast<const FS4Packet_Shared *>(decoded2.get())); + EXPECT_TRUE(nullptr != dynamic_cast<const FS4Packet_QUERYX *>(decoded2.get())); + EXPECT_EQUAL(src->GetLength(), decoded2->GetLength()); + verifyQueryX(*static_cast<FS4Packet_QUERYX *>(decoded2.get()), FNET_QF_SUPPORTED_MASK); +} + +TEST("test pre serializing packets no compression") { + FNET_Packet::UP src(createAndFill_QUERYX()); + FS4Packet_QUERYX * queryX = static_cast<FS4Packet_QUERYX *>(src.get()); + queryX->_features=FNET_QF_SUPPORTED_MASK; + FNET_Packet::UP decoded(testEncodeDecode(*src)); + verifyQueryX(*static_cast<FS4Packet_QUERYX *>(decoded.get()), FNET_QF_SUPPORTED_MASK); + EXPECT_EQUAL(512u, src->GetLength()); + EXPECT_EQUAL(src->GetLength(), decoded->GetLength()); + FS4Packet_PreSerialized serialized(*src); + EXPECT_EQUAL(218u, serialized.GetPCODE()); + EXPECT_EQUAL(512u, serialized.GetLength()); + FNET_Packet::UP decoded2(testEncodeDecode(serialized)); + EXPECT_EQUAL(512u, decoded2->GetLength()); + verifyQueryX(*static_cast<FS4Packet_QUERYX *>(decoded2.get()), FNET_QF_SUPPORTED_MASK); +} + +TEST("test pre serializing packets with compression") { + FNET_Packet::UP src(createAndFill_QUERYX()); + FS4Packet_QUERYX * queryX = static_cast<FS4Packet_QUERYX *>(src.get()); + queryX->_features=FNET_QF_SUPPORTED_MASK; + FNET_Packet::UP decoded(testEncodeDecode(*src)); + verifyQueryX(*static_cast<FS4Packet_QUERYX *>(decoded.get()), FNET_QF_SUPPORTED_MASK); + EXPECT_EQUAL(512u, src->GetLength()); + EXPECT_EQUAL(src->GetLength(), decoded->GetLength()); + FS4PersistentPacketStreamer::Instance.SetCompressionLimit(100); + FS4Packet_PreSerialized serialized(*src); + EXPECT_EQUAL(218u | (document::CompressionConfig::LZ4 << 24), serialized.GetPCODE()); + EXPECT_GREATER_EQUAL(321u, serialized.GetLength()); + FNET_Packet::UP decoded2(testEncodeDecode(serialized)); + EXPECT_EQUAL(512u, decoded2->GetLength()); + verifyQueryX(*static_cast<FS4Packet_QUERYX *>(decoded2.get()), FNET_QF_SUPPORTED_MASK); +} + + +TEST("testGetDocsumsX") { + FS4Packet_GETDOCSUMSX *src = dynamic_cast<FS4Packet_GETDOCSUMSX*>(FS4PacketFactory::CreateFS4Packet(PCODE_GETDOCSUMSX)); + ASSERT_TRUE(src != NULL); + src->setTimeout(fastos::TimeStamp(2*fastos::TimeStamp::MS)); + src->setRanking("four"); + src->_qflags = 5u; + src->_stackItems = 7u; + src->_propsVector.resize(2); + fillProperties(src->_propsVector[0], "foo", 8); + fillProperties(src->_propsVector[1], "bar", 16); + src->setResultClassName("resultclassname"); + src->setStackDump("stackdump"); + src->setLocation("location"); + src->_flags = GDFLAG_IGNORE_ROW; + src->AllocateDocIDs(2); + src->_docid[0]._gid = gid0; + src->_docid[0]._partid = 2u; + src->_docid[1]._gid = gid1; + src->_docid[1]._partid = 3u; + + std::vector<std::pair<FNET_Packet*, uint32_t>> lst; + for (uint32_t i = GDF_MLD, len = (uint32_t)(GDF_FLAGS << 1); i < len; ++i) { + if (i & ~FNET_GDF_SUPPORTED_MASK) { + continue; // not supported + } + src->_features = i; + lst.emplace_back(testEncodeDecode(*src), i); + } + src->_features = uint32_t(-1); + lst.emplace_back(src, uint32_t(-1)); + + for (const auto & pfPair : lst) { + FS4Packet_GETDOCSUMSX *ptr = dynamic_cast<FS4Packet_GETDOCSUMSX*>(pfPair.first); + ASSERT_TRUE(ptr != NULL); + EXPECT_EQUAL((uint32_t)PCODE_GETDOCSUMSX, ptr->GetPCODE()); + EXPECT_EQUAL(pfPair.second, ptr->_features); + EXPECT_EQUAL(fastos::TimeStamp(2*fastos::TimeStamp::MS), ptr->getTimeout()); + if (ptr->_features & GDF_RANKP_QFLAGS) { + EXPECT_EQUAL("four", ptr->_ranking); + } else { + EXPECT_EQUAL("", ptr->_ranking); + } + EXPECT_EQUAL(ptr->_features & GDF_RANKP_QFLAGS ? 5u : 0u, ptr->_qflags); + EXPECT_EQUAL(ptr->_features & GDF_QUERYSTACK ? 7u : 0u, ptr->_stackItems); + if (ptr->_features & GDF_PROPERTIES) { + EXPECT_EQUAL(2u, ptr->_propsVector.size()); + testProperties(ptr->_propsVector[0], "foo", 8); + testProperties(ptr->_propsVector[1], "bar", 16); + } else { + EXPECT_EQUAL(0u, ptr->_propsVector.size()); + } + if (ptr->_features & GDF_RESCLASSNAME) { + EXPECT_EQUAL("resultclassname", ptr->_resultClassName); + } else { + EXPECT_EQUAL(0u, ptr->_resultClassName.size()); + } + if (ptr->_features & GDF_QUERYSTACK) { + EXPECT_EQUAL("stackdump", ptr->_stackDump); + } else { + EXPECT_EQUAL(0u, ptr->_stackDump.size()); + } + if (ptr->_features & GDF_LOCATION) { + EXPECT_EQUAL("location", ptr->_location); + } else { + EXPECT_EQUAL(0u, ptr->_location.size()); + } + if (ptr->_features & GDF_FLAGS) { + EXPECT_EQUAL(static_cast<uint32_t>(GDFLAG_IGNORE_ROW), + ptr->_flags); + } else { + EXPECT_EQUAL(0u, ptr->_flags); + } + EXPECT_EQUAL(2u, ptr->_docidCnt); + ASSERT_TRUE(ptr->_docid != NULL); + for (uint32_t i = 0; i < ptr->_docidCnt; ++i) { + EXPECT_EQUAL(i == 0u ? gid0 : gid1, ptr->_docid[i]._gid); + EXPECT_EQUAL(ptr->_features & GDF_MLD ? 2u + i : 0u, ptr->_docid[i]._partid); + } + + delete ptr; + } +} + +TEST("require that FS4PersistentPacketStreamer can compress packets") { + FS4Packet_ERROR *packet = static_cast<FS4Packet_ERROR*>(FS4PacketFactory::CreateFS4Packet(PCODE_ERROR)); + packet->_errorCode = 1u; + packet->setErrorMessage(string(1000, 'a')); + + FS4PersistentPacketStreamer streamer(FS4PacketFactory::CreateFS4Packet); + + FNET_DataBuffer buf1; + streamer.Encode(packet, 1u, &buf1); + EXPECT_EQUAL(1020u, buf1.GetDataLen()); + + streamer.SetCompressionLimit(100); + FNET_DataBuffer buf2; + streamer.Encode(packet, 1u, &buf2); + EXPECT_EQUAL(38u, buf2.GetDataLen()); + + std::vector<FNET_Packet*> lst{ packet, testEncodeDecode(streamer, *packet) }; + + for (FNET_Packet * fnetPacket : lst) { + FS4Packet_ERROR *ptr = dynamic_cast<FS4Packet_ERROR*>(fnetPacket); + ASSERT_TRUE(ptr != NULL); + EXPECT_EQUAL((uint32_t)PCODE_ERROR, ptr->GetPCODE()); + EXPECT_EQUAL(1008u, ptr->GetLength()); + delete ptr; + } +} + +TEST("require that FS4PersistentPacketStreamer can avoid compressing small packets") { + FS4Packet_ERROR *packet = static_cast<FS4Packet_ERROR*>(FS4PacketFactory::CreateFS4Packet(PCODE_ERROR)); + packet->_errorCode = 1u; + packet->setErrorMessage("a"); + + FS4PersistentPacketStreamer streamer(FS4PacketFactory::CreateFS4Packet); + + FNET_DataBuffer buf1; + streamer.Encode(packet, 1u, &buf1); + EXPECT_EQUAL(21u, buf1.GetDataLen()); + + streamer.SetCompressionLimit(10); + FNET_DataBuffer buf2; + streamer.Encode(packet, 1u, &buf2); + EXPECT_EQUAL(21u, buf2.GetDataLen()); + + delete packet; +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/common/rcuvector/.gitignore b/searchlib/src/tests/common/rcuvector/.gitignore new file mode 100644 index 00000000000..d88533ed6af --- /dev/null +++ b/searchlib/src/tests/common/rcuvector/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +rcuvector_test +searchlib_rcuvector_test_app diff --git a/searchlib/src/tests/common/rcuvector/CMakeLists.txt b/searchlib/src/tests/common/rcuvector/CMakeLists.txt new file mode 100644 index 00000000000..362dbf68dca --- /dev/null +++ b/searchlib/src/tests/common/rcuvector/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_rcuvector_test_app + SOURCES + rcuvector_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_rcuvector_test_app COMMAND searchlib_rcuvector_test_app) diff --git a/searchlib/src/tests/common/rcuvector/DESC b/searchlib/src/tests/common/rcuvector/DESC new file mode 100644 index 00000000000..38af6317f80 --- /dev/null +++ b/searchlib/src/tests/common/rcuvector/DESC @@ -0,0 +1 @@ +rcuvector test. Take a look at rcuvector.h for details. diff --git a/searchlib/src/tests/common/rcuvector/FILES b/searchlib/src/tests/common/rcuvector/FILES new file mode 100644 index 00000000000..a8bae8dbd5c --- /dev/null +++ b/searchlib/src/tests/common/rcuvector/FILES @@ -0,0 +1 @@ +rcuvector.h diff --git a/searchlib/src/tests/common/rcuvector/rcuvector_test.cpp b/searchlib/src/tests/common/rcuvector/rcuvector_test.cpp new file mode 100644 index 00000000000..dd50de79f17 --- /dev/null +++ b/searchlib/src/tests/common/rcuvector/rcuvector_test.cpp @@ -0,0 +1,284 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("rcuvector_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/common/rcuvector.h> + +namespace search { +namespace attribute { + +using vespalib::GenerationHandler; +using vespalib::GenerationHolder; +using vespalib::GenerationHeldBase; + +class Test : public vespalib::TestApp { +private: + bool assertUsage(const MemoryUsage & exp, const MemoryUsage & act); + void testGenerationHolder(); + void testBasic(); + void testResize(); + void testGenerationHandling(); + void testMemoryUsage(); + + void + testShrink(); + void testSmallExpand(); +public: + int Main(); +}; + +bool +Test::assertUsage(const MemoryUsage & exp, const MemoryUsage & act) +{ + bool retval = true; + if (!EXPECT_EQUAL(exp.allocatedBytes(), act.allocatedBytes())) retval = false; + if (!EXPECT_EQUAL(exp.usedBytes(), act.usedBytes())) retval = false; + if (!EXPECT_EQUAL(exp.deadBytes(), act.deadBytes())) retval = false; + if (!EXPECT_EQUAL(exp.allocatedBytesOnHold(), act.allocatedBytesOnHold())) retval = false; + return retval; +} + +void +Test::testGenerationHolder() +{ + typedef std::unique_ptr<int32_t> IntPtr; + GenerationHolder gh; + gh.hold(GenerationHeldBase::UP(new RcuVectorHeld<int32_t>(sizeof(int32_t), + IntPtr(new int32_t(0))))); + gh.transferHoldLists(0); + gh.hold(GenerationHeldBase::UP(new RcuVectorHeld<int32_t>(sizeof(int32_t), + IntPtr(new int32_t(1))))); + gh.transferHoldLists(1); + gh.hold(GenerationHeldBase::UP(new RcuVectorHeld<int32_t>(sizeof(int32_t), + IntPtr(new int32_t(2))))); + gh.transferHoldLists(2); + gh.hold(GenerationHeldBase::UP(new RcuVectorHeld<int32_t>(sizeof(int32_t), + IntPtr(new int32_t(4))))); + gh.transferHoldLists(4); + EXPECT_EQUAL(4u * sizeof(int32_t), gh.getHeldBytes()); + gh.trimHoldLists(0); + EXPECT_EQUAL(4u * sizeof(int32_t), gh.getHeldBytes()); + gh.trimHoldLists(1); + EXPECT_EQUAL(3u * sizeof(int32_t), gh.getHeldBytes()); + gh.trimHoldLists(2); + EXPECT_EQUAL(2u * sizeof(int32_t), gh.getHeldBytes()); + gh.hold(GenerationHeldBase::UP(new RcuVectorHeld<int32_t>(sizeof(int32_t), + IntPtr(new int32_t(6))))); + gh.transferHoldLists(6); + EXPECT_EQUAL(3u * sizeof(int32_t), gh.getHeldBytes()); + gh.trimHoldLists(6); + EXPECT_EQUAL(1u * sizeof(int32_t), gh.getHeldBytes()); + gh.trimHoldLists(7); + EXPECT_EQUAL(0u * sizeof(int32_t), gh.getHeldBytes()); + gh.trimHoldLists(7); + EXPECT_EQUAL(0u * sizeof(int32_t), gh.getHeldBytes()); +} + +void +Test::testBasic() +{ + { // insert + RcuVector<int32_t> v(4, 0, 4); + for (int32_t i = 0; i < 100; ++i) { + v.push_back(i); + EXPECT_EQUAL(i, v[i]); + EXPECT_EQUAL((size_t)i + 1, v.size()); + } + for (int32_t i = 0; i < 100; ++i) { + v[i] = i + 1; + EXPECT_EQUAL(i + 1, v[i]); + EXPECT_EQUAL(100u, v.size()); + } + } +} + +void +Test::testResize() +{ + { // resize percent + RcuVector<int32_t> v(2, 50, 0); + EXPECT_EQUAL(2u, v.capacity()); + v.push_back(0); + EXPECT_EQUAL(2u, v.capacity()); + v.push_back(0); + EXPECT_EQUAL(2u, v.capacity()); + EXPECT_TRUE(v.isFull()); + v.push_back(0); + EXPECT_EQUAL(3u, v.capacity()); + EXPECT_TRUE(v.isFull()); + } + { // resize delta + RcuVector<int32_t> v(1, 0, 3); + EXPECT_EQUAL(1u, v.capacity()); + v.push_back(0); + EXPECT_EQUAL(1u, v.capacity()); + EXPECT_TRUE(v.isFull()); + v.push_back(0); + EXPECT_EQUAL(4u, v.capacity()); + EXPECT_TRUE(!v.isFull()); + } + { // resize both + RcuVector<int32_t> v(2, 200, 3); + EXPECT_EQUAL(2u, v.capacity()); + v.push_back(0); + EXPECT_EQUAL(2u, v.capacity()); + v.push_back(0); + EXPECT_EQUAL(2u, v.capacity()); + EXPECT_TRUE(v.isFull()); + v.push_back(0); + EXPECT_EQUAL(9u, v.capacity()); + EXPECT_TRUE(!v.isFull()); + } + { // reserve + RcuVector<int32_t> v(2, 0, 0); + EXPECT_EQUAL(2u, v.capacity()); + v.unsafe_reserve(8); + EXPECT_EQUAL(8u, v.capacity()); + } + { // explicit resize + GenerationHolder g; + RcuVectorBase<int8_t> v(g); + v.push_back(1); + v.push_back(2); + g.transferHoldLists(0); + g.trimHoldLists(1); + const int8_t *old = &v[0]; + EXPECT_EQUAL(16u, v.capacity()); + EXPECT_EQUAL(2u, v.size()); + v.ensure_size(32, 3); + v[0] = 3; + v[1] = 3; + g.transferHoldLists(1); + EXPECT_EQUAL(1, old[0]); + EXPECT_EQUAL(2, old[1]); + EXPECT_EQUAL(3, v[0]); + EXPECT_EQUAL(3, v[1]); + EXPECT_EQUAL(3, v[2]); + EXPECT_EQUAL(3, v[31]); + EXPECT_EQUAL(64u, v.capacity()); + EXPECT_EQUAL(32u, v.size()); + g.trimHoldLists(2); + } +} + +void +Test::testGenerationHandling() +{ + RcuVector<int32_t> v(2, 0, 2); + v.push_back(0); + v.push_back(10); + EXPECT_EQUAL(0u, v.getMemoryUsage().allocatedBytesOnHold()); + v.push_back(20); // new array + EXPECT_EQUAL(8u, v.getMemoryUsage().allocatedBytesOnHold()); + + v.setGeneration(1); + v.push_back(30); + EXPECT_EQUAL(8u, v.getMemoryUsage().allocatedBytesOnHold()); + v.push_back(40); // new array + EXPECT_EQUAL(24u, v.getMemoryUsage().allocatedBytesOnHold()); + + v.setGeneration(2); + v.push_back(50); + v.removeOldGenerations(3); + EXPECT_EQUAL(0u, v.getMemoryUsage().allocatedBytesOnHold()); + v.push_back(60); // new array + EXPECT_EQUAL(24u, v.getMemoryUsage().allocatedBytesOnHold()); +} + +void +Test::testMemoryUsage() +{ + RcuVector<int8_t> v(2, 0, 2); + EXPECT_TRUE(assertUsage(MemoryUsage(2,0,0,0), v.getMemoryUsage())); + v.push_back(0); + EXPECT_TRUE(assertUsage(MemoryUsage(2,1,0,0), v.getMemoryUsage())); + v.push_back(1); + EXPECT_TRUE(assertUsage(MemoryUsage(2,2,0,0), v.getMemoryUsage())); + v.push_back(2); + EXPECT_TRUE(assertUsage(MemoryUsage(4,3,0,2), v.getMemoryUsage())); + v.push_back(3); + EXPECT_TRUE(assertUsage(MemoryUsage(4,4,0,2), v.getMemoryUsage())); + v.push_back(4); + EXPECT_TRUE(assertUsage(MemoryUsage(6,5,0,6), v.getMemoryUsage())); + v.removeOldGenerations(1); + EXPECT_TRUE(assertUsage(MemoryUsage(6,5,0,0), v.getMemoryUsage())); +} + + +void +Test::testShrink() +{ + GenerationHolder g; + RcuVectorBase<int8_t> v(g); + v.push_back(1); + v.push_back(2); + v.push_back(3); + v.push_back(4); + g.transferHoldLists(0); + g.trimHoldLists(1); + MemoryUsage mu; + mu = v.getMemoryUsage(); + mu.incAllocatedBytesOnHold(g.getHeldBytes()); + EXPECT_TRUE(assertUsage(MemoryUsage(16, 4, 0, 0), mu)); + EXPECT_EQUAL(4u, v.size()); + EXPECT_TRUE(v.capacity() >= 4u); + EXPECT_EQUAL(1, v[0]); + EXPECT_EQUAL(2, v[1]); + EXPECT_EQUAL(3, v[2]); + EXPECT_EQUAL(4, v[3]); + const int8_t *old = &v[0]; + v.shrink(2); + g.transferHoldLists(1); + EXPECT_EQUAL(2u, v.size()); + EXPECT_EQUAL(2u, v.capacity()); + EXPECT_EQUAL(1, v[0]); + EXPECT_EQUAL(2, v[1]); + EXPECT_EQUAL(1, old[0]); + EXPECT_EQUAL(2, old[1]); + g.trimHoldLists(2); + EXPECT_EQUAL(1, v[0]); + EXPECT_EQUAL(2, v[1]); + mu = v.getMemoryUsage(); + mu.incAllocatedBytesOnHold(g.getHeldBytes()); + EXPECT_TRUE(assertUsage(MemoryUsage(2, 2, 0, 0), mu)); +} + +void +Test::testSmallExpand() +{ + GenerationHolder g; + RcuVectorBase<int8_t> v(1, 50, 0, g); + EXPECT_EQUAL(1u, v.capacity()); + EXPECT_EQUAL(0u, v.size()); + v.push_back(1); + EXPECT_EQUAL(1u, v.capacity()); + EXPECT_EQUAL(1u, v.size()); + v.push_back(2); + EXPECT_EQUAL(2u, v.capacity()); + EXPECT_EQUAL(2u, v.size()); + g.transferHoldLists(1); + g.trimHoldLists(2); +} + + +int +Test::Main() +{ + TEST_INIT("rcuvector_test"); + + testGenerationHolder(); + testBasic(); + testResize(); + testGenerationHandling(); + testMemoryUsage(); + testShrink(); + testSmallExpand(); + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::attribute::Test); diff --git a/searchlib/src/tests/common/resultset/.gitignore b/searchlib/src/tests/common/resultset/.gitignore new file mode 100644 index 00000000000..41242fde289 --- /dev/null +++ b/searchlib/src/tests/common/resultset/.gitignore @@ -0,0 +1 @@ +searchlib_resultset_test_app diff --git a/searchlib/src/tests/common/resultset/CMakeLists.txt b/searchlib/src/tests/common/resultset/CMakeLists.txt new file mode 100644 index 00000000000..0aed46f6e89 --- /dev/null +++ b/searchlib/src/tests/common/resultset/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_resultset_test_app + SOURCES + resultset_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_resultset_test_app COMMAND searchlib_resultset_test_app) diff --git a/searchlib/src/tests/common/resultset/resultset_test.cpp b/searchlib/src/tests/common/resultset/resultset_test.cpp new file mode 100644 index 00000000000..983dc10b914 --- /dev/null +++ b/searchlib/src/tests/common/resultset/resultset_test.cpp @@ -0,0 +1,109 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for resultset. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("resultset_test"); + +#include <vespa/searchlib/common/bitvector.h> +#include <vespa/searchlib/common/resultset.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/util/arraysize.h> + +using namespace search; +using vespalib::arraysize; + +namespace { + +void concatenate(const ResultSet *input_array[], size_t array_size, + ResultSet &output) +{ + size_t hit_count = 0; + for (size_t i = 0; i < array_size; ++i) { + hit_count += input_array[i]->getArrayUsed(); + } + output.allocArray(hit_count); + RankedHit *p = output.getArray(); + for (size_t i = 0; i < array_size; ++i) { + const ResultSet &set = *input_array[i]; + memcpy(p, set.getArray(), set.getArrayUsed() * sizeof(RankedHit)); + p += set.getArrayUsed(); + if (set.getBitOverflow()) { + if (output.getBitOverflow()) { + output.getBitOverflow()->orWith(*set.getBitOverflow()); + } else { + output.setBitOverflow(BitVector::create(*set.getBitOverflow())); + } + } + } + output.setArrayUsed(hit_count); +} + + +void addHit(ResultSet &set, unsigned int doc_id, double rank) { + if (set.getArrayAllocated() == 0) { + set.allocArray(10); + } + ASSERT_LESS(set.getArrayUsed(), set.getArrayAllocated()); + RankedHit *hit_array = set.getArray(); + hit_array[set.getArrayUsed()]._docId = doc_id; + hit_array[set.getArrayUsed()]._rankValue = rank; + set.setArrayUsed(set.getArrayUsed() + 1); +} + +TEST("require that mergeWithOverflow works") { + ResultSet set1; + addHit(set1, 2, 4.2); + addHit(set1, 4, 3.2); + BitVector::UP bit_vector = BitVector::create(20); + bit_vector->setBit(2); + bit_vector->setBit(4); + bit_vector->setBit(7); + bit_vector->invalidateCachedCount(); + set1.setBitOverflow(std::move(bit_vector)); + EXPECT_EQUAL(3u, set1.getNumHits()); + set1.mergeWithBitOverflow(); + EXPECT_EQUAL(3u, set1.getNumHits()); +} + +TEST("require that resultsets can be concatenated") { + ResultSet set1; + addHit(set1, 2, 4.2); + addHit(set1, 4, 3.2); + BitVector::UP bit_vector = BitVector::create(20); + bit_vector->setBit(7); + set1.setBitOverflow(std::move(bit_vector)); + + ResultSet set2; + addHit(set2, 12, 4.2); + addHit(set2, 14, 3.2); + bit_vector = BitVector::create(20); + bit_vector->setBit(17); + set2.setBitOverflow(std::move(bit_vector)); + + const ResultSet *sets[] = { &set1, &set2 }; + ResultSet target; + concatenate(sets, arraysize(sets), target); + + EXPECT_EQUAL(4u, target.getArrayAllocated()); + ASSERT_EQUAL(4u, target.getArrayUsed()); + EXPECT_EQUAL(2u, target.getArray()[0]._docId); + EXPECT_EQUAL(4.2, target.getArray()[0]._rankValue); + EXPECT_EQUAL(4u, target.getArray()[1]._docId); + EXPECT_EQUAL(3.2, target.getArray()[1]._rankValue); + EXPECT_EQUAL(12u, target.getArray()[2]._docId); + EXPECT_EQUAL(4.2, target.getArray()[2]._rankValue); + EXPECT_EQUAL(14u, target.getArray()[3]._docId); + EXPECT_EQUAL(3.2, target.getArray()[3]._rankValue); + + BitVector * bv = target.getBitOverflow(); + ASSERT_TRUE(bv); + EXPECT_EQUAL(20u, bv->size()); + EXPECT_EQUAL(7u, bv->getNextTrueBit(0)); + EXPECT_EQUAL(17u, bv->getNextTrueBit(8)); + EXPECT_EQUAL(20u, bv->getNextTrueBit(18)); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/common/sequencedtaskexecutor/.gitignore b/searchlib/src/tests/common/sequencedtaskexecutor/.gitignore new file mode 100644 index 00000000000..35d038b0b7c --- /dev/null +++ b/searchlib/src/tests/common/sequencedtaskexecutor/.gitignore @@ -0,0 +1 @@ +searchlib_sequencedtaskexecutor_test_app diff --git a/searchlib/src/tests/common/sequencedtaskexecutor/CMakeLists.txt b/searchlib/src/tests/common/sequencedtaskexecutor/CMakeLists.txt new file mode 100644 index 00000000000..501fd3b07f1 --- /dev/null +++ b/searchlib/src/tests/common/sequencedtaskexecutor/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_sequencedtaskexecutor_test_app + SOURCES + sequencedtaskexecutor_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_sequencedtaskexecutor_test_app COMMAND searchlib_sequencedtaskexecutor_test_app) diff --git a/searchlib/src/tests/common/sequencedtaskexecutor/DESC b/searchlib/src/tests/common/sequencedtaskexecutor/DESC new file mode 100644 index 00000000000..29ac00d3453 --- /dev/null +++ b/searchlib/src/tests/common/sequencedtaskexecutor/DESC @@ -0,0 +1 @@ +sequencedtaskexecutor test. Take a look at sequencedtaskexecutor_test.cpp for details. diff --git a/searchlib/src/tests/common/sequencedtaskexecutor/FILES b/searchlib/src/tests/common/sequencedtaskexecutor/FILES new file mode 100644 index 00000000000..a8ebec0ebca --- /dev/null +++ b/searchlib/src/tests/common/sequencedtaskexecutor/FILES @@ -0,0 +1 @@ +sequencedtaskexecutor_test.cpp diff --git a/searchlib/src/tests/common/sequencedtaskexecutor/sequencedtaskexecutor_test.cpp b/searchlib/src/tests/common/sequencedtaskexecutor/sequencedtaskexecutor_test.cpp new file mode 100644 index 00000000000..98436364ea0 --- /dev/null +++ b/searchlib/src/tests/common/sequencedtaskexecutor/sequencedtaskexecutor_test.cpp @@ -0,0 +1,194 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("sequencedtaskexecutor_test"); +#include <vespa/searchlib/common/sequencedtaskexecutor.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/test/insertion_operators.h> + +#include <mutex> +#include <condition_variable> + +namespace search +{ + +namespace common +{ + + +class Fixture +{ +public: + SequencedTaskExecutor _threads; + + Fixture() + : _threads(2) + { + } +}; + + +class TestObj +{ +public: + std::mutex _m; + std::condition_variable _cv; + int _done; + int _fail; + int _val; + + TestObj() + : _m(), + _cv(), + _done(0), + _fail(0), + _val(0) + { + } + + void + modify(int oldValue, int newValue) + { + { + std::lock_guard<std::mutex> guard(_m); + if (_val == oldValue) { + _val = newValue; + } else { + ++_fail; + } + ++_done; + } + _cv.notify_all(); + } + + void + wait(int wantDone) + { + std::unique_lock<std::mutex> guard(_m); + _cv.wait(guard, [=] { return this->_done >= wantDone; }); + } +}; + +TEST_F("testExecute", Fixture) { + std::shared_ptr<TestObj> tv(std::make_shared<TestObj>()); + EXPECT_EQUAL(0, tv->_val); + f._threads.execute(1, [=]() { tv->modify(0, 42); }); + tv->wait(1); + EXPECT_EQUAL(0, tv->_fail); + EXPECT_EQUAL(42, tv->_val); + f._threads.sync(); + EXPECT_EQUAL(0, tv->_fail); + EXPECT_EQUAL(42, tv->_val); +} + + +TEST_F("require that task with same id are serialized", Fixture) +{ + std::shared_ptr<TestObj> tv(std::make_shared<TestObj>()); + EXPECT_EQUAL(0, tv->_val); + f._threads.execute(0, [=]() { usleep(2000); tv->modify(0, 14); }); + f._threads.execute(0, [=]() { tv->modify(14, 42); }); + tv->wait(2); + EXPECT_EQUAL(0, tv->_fail); + EXPECT_EQUAL(42, tv->_val); + f._threads.sync(); + EXPECT_EQUAL(0, tv->_fail); + EXPECT_EQUAL(42, tv->_val); +} + +TEST_F("require that task with different ids are not serialized", Fixture) +{ + int tryCnt = 0; + for (tryCnt = 0; tryCnt < 100; ++tryCnt) { + std::shared_ptr<TestObj> tv(std::make_shared<TestObj>()); + EXPECT_EQUAL(0, tv->_val); + f._threads.execute(0, [=]() { usleep(2000); tv->modify(0, 14); }); + f._threads.execute(2, [=]() { tv->modify(14, 42); }); + tv->wait(2); + if (tv->_fail != 1) { + continue; + } + EXPECT_EQUAL(1, tv->_fail); + EXPECT_EQUAL(14, tv->_val); + f._threads.sync(); + EXPECT_EQUAL(1, tv->_fail); + EXPECT_EQUAL(14, tv->_val); + break; + } + EXPECT_TRUE(tryCnt < 100); +} + + +TEST_F("require that task with same string id are serialized", Fixture) +{ + std::shared_ptr<TestObj> tv(std::make_shared<TestObj>()); + EXPECT_EQUAL(0, tv->_val); + auto test2 = [=]() { tv->modify(14, 42); }; + f._threads.execute("0", [=]() { usleep(2000); tv->modify(0, 14); }); + f._threads.execute("0", test2); + tv->wait(2); + EXPECT_EQUAL(0, tv->_fail); + EXPECT_EQUAL(42, tv->_val); + f._threads.sync(); + EXPECT_EQUAL(0, tv->_fail); + EXPECT_EQUAL(42, tv->_val); +} + +TEST_F("require that task with different string ids are not serialized", + Fixture) +{ + int tryCnt = 0; + for (tryCnt = 0; tryCnt < 100; ++tryCnt) { + std::shared_ptr<TestObj> tv(std::make_shared<TestObj>()); + EXPECT_EQUAL(0, tv->_val); + f._threads.execute("0", [=]() { usleep(2000); tv->modify(0, 14); }); + f._threads.execute("2", [=]() { tv->modify(14, 42); }); + tv->wait(2); + if (tv->_fail != 1) { + continue; + } + EXPECT_EQUAL(1, tv->_fail); + EXPECT_EQUAL(14, tv->_val); + f._threads.sync(); + EXPECT_EQUAL(1, tv->_fail); + EXPECT_EQUAL(14, tv->_val); + break; + } + EXPECT_TRUE(tryCnt < 100); +} + + +TEST_F("require that execute works with const lambda", Fixture) +{ + int i = 5; + std::vector<int> res; + const auto lambda = [i, &res]() mutable + { res.push_back(i--); res.push_back(i--); }; + f._threads.execute(0, lambda); + f._threads.execute(0, lambda); + f._threads.sync(); + std::vector<int> exp({5, 4, 5, 4}); + EXPECT_EQUAL(exp, res); + EXPECT_EQUAL(5, i); +} + +TEST_F("require that execute works with reference to lambda", Fixture) +{ + int i = 5; + std::vector<int> res; + auto lambda = [i, &res]() mutable + { res.push_back(i--); res.push_back(i--); }; + auto &lambdaref = lambda; + f._threads.execute(0, lambdaref); + f._threads.execute(0, lambdaref); + f._threads.sync(); + std::vector<int> exp({5, 4, 5, 4}); + EXPECT_EQUAL(exp, res); + EXPECT_EQUAL(5, i); +} + + +} // namespace common +} // namespace search + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/common/summaryfeatures/.gitignore b/searchlib/src/tests/common/summaryfeatures/.gitignore new file mode 100644 index 00000000000..543319fb8dd --- /dev/null +++ b/searchlib/src/tests/common/summaryfeatures/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +summaryfeatures_test +searchlib_summaryfeatures_test_app diff --git a/searchlib/src/tests/common/summaryfeatures/CMakeLists.txt b/searchlib/src/tests/common/summaryfeatures/CMakeLists.txt new file mode 100644 index 00000000000..3b6cb392615 --- /dev/null +++ b/searchlib/src/tests/common/summaryfeatures/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_summaryfeatures_test_app + SOURCES + summaryfeatures.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_summaryfeatures_test_app COMMAND searchlib_summaryfeatures_test_app) diff --git a/searchlib/src/tests/common/summaryfeatures/DESC b/searchlib/src/tests/common/summaryfeatures/DESC new file mode 100644 index 00000000000..9cc24928a82 --- /dev/null +++ b/searchlib/src/tests/common/summaryfeatures/DESC @@ -0,0 +1 @@ +summaryfeatures test. Take a look at summaryfeatures.cpp for details. diff --git a/searchlib/src/tests/common/summaryfeatures/FILES b/searchlib/src/tests/common/summaryfeatures/FILES new file mode 100644 index 00000000000..19692b59229 --- /dev/null +++ b/searchlib/src/tests/common/summaryfeatures/FILES @@ -0,0 +1 @@ +summaryfeatures.cpp diff --git a/searchlib/src/tests/common/summaryfeatures/summaryfeatures.cpp b/searchlib/src/tests/common/summaryfeatures/summaryfeatures.cpp new file mode 100644 index 00000000000..6d4e8bc49c8 --- /dev/null +++ b/searchlib/src/tests/common/summaryfeatures/summaryfeatures.cpp @@ -0,0 +1,152 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("summaryfeatures_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/common/featureset.h> + +using namespace search; + +TEST_SETUP(Test); + +int +Test::Main() +{ + TEST_INIT("summaryfeatures_test"); + { + FeatureSet sf; + EXPECT_EQUAL(sf.getNames().size(), 0u); + EXPECT_EQUAL(sf.numFeatures(), 0u); + EXPECT_EQUAL(sf.numDocs(), 0u); + EXPECT_TRUE(sf.getFeaturesByIndex(0) == 0); + EXPECT_TRUE(sf.getFeaturesByDocId(0) == 0); + std::vector<uint32_t> docs; + EXPECT_TRUE(sf.contains(docs)); + docs.push_back(1); + EXPECT_TRUE(!sf.contains(docs)); + } + { + FeatureSet::StringVector n; + n.push_back("f1"); + n.push_back("f2"); + n.push_back("f3"); + + FeatureSet sf(n, 5); + EXPECT_EQUAL(sf.getNames().size(), 3u); + EXPECT_EQUAL(sf.getNames()[0], "f1"); + EXPECT_EQUAL(sf.getNames()[1], "f2"); + EXPECT_EQUAL(sf.getNames()[2], "f3"); + EXPECT_EQUAL(sf.numFeatures(), 3u); + EXPECT_EQUAL(sf.numDocs(), 0u); + EXPECT_EQUAL(sf.addDocId(10), 0u); + EXPECT_EQUAL(sf.addDocId(20), 1u); + EXPECT_EQUAL(sf.addDocId(30), 2u); + EXPECT_EQUAL(sf.addDocId(40), 3u); + EXPECT_EQUAL(sf.addDocId(50), 4u); + EXPECT_EQUAL(sf.numDocs(), 5u); + feature_t *f; + const feature_t *cf; + f = sf.getFeaturesByIndex(0); + ASSERT_TRUE(f != 0); + f[0] = 11.0; + f[1] = 12.0; + f[2] = 13.0; + f = sf.getFeaturesByIndex(1); + ASSERT_TRUE(f != 0); + f[0] = 21.0; + f[1] = 22.0; + f[2] = 23.0; + f = sf.getFeaturesByIndex(2); + ASSERT_TRUE(f != 0); + f[0] = 31.0; + f[1] = 32.0; + f[2] = 33.0; + f = sf.getFeaturesByIndex(3); + ASSERT_TRUE(f != 0); + f[0] = 41.0; + f[1] = 42.0; + f[2] = 43.0; + f = sf.getFeaturesByIndex(4); + ASSERT_TRUE(f != 0); + f[0] = 51.0; + f[1] = 52.0; + f[2] = 53.0; + EXPECT_TRUE(sf.getFeaturesByIndex(5) == 0); + { + std::vector<uint32_t> docs; + EXPECT_TRUE(sf.contains(docs)); + } + { + std::vector<uint32_t> docs; + docs.push_back(1); + EXPECT_TRUE(!sf.contains(docs)); + } + { + std::vector<uint32_t> docs; + docs.push_back(31); + EXPECT_TRUE(!sf.contains(docs)); + } + { + std::vector<uint32_t> docs; + docs.push_back(51); + EXPECT_TRUE(!sf.contains(docs)); + } + { + std::vector<uint32_t> docs; + docs.push_back(20); + docs.push_back(40); + EXPECT_TRUE(sf.contains(docs)); + } + { + std::vector<uint32_t> docs; + docs.push_back(10); + docs.push_back(20); + docs.push_back(30); + docs.push_back(40); + docs.push_back(50); + EXPECT_TRUE(sf.contains(docs)); + } + { + cf = sf.getFeaturesByDocId(10); + ASSERT_TRUE(cf != 0); + EXPECT_APPROX(cf[0], 11.0, 10e-6); + EXPECT_APPROX(cf[1], 12.0, 10e-6); + EXPECT_APPROX(cf[2], 13.0, 10e-6); + } + { + cf = sf.getFeaturesByDocId(20); + ASSERT_TRUE(cf != 0); + EXPECT_APPROX(cf[0], 21.0, 10e-6); + EXPECT_APPROX(cf[1], 22.0, 10e-6); + EXPECT_APPROX(cf[2], 23.0, 10e-6); + } + { + cf = sf.getFeaturesByDocId(30); + ASSERT_TRUE(cf != 0); + EXPECT_APPROX(cf[0], 31.0, 10e-6); + EXPECT_APPROX(cf[1], 32.0, 10e-6); + EXPECT_APPROX(cf[2], 33.0, 10e-6); + } + { + cf = sf.getFeaturesByDocId(40); + ASSERT_TRUE(cf != 0); + EXPECT_APPROX(cf[0], 41.0, 10e-6); + EXPECT_APPROX(cf[1], 42.0, 10e-6); + EXPECT_APPROX(cf[2], 43.0, 10e-6); + } + { + cf = sf.getFeaturesByDocId(50); + ASSERT_TRUE(cf != 0); + EXPECT_APPROX(cf[0], 51.0, 10e-6); + EXPECT_APPROX(cf[1], 52.0, 10e-6); + EXPECT_APPROX(cf[2], 53.0, 10e-6); + } + EXPECT_TRUE(sf.getFeaturesByDocId(5) == 0); + EXPECT_TRUE(sf.getFeaturesByDocId(15) == 0); + EXPECT_TRUE(sf.getFeaturesByDocId(25) == 0); + EXPECT_TRUE(sf.getFeaturesByDocId(35) == 0); + EXPECT_TRUE(sf.getFeaturesByDocId(45) == 0); + EXPECT_TRUE(sf.getFeaturesByDocId(55) == 0); + } + TEST_DONE(); +} diff --git a/searchlib/src/tests/create-test.sh b/searchlib/src/tests/create-test.sh new file mode 100755 index 00000000000..d2bc3ded67b --- /dev/null +++ b/searchlib/src/tests/create-test.sh @@ -0,0 +1,52 @@ +#!/bin/sh +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +gen_project_file() { + echo "generating '$1' ..." + echo "APPLICATION ${test}_test" > $1 + echo "OBJS ${test}_test" >> $1 + echo "LIBS searchlib/searchlib" >> $1 + echo "EXTERNALLIBS searchcommon persistencetypes metrics" >> $1 + echo "" >> $1 + echo "CUSTOMMAKE" >> $1 + echo "test: all" >> $1 + echo -e "\t\$(HIDE) \$(LDL) \$(VALGRIND) ./${test}_test" >> $1 +} + +gen_source() { + echo "generating '$1' ..." + echo "#include <vespa/vespalib/testkit/test_kit.h>" >> $1 + echo "" >> $1 + echo "// using namespace search;" >> $1 + echo "" >> $1 + echo "TEST(\"require something\") {" >> $1 + echo "}" >> $1 + echo "" >> $1 + echo "TEST_MAIN() { TEST_RUN_ALL(); }" >> $1 +} + +gen_file_list() { + echo "generating '$1' ..." + echo "${test}_test.cpp" > $1 +} + +if [ $# -ne 1 ]; then + echo "usage: $0 <name>" + echo " name: name of the test to create" + exit 1 +fi + +test=$1 +if [ -e $test ]; then + echo "$test already present, don't want to mess it up..." + exit 1 +fi + +echo "creating directory '$test' ..." +mkdir -p $test || exit 1 +cd $test || exit 1 +test=`basename $test` + +gen_project_file fastos.project +gen_source ${test}_test.cpp +gen_file_list FILES diff --git a/searchlib/src/tests/datastore/.gitignore b/searchlib/src/tests/datastore/.gitignore new file mode 100644 index 00000000000..0f6b605a280 --- /dev/null +++ b/searchlib/src/tests/datastore/.gitignore @@ -0,0 +1,8 @@ +*.So +*_test +.depend* +Makefile +vlog1.txt +vlog2.txt +vlog3.txt +searchlib_logdatastore_test_app diff --git a/searchlib/src/tests/datastore/CMakeLists.txt b/searchlib/src/tests/datastore/CMakeLists.txt new file mode 100644 index 00000000000..b10bc4d4e09 --- /dev/null +++ b/searchlib/src/tests/datastore/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_logdatastore_test_app + SOURCES + logdatastore_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_logdatastore_test_app COMMAND sh logdatastore_test.sh) diff --git a/searchlib/src/tests/datastore/DESC b/searchlib/src/tests/datastore/DESC new file mode 100644 index 00000000000..f035e6aecfb --- /dev/null +++ b/searchlib/src/tests/datastore/DESC @@ -0,0 +1 @@ +Tests behavior of class search::DataStore from <searchlib/docstore/datastore.h> diff --git a/searchlib/src/tests/datastore/FILES b/searchlib/src/tests/datastore/FILES new file mode 100644 index 00000000000..6bfee2917f4 --- /dev/null +++ b/searchlib/src/tests/datastore/FILES @@ -0,0 +1 @@ +datastore.cpp diff --git a/searchlib/src/tests/datastore/bad.dat b/searchlib/src/tests/datastore/bad.dat Binary files differnew file mode 100644 index 00000000000..1bf7a93a2f8 --- /dev/null +++ b/searchlib/src/tests/datastore/bad.dat diff --git a/searchlib/src/tests/datastore/bug-7257706/1422358701368384000.dat b/searchlib/src/tests/datastore/bug-7257706/1422358701368384000.dat Binary files differnew file mode 100644 index 00000000000..dfeedf08029 --- /dev/null +++ b/searchlib/src/tests/datastore/bug-7257706/1422358701368384000.dat diff --git a/searchlib/src/tests/datastore/bug-7257706/1422358701368384000.idx b/searchlib/src/tests/datastore/bug-7257706/1422358701368384000.idx Binary files differnew file mode 100644 index 00000000000..883a5265afe --- /dev/null +++ b/searchlib/src/tests/datastore/bug-7257706/1422358701368384000.idx diff --git a/searchlib/src/tests/datastore/dangling/1425506005745465000.dat b/searchlib/src/tests/datastore/dangling/1425506005745465000.dat Binary files differnew file mode 100644 index 00000000000..cb202f8d72a --- /dev/null +++ b/searchlib/src/tests/datastore/dangling/1425506005745465000.dat diff --git a/searchlib/src/tests/datastore/dangling/1425506005745465000.idx b/searchlib/src/tests/datastore/dangling/1425506005745465000.idx Binary files differnew file mode 100644 index 00000000000..0fc41cdf9e0 --- /dev/null +++ b/searchlib/src/tests/datastore/dangling/1425506005745465000.idx diff --git a/searchlib/src/tests/datastore/dangling/2425506005745465000.dat b/searchlib/src/tests/datastore/dangling/2425506005745465000.dat Binary files differnew file mode 100644 index 00000000000..cb202f8d72a --- /dev/null +++ b/searchlib/src/tests/datastore/dangling/2425506005745465000.dat diff --git a/searchlib/src/tests/datastore/dangling/2425506005745465000.idx b/searchlib/src/tests/datastore/dangling/2425506005745465000.idx Binary files differnew file mode 100644 index 00000000000..0fc41cdf9e0 --- /dev/null +++ b/searchlib/src/tests/datastore/dangling/2425506005745465000.idx diff --git a/searchlib/src/tests/datastore/dangling/3425506005745465000.dat b/searchlib/src/tests/datastore/dangling/3425506005745465000.dat Binary files differnew file mode 100644 index 00000000000..cb202f8d72a --- /dev/null +++ b/searchlib/src/tests/datastore/dangling/3425506005745465000.dat diff --git a/searchlib/src/tests/datastore/dangling/4425506005745465000.dat b/searchlib/src/tests/datastore/dangling/4425506005745465000.dat Binary files differnew file mode 100644 index 00000000000..cb202f8d72a --- /dev/null +++ b/searchlib/src/tests/datastore/dangling/4425506005745465000.dat diff --git a/searchlib/src/tests/datastore/dangling/4425506005745465000.idx b/searchlib/src/tests/datastore/dangling/4425506005745465000.idx new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/searchlib/src/tests/datastore/dangling/4425506005745465000.idx diff --git a/searchlib/src/tests/datastore/datastore.dat b/searchlib/src/tests/datastore/datastore.dat Binary files differnew file mode 100644 index 00000000000..34d6ed1392f --- /dev/null +++ b/searchlib/src/tests/datastore/datastore.dat diff --git a/searchlib/src/tests/datastore/logdatastore_test.cpp b/searchlib/src/tests/datastore/logdatastore_test.cpp new file mode 100644 index 00000000000..776e6b25533 --- /dev/null +++ b/searchlib/src/tests/datastore/logdatastore_test.cpp @@ -0,0 +1,468 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("datastore_test"); + +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/docstore/logdatastore.h> +#include <vespa/searchlib/docstore/chunkformats.h> +#include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <iostream> + +#include <vespa/vespalib/util/exceptions.h> + +class MyTlSyncer : public search::transactionlog::SyncProxy { + search::SerialNum _syncedTo; +public: + MyTlSyncer(void) : _syncedTo(0) { } + + void sync(search::SerialNum syncTo) { + _syncedTo = syncTo; + } +}; + +using namespace search; +using search::index::DummyFileHeaderContext; + +namespace { + +void +showStats(const DataStoreStorageStats &stats) +{ + fprintf(stdout, + "Storage stats usage=%9lu bloat=%9lu" + " lastSerial=%9lu lastFlushedSerial=%9lu" + " maxBucketSpread=%6.2f\n", + stats.diskUsage(), stats.diskBloat(), + stats.lastSerialNum(), stats.lastFlushedSerialNum(), + stats.maxBucketSpread()); + fflush(stdout); +} + +void +showChunks(const std::vector<DataStoreFileChunkStats> &chunkStats) +{ + fprintf(stdout, "Number of chunks is %zu\n", chunkStats.size()); + for (const auto &chunk : chunkStats) { + fprintf(stdout, + "Chunk %019lu usage=%9lu bloat=%9lu" + " lastSerial=%9lu lastFlushedSerial=%9lu" + " bucketSpread=%6.2f\n", + chunk.nameId(), chunk.diskUsage(), chunk.diskBloat(), + chunk.lastSerialNum(), chunk.lastFlushedSerialNum(), + chunk.maxBucketSpread()); + } + fflush(stdout); +} + +SerialNum +calcLastSerialNum(const std::vector<DataStoreFileChunkStats> &chunkStats) +{ + SerialNum lastSerialNum = 0u; + for (const auto &chunk : chunkStats) { + lastSerialNum = std::max(lastSerialNum, chunk.lastSerialNum()); + } + return lastSerialNum; +} + +SerialNum +calcLastFlushedSerialNum(const std::vector<DataStoreFileChunkStats> &chunkStats) +{ + SerialNum lastFlushedSerialNum = 0u; + for (const auto &chunk : chunkStats) { + lastFlushedSerialNum = std::max(lastFlushedSerialNum, + chunk.lastFlushedSerialNum()); + } + return lastFlushedSerialNum; +} + +uint64_t +calcDiskUsage(const std::vector<DataStoreFileChunkStats> &chunkStats) +{ + uint64_t diskUsage = 0u; + for (const auto &chunk : chunkStats) { + diskUsage += chunk.diskUsage(); + } + return diskUsage; +} + +uint64_t +calcDiskBloat(const std::vector<DataStoreFileChunkStats> &chunkStats) +{ + uint64_t diskBloat = 0u; + for (const auto &chunk : chunkStats) { + diskBloat += chunk.diskBloat(); + } + return diskBloat; +} + +void +checkStats(IDataStore &store, + SerialNum expLastSerial, SerialNum expLastFlushedSerial) +{ + DataStoreStorageStats storageStats(store.getStorageStats()); + std::vector<DataStoreFileChunkStats> chunkStats; + chunkStats = store.getFileChunkStats(); + showStats(storageStats); + showChunks(chunkStats); + EXPECT_EQUAL(expLastSerial, storageStats.lastSerialNum()); + EXPECT_EQUAL(expLastFlushedSerial, storageStats.lastFlushedSerialNum()); + EXPECT_EQUAL(storageStats.lastSerialNum(), calcLastSerialNum(chunkStats)); + EXPECT_EQUAL(storageStats.lastFlushedSerialNum(), + calcLastFlushedSerialNum(chunkStats)); + EXPECT_EQUAL(storageStats.diskUsage(), + calcDiskUsage(chunkStats)); + EXPECT_EQUAL(storageStats.diskBloat(), calcDiskBloat(chunkStats)); +} + + +} + +TEST("testThatLidInfoOrdersFileChunkSize") { + EXPECT_TRUE(LidInfo(1, 1, 1) == LidInfo(1, 1, 1)); + EXPECT_FALSE(LidInfo(1, 1, 1) < LidInfo(1, 1, 1)); + + EXPECT_FALSE(LidInfo(1, 1, 1) == LidInfo(2, 1, 1)); + EXPECT_TRUE(LidInfo(1, 1, 1) < LidInfo(2, 1, 1)); + EXPECT_TRUE(LidInfo(1, 2, 1) < LidInfo(2, 1, 1)); + EXPECT_TRUE(LidInfo(1, 1, 2) < LidInfo(2, 1, 1)); +} + +TEST("testGrowing") { + FastOS_File::EmptyAndRemoveDirectory("growing"); + EXPECT_TRUE(FastOS_File::MakeDirectory("growing")); + LogDataStore::Config config(100000, 0.1, 3.0, 0.2, 8, true, + WriteableFileChunk::Config( + document::CompressionConfig( + document::CompressionConfig:: + LZ4, 9, 60), + 1000, + 20)); + vespalib::ThreadStackExecutor executor(config.getNumThreads(), 128*1024); + DummyFileHeaderContext fileHeaderContext; + MyTlSyncer tlSyncer; + { + LogDataStore datastore(executor, + "growing", + config, + GrowStrategy(), + TuneFileSummary(), + fileHeaderContext, + tlSyncer, + NULL); + srand(7); + char buffer[12000]; + SerialNum lastSyncToken(0); + for (size_t i(0); i < sizeof(buffer); i++) { + buffer[i] = rand() & 0xff; + } + for (size_t i(1); i < 10000; i++) { + long r = rand()%10000; + assert(i > lastSyncToken); + lastSyncToken = i; + datastore.write(i, i, &buffer[r], uint8_t(buffer[r])*4); + } + datastore.flush(datastore.initFlush(lastSyncToken)); + for (size_t i(1); i < 200; i++) { + assert(i + 20000 > lastSyncToken); + lastSyncToken = i + 20000; + datastore.remove(i + 20000, i); + } + for (size_t i(201); i < 2000; i+= 2) { + assert(i + 20000 > lastSyncToken); + lastSyncToken = i + 20000; + datastore.remove(i + 20000, i); + } + datastore.flush(datastore.initFlush(lastSyncToken)); + datastore.compact(30000); + datastore.remove(31000, 0); + checkStats(datastore, 31000, 30000); + } + { + LogDataStore datastore(executor, + "growing", + config, + GrowStrategy(), + TuneFileSummary(), + fileHeaderContext, + tlSyncer, + NULL); + checkStats(datastore, 30000, 30000); + } + + FastOS_File::EmptyAndRemoveDirectory("growing"); +} + +void fetchAndTest(IDataStore & datastore, uint32_t lid, const void *a, size_t sz) +{ + vespalib::DataBuffer buf; + EXPECT_EQUAL(static_cast<ssize_t>(sz), datastore.read(lid, buf)); + EXPECT_EQUAL(buf.getDataLen(), sz); + EXPECT_TRUE(memcmp(a, buf.getData(), sz) == 0); +} + +TEST("testTruncatedIdxFile"){ + LogDataStore::Config config; + DummyFileHeaderContext fileHeaderContext; + vespalib::ThreadStackExecutor executor(config.getNumThreads(), 128*1024); + MyTlSyncer tlSyncer; + { + // Files comes from the 'growing test'. + LogDataStore datastore(executor, "bug-7257706", config, + GrowStrategy(), TuneFileSummary(), + fileHeaderContext, tlSyncer, NULL); + EXPECT_EQUAL(354ul, datastore.lastSyncToken()); + } + { + LogDataStore datastore(executor, "bug-7257706-truncated", config, + GrowStrategy(), TuneFileSummary(), + fileHeaderContext, tlSyncer, NULL); + EXPECT_EQUAL(331ul, datastore.lastSyncToken()); + } + { + LogDataStore datastore(executor, "bug-7257706-truncated", config, + GrowStrategy(), TuneFileSummary(), + fileHeaderContext, tlSyncer, NULL); + EXPECT_EQUAL(331ul, datastore.lastSyncToken()); + } +} + +TEST("testThatEmptyIdxFilesAndDanglingDatFilesAreRemoved") { + LogDataStore::Config config; + DummyFileHeaderContext fileHeaderContext; + vespalib::ThreadStackExecutor executor(config.getNumThreads(), 128*1024); + MyTlSyncer tlSyncer; + LogDataStore datastore(executor, "dangling-test", config, + GrowStrategy(), TuneFileSummary(), + fileHeaderContext, tlSyncer, NULL); + EXPECT_EQUAL(354ul, datastore.lastSyncToken()); + EXPECT_EQUAL(4096u + 480u, datastore.getDiskHeaderFootprint()); + EXPECT_EQUAL(datastore.getDiskHeaderFootprint() + 94016u, datastore.getDiskFootprint()); +} + +TEST("testWriteRead") { + FastOS_File::RemoveDirectory("empty"); + const char * bufA = "aaaaaaaaaaaaaaaaaaaaa"; + const char * bufB = "bbbbbbbbbbbbbbbb"; + const vespalib::ConstBufferRef a[2] = { vespalib::ConstBufferRef(bufA, strlen(bufA)), vespalib::ConstBufferRef(bufB, strlen(bufB))}; + LogDataStore::Config config; + { + EXPECT_TRUE(FastOS_File::MakeDirectory("empty")); + DummyFileHeaderContext fileHeaderContext; + vespalib::ThreadStackExecutor executor(config.getNumThreads(), 128*1024); + MyTlSyncer tlSyncer; + LogDataStore datastore(executor, "empty", config, + GrowStrategy(), TuneFileSummary(), + fileHeaderContext, tlSyncer, NULL); + ASSERT_TRUE(datastore.lastSyncToken() == 0); + size_t headerFootprint = datastore.getDiskHeaderFootprint(); + EXPECT_LESS(0u, headerFootprint); + EXPECT_EQUAL(datastore.getDiskFootprint(), headerFootprint); + EXPECT_EQUAL(datastore.getDiskBloat(), 0ul); + EXPECT_EQUAL(datastore.getMaxCompactGain(), 0ul); + datastore.write(1, 0, a[0].c_str(), a[0].size()); + fetchAndTest(datastore, 0, a[0].c_str(), a[0].size()); + datastore.write(2, 0, a[1].c_str(), a[1].size()); + fetchAndTest(datastore, 0, a[1].c_str(), a[1].size()); + fetchAndTest(datastore, 1, NULL, 0); + datastore.remove(3, 0); + fetchAndTest(datastore, 0, "", 0); + + SerialNum lastSyncToken(0); + for(size_t i=0; i < 100; i++) { + datastore.write(i+4, i, a[i%2].c_str(), a[i%2].size()); + assert(i +4 > lastSyncToken); + lastSyncToken = i + 4; + fetchAndTest(datastore, i, a[i%2].c_str(), a[i%2].size()); + } + for(size_t i=0; i < 100; i++) { + fetchAndTest(datastore, i, a[i%2].c_str(), a[i%2].size()); + } + EXPECT_EQUAL(datastore.getDiskFootprint(), + 2711ul + headerFootprint); + EXPECT_EQUAL(datastore.getDiskBloat(), 0ul); + EXPECT_EQUAL(datastore.getMaxCompactGain(), 0ul); + datastore.flush(datastore.initFlush(lastSyncToken)); + } + { + DummyFileHeaderContext fileHeaderContext; + vespalib::ThreadStackExecutor executor(config.getNumThreads(), 128*1024); + MyTlSyncer tlSyncer; + LogDataStore datastore(executor, "empty", config, + GrowStrategy(), TuneFileSummary(), + fileHeaderContext, tlSyncer, NULL); + size_t headerFootprint = datastore.getDiskHeaderFootprint(); + EXPECT_LESS(0u, headerFootprint); + EXPECT_EQUAL(4944ul + headerFootprint, datastore.getDiskFootprint()); + EXPECT_EQUAL(0ul, datastore.getDiskBloat()); + EXPECT_EQUAL(0ul, datastore.getMaxCompactGain()); + + for(size_t i=0; i < 100; i++) { + fetchAndTest(datastore, i, a[i%2].c_str(), a[i%2].size()); + } + for(size_t i=0; i < 100; i++) { + datastore.write(i+3+100, i, a[(i+1)%2].c_str(), a[(i+1)%2].size()); + fetchAndTest(datastore, i, a[(i+1)%2].c_str(), a[(i+1)%2].size()); + } + for(size_t i=0; i < 100; i++) { + fetchAndTest(datastore, i, a[(i+1)%2].c_str(), a[(i+1)%2].size()); + } + + EXPECT_EQUAL(7594ul + headerFootprint, datastore.getDiskFootprint()); + EXPECT_EQUAL(0ul, datastore.getDiskBloat()); + EXPECT_EQUAL(0ul, datastore.getMaxCompactGain()); + } + FastOS_File::EmptyAndRemoveDirectory("empty"); +} + +TEST("requireThatSyncTokenIsUpdatedAfterFlush") { +#if 0 + std::string file = "sync.dat"; + FastOS_File::Delete(file.c_str()); + { + vespalib::DataBuffer buf; + SimpleDataStore store(file); + EXPECT_EQUAL(0u, store.lastSyncToken()); + makeData(buf, 10); + store.write(0, buf, 10); + store.flush(4); + EXPECT_EQUAL(4u, store.lastSyncToken()); + } + FastOS_File::Delete(file.c_str()); +#endif +} + +class GuardDirectory { +public: + GuardDirectory(const vespalib::string & dir) : _dir(dir) + { + FastOS_File::EmptyAndRemoveDirectory(_dir.c_str()); + EXPECT_TRUE(FastOS_File::MakeDirectory(_dir.c_str())); + } + ~GuardDirectory() { + FastOS_File::EmptyAndRemoveDirectory(_dir.c_str()); + } + const vespalib::string & getDir() const { return _dir; } +private: + vespalib::string _dir; +}; + +TEST("requireThatFlushTimeIsAvailableAfterFlush") { + GuardDirectory testDir("flushtime"); + fastos::TimeStamp before(fastos::ClockSystem::now()); + DummyFileHeaderContext fileHeaderContext; + LogDataStore::Config config; + vespalib::ThreadStackExecutor executor(config.getNumThreads(), 128*1024); + MyTlSyncer tlSyncer; + LogDataStore store(executor, + testDir.getDir(), + config, + GrowStrategy(), + TuneFileSummary(), + fileHeaderContext, + tlSyncer, + NULL); + EXPECT_EQUAL(0, store.getLastFlushTime().time()); + uint64_t flushToken = store.initFlush(5); + EXPECT_EQUAL(5u, flushToken); + store.flush(flushToken); + fastos::TimeStamp after(fastos::ClockSystem::now()); + // the file name of the dat file is 'magic', using the clock instead of stating the file + EXPECT_LESS_EQUAL(before.time(), store.getLastFlushTime().time()); + EXPECT_GREATER_EQUAL(after.time(), store.getLastFlushTime().time()); +} + +TEST("requireThatChunksObeyLimits") { + Chunk c(0, Chunk::Config(256, 2)); + EXPECT_TRUE(c.hasRoom(1000)); // At least 1 is allowed no matter what the size is. + c.append(1, "abc", 3); + EXPECT_TRUE(c.hasRoom(229)); + EXPECT_FALSE(c.hasRoom(230)); + c.append(2, "abc", 3); + EXPECT_FALSE(c.hasRoom(20)); +} + +TEST("requireThatChunkCanProduceUniqueList") { + const char *d = "ABCDEF"; + Chunk c(0, Chunk::Config(100, 20)); + c.append(1, d, 1); + c.append(2, d, 2); + c.append(3, d, 3); + c.append(2, d, 4); + c.append(1, d, 5); + EXPECT_EQUAL(5u, c.count()); + const Chunk::LidList & all = c.getLids(); + EXPECT_EQUAL(5u, all.size()); + Chunk::LidList unique = c.getUniqueLids(); + EXPECT_EQUAL(3u, unique.size()); + EXPECT_EQUAL(1u, unique[0].getLid()); + EXPECT_EQUAL(5u, unique[0].netSize()); + EXPECT_EQUAL(2u, unique[1].getLid()); + EXPECT_EQUAL(4u, unique[1].netSize()); + EXPECT_EQUAL(3u, unique[2].getLid()); + EXPECT_EQUAL(3u, unique[2].netSize()); +} + +void testChunkFormat(ChunkFormat & cf, size_t expectedLen, const vespalib::string & expectedContent) +{ + document::CompressionConfig cfg; + uint64_t MAGIC_CONTENT(0xabcdef9876543210); + cf.getBuffer() << MAGIC_CONTENT; + vespalib::DataBuffer buffer; + cf.pack(7, buffer, cfg); + EXPECT_EQUAL(expectedLen, buffer.getDataLen()); + std::ostringstream os; + os << vespalib::HexDump(buffer.getData(), buffer.getDataLen()); + EXPECT_EQUAL(expectedContent, os.str()); +} + +TEST("requireThatChunkFormatsDoesNotChangeBetweenReleases") { + ChunkFormatV1 v1(10); + testChunkFormat(v1, 26, "26 000000000010ABCDEF987654321000000000000000079CF5E79B"); + ChunkFormatV2 v2(10); + testChunkFormat(v2, 34, "34 015BA32DE7000000220000000010ABCDEF987654321000000000000000074D000694"); +} + +class DummyBucketizer : public IBucketizer +{ +public: + DummyBucketizer(uint32_t mod) : _mod(mod) { } + uint64_t getBucketOf(const vespalib::GenerationHandler::Guard &, uint32_t lid) const override { + return lid%_mod; + } + vespalib::GenerationHandler::Guard getGuard() const override { + return vespalib::GenerationHandler::Guard(); + } +private: + uint32_t _mod; +}; + +TEST("testBucketDensityComputer") { + DummyBucketizer bucketizer(100); + BucketDensityComputer bdc(&bucketizer); + vespalib::GenerationHandler::Guard guard = bdc.getGuard(); + EXPECT_EQUAL(0u, bdc.getNumBuckets()); + bdc.recordLid(guard, 1, 1); + EXPECT_EQUAL(1u, bdc.getNumBuckets()); + bdc.recordLid(guard, 2, 1); + EXPECT_EQUAL(2u, bdc.getNumBuckets()); + bdc.recordLid(guard, 3, 1); + EXPECT_EQUAL(3u, bdc.getNumBuckets()); + bdc.recordLid(guard, 2, 1); + EXPECT_EQUAL(3u, bdc.getNumBuckets()); + bdc.recordLid(guard, 4, 0); + EXPECT_EQUAL(3u, bdc.getNumBuckets()); + bdc.recordLid(guard, 4, 1); + EXPECT_EQUAL(4u, bdc.getNumBuckets()); + + BucketDensityComputer nonRecording(nullptr); + guard = nonRecording.getGuard(); + EXPECT_EQUAL(0u, nonRecording.getNumBuckets()); + nonRecording.recordLid(guard, 1, 1); + EXPECT_EQUAL(0u, nonRecording.getNumBuckets()); +} + +TEST_MAIN() { + DummyFileHeaderContext::setCreator("logdatastore_test"); + TEST_RUN_ALL(); +} diff --git a/searchlib/src/tests/datastore/logdatastore_test.sh b/searchlib/src/tests/datastore/logdatastore_test.sh new file mode 100755 index 00000000000..46455e1fae9 --- /dev/null +++ b/searchlib/src/tests/datastore/logdatastore_test.sh @@ -0,0 +1,10 @@ +#!/bin/bash +cp -r bug-7257706 bug-7257706-truncated +mkdir dangling-test +cp bug-7257706/*.dat dangling-test/ +cp bug-7257706/*.idx dangling-test/ +cp dangling/*.dat dangling-test/ +cp dangling/*.idx dangling-test/ +truncate --size 3830 bug-7257706-truncated/1422358701368384000.idx +VESPA_LOG_TARGET=file:vlog2.txt $VALGRIND ./searchlib_logdatastore_test_app +rm -rf bug-7257706-truncated dangling-test diff --git a/searchlib/src/tests/diskindex/bitvector/.gitignore b/searchlib/src/tests/diskindex/bitvector/.gitignore new file mode 100644 index 00000000000..32b1b86e1e5 --- /dev/null +++ b/searchlib/src/tests/diskindex/bitvector/.gitignore @@ -0,0 +1,6 @@ +.depend +Makefile +bitvector_test +dump +/bitvector_test-diskindex +searchlib_bitvector_test-diskindex_app diff --git a/searchlib/src/tests/diskindex/bitvector/CMakeLists.txt b/searchlib/src/tests/diskindex/bitvector/CMakeLists.txt new file mode 100644 index 00000000000..27c03b483ab --- /dev/null +++ b/searchlib/src/tests/diskindex/bitvector/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_bitvector_test-diskindex_app + SOURCES + bitvector_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_bitvector_test-diskindex_app COMMAND searchlib_bitvector_test-diskindex_app) diff --git a/searchlib/src/tests/diskindex/bitvector/DESC b/searchlib/src/tests/diskindex/bitvector/DESC new file mode 100644 index 00000000000..313f0f89f2a --- /dev/null +++ b/searchlib/src/tests/diskindex/bitvector/DESC @@ -0,0 +1 @@ +bitvector test. Take a look at bitvector_test.cpp for details. diff --git a/searchlib/src/tests/diskindex/bitvector/FILES b/searchlib/src/tests/diskindex/bitvector/FILES new file mode 100644 index 00000000000..a2583d74519 --- /dev/null +++ b/searchlib/src/tests/diskindex/bitvector/FILES @@ -0,0 +1 @@ +bitvector_test.cpp diff --git a/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp b/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp new file mode 100644 index 00000000000..bf95e3d56a6 --- /dev/null +++ b/searchlib/src/tests/diskindex/bitvector/bitvector_test.cpp @@ -0,0 +1,221 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("bitvector_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/diskindex/bitvectordictionary.h> +#include <vespa/searchlib/diskindex/fieldwriter.h> +#include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/vespalib/io/fileutil.h> + +using namespace search::index; + +namespace search { +namespace diskindex { + +struct FieldWriterWrapper +{ + FieldWriter _writer; + + FieldWriterWrapper(uint32_t docIdLimit, uint64_t numWordIds); + + FieldWriterWrapper & + newWord(const vespalib::stringref &word); + + FieldWriterWrapper & + add(uint32_t docId); + + bool + open(const std::string &path, + const Schema &schema, + const uint32_t indexId, + const TuneFileSeqWrite &tuneFileWrite, + const common::FileHeaderContext &fileHeaderContext); +}; + + +FieldWriterWrapper::FieldWriterWrapper(uint32_t docIdLimit, uint64_t numWordIds) + : _writer(docIdLimit, numWordIds) +{ +} + +bool +FieldWriterWrapper::open(const std::string &path, + const Schema &schema, + const uint32_t indexId, + const TuneFileSeqWrite &tuneFileWrite, + const common::FileHeaderContext &fileHeaderContext) +{ + vespalib::mkdir(path, false); + _writer.earlyOpen(path, 64, 10000, false, schema, indexId, tuneFileWrite); + return _writer.lateOpen(tuneFileWrite, fileHeaderContext); +} + +FieldWriterWrapper & +FieldWriterWrapper::newWord(const vespalib::stringref &word) +{ + _writer.newWord(word); + return *this; +} + + +FieldWriterWrapper & +FieldWriterWrapper::add(uint32_t docId) +{ + DocIdAndFeatures daf; + daf._docId = docId; + daf._elements.push_back(WordDocElementFeatures(0)); + daf._elements.back().setNumOccs(1); + daf._wordPositions.push_back(WordDocElementWordPosFeatures(0)); + //LOG(info, "add(%" PRIu64 ", %u)", wordNum, docId); + _writer.add(daf); + return *this; +} + +class Test : public vespalib::TestApp +{ +private: + Schema _schema; + uint32_t _indexId; +public: + void + requireThatDictionaryHandlesNoEntries(bool directio, bool readmmap); + + void + requireThatDictionaryHandlesMultipleEntries(bool directio, bool readmmap); + + Test(); + int Main(); +}; + +void +Test::requireThatDictionaryHandlesNoEntries(bool directio, bool readmmap) +{ + TuneFileSeqWrite tuneFileWrite; + TuneFileRandRead tuneFileRead; + DummyFileHeaderContext fileHeaderContext; + + if (directio) { + tuneFileWrite.setWantDirectIO(); + tuneFileRead.setWantDirectIO(); + } + if (readmmap) + tuneFileRead.setWantMemoryMap(); + FieldWriterWrapper fww(5, 2); + vespalib::mkdir("dump", false); + EXPECT_TRUE(fww.open("dump/1/", _schema, _indexId, tuneFileWrite, + fileHeaderContext)); + fww.newWord("1").add(1); + fww.newWord("2").add(2).add(3); + EXPECT_TRUE(fww._writer.close()); + + BitVectorDictionary dict; + BitVectorKeyScope bvScope(BitVectorKeyScope::PERFIELD_WORDS); + EXPECT_TRUE(dict.open("dump/1/", tuneFileRead, bvScope)); + EXPECT_EQUAL(5u, dict.getDocIdLimit()); + EXPECT_EQUAL(0u, dict.getEntries().size()); + EXPECT_TRUE(dict.lookup(1).get() == NULL); + EXPECT_TRUE(dict.lookup(2).get() == NULL); +} + +void +Test::requireThatDictionaryHandlesMultipleEntries(bool directio, bool readmmap) +{ + TuneFileSeqWrite tuneFileWrite; + TuneFileRandRead tuneFileRead; + DummyFileHeaderContext fileHeaderContext; + + if (directio) { + tuneFileWrite.setWantDirectIO(); + tuneFileRead.setWantDirectIO(); + } + if (readmmap) + tuneFileRead.setWantMemoryMap(); + FieldWriterWrapper fww(64, 6); + EXPECT_TRUE(fww.open("dump/2/", _schema, _indexId, tuneFileWrite, + fileHeaderContext)); + // must have >16 docs in order to create bitvector for a word + // 17 docs for word 1 + BitVector::UP bv1exp(BitVector::create(64)); + fww.newWord("1"); + for (uint32_t docId = 1; docId < 18; ++docId) { + fww.add(docId); + bv1exp->setBit(docId); + } + fww.newWord("2").add(1); + // 16 docs for word 3 + fww.newWord("3"); + for (uint32_t docId = 1; docId < 17; ++docId) { + fww.add(docId); + } + fww.newWord("4").add(1); + // 23 docs for word 5 + BitVector::UP bv5exp(BitVector::create(64)); + fww.newWord("5"); + for (uint32_t docId = 1; docId < 24; ++docId) { + fww.add(docId * 2); + bv5exp->setBit(docId * 2); + } + fww.newWord("6").add(1); + EXPECT_TRUE(fww._writer.close()); + + BitVectorDictionary dict; + BitVectorKeyScope bvScope(BitVectorKeyScope::PERFIELD_WORDS); + EXPECT_TRUE(dict.open("dump/2/", tuneFileRead, bvScope)); + EXPECT_EQUAL(64u, dict.getDocIdLimit()); + EXPECT_EQUAL(2u, dict.getEntries().size()); + + BitVectorWordSingleKey e; + e = dict.getEntries()[0]; + EXPECT_EQUAL(1u, e._wordNum); + EXPECT_EQUAL(17u, e._numDocs); + e = dict.getEntries()[1]; + EXPECT_EQUAL(5u, e._wordNum); + EXPECT_EQUAL(23u, e._numDocs); + + EXPECT_TRUE(dict.lookup(2).get() == NULL); + EXPECT_TRUE(dict.lookup(3).get() == NULL); + EXPECT_TRUE(dict.lookup(4).get() == NULL); + EXPECT_TRUE(dict.lookup(6).get() == NULL); + + BitVector::UP bv1act = dict.lookup(1); + EXPECT_TRUE(bv1act.get() != NULL); + EXPECT_TRUE(*bv1exp == *bv1act); + + BitVector::UP bv5act = dict.lookup(5); + EXPECT_TRUE(bv5act.get() != NULL); + EXPECT_TRUE(*bv5exp == *bv5act); +} + +Test::Test() + : _schema(), + _indexId(0) +{ + _schema.addIndexField(Schema::IndexField("f1", Schema::STRING)); +} + +int +Test::Main() +{ + TEST_INIT("bitvector_test"); + + TuneFileSeqWrite tuneFileWrite; + TuneFileRandRead tuneFileRead; + + if (_argc > 0) { + DummyFileHeaderContext::setCreator(_argv[0]); + } + TEST_DO(requireThatDictionaryHandlesNoEntries(false, false)); + TEST_DO(requireThatDictionaryHandlesMultipleEntries(false, false)); + TEST_DO(requireThatDictionaryHandlesNoEntries(true, false)); + TEST_DO(requireThatDictionaryHandlesMultipleEntries(true, false)); + TEST_DO(requireThatDictionaryHandlesNoEntries(false, true)); + TEST_DO(requireThatDictionaryHandlesMultipleEntries(false, true)); + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::diskindex::Test); diff --git a/searchlib/src/tests/diskindex/diskindex/.gitignore b/searchlib/src/tests/diskindex/diskindex/.gitignore new file mode 100644 index 00000000000..58819f1c4bb --- /dev/null +++ b/searchlib/src/tests/diskindex/diskindex/.gitignore @@ -0,0 +1,5 @@ +.depend +Makefile +diskindex_test +index +searchlib_diskindex_test_app diff --git a/searchlib/src/tests/diskindex/diskindex/CMakeLists.txt b/searchlib/src/tests/diskindex/diskindex/CMakeLists.txt new file mode 100644 index 00000000000..7cee100f534 --- /dev/null +++ b/searchlib/src/tests/diskindex/diskindex/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_diskindex_test_app + SOURCES + diskindex_test.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_diskindex_test_app COMMAND searchlib_diskindex_test_app) diff --git a/searchlib/src/tests/diskindex/diskindex/DESC b/searchlib/src/tests/diskindex/diskindex/DESC new file mode 100644 index 00000000000..fc14faaca7a --- /dev/null +++ b/searchlib/src/tests/diskindex/diskindex/DESC @@ -0,0 +1 @@ +diskindex test. Take a look at diskindex_test.cpp for details. diff --git a/searchlib/src/tests/diskindex/diskindex/FILES b/searchlib/src/tests/diskindex/diskindex/FILES new file mode 100644 index 00000000000..54eef52f856 --- /dev/null +++ b/searchlib/src/tests/diskindex/diskindex/FILES @@ -0,0 +1 @@ +diskindex_test.cpp diff --git a/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp b/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp new file mode 100644 index 00000000000..a8972d2a289 --- /dev/null +++ b/searchlib/src/tests/diskindex/diskindex/diskindex_test.cpp @@ -0,0 +1,330 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/common/bitvectoriterator.h> +#include <vespa/searchlib/diskindex/disktermblueprint.h> +#include <vespa/searchlib/test/diskindex/testdiskindex.h> +#include <vespa/searchlib/test/initrange.h> +#include <vespa/searchlib/test/fakedata/fakeword.h> +#include <vespa/searchlib/diskindex/zcposocciterators.h> +#include <vespa/searchlib/fef/termfieldmatchdataarray.h> +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h> +#include <vespa/searchlib/queryeval/leaf_blueprints.h> +#include <vespa/searchlib/queryeval/searchiterator.h> +#include <vespa/searchlib/queryeval/emptysearch.h> +#include <vespa/searchlib/queryeval/fake_requestcontext.h> +#include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/vespalib/io/fileutil.h> +#include <iostream> +#include <set> +#include <vespa/searchlib/test/fakedata/fpfactory.h> + +LOG_SETUP("diskindex_test"); + +using search::BitVectorIterator; +using namespace search::fef; +using namespace search::index; +using namespace search::query; +using namespace search::queryeval; +using namespace search::queryeval::blueprint; +using search::test::InitRangeVerifier; +using namespace search::fakedata; + +namespace search { +namespace diskindex { + +typedef DiskIndex::LookupResult LookupResult; + +std::string +toString(SearchIterator & sb) +{ + std::ostringstream oss; + bool first = true; + for (sb.seek(1u); ! sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) { + if (!first) oss << ","; + oss << sb.getDocId(); + first = false; + } + return oss.str(); +} + +SimpleStringTerm +makeTerm(const std::string & term) +{ + return SimpleStringTerm(term, "field", 0, search::query::Weight(0)); +} + +class Test : public vespalib::TestApp, public TestDiskIndex { +private: + FakeRequestContext _requestContext; + + void requireThatLookupIsWorking(bool fieldEmpty, bool docEmpty, bool wordEmpty); + void requireThatWeCanReadPostingList(); + void requireThatWeCanReadBitVector(); + void requireThatBlueprintIsCreated(); + void requireThatBlueprintCanCreateSearchIterators(); + void requireThatInitRangeConforms(); +public: + Test(); + int Main(); +}; + +void +Test::requireThatInitRangeConforms() +{ + InitRangeVerifier ir; + Schema schema; + schema.addIndexField(Schema::IndexField("a", Schema::DataType::STRING)); + bitcompression::PosOccFieldsParams params; + params.setSchemaParams(schema, 0); + search::fakedata::FakeWord fw(ir.getDocIdLimit(), ir.getExpectedDocIds(), "a", params, 0); + TermFieldMatchData md; + TermFieldMatchDataArray tfmda; + tfmda.add(&md); + std::vector<const FakeWord *> v; + v.push_back(&fw); + std::set<std::string> ignored = { "MemTreeOcc", "MemTreeOcc2", + "FilterOcc", "ZcFilterOcc", + "ZcNoSkipFilterOcc", "ZcSkipFilterOcc", + "ZcbFilterOcc", + "EGCompr64FilterOcc", "EGCompr64LEFilterOcc", + "EGCompr64NoSkipFilterOcc", "EGCompr64SkipFilterOcc" }; + for (auto postingType : search::fakedata::getPostingTypes()) { + if (ignored.find(postingType) == ignored.end()) { + std::cerr << "Verifying " << postingType << std::endl; + std::unique_ptr<FPFactory> ff(getFPFactory(postingType, schema)); + ff->setup(v); + FakePosting::SP f(ff->make(fw)); + TEST_DO(ir.verify(f->createIterator(tfmda))); + } + } +} + +void +Test::requireThatLookupIsWorking(bool fieldEmpty, + bool docEmpty, + bool wordEmpty) +{ + uint32_t f1(_schema.getIndexFieldId("f1")); + uint32_t f2(_schema.getIndexFieldId("f2")); + uint32_t f3(_schema.getIndexFieldId("f3")); + LookupResult::UP r; + r = _index->lookup(f1, "not"); + EXPECT_TRUE(!r || r->counts._numDocs == 0); + r = _index->lookup(f1, "w1not"); + EXPECT_TRUE(!r || r->counts._numDocs == 0); + r = _index->lookup(f1, "wnot"); + EXPECT_TRUE(!r || r->counts._numDocs == 0); + { // field 'f1' + r = _index->lookup(f1, "w1"); + if (wordEmpty || fieldEmpty || docEmpty) { + EXPECT_TRUE(!r || r->counts._numDocs == 0); + } else { + EXPECT_EQUAL(1u, r->wordNum); + EXPECT_EQUAL(2u, r->counts._numDocs); + } + r = _index->lookup(f1, "w2"); + EXPECT_TRUE(!r || r->counts._numDocs == 0); + } + { // field 'f2' + r = _index->lookup(f2, "w1"); + if (wordEmpty || fieldEmpty || docEmpty) { + EXPECT_TRUE(!r || r->counts._numDocs == 0); + } else { + EXPECT_EQUAL(1u, r->wordNum); + EXPECT_EQUAL(3u, r->counts._numDocs); + } + r = _index->lookup(f2, "w2"); + if (wordEmpty || fieldEmpty || docEmpty) { + EXPECT_TRUE(!r || r->counts._numDocs == 0); + } else { + EXPECT_EQUAL(2u, r->wordNum); + EXPECT_EQUAL(17u, r->counts._numDocs); + } + } + { // field 'f3' doesn't exist + r = _index->lookup(f3, "w1"); + EXPECT_TRUE(!r || r->counts._numDocs == 0); + r = _index->lookup(f3, "w2"); + EXPECT_TRUE(!r || r->counts._numDocs == 0); + } +} + +void +Test::requireThatWeCanReadPostingList() +{ + TermFieldMatchDataArray mda; + { // field 'f1' + LookupResult::UP r = _index->lookup(0, "w1"); + PostingListHandle::UP h = _index->readPostingList(*r); + SearchIterator * sb = h->createIterator(r->counts, mda); + sb->initFullRange(); + EXPECT_EQUAL("1,3", toString(*sb)); + delete sb; + } +} + +void +Test::requireThatWeCanReadBitVector() +{ + { // word 'w1' + LookupResult::UP r = _index->lookup(1, "w1"); + // not bit vector for 'w1' + EXPECT_TRUE(_index->readBitVector(*r).get() == NULL); + } + { // word 'w2' + BitVector::UP exp(BitVector::create(32)); + for (uint32_t docId = 1; docId < 18; ++docId) exp->setBit(docId); + { // field 'f2' + LookupResult::UP r = + _index->lookup(1, "w2"); + BitVector::UP bv = _index->readBitVector(*r); + EXPECT_TRUE(bv.get() != NULL); + EXPECT_TRUE(*bv == *exp); + } + } +} + +void +Test::requireThatBlueprintIsCreated() +{ + { // unknown field + Blueprint::UP b = + _index->createBlueprint(_requestContext, FieldSpec("none", 0, 0), makeTerm("w1")); + EXPECT_TRUE(dynamic_cast<EmptyBlueprint *>(b.get()) != NULL); + } + { // unknown word + Blueprint::UP b = + _index->createBlueprint(_requestContext, FieldSpec("f1", 0, 0), makeTerm("none")); + EXPECT_TRUE(dynamic_cast<EmptyBlueprint *>(b.get()) != NULL); + } + { // known field & word with hits + Blueprint::UP b = + _index->createBlueprint(_requestContext, FieldSpec("f1", 0, 0), makeTerm("w1")); + EXPECT_TRUE(dynamic_cast<DiskTermBlueprint *>(b.get()) != NULL); + EXPECT_EQUAL(2u, b->getState().estimate().estHits); + EXPECT_TRUE(!b->getState().estimate().empty); + } + { // known field & word without hits + Blueprint::UP b = + _index->createBlueprint(_requestContext, FieldSpec("f1", 0, 0), makeTerm("w2")); +// std::cerr << "BP = " << typeid(*b).name() << std::endl; + EXPECT_TRUE((dynamic_cast<DiskTermBlueprint *>(b.get()) != NULL) || + (dynamic_cast<EmptyBlueprint *>(b.get()) != NULL)); + EXPECT_EQUAL(0u, b->getState().estimate().estHits); + EXPECT_TRUE(b->getState().estimate().empty); + } +} + +void +Test::requireThatBlueprintCanCreateSearchIterators() +{ + TermFieldMatchData md; + TermFieldMatchDataArray mda; + mda.add(&md); + Blueprint::UP b; + SearchIterator::UP s; + { // bit vector due to isFilter + b = _index->createBlueprint(_requestContext, FieldSpec("f2", 0, 0, true), makeTerm("w2")); + b->fetchPostings(true); + s = (dynamic_cast<LeafBlueprint *>(b.get()))->createLeafSearch(mda, true); + EXPECT_TRUE(dynamic_cast<BitVectorIterator *>(s.get()) != NULL); + } + { // bit vector due to no ranking needed + b = _index->createBlueprint(_requestContext, FieldSpec("f2", 0, 0, false), makeTerm("w2")); + b->fetchPostings(true); + s = (dynamic_cast<LeafBlueprint *>(b.get()))->createLeafSearch(mda, true); + EXPECT_FALSE(dynamic_cast<BitVectorIterator *>(s.get()) != NULL); + TermFieldMatchData md2; + md2.tagAsNotNeeded(); + TermFieldMatchDataArray mda2; + mda2.add(&md2); + EXPECT_TRUE(mda2[0]->isNotNeeded()); + s = (dynamic_cast<LeafBlueprint *>(b.get()))->createLeafSearch(mda2, false); + EXPECT_TRUE(dynamic_cast<BitVectorIterator *>(s.get()) != NULL); + } + { // fake bit vector + b = _index->createBlueprint(_requestContext, FieldSpec("f1", 0, 0, true), makeTerm("w2")); +// std::cerr << "BP = " << typeid(*b).name() << std::endl; + b->fetchPostings(true); + s = (dynamic_cast<LeafBlueprint *>(b.get()))->createLeafSearch(mda, true); +// std::cerr << "SI = " << typeid(*s).name() << std::endl; + EXPECT_TRUE((dynamic_cast<BooleanMatchIteratorWrapper *>(s.get()) != NULL) || + dynamic_cast<EmptySearch *>(s.get())); + } + { // posting list iterator + b = _index->createBlueprint(_requestContext, FieldSpec("f1", 0, 0), makeTerm("w1")); + b->fetchPostings(true); + s = (dynamic_cast<LeafBlueprint *>(b.get()))->createLeafSearch(mda, true); + ASSERT_TRUE(dynamic_cast<Zc4RareWordPosOccIterator<true> *>(s.get()) != NULL); + } +} + +Test::Test() : + TestDiskIndex() +{ +} + +int +Test::Main() +{ + TEST_INIT("diskindex_test"); + + if (_argc > 0) { + DummyFileHeaderContext::setCreator(_argv[0]); + } + + vespalib::mkdir("index", false); + TEST_DO(openIndex("index/1fedewe", false, false, true, true, true)); + TEST_DO(requireThatLookupIsWorking(true, true, true)); + TEST_DO(openIndex("index/1fede", false, false, true, true, false)); + TEST_DO(requireThatLookupIsWorking(true, true, false)); + TEST_DO(openIndex("index/1fewe", false, false, true, false, true)); + TEST_DO(requireThatLookupIsWorking(true, false, true)); + TEST_DO(openIndex("index/1fe", false, false, true, false, false)); + TEST_DO(requireThatLookupIsWorking(true, false, false)); + buildSchema(); + TEST_DO(openIndex("index/1dewe", false, false, false, true, true)); + TEST_DO(requireThatLookupIsWorking(false, true, true)); + TEST_DO(openIndex("index/1de", false, false, false, true, false)); + TEST_DO(requireThatLookupIsWorking(false, true, false)); + TEST_DO(openIndex("index/1we", false, false, false, false, true)); + TEST_DO(requireThatLookupIsWorking(false, false, true)); + TEST_DO(openIndex("index/1", false, false, false, false, false)); + TEST_DO(requireThatLookupIsWorking(false, false, false)); + TEST_DO(requireThatWeCanReadPostingList()); + TEST_DO(requireThatWeCanReadBitVector()); + TEST_DO(requireThatBlueprintIsCreated()); + TEST_DO(requireThatBlueprintCanCreateSearchIterators()); + + TEST_DO(openIndex("index/2", true, false, false, false, false)); + TEST_DO(requireThatLookupIsWorking(false, false, false)); + TEST_DO(requireThatWeCanReadPostingList()); + TEST_DO(requireThatWeCanReadBitVector()); + TEST_DO(requireThatBlueprintIsCreated()); + TEST_DO(requireThatBlueprintCanCreateSearchIterators()); + + TEST_DO(openIndex("index/3", false, true, false, false, false)); + TEST_DO(requireThatLookupIsWorking(false, false, false)); + TEST_DO(requireThatWeCanReadPostingList()); + TEST_DO(requireThatWeCanReadBitVector()); + TEST_DO(requireThatBlueprintIsCreated()); + TEST_DO(requireThatBlueprintCanCreateSearchIterators()); + + TEST_DO(openIndex("index/4", true, true, false, false, false)); + TEST_DO(requireThatLookupIsWorking(false, false, false)); + TEST_DO(requireThatWeCanReadPostingList()); + TEST_DO(requireThatWeCanReadBitVector()); + TEST_DO(requireThatBlueprintIsCreated()); + TEST_DO(requireThatBlueprintCanCreateSearchIterators()); + TEST_DO(requireThatInitRangeConforms()); + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::diskindex::Test); diff --git a/searchlib/src/tests/diskindex/fieldwriter/.gitignore b/searchlib/src/tests/diskindex/fieldwriter/.gitignore new file mode 100644 index 00000000000..bdb91bca5eb --- /dev/null +++ b/searchlib/src/tests/diskindex/fieldwriter/.gitignore @@ -0,0 +1,3 @@ +/field1.f +/index +searchlib_fieldwriter_test_app diff --git a/searchlib/src/tests/diskindex/fieldwriter/CMakeLists.txt b/searchlib/src/tests/diskindex/fieldwriter/CMakeLists.txt new file mode 100644 index 00000000000..a03313fac35 --- /dev/null +++ b/searchlib/src/tests/diskindex/fieldwriter/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_fieldwriter_test_app + SOURCES + fieldwriter_test.cpp + DEPENDS + searchlib_test + searchlib +) +vespa_add_test(NAME searchlib_fieldwriter_test_app COMMAND sh runtests.sh) diff --git a/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp b/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp new file mode 100644 index 00000000000..ab6be2e0801 --- /dev/null +++ b/searchlib/src/tests/diskindex/fieldwriter/fieldwriter_test.cpp @@ -0,0 +1,972 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("fieldwriter_test"); +#include <vespa/searchlib/common/bitvector.h> +#include <vespa/searchlib/common/resultset.h> +#include <vespa/searchlib/util/rand48.h> +#include <vespa/searchlib/test/fakedata/fakeword.h> +#include <vespa/searchlib/test/fakedata/fakewordset.h> +#include <vespa/searchlib/index/docidandfeatures.h> +#include <vespa/searchlib/index/postinglisthandle.h> +#include <vespa/searchlib/diskindex/zcposocc.h> +#include <vespa/searchlib/diskindex/zcposoccrandread.h> +#include <vespa/searchlib/diskindex/checkpointfile.h> +#include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/searchlib/index/schemautil.h> +#include <vespa/vespalib/objects/nbostream.h> +#include <vespa/searchlib/diskindex/fieldwriter.h> +#include <vespa/searchlib/diskindex/fieldreader.h> +#include <vespa/vespalib/io/fileutil.h> +#include <vespa/searchlib/util/dirtraverse.h> +#include <vespa/searchlib/diskindex/pagedict4file.h> +#include <vespa/searchlib/diskindex/pagedict4randread.h> + + +using search::ResultSet; +using search::fef::TermFieldMatchData; +using search::fef::TermFieldMatchDataArray; +using search::queryeval::SearchIterator; +using search::fakedata::FakeWord; +using search::fakedata::FakeWordSet; +using search::index::PostingListParams; +using search::index::PostingListCounts; +using search::index::PostingListOffsetAndCounts; +using search::index::Schema; +using search::index::SchemaUtil; +using search::common::FileHeaderContext; +using search::index::DummyFileHeaderContext; +using search::diskindex::CheckPointFile; +using search::TuneFileSeqRead; +using search::TuneFileSeqWrite; +using search::TuneFileRandRead; +using vespalib::nbostream; +using search::diskindex::FieldWriter; +using search::diskindex::FieldReader; +using search::diskindex::DocIdMapping; +using search::diskindex::WordNumMapping; +using search::diskindex::PageDict4RandRead; + +// needed to resolve external symbol from httpd.h on AIX +void FastS_block_usr2() {} + +namespace fieldwriter +{ + +uint32_t minSkipDocs = 64; +uint32_t minChunkDocs = 262144; + +vespalib::string dirprefix = "index/"; + +void +disableSkip(void) +{ + minSkipDocs = 10000000; + minChunkDocs = 1 << 30; +} + +void +enableSkip(void) +{ + minSkipDocs = 64; + minChunkDocs = 1 << 30; +} + +void +enableSkipChunks(void) +{ + minSkipDocs = 64; + minChunkDocs = 9000; // Unrealistic low for testing +} + + +vespalib::string +makeWordString(uint64_t wordNum) +{ + using AS = vespalib::asciistream; + AS ws; + ws << AS::Width(4) << AS::Fill('0') << wordNum; + return ws.str(); +} + + +typedef std::shared_ptr<FieldReader> FieldReaderSP; +typedef std::shared_ptr<FieldWriter> FieldWriterSP; + +class FieldWriterTest : public FastOS_Application +{ +private: + bool _verbose; + uint32_t _numDocs; + uint32_t _commonDocFreq; + uint32_t _numWordsPerClass; + FakeWordSet _wordSet; + FakeWordSet _wordSet2; +public: + search::Rand48 _rnd; + +private: + void Usage(void); + void testFake(const std::string &postingType, FakeWord &fw); +public: + FieldWriterTest(void); + ~FieldWriterTest(void); + int Main(void); +}; + + +void +FieldWriterTest::Usage(void) +{ + printf("fieldwriter_test " + "[-c <commonDocFreq>] " + "[-d <numDocs>] " + "[-v] " + "[-w <numWordPerClass>]\n"); +} + + +FieldWriterTest::FieldWriterTest(void) + : _verbose(false), + _numDocs(3000000), + _commonDocFreq(50000), + _numWordsPerClass(6), + _wordSet(), + _wordSet2(), + _rnd() +{ +} + + +FieldWriterTest::~FieldWriterTest(void) +{ +} + + +class WrappedFieldWriter : public search::fakedata::CheckPointCallback +{ +public: + FieldWriterSP _fieldWriter; +private: + bool _dynamicK; + uint32_t _numWordIds; + uint32_t _docIdLimit; + vespalib::string _namepref; + Schema _schema; + uint32_t _indexId; + +public: + + WrappedFieldWriter(const vespalib::string &namepref, + bool dynamicK, + uint32_t numWordIds, + uint32_t docIdLimit); + + virtual void + checkPoint(void) override; + + void + earlyOpen(void); + + void + lateOpen(void); + + void + open(void); + + void + close(void); + + void + writeCheckPoint(void); + + void + readCheckPoint(bool first); +}; + + +WrappedFieldWriter::WrappedFieldWriter(const vespalib::string &namepref, + bool dynamicK, + uint32_t numWordIds, + uint32_t docIdLimit) + : _fieldWriter(), + _dynamicK(dynamicK), + _numWordIds(numWordIds), + _docIdLimit(docIdLimit), + _namepref(dirprefix + namepref), + _schema(), + _indexId() +{ + Schema::CollectionType ct(Schema::SINGLE); + _schema.addIndexField(Schema::IndexField("field1", Schema::STRING, ct)); + _indexId = _schema.getIndexFieldId("field1"); +} + + +void +WrappedFieldWriter::earlyOpen(void) +{ + TuneFileSeqWrite tuneFileWrite; + _fieldWriter.reset(new FieldWriter(_docIdLimit, _numWordIds)); + _fieldWriter->earlyOpen(_namepref, + minSkipDocs, minChunkDocs, _dynamicK, _schema, + _indexId, + tuneFileWrite); +} + + +void +WrappedFieldWriter::lateOpen(void) +{ + TuneFileSeqWrite tuneFileWrite; + DummyFileHeaderContext fileHeaderContext; + fileHeaderContext.disableFileName(); + _fieldWriter->lateOpen(tuneFileWrite, fileHeaderContext); +} + + +void +WrappedFieldWriter::open(void) +{ + earlyOpen(); + lateOpen(); +} + + +void +WrappedFieldWriter::close(void) +{ + _fieldWriter->close(); + _fieldWriter.reset(); +} + + +void +WrappedFieldWriter::writeCheckPoint(void) +{ + CheckPointFile chkptfile("chkpt"); + nbostream out; + _fieldWriter->checkPointWrite(out); + chkptfile.write(out, DummyFileHeaderContext()); +} + + +void +WrappedFieldWriter::readCheckPoint(bool first) +{ + CheckPointFile chkptfile("chkpt"); + nbostream in; + bool openRes = chkptfile.read(in); + assert(first || openRes); + (void) first; + if (!openRes) + return; + _fieldWriter->checkPointRead(in); + assert(in.empty()); +} + + +void +WrappedFieldWriter::checkPoint(void) +{ + writeCheckPoint(); + _fieldWriter.reset(); + earlyOpen(); + readCheckPoint(false); + lateOpen(); +} + + +class WrappedFieldReader : public search::fakedata::CheckPointCallback +{ +public: + FieldReaderSP _fieldReader; +private: + std::string _namepref; + uint32_t _numWordIds; + uint32_t _docIdLimit; + WordNumMapping _wmap; + DocIdMapping _dmap; + Schema _oldSchema; + Schema _schema; + +public: + WrappedFieldReader(const vespalib::string &namepref, + uint32_t numWordIds, + uint32_t docIdLimit); + + ~WrappedFieldReader(void); + + void + earlyOpen(void); + + void + lateOpen(void); + + void + open(void); + + void + close(void); + + void + writeCheckPoint(void); + + void + readCheckPoint(bool first); + + virtual void + checkPoint(void) override; +}; + + +WrappedFieldReader::WrappedFieldReader(const vespalib::string &namepref, + uint32_t numWordIds, + uint32_t docIdLimit) + : search::fakedata::CheckPointCallback(), + _fieldReader(), + _namepref(dirprefix + namepref), + _numWordIds(numWordIds), + _docIdLimit(docIdLimit), + _wmap(), + _dmap(), + _oldSchema(), + _schema() +{ + Schema::CollectionType ct(Schema::SINGLE); + _oldSchema.addIndexField(Schema::IndexField("field1", + Schema::STRING, + ct)); + _schema.addIndexField(Schema::IndexField("field1", + Schema::STRING, + ct)); +} + + +WrappedFieldReader::~WrappedFieldReader(void) +{ +} + + +void +WrappedFieldReader::earlyOpen(void) +{ + TuneFileSeqRead tuneFileRead; + _fieldReader.reset(new FieldReader()); + _fieldReader->earlyOpen(_namepref, tuneFileRead); +} + + +void +WrappedFieldReader::lateOpen(void) +{ + TuneFileSeqRead tuneFileRead; + _wmap.setup(_numWordIds); + _dmap.setup(_docIdLimit); + _fieldReader->setup(_wmap, _dmap); + _fieldReader->lateOpen(_namepref, tuneFileRead); +} + + +void +WrappedFieldReader::open(void) +{ + earlyOpen(); + lateOpen(); +} + + +void +WrappedFieldReader::close(void) +{ + _fieldReader->close(); + _fieldReader.reset(); +} + + +void +WrappedFieldReader::writeCheckPoint(void) +{ + CheckPointFile chkptfile("chkpt"); + nbostream out; + _fieldReader->checkPointWrite(out); + chkptfile.write(out, DummyFileHeaderContext()); +} + + +void +WrappedFieldReader::readCheckPoint(bool first) +{ + CheckPointFile chkptfile("chkpt"); + nbostream in; + bool openRes = chkptfile.read(in); + assert(first || openRes); + (void) first; + if (!openRes) + return; + _fieldReader->checkPointRead(in); + assert(in.empty()); +} + + +void +WrappedFieldReader::checkPoint(void) +{ + writeCheckPoint(); + _fieldReader.reset(); + earlyOpen(); + readCheckPoint(false); + lateOpen(); +} + + +void +writeField(FakeWordSet &wordSet, + uint32_t docIdLimit, + const std::string &namepref, + bool dynamicK) +{ + const char *dynamicKStr = dynamicK ? "true" : "false"; + + FastOS_Time tv; + double before; + double after; + + LOG(info, + "enter writeField, " + "namepref=%s, dynamicK=%s", + namepref.c_str(), + dynamicKStr); + tv.SetNow(); + before = tv.Secs(); + WrappedFieldWriter ostate(namepref, + dynamicK, + wordSet.getNumWords(), docIdLimit); + FieldWriter::remove(namepref); + ostate.open(); + + unsigned int wordNum = 1; + uint32_t checkPointCheck = 0; + uint32_t checkPointInterval = 12227; + for (unsigned int wc = 0; wc < wordSet._words.size(); ++wc) { + for (unsigned int wi = 0; wi < wordSet._words[wc].size(); ++wi) { + FakeWord &fw = *wordSet._words[wc][wi]; + ostate._fieldWriter->newWord(makeWordString(wordNum)); + fw.dump(ostate._fieldWriter, false, + checkPointCheck, + checkPointInterval, + NULL); + ++wordNum; + } + } + ostate.close(); + + tv.SetNow(); + after = tv.Secs(); + LOG(info, + "leave writeField, " + "namepref=%s, dynamicK=%s" + " elapsed=%10.6f", + namepref.c_str(), + dynamicKStr, + after - before); +} + + +void +writeFieldCheckPointed(FakeWordSet &wordSet, + uint32_t docIdLimit, + const std::string &namepref, + bool dynamicK) +{ + const char *dynamicKStr = dynamicK ? "true" : "false"; + + FastOS_Time tv; + double before; + double after; + bool first = true; + + LOG(info, + "enter writeFieldCheckPointed, " + "namepref=%s, dynamicK=%s", + namepref.c_str(), + dynamicKStr); + tv.SetNow(); + before = tv.Secs(); + + unsigned int wordNum = 1; + uint32_t checkPointCheck = 0; + uint32_t checkPointInterval = 12227; + for (unsigned int wc = 0; wc < wordSet._words.size(); ++wc) { + for (unsigned int wi = 0; wi < wordSet._words[wc].size(); ++wi) { + FakeWord &fw = *wordSet._words[wc][wi]; + + WrappedFieldWriter ostate(namepref, + dynamicK, + wordSet.getNumWords(), docIdLimit); + ostate.earlyOpen(); + ostate.readCheckPoint(first); + first = false; + ostate.lateOpen(); + ostate._fieldWriter->newWord(makeWordString(wordNum)); + fw.dump(ostate._fieldWriter, false, + checkPointCheck, + checkPointInterval, + &ostate); + ostate.writeCheckPoint(); + ++wordNum; + } + } + do { + WrappedFieldWriter ostate(namepref, + dynamicK, + wordSet.getNumWords(), docIdLimit); + ostate.earlyOpen(); + ostate.readCheckPoint(first); + ostate.lateOpen(); + ostate.close(); + } while (0); + CheckPointFile dropper("chkpt"); + dropper.remove(); + + tv.SetNow(); + after = tv.Secs(); + LOG(info, + "leave writeFieldCheckPointed, " + "namepref=%s, dynamicK=%s" + " elapsed=%10.6f", + namepref.c_str(), + dynamicKStr, + after - before); +} + + +void +readField(FakeWordSet &wordSet, + uint32_t docIdLimit, + const std::string &namepref, + bool dynamicK, + bool verbose) +{ + const char *dynamicKStr = dynamicK ? "true" : "false"; + + FastOS_Time tv; + double before; + double after; + WrappedFieldReader istate(namepref, wordSet.getNumWords(), + docIdLimit); + LOG(info, + "enter readField, " + "namepref=%s, dynamicK=%s", + namepref.c_str(), + dynamicKStr); + tv.SetNow(); + before = tv.Secs(); + istate.open(); + if (istate._fieldReader->isValid()) + istate._fieldReader->read(); + + TermFieldMatchData mdfield1; + + unsigned int wordNum = 1; + uint32_t checkPointCheck = 0; + uint32_t checkPointInterval = 12227; + for (unsigned int wc = 0; wc < wordSet._words.size(); ++wc) { + for (unsigned int wi = 0; wi < wordSet._words[wc].size(); ++wi) { + FakeWord &fw = *wordSet._words[wc][wi]; + + TermFieldMatchDataArray tfmda; + tfmda.add(&mdfield1); + + fw.validate(istate._fieldReader, wordNum, + tfmda, verbose, + checkPointCheck, checkPointInterval, &istate); + ++wordNum; + } + } + + istate.close(); + tv.SetNow(); + after = tv.Secs(); + CheckPointFile dropper("chkpt"); + dropper.remove(); + LOG(info, + "leave readField, " + "namepref=%s, dynamicK=%s" + " elapsed=%10.6f", + namepref.c_str(), + dynamicKStr, + after - before); +} + + +void +randReadField(FakeWordSet &wordSet, + const std::string &namepref, + bool dynamicK, + bool verbose) +{ + const char *dynamicKStr = dynamicK ? "true" : "false"; + + FastOS_Time tv; + double before; + double after; + PostingListCounts counts; + + LOG(info, + "enter randReadField," + " namepref=%s, dynamicK=%s", + namepref.c_str(), + dynamicKStr); + tv.SetNow(); + before = tv.Secs(); + + std::string cname = dirprefix + namepref; + cname += "dictionary"; + + std::unique_ptr<search::index::DictionaryFileRandRead> dictFile; + dictFile.reset(new PageDict4RandRead); + + search::index::PostingListFileRandRead *postingFile = NULL; + if (dynamicK) + postingFile = + new search::diskindex::ZcPosOccRandRead; + else + postingFile = + new search::diskindex::Zc4PosOccRandRead; + + TuneFileSeqRead tuneFileRead; + TuneFileRandRead tuneFileRandRead; + bool openCntRes = dictFile->open(cname, tuneFileRandRead); + assert(openCntRes); + (void) openCntRes; + vespalib::string cWord; + + std::string pname = dirprefix + namepref + "posocc.dat"; + pname += ".compressed"; + bool openPostingRes = postingFile->open(pname, tuneFileRandRead); + assert(openPostingRes); + (void) openPostingRes; + + for (int loop = 0; loop < 1; ++loop) { + unsigned int wordNum = 1; + for (unsigned int wc = 0; wc < wordSet._words.size(); ++wc) { + for (unsigned int wi = 0; wi < wordSet._words[wc].size(); ++wi) { + FakeWord &fw = *wordSet._words[wc][wi]; + + PostingListOffsetAndCounts offsetAndCounts; + uint64_t checkWordNum; + dictFile->lookup(makeWordString(wordNum), + checkWordNum, + offsetAndCounts); + assert(wordNum == checkWordNum); + + counts = offsetAndCounts._counts; + search::index::PostingListHandle handle; + + handle._bitLength = counts._bitLength; + handle._file = postingFile; + handle._bitOffset = offsetAndCounts._offset; + + postingFile->readPostingList(counts, + 0, + counts._segments.empty() ? 1 : counts._segments.size(), + handle); + + TermFieldMatchData mdfield1; + TermFieldMatchDataArray tfmda; + tfmda.add(&mdfield1); + + std::unique_ptr<SearchIterator> + sb(handle.createIterator(counts, tfmda)); + + // LOG(info, "loop=%d, wordNum=%u", loop, wordNum); + fw.validate(sb.get(), tfmda, verbose); + + sb.reset(handle.createIterator(counts, tfmda)); + fw.validate(sb.get(), tfmda, 19, verbose); + + sb.reset(handle.createIterator(counts, tfmda)); + fw.validate(sb.get(), tfmda, 99, verbose); + + sb.reset(handle.createIterator(counts, tfmda)); + fw.validate(sb.get(), tfmda, 799, verbose); + + sb.reset(handle.createIterator(counts, tfmda)); + fw.validate(sb.get(), tfmda, 6399, verbose); + + sb.reset(handle.createIterator(counts, tfmda)); + fw.validate(sb.get(), tfmda, 11999, verbose); + ++wordNum; + } + } + } + + postingFile->close(); + dictFile->close(); + delete postingFile; + dictFile.reset(); + tv.SetNow(); + after = tv.Secs(); + LOG(info, + "leave randReadField, namepref=%s," + " dynamicK=%s, " + "elapsed=%10.6f", + namepref.c_str(), + dynamicKStr, + after - before); +} + + +void +fusionField(uint32_t numWordIds, + uint32_t docIdLimit, + const vespalib::string &ipref, + const vespalib::string &opref, + bool doRaw, + bool dynamicK) +{ + const char *rawStr = doRaw ? "true" : "false"; + const char *dynamicKStr = dynamicK ? "true" : "false"; + + + LOG(info, + "enter fusionField, ipref=%s, opref=%s," + " raw=%s," + " dynamicK=%s", + ipref.c_str(), + opref.c_str(), + rawStr, + dynamicKStr); + + FastOS_Time tv; + double before; + double after; + WrappedFieldWriter ostate(opref, + dynamicK, + numWordIds, docIdLimit); + WrappedFieldReader istate(ipref, numWordIds, docIdLimit); + + tv.SetNow(); + before = tv.Secs(); + + ostate.open(); + istate.open(); + + if (doRaw) { + PostingListParams featureParams; + featureParams.clear(); + featureParams.set("cooked", false); + istate._fieldReader->setFeatureParams(featureParams); + } + if (istate._fieldReader->isValid()) + istate._fieldReader->read(); + + while (istate._fieldReader->isValid()) { + istate._fieldReader->write(*ostate._fieldWriter); + istate._fieldReader->read(); + } + istate.close(); + ostate.close(); + tv.SetNow(); + after = tv.Secs(); + LOG(info, + "leave fusionField, ipref=%s, opref=%s," + " raw=%s dynamicK=%s, " + " elapsed=%10.6f", + ipref.c_str(), + opref.c_str(), + rawStr, + dynamicKStr, + after - before); +} + + +void +testFieldWriterVariants(FakeWordSet &wordSet, + uint32_t docIdLimit, bool verbose) +{ + CheckPointFile dropper("chkpt"); + dropper.remove(); + disableSkip(); + writeField(wordSet, docIdLimit, "new4", true); + readField(wordSet, docIdLimit, "new4", true, verbose); + readField(wordSet, docIdLimit, "new4", true, verbose); + writeFieldCheckPointed(wordSet, docIdLimit, "new6", true); + writeField(wordSet, docIdLimit, "new5", false); + readField(wordSet, docIdLimit, "new5", false, verbose); + writeFieldCheckPointed(wordSet, docIdLimit, "new7", false); + enableSkip(); + writeField(wordSet, docIdLimit, "newskip4", true); + readField(wordSet, docIdLimit, "newskip4", true, verbose); + writeFieldCheckPointed(wordSet, docIdLimit, "newskip6", + true); + writeField(wordSet, docIdLimit, "newskip5", false); + readField(wordSet, docIdLimit, "newskip5", false, verbose); + writeFieldCheckPointed(wordSet, docIdLimit, "newskip7", + false); + enableSkipChunks(); + writeField(wordSet, docIdLimit, "newchunk4", true); + readField(wordSet, docIdLimit, "newchunk4", true, verbose); + writeFieldCheckPointed(wordSet, docIdLimit, "newchunk6", + true); + writeField(wordSet, docIdLimit, "newchunk5", false); + readField(wordSet, docIdLimit, + "newchunk5",false, verbose); + writeFieldCheckPointed(wordSet, docIdLimit, "newchunk7", + false); + disableSkip(); + fusionField(wordSet.getNumWords(), + docIdLimit, + "new4", "new4x", + false, true); + fusionField(wordSet.getNumWords(), + docIdLimit, + "new4", "new4xx", + true, true); + fusionField(wordSet.getNumWords(), + docIdLimit, + "new5", "new5x", + false, false); + fusionField(wordSet.getNumWords(), + docIdLimit, + "new5", "new5xx", + true, false); + randReadField(wordSet, "new4", true, verbose); + randReadField(wordSet, "new5", false, verbose); + enableSkip(); + fusionField(wordSet.getNumWords(), + docIdLimit, + "newskip4", "newskip4x", + false, true); + fusionField(wordSet.getNumWords(), + docIdLimit, + "newskip4", "newskip4xx", + true, true); + fusionField(wordSet.getNumWords(), + docIdLimit, + "newskip5", "newskip5x", + false, false); + fusionField(wordSet.getNumWords(), + docIdLimit, + "newskip5", "newskip5xx", + true, false); + randReadField(wordSet, "newskip4", true, verbose); + randReadField(wordSet, "newskip5", false, verbose); + enableSkipChunks(); + fusionField(wordSet.getNumWords(), + docIdLimit, + "newchunk4", "newchunk4x", + false, true); + fusionField(wordSet.getNumWords(), + docIdLimit, + "newchunk4", "newchunk4xx", + true, true); + fusionField(wordSet.getNumWords(), + docIdLimit, + "newchunk5", "newchunk5x", + false, false); + fusionField(wordSet.getNumWords(), + docIdLimit, + "newchunk5", "newchunk5xx", + true, false); + randReadField(wordSet, "newchunk4", true, verbose); + randReadField(wordSet, "newchunk5", false, verbose); +} + + +void +testFieldWriterVariantsWithHighLids(FakeWordSet &wordSet, uint32_t docIdLimit, + bool verbose) +{ + CheckPointFile dropper("chkpt"); + dropper.remove(); + disableSkip(); + writeField(wordSet, docIdLimit, "hlid4", true); + readField(wordSet, docIdLimit, "hlid4", true, verbose); + writeField(wordSet, docIdLimit, "hlid5", false); + readField(wordSet, docIdLimit, "hlid5", false, verbose); + randReadField(wordSet, "hlid4", true, verbose); + randReadField(wordSet, "hlid5", false, verbose); + enableSkip(); + writeField(wordSet, docIdLimit, "hlidskip4", true); + readField(wordSet, docIdLimit, "hlidskip4", true, verbose); + writeField(wordSet, docIdLimit, "hlidskip5", false); + readField(wordSet, docIdLimit, "hlidskip5", false, verbose); + randReadField(wordSet, "hlidskip4", true, verbose); + randReadField(wordSet, "hlidskip5", false, verbose); + enableSkipChunks(); + writeField(wordSet, docIdLimit, "hlidchunk4", true); + readField(wordSet, docIdLimit, "hlidchunk4", true, verbose); + writeField(wordSet, docIdLimit, "hlidchunk5", false); + readField(wordSet, docIdLimit, "hlidchunk5", false, verbose); + randReadField(wordSet, "hlidchunk4", true, verbose); + randReadField(wordSet, "hlidchunk5", false, verbose); +} + +int +FieldWriterTest::Main(void) +{ + int argi; + char c; + const char *optArg; + + if (_argc > 0) { + DummyFileHeaderContext::setCreator(_argv[0]); + } + argi = 1; + + while ((c = GetOpt("c:d:vw:", optArg, argi)) != -1) { + switch(c) { + case 'c': + _commonDocFreq = atoi(optArg); + if (_commonDocFreq == 0) + _commonDocFreq = 1; + break; + case 'd': + _numDocs = atoi(optArg); + break; + case 'v': + _verbose = true; + break; + case 'w': + _numWordsPerClass = atoi(optArg); + break; + default: + Usage(); + return 1; + } + } + + if (_commonDocFreq > _numDocs) { + Usage(); + return 1; + } + + _wordSet.setupParams(false, false); + _wordSet.setupWords(_rnd, _numDocs, _commonDocFreq, _numWordsPerClass); + + vespalib::mkdir("index", false); + testFieldWriterVariants(_wordSet, _numDocs, _verbose); + + _wordSet2.setupParams(false, false); + _wordSet2.setupWords(_rnd, _numDocs, _commonDocFreq, 3); + uint32_t docIdBias = 700000000; + _wordSet2.addDocIdBias(docIdBias); // Large skip numbers + testFieldWriterVariantsWithHighLids(_wordSet2, _numDocs + docIdBias, + _verbose); + return 0; +} + +} // namespace fieldwriter + +int +main(int argc, char **argv) +{ + fieldwriter::FieldWriterTest app; + + setvbuf(stdout, NULL, _IOLBF, 32768); + app._rnd.srand48(32); + return app.Entry(argc, argv); +} diff --git a/searchlib/src/tests/diskindex/fieldwriter/runtests.sh b/searchlib/src/tests/diskindex/fieldwriter/runtests.sh new file mode 100755 index 00000000000..1f2b6d6076f --- /dev/null +++ b/searchlib/src/tests/diskindex/fieldwriter/runtests.sh @@ -0,0 +1,66 @@ +#!/bin/sh +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +rm -f new* chkpt* +sync +sleep 2 + +if ${VALGRIND} ./searchlib_fieldwriter_test_app "$@" +then + : +else + echo FAILURE: ./searchlib_fieldwriter_test_app program failed. + exit 1 +fi + +checksame() +{ + file1=$1 + rval=0 + shift + for file in $* + do + if cmp -s $file1 $file + then + : + else + echo "FAILURE: $file1 != $file" + rval=1 + fi + done + return $rval +} + +newpcntfiles1=index/new[46]*dictionary.pdat +newpcntfiles1b=index/new[46]*dictionary.spdat +newpcntfiles1c=index/new[46]*dictionary.ssdat +newpcntfiles2=index/newskip[46]*dictionary.pdat +newpcntfiles2b=index/newskip[46]*dictionary.pdat +newpcntfiles2c=index/newskip[46]*dictionary.pdat +newpcntfiles3=index/newchunk[46]*dictionary.pdat +newpcntfiles3b=index/newchunk[46]*dictionary.pdat +newpcntfiles3c=index/newchunk[46]*dictionary.pdat +newpcntfiles4=index/new[57]*dictionary.pdat +newpcntfiles4b=index/new[57]*dictionary.pdat +newpcntfiles4c=index/new[57]*dictionary.pdat +newpcntfiles5=index/newskip[57]*dictionary.pdat +newpcntfiles5b=index/newskip[57]*dictionary.pdat +newpcntfiles5c=index/newskip[57]*dictionary.pdat +newpcntfiles6=index/newchunk[57]*dictionary.pdat +newpcntfiles6b=index/newchunk[57]*dictionary.pdat +newpcntfiles6c=index/newchunk[57]*dictionary.pdat +newpfiles1=index/new[46]*posocc.dat.compressed +newpfiles2=index/newskip[46]*posocc.dat.compressed +newpfiles3=index/newchunk[46]*posocc.dat.compressed +newpfiles4=index/new[57]*posocc.dat.compressed +newpfiles5=index/newskip[57]*posocc.dat.compressed +newpfiles6=index/newchunk[57]*posocc.dat.compressed + +if checksame $newpcntfiles1 && checksame $newpcntfiles1b && checksame $newpcntfiles1c && checksame $newpfiles1 && checksame $newpcntfiles2 && checksame $newpcntfiles2b && checksame $newpcntfiles2c && checksame $newpfiles2 && checksame $newpcntfiles3 && checksame $newpcntfiles3b && checksame $newpcntfiles3c && checksame $newpfiles3 && checksame $newpcntfiles4 && checksame $newpcntfiles4b && checksame $newpcntfiles4c && checksame $newpfiles4 && checksame $newpcntfiles5 && checksame $newpcntfiles5b && checksame $newpcntfiles5c && checksame $newpfiles5 && checksame $newpcntfiles6 && checksame $newpcntfiles6b && checksame $newpcntfiles6c && checksame $newpfiles6 +then + echo SUCCESS: Files match up + exit 0 +else + echo FAILURE: Files do not match up + exit 1 +fi diff --git a/searchlib/src/tests/diskindex/fusion/.gitignore b/searchlib/src/tests/diskindex/fusion/.gitignore new file mode 100644 index 00000000000..8526d6faa38 --- /dev/null +++ b/searchlib/src/tests/diskindex/fusion/.gitignore @@ -0,0 +1,37 @@ +.depend +Makefile +[dms]dump[1-5] +chkpt +ddump2 +ddump3 +ddump4 +ddump5 +dmdump2 +dmdump3 +dmdump4 +dmdump5 +dmdump[1-5] +dump2 +dump3 +dump4 +dump5 +dump[1-5] +fusion_test +mdump2 +mdump3 +mdump4 +mdump5 +sdump2 +sdump3 +sdump4 +sdump5 +/ddump6 +/dmdump6 +/dump6 +/dumpwords.out +/mdump6 +/transpose.out +/usage.out +/zwordc0coll.out +/zwordf0field.out +searchlib_fusion_test_app diff --git a/searchlib/src/tests/diskindex/fusion/CMakeLists.txt b/searchlib/src/tests/diskindex/fusion/CMakeLists.txt new file mode 100644 index 00000000000..9c079b09c90 --- /dev/null +++ b/searchlib/src/tests/diskindex/fusion/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_fusion_test_app + SOURCES + fusion_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_fusion_test_app COMMAND sh fusion_test.sh) diff --git a/searchlib/src/tests/diskindex/fusion/DESC b/searchlib/src/tests/diskindex/fusion/DESC new file mode 100644 index 00000000000..b0db86422b9 --- /dev/null +++ b/searchlib/src/tests/diskindex/fusion/DESC @@ -0,0 +1 @@ +fusion test. Performs basic fusion operations and validates results. diff --git a/searchlib/src/tests/diskindex/fusion/FILES b/searchlib/src/tests/diskindex/fusion/FILES new file mode 100644 index 00000000000..fb22ce21a9d --- /dev/null +++ b/searchlib/src/tests/diskindex/fusion/FILES @@ -0,0 +1 @@ +fusion_test.cpp diff --git a/searchlib/src/tests/diskindex/fusion/fusion_test.cpp b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp new file mode 100644 index 00000000000..4191a8f8d2b --- /dev/null +++ b/searchlib/src/tests/diskindex/fusion/fusion_test.cpp @@ -0,0 +1,506 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("fusion_test"); +#include <vespa/searchlib/diskindex/checkpointfile.h> +#include <vespa/searchlib/diskindex/fusion.h> +#include <vespa/searchlib/diskindex/indexbuilder.h> +#include <vespa/searchlib/diskindex/zcposoccrandread.h> +#include <vespa/searchlib/fef/fieldpositionsiterator.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchlib/fef/termfieldmatchdataarray.h> +#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/searchlib/index/indexbuilder.h> +#include <vespa/searchlib/index/schemautil.h> +#include <vespa/searchlib/btree/btreeroot.hpp> +#include <vespa/searchlib/btree/btreenodeallocator.hpp> +#include <vespa/searchlib/btree/btreenode.hpp> +#include <vespa/searchlib/memoryindex/dictionary.h> +#include <vespa/searchlib/memoryindex/documentinverter.h> +#include <vespa/searchlib/memoryindex/featurestore.h> +#include <vespa/searchlib/memoryindex/postingiterator.h> +#include <vespa/searchlib/memoryindex/i_document_insert_listener.h> +#include <vespa/searchlib/diskindex/diskindex.h> +#include <vespa/vespalib/objects/nbostream.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/util/filekit.h> +#include <vespa/searchlib/common/sequencedtaskexecutor.h> + +namespace search +{ + + +using document::Document; +using fef::FieldPositionsIterator; +using fef::TermFieldMatchData; +using fef::TermFieldMatchDataArray; +using index::DocBuilder; +using index::DocIdAndFeatures; +using index::Schema; +using index::SchemaUtil; +using search::common::FileHeaderContext; +using search::index::DummyFileHeaderContext; +using memoryindex::Dictionary; +using memoryindex::DocumentInverter; +using queryeval::SearchIterator; + +namespace diskindex +{ + + +class Test : public vespalib::TestApp +{ +private: + Schema _schema; + const Schema & getSchema() const { return _schema; } + + void + requireThatFusionIsWorking(const vespalib::string &prefix, + bool directio, + bool readmmap); + +public: + Test(); + int Main(); +}; + + +namespace +{ + +void +myPushDocument(DocumentInverter &inv, Dictionary &d) +{ + inv.pushDocuments(d, std::shared_ptr<IDestructorCallback>()); +} + + +} + +vespalib::string +toString(FieldPositionsIterator posItr, + bool hasElements = false, bool hasWeights = false) +{ + vespalib::asciistream ss; + ss << "{"; + ss << posItr.getFieldLength() << ":"; + bool first = true; + for (; posItr.valid(); posItr.next()) { + if (!first) ss << ","; + ss << posItr.getPosition(); + first = false; + if (hasElements) { + ss << "[e=" << posItr.getElementId(); + if (hasWeights) + ss << ",w=" << posItr.getElementWeight(); + ss << ",l=" << posItr.getElementLen() << "]"; + } + } + ss << "}"; + return ss.str(); +} + + +#if 0 +vespalib::string +toString(DocIdAndFeatures &features) +{ + vespalib::asciistream ss; + ss << "{"; + std::vector<search::index::WordDocFieldElementFeatures>::const_iterator + element = features._elements.begin(); + std::vector<search::index::WordDocFieldElementWordPosFeatures>:: + const_iterator position = features._wordPositions.begin(); + for (; field != fielde; ++field) { + ss << "f=" << field->getFieldId() << "{"; + uint32_t numElements = field->getNumElements(); + while (numElements--) { + ss << "e=" << element->getElementId() << "," + << "ew=" << element->getWeight() << "," + << "el=" << element->getElementLen() << "{"; + uint32_t numOccs = element->getNumOccs(); + while (numOccs--) { + ss << position->getWordPos(); + if (numOccs != 0) + ss << ","; + } + ss << "}"; + if (numElements != 0) + ss << ","; + } + ss << "}"; + } + ss << "}"; + return ss.str(); +} +#endif + + +void +validateDiskIndex(DiskIndex &dw, + bool f2HasElements, + bool f3HasWeights) +{ + typedef DiskIndex::LookupResult LR; + typedef index::PostingListHandle PH; + typedef search::queryeval::SearchIterator SB; + + const Schema &schema(dw.getSchema()); + + { + uint32_t id1(schema.getIndexFieldId("f0")); + LR::UP lr1(dw.lookup(id1, "c")); + EXPECT_TRUE(lr1.get() != NULL); + PH::UP wh1(dw.readPostingList(*lr1)); + EXPECT_TRUE(wh1.get() != NULL); + TermFieldMatchData f0; + TermFieldMatchDataArray a; + a.add(&f0); + SB::UP sbap(wh1->createIterator(lr1->counts, a)); + sbap->initFullRange(); + EXPECT_EQUAL("{1000000:}", toString(f0.getIterator())); + EXPECT_TRUE(sbap->seek(10)); + sbap->unpack(10); + EXPECT_EQUAL("{7:2}", toString(f0.getIterator())); + } + { + uint32_t id1(schema.getIndexFieldId("f2")); + LR::UP lr1(dw.lookup(id1, "ax")); + EXPECT_TRUE(lr1.get() != NULL); + PH::UP wh1(dw.readPostingList(*lr1)); + EXPECT_TRUE(wh1.get() != NULL); + TermFieldMatchData f2; + TermFieldMatchDataArray a; + a.add(&f2); + SB::UP sbap(wh1->createIterator(lr1->counts, a)); + sbap->initFullRange(); + EXPECT_EQUAL("{1000000:}", toString(f2.getIterator())); + EXPECT_TRUE(sbap->seek(10)); + sbap->unpack(10); + if (f2HasElements) { + EXPECT_EQUAL("{3:0[e=0,l=3],0[e=1,l=1]}", + toString(f2.getIterator(), true)); + } else { + EXPECT_EQUAL("{3:0[e=0,l=3]}", + toString(f2.getIterator(), true)); + } + } + { + uint32_t id1(schema.getIndexFieldId("f3"));; + LR::UP lr1(dw.lookup(id1, "wx")); + EXPECT_TRUE(lr1.get() != NULL); + PH::UP wh1(dw.readPostingList(*lr1)); + EXPECT_TRUE(wh1.get() != NULL); + TermFieldMatchData f3; + TermFieldMatchDataArray a; + a.add(&f3); + SB::UP sbap(wh1->createIterator(lr1->counts, a)); + sbap->initFullRange(); + EXPECT_EQUAL("{1000000:}", toString(f3.getIterator())); + EXPECT_TRUE(sbap->seek(10)); + sbap->unpack(10); + if (f3HasWeights) { + EXPECT_EQUAL("{2:0[e=0,w=4,l=2]}", + toString(f3.getIterator(), true, true)); + } else { + EXPECT_EQUAL("{2:0[e=0,w=1,l=2]}", + toString(f3.getIterator(), true, true)); + } + } + { + uint32_t id1(schema.getIndexFieldId("f3"));; + LR::UP lr1(dw.lookup(id1, "zz")); + EXPECT_TRUE(lr1.get() != NULL); + PH::UP wh1(dw.readPostingList(*lr1)); + EXPECT_TRUE(wh1.get() != NULL); + TermFieldMatchData f3; + TermFieldMatchDataArray a; + a.add(&f3); + SB::UP sbap(wh1->createIterator(lr1->counts, a)); + sbap->initFullRange(); + EXPECT_EQUAL("{1000000:}", toString(f3.getIterator())); + EXPECT_TRUE(sbap->seek(11)); + sbap->unpack(11); + if (f3HasWeights) { + EXPECT_EQUAL("{1:0[e=0,w=-27,l=1]}", + toString(f3.getIterator(), true, true)); + } else { + EXPECT_EQUAL("{1:0[e=0,w=1,l=1]}", + toString(f3.getIterator(), true, true)); + } + } + { + uint32_t id1(schema.getIndexFieldId("f3"));; + LR::UP lr1(dw.lookup(id1, "zz0")); + EXPECT_TRUE(lr1.get() != NULL); + PH::UP wh1(dw.readPostingList(*lr1)); + EXPECT_TRUE(wh1.get() != NULL); + TermFieldMatchData f3; + TermFieldMatchDataArray a; + a.add(&f3); + SB::UP sbap(wh1->createIterator(lr1->counts, a)); + sbap->initFullRange(); + EXPECT_EQUAL("{1000000:}", toString(f3.getIterator())); + EXPECT_TRUE(sbap->seek(12)); + sbap->unpack(12); + if (f3HasWeights) { + EXPECT_EQUAL("{1:0[e=0,w=0,l=1]}", + toString(f3.getIterator(), true, true)); + } else { + EXPECT_EQUAL("{1:0[e=0,w=1,l=1]}", + toString(f3.getIterator(), true, true)); + } + } +} + + +void +Test::requireThatFusionIsWorking(const vespalib::string &prefix, + bool directio, + bool readmmap) +{ + Schema schema; + Schema schema2; + Schema schema3; + for (SchemaUtil::IndexIterator it(getSchema()); it.isValid(); ++it) { + const Schema::IndexField &iField = + _schema.getIndexField(it.getIndex()); + schema.addIndexField(Schema::IndexField(iField.getName(), + iField.getDataType(), + iField.getCollectionType())); + if (iField.getCollectionType() == Schema::WEIGHTEDSET) + schema2.addIndexField(Schema::IndexField(iField.getName(), + iField.getDataType(), + Schema::ARRAY)); + else + schema2.addIndexField(Schema::IndexField(iField.getName(), + iField.getDataType(), + iField.getCollectionType())); + schema3.addIndexField(Schema::IndexField(iField.getName(), + iField.getDataType(), + Schema::SINGLE)); + } + schema3.addIndexField(Schema::IndexField("f4", + Schema::STRING)); + schema.addFieldSet(Schema::FieldSet("nc0"). + addField("f0").addField("f1")); + schema2.addFieldSet(Schema::FieldSet("nc0"). + addField("f1").addField("f0")); + schema3.addFieldSet(Schema::FieldSet("nc2"). + addField("f0").addField("f1"). + addField("f2").addField("f3"). + addField("f4")); + Dictionary d(schema); + DocBuilder b(schema); + SequencedTaskExecutor invertThreads(2); + SequencedTaskExecutor pushThreads(2); + DocumentInverter inv(schema, invertThreads, pushThreads); + Document::UP doc; + + b.startDocument("doc::10"); + b.startIndexField("f0"). + addStr("a").addStr("b").addStr("c").addStr("d"). + addStr("e").addStr("f").addStr("z"). + endField(); + b.startIndexField("f1"). + addStr("w").addStr("x"). + addStr("y").addStr("z"). + endField(); + b.startIndexField("f2"). + startElement(4).addStr("ax").addStr("ay").addStr("z").endElement(). + startElement(5).addStr("ax").endElement(). + endField(); + b.startIndexField("f3"). + startElement(4).addStr("wx").addStr("z").endElement(). + endField(); + + doc = b.endDocument(); + inv.invertDocument(10, *doc); + invertThreads.sync(); + myPushDocument(inv, d); + pushThreads.sync(); + + b.startDocument("doc::11"). + startIndexField("f3"). + startElement(-27).addStr("zz").endElement(). + endField(); + doc = b.endDocument(); + inv.invertDocument(11, *doc); + invertThreads.sync(); + myPushDocument(inv, d); + pushThreads.sync(); + + b.startDocument("doc::12"). + startIndexField("f3"). + startElement(0).addStr("zz0").endElement(). + endField(); + doc = b.endDocument(); + inv.invertDocument(12, *doc); + invertThreads.sync(); + myPushDocument(inv, d); + pushThreads.sync(); + + IndexBuilder ib(schema); + vespalib::string dump2dir = prefix + "dump2"; + ib.setPrefix(dump2dir); + uint32_t numDocs = 12 + 1; + uint32_t numWords = d.getNumUniqueWords(); + bool dynamicKPosOcc = false; + TuneFileIndexing tuneFileIndexing; + TuneFileSearch tuneFileSearch; + DummyFileHeaderContext fileHeaderContext; + if (directio) { + tuneFileIndexing._read.setWantDirectIO(); + tuneFileIndexing._write.setWantDirectIO(); + tuneFileSearch._read.setWantDirectIO(); + } + if (readmmap) + tuneFileSearch._read.setWantMemoryMap(); + ib.open(numDocs, numWords, tuneFileIndexing, fileHeaderContext); + d.dump(ib); + ib.close(); + + vespalib::string tsName = dump2dir + "/.teststamp"; + typedef search::FileKit FileKit; + EXPECT_TRUE(FileKit::createStamp(tsName)); + EXPECT_TRUE(FileKit::hasStamp(tsName)); + EXPECT_TRUE(FileKit::removeStamp(tsName)); + EXPECT_FALSE(FileKit::hasStamp(tsName)); + + do { + DiskIndex dw2(prefix + "dump2"); + if (!EXPECT_TRUE(dw2.setup(tuneFileSearch))) + break; + TEST_DO(validateDiskIndex(dw2, true, true)); + } while (0); + + do { + std::vector<vespalib::string> sources; + SelectorArray selector(numDocs, 0); + sources.push_back(prefix + "dump2"); + if (!EXPECT_TRUE(Fusion::merge(schema, + prefix + "dump3", + sources, selector, + dynamicKPosOcc, + tuneFileIndexing, + fileHeaderContext))) + return; + } while (0); + do { + DiskIndex dw3(prefix + "dump3"); + if (!EXPECT_TRUE(dw3.setup(tuneFileSearch))) + break; + TEST_DO(validateDiskIndex(dw3, true, true)); + } while (0); + do { + std::vector<vespalib::string> sources; + SelectorArray selector(numDocs, 0); + sources.push_back(prefix + "dump3"); + if (!EXPECT_TRUE(Fusion::merge(schema2, + prefix + "dump4", + sources, selector, + dynamicKPosOcc, + tuneFileIndexing, + fileHeaderContext))) + return; + } while (0); + do { + DiskIndex dw4(prefix + "dump4"); + if (!EXPECT_TRUE(dw4.setup(tuneFileSearch))) + break; + TEST_DO(validateDiskIndex(dw4, true, false)); + } while (0); + do { + std::vector<vespalib::string> sources; + SelectorArray selector(numDocs, 0); + sources.push_back(prefix + "dump3"); + if (!EXPECT_TRUE(Fusion::merge(schema3, + prefix + "dump5", + sources, selector, + dynamicKPosOcc, + tuneFileIndexing, + fileHeaderContext))) + return; + } while (0); + do { + DiskIndex dw5(prefix + "dump5"); + if (!EXPECT_TRUE(dw5.setup(tuneFileSearch))) + break; + TEST_DO(validateDiskIndex(dw5, false, false)); + } while (0); + do { + std::vector<vespalib::string> sources; + SelectorArray selector(numDocs, 0); + sources.push_back(prefix + "dump3"); + if (!EXPECT_TRUE(Fusion::merge(schema, + prefix + "dump6", + sources, selector, + !dynamicKPosOcc, + tuneFileIndexing, + fileHeaderContext))) + return; + } while (0); + do { + DiskIndex dw6(prefix + "dump6"); + if (!EXPECT_TRUE(dw6.setup(tuneFileSearch))) + break; + TEST_DO(validateDiskIndex(dw6, true, true)); + } while (0); + do { + std::vector<vespalib::string> sources; + SelectorArray selector(numDocs, 0); + sources.push_back(prefix + "dump2"); + if (!EXPECT_TRUE(Fusion::merge(schema, + prefix + "dump3", + sources, selector, + dynamicKPosOcc, + tuneFileIndexing, + fileHeaderContext))) + return; + } while (0); + do { + DiskIndex dw3(prefix + "dump3"); + if (!EXPECT_TRUE(dw3.setup(tuneFileSearch))) + break; + TEST_DO(validateDiskIndex(dw3, true, true)); + } while (0); +} + + +Test::Test() + : _schema() +{ + _schema.addIndexField(Schema::IndexField("f0", Schema::STRING)); + _schema.addIndexField(Schema::IndexField("f1", Schema::STRING)); + _schema.addIndexField(Schema::IndexField("f2", Schema::STRING, + Schema::ARRAY)); + _schema.addIndexField(Schema::IndexField("f3", Schema::STRING, + Schema::WEIGHTEDSET)); +} + + +int +Test::Main() +{ + TEST_INIT("fusion_test"); + + if (_argc > 0) { + DummyFileHeaderContext::setCreator(_argv[0]); + } + + TEST_DO(requireThatFusionIsWorking("", false, false)); + TEST_DO(requireThatFusionIsWorking("d", true, false)); + TEST_DO(requireThatFusionIsWorking("m", false, true)); + TEST_DO(requireThatFusionIsWorking("dm", true, true)); + + TEST_DONE(); +} + +} + + +} + + +TEST_APPHOOK(search::diskindex::Test); diff --git a/searchlib/src/tests/diskindex/fusion/fusion_test.sh b/searchlib/src/tests/diskindex/fusion/fusion_test.sh new file mode 100755 index 00000000000..127453fae07 --- /dev/null +++ b/searchlib/src/tests/diskindex/fusion/fusion_test.sh @@ -0,0 +1,15 @@ +#!/bin/bash +IINSPECT=../../../apps/vespa-index-inspect/searchlib_vespa-index-inspect_app +ECHO_CMD=echo + +$VALGRIND ./searchlib_fusion_test_app +$ECHO_CMD showing usage +$IINSPECT --help > usage.out 2>&1 || true +$ECHO_CMD dumping dictionary words for field f0 +$IINSPECT dumpwords --indexdir dump3 --field f0 > dumpwords.out +$ECHO_CMD transposing index back for inspection +$IINSPECT showpostings --transpose --indexdir dump3 > transpose.out +$ECHO_CMD dumping posting list for word z in field f0 +$IINSPECT showpostings --indexdir dump3 --field f0 z > zwordf0field.out +$ECHO_CMD inspection done. + diff --git a/searchlib/src/tests/diskindex/pagedict4/.gitignore b/searchlib/src/tests/diskindex/pagedict4/.gitignore new file mode 100644 index 00000000000..2381ed57229 --- /dev/null +++ b/searchlib/src/tests/diskindex/pagedict4/.gitignore @@ -0,0 +1,5 @@ +.depend +Makefile +pagedict4_test +fakedict.* +searchlib_pagedict4_test_app diff --git a/searchlib/src/tests/diskindex/pagedict4/CMakeLists.txt b/searchlib/src/tests/diskindex/pagedict4/CMakeLists.txt new file mode 100644 index 00000000000..f8aef573c9a --- /dev/null +++ b/searchlib/src/tests/diskindex/pagedict4/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_pagedict4_test_app + SOURCES + pagedict4test.cpp + DEPENDS + searchlib_test + searchlib +) +vespa_add_test(NAME searchlib_pagedict4_test_app COMMAND searchlib_pagedict4_test_app) diff --git a/searchlib/src/tests/diskindex/pagedict4/pagedict4test.cpp b/searchlib/src/tests/diskindex/pagedict4/pagedict4test.cpp new file mode 100644 index 00000000000..03d73e84b42 --- /dev/null +++ b/searchlib/src/tests/diskindex/pagedict4/pagedict4test.cpp @@ -0,0 +1,876 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("pagedict4test"); +#include <vespa/searchlib/bitcompression/compression.h> +#include <vector> +#include <vespa/searchlib/util/rand48.h> +#include <vespa/searchlib/index/schemautil.h> +#include <vespa/searchlib/bitcompression/countcompression.h> +#include <vespa/searchlib/bitcompression/pagedict4.h> +#include <vespa/searchlib/test/diskindex/threelevelcountbuffers.h> +#include <vespa/searchlib/index/postinglistcounts.h> +#include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/searchlib/diskindex/pagedict4file.h> +#include <vespa/searchlib/diskindex/pagedict4randread.h> +#include <vespa/searchlib/common/tunefileinfo.h> + +using search::bitcompression::PostingListCountFileEncodeContext; +using search::bitcompression::PostingListCountFileDecodeContext; +using search::index::PostingListCounts; +using search::index::PostingListOffsetAndCounts; +using search::index::PostingListParams; +using search::bitcompression::PageDict4SSWriter; +using search::bitcompression::PageDict4SPWriter; +using search::bitcompression::PageDict4PWriter; +using search::bitcompression::PageDict4Reader; +using search::bitcompression::PageDict4SSReader; +using search::bitcompression::PageDict4SSLookupRes; +using search::bitcompression::PageDict4SPLookupRes; +using search::bitcompression::PageDict4PLookupRes; +using search::index::Schema; +using search::index::DictionaryFileSeqRead; +using search::index::DictionaryFileSeqWrite; +using search::index::DictionaryFileRandRead; +using search::diskindex::PageDict4FileSeqRead; +using search::diskindex::PageDict4FileSeqWrite; +using search::diskindex::PageDict4RandRead; +using search::index::DummyFileHeaderContext; + +typedef search::bitcompression::PageDict4StartOffset StartOffset; + +namespace +{ + + +class Writer : public search::diskindex::ThreeLevelCountWriteBuffers +{ +public: + PageDict4SSWriter *_ssw; + PageDict4SPWriter *_spw; + PageDict4PWriter *_pw; + + Writer(EC &sse, + EC &spe, + EC &pe) + : ThreeLevelCountWriteBuffers(sse, spe, pe), + _ssw(NULL), + _spw(NULL), + _pw(NULL) + { + } + + ~Writer(void) + { + delete _ssw; + delete _spw; + delete _pw; + } + + void + allocWriters() + { + _ssw = new PageDict4SSWriter(_sse); + _spw = new PageDict4SPWriter(*_ssw, _spe); + _pw = new PageDict4PWriter(*_spw, _pe); + _spw->setup(); + _pw->setup(); + } + + void + flush(void) + { + _pw->flush(); + ThreeLevelCountWriteBuffers::flush(); + } + + void + addCounts(const std::string &word, + const PostingListCounts &counts) + { + _pw->addCounts(word, counts); + } +}; + + +class SeqReader : public search::diskindex::ThreeLevelCountReadBuffers +{ +public: + PageDict4SSReader _ssr; + PageDict4Reader _pr; + + SeqReader(DC &ssd, + DC &spd, + DC &pd, + search::diskindex::ThreeLevelCountWriteBuffers &wb) + : ThreeLevelCountReadBuffers(ssd, spd, pd, wb), + _ssr(_rcssd, + wb._ssHeaderLen, wb._ssFileBitSize, + wb._spHeaderLen, wb._spFileBitSize, + wb._pHeaderLen, wb._pFileBitSize), + _pr(_ssr, spd, pd) + { + _ssr.setup(ssd); + _pr.setup(); + } + + void + readCounts(vespalib::string &word, + uint64_t &wordNum, + PostingListCounts &counts) + { + _pr.readCounts(word, wordNum, counts); + } +}; + +class RandReader : public search::diskindex::ThreeLevelCountReadBuffers +{ +public: + PageDict4SSReader _ssr; + const char *_spData; + const char *_pData; + size_t _pageSize; + + RandReader(DC &ssd, + DC &spd, + DC &pd, + search::diskindex::ThreeLevelCountWriteBuffers &wb) + : ThreeLevelCountReadBuffers(ssd, spd, pd, wb), + _ssr(_rcssd, + wb._ssHeaderLen, wb._ssFileBitSize, + wb._spHeaderLen, wb._spFileBitSize, + wb._pHeaderLen, wb._pFileBitSize), + _spData(static_cast<const char *>(_rcspd._comprBuf)), + _pData(static_cast<const char *>(_rcpd._comprBuf)), + _pageSize(search::bitcompression::PageDict4PageParams::getPageByteSize()) + { + _ssr.setup(ssd); + } + + bool + lookup(const std::string &key, + uint64_t &wordNum, + PostingListCounts &counts, + StartOffset &offsets) + { + PageDict4SSLookupRes sslr; + + sslr = _ssr.lookup(key); + if (!sslr._res) { + counts.clear(); + offsets = sslr._l6StartOffset; + wordNum = sslr._l6WordNum; + return false; + } + + if (sslr._overflow) { + wordNum = sslr._l6WordNum; + counts = sslr._counts; + offsets = sslr._startOffset; + return true; + } + PageDict4SPLookupRes splr; + splr.lookup(_ssr, + _spData + + _pageSize * sslr._sparsePageNum, + key, + sslr._l6Word, + sslr._lastWord, + sslr._l6StartOffset, + sslr._l6WordNum, + sslr._pageNum); + + PageDict4PLookupRes plr; + plr.lookup(_ssr, + _pData + _pageSize * splr._pageNum, + key, + splr._l3Word, + splr._lastWord, + splr._l3StartOffset, + splr._l3WordNum); + wordNum = plr._wordNum; + offsets = plr._startOffset; + if (plr._res) { + counts = plr._counts; + return true; + } + counts.clear(); + return false; + } +}; + +} + +class PageDict4TestApp : public FastOS_Application +{ +public: + search::Rand48 _rnd; + bool _stress; + bool _emptyWord; + bool _firstWordForcedCommon; + bool _lastWordForcedCommon; + + void + usage(void); + + int + Main(void); + + void + testWords(void); + + PageDict4TestApp(void) + : _rnd(), + _stress(false), + _emptyWord(false), + _firstWordForcedCommon(false), + _lastWordForcedCommon(false) + { + } +}; + + +void +PageDict4TestApp::usage(void) +{ + printf("Usage: wordnumbers\n"); + fflush(stdout); +} + + +int +PageDict4TestApp::Main(void) +{ + if (_argc > 0) { + DummyFileHeaderContext::setCreator(_argv[0]); + } + _rnd.srand48(32); + for (int32_t i = 1; i < _argc; ++i) { + if (strcmp(_argv[i], "stress") == 0) + _stress = true; + if (strcmp(_argv[i], "emptyword") == 0) + _emptyWord = true; + if (strcmp(_argv[i], "firstwordforcedcommon") == 0) + _firstWordForcedCommon = true; + if (strcmp(_argv[i], "lastwordforcedcommon") == 0) + _lastWordForcedCommon = true; + } + testWords(); + + LOG(info, + "_stress is %s", + _stress ? "true" : "false"); + LOG(info, + "_emptyWord is %s", + _emptyWord ? "true" : "false"); + LOG(info, + "_firstWordForcedCommon is %s", + _firstWordForcedCommon ? "true" : "false"); + LOG(info, + "_lastWordForcedCommon is %s", + _lastWordForcedCommon ? "true" : "false"); + + LOG(info, "SUCCESS"); + return 0; +} + + +class WordIndexCounts +{ +public: + uint32_t _numDocs; + uint64_t _fileOffset; + uint64_t _bitLength; + uint64_t _accNumDocs; + + WordIndexCounts(uint64_t bitLength, + uint32_t numDocs) + : _numDocs(numDocs), + _fileOffset(0), + _bitLength(bitLength), + _accNumDocs(0) + { + } + + WordIndexCounts() + : _numDocs(0), + _fileOffset(0), + _bitLength(0), + _accNumDocs(0) + { + } +}; + +class WordCounts +{ +public: + std::string _word; + WordIndexCounts _counts; + + bool + operator!=(const WordCounts &rhs) const + { + return _word != rhs._word; + } + + WordCounts(const std::string &word) + : _word(word), + _counts() + { + } + + bool + operator<(const WordCounts &rhs) const + { + return _word < rhs._word; + } +}; + + +void +deDup(std::vector<WordCounts> &v) +{ + std::vector<WordCounts> v2; + std::sort(v.begin(), v.end()); + for (std::vector<WordCounts>::const_iterator + i = v.begin(), + ie = v.end(); + i != ie; + ++i) { + if (v2.empty() || v2.back() != *i) + v2.push_back(*i); + } + std::swap(v, v2); +} + + +void +deDup(std::vector<uint32_t> &v) +{ + std::vector<uint32_t> v2; + std::sort(v.begin(), v.end()); + for (std::vector<uint32_t>::const_iterator + i = v.begin(), + ie = v.end(); + i != ie; + ++i) { + if (v2.empty() || v2.back() != *i) + v2.push_back(*i); + } + std::swap(v, v2); +} + + +static WordIndexCounts +makeIndex(search::Rand48 &rnd, bool forceCommon) +{ + uint64_t bitLength = 10; + uint32_t numDocs = 1; + if ((rnd.lrand48() % 150) == 0 || forceCommon) { + bitLength = 1000000000; + numDocs = 500000; + } + return WordIndexCounts(bitLength, numDocs); +} + + +void +makeIndexes(search::Rand48 &rnd, + WordIndexCounts &counts, + bool forceCommon) +{ + counts = makeIndex(rnd, forceCommon); +} + + +static void +makeWords(std::vector<WordCounts> &v, + search::Rand48 &rnd, + uint32_t numWordIds, + uint32_t tupleCount, + bool emptyWord, + bool firstWordForcedCommon, + bool lastWordForcedCommon) +{ + v.clear(); + for (unsigned int i = 0; i < tupleCount; ++i) { + uint64_t word = rnd.lrand48() % numWordIds; + uint64_t wordCount = (rnd.lrand48() % 10) + 1; + for (unsigned int j = 0; j < wordCount; ++j) { + uint64_t nextWord = rnd.lrand48() % numWordIds; + uint64_t nextWordCount = 0; + bool incomplete = true; + nextWordCount = rnd.lrand48() % 10; + incomplete = (rnd.lrand48() % 3) == 0 || nextWordCount == 0; + for (unsigned int k = 0; k < nextWordCount; ++k) { + uint64_t nextNextWord = rnd.lrand48() % numWordIds; + std::ostringstream w; + w << word; + w << "-"; + w << nextWord; + w << "-"; + w << nextNextWord; + v.push_back(WordCounts(w.str())); + } + if (incomplete) { + std::ostringstream w; + w << word; + w << "-"; + w << nextWord; + w << "-"; + w << "9999999999999999"; + v.push_back(WordCounts(w.str())); + } + } + } + deDup(v); + if (!v.empty() && emptyWord) + v.front()._word = ""; + for (std::vector<WordCounts>::iterator + i = v.begin(), ib = v.begin(), ie = v.end(); + i != ie; ++i) { + std::vector<WordIndexCounts> indexes; + makeIndexes(rnd, i->_counts, + (i == ib && firstWordForcedCommon) || + (i + 1 == ie && lastWordForcedCommon)); + } + uint64_t fileOffset = 0; + uint64_t accNumDocs = 0; + for (std::vector<WordCounts>::iterator + i = v.begin(), + ie = v.end(); + i != ie; + ++i) { + WordIndexCounts *f = &i->_counts; + assert(f->_numDocs > 0); + assert(f->_bitLength > 0); + f->_fileOffset = fileOffset; + f->_accNumDocs = accNumDocs; + fileOffset += f->_bitLength; + accNumDocs += f->_numDocs; + } +} + + +void +makeCounts(PostingListCounts &counts, + const WordCounts &i, + uint32_t chunkSize) +{ + PostingListCounts c; + const WordIndexCounts *j = &i._counts; + c._bitLength = j->_bitLength; + c._numDocs = j->_numDocs; + c._segments.clear(); + assert(j->_numDocs > 0); + uint32_t numChunks = (j->_numDocs + chunkSize - 1) / chunkSize; + if (numChunks > 1) { + uint32_t chunkBits = j->_bitLength / numChunks; + for (uint32_t chunkNo = 0; chunkNo < numChunks; ++chunkNo) { + PostingListCounts::Segment seg; + seg._bitLength = chunkBits; + seg._numDocs = chunkSize; + seg._lastDoc = (chunkNo + 1) * chunkSize - 1; + if (chunkNo + 1 == numChunks) { + seg._bitLength = c._bitLength - + (numChunks - 1) * chunkBits; + seg._lastDoc = c._numDocs - 1; + seg._numDocs = c._numDocs - (numChunks - 1) * chunkSize; + } + c._segments.push_back(seg); + } + } + counts = c; +} + + +void +checkCounts(const std::string &word, + const PostingListCounts &counts, + const StartOffset &fileOffset, + const WordCounts &i, + uint32_t chunkSize) +{ + PostingListCounts answer; + + makeCounts(answer, i, chunkSize); + assert(word == i._word); + (void) word; + (void) fileOffset; + const WordIndexCounts *j = &i._counts; + assert(counts._bitLength == j->_bitLength); + assert(counts._numDocs == j->_numDocs); + assert(fileOffset._fileOffset == j->_fileOffset); + assert(fileOffset._accNumDocs == j->_accNumDocs); + assert(counts._segments == answer._segments); + assert(counts == answer); + (void) counts; +} + + +void +testWords(const std::string &logname, + search::Rand48 &rnd, + uint64_t numWordIds, + uint32_t tupleCount, + uint32_t chunkSize, + uint32_t ssPad, + uint32_t spPad, + uint32_t pPad, + bool emptyWord, + bool firstWordForcedCommon, + bool lastWordForcedCommon) +{ + typedef search::bitcompression::PostingListCountFileEncodeContext EC; + typedef search::bitcompression::PostingListCountFileDecodeContext DC; + + LOG(info, "%s: word test start", logname.c_str()); + std::vector<WordCounts> myrand; + makeWords(myrand, rnd, numWordIds, tupleCount, + emptyWord, firstWordForcedCommon, lastWordForcedCommon); + + PostingListCounts xcounts; + for (std::vector<WordCounts>::const_iterator + i = myrand.begin(), + ie = myrand.end(); + i != ie; + ++i) { + makeCounts(xcounts, *i, chunkSize); + } + LOG(info, "%s: word counts generated", logname.c_str()); + + EC pe; + EC spe; + EC sse; + + sse._minChunkDocs = chunkSize; + sse._numWordIds = numWordIds; + spe.copyParams(sse); + pe.copyParams(sse); + Writer w(sse, spe, pe); + w.startPad(ssPad, spPad, pPad); + w.allocWriters(); + + PostingListCounts counts; + for (std::vector<WordCounts>::const_iterator + i = myrand.begin(), + ie = myrand.end(); + i != ie; + ++i) { + makeCounts(counts, *i, chunkSize); + w.addCounts(i->_word, counts); + } + w.flush(); + + LOG(info, + "%s: Used %" PRIu64 "+%" PRIu64 "+%" PRIu64 + " bits for %d words", + logname.c_str(), + w._pFileBitSize, + w._spFileBitSize, + w._ssFileBitSize, + (int) myrand.size()); + + StartOffset checkOffset; + + { + DC ssd; + ssd._minChunkDocs = chunkSize; + ssd._numWordIds = numWordIds; + DC spd; + spd.copyParams(ssd); + DC pd; + pd.copyParams(ssd); + + SeqReader r(ssd, spd, pd, w); + + uint64_t wordNum = 1; + uint64_t checkWordNum = 0; + for (std::vector<WordCounts>::const_iterator + i = myrand.begin(), + ie = myrand.end(); + i != ie; + ++i, ++wordNum) { + vespalib::string word; + counts.clear(); + r.readCounts(word, checkWordNum, counts); + checkCounts(word, counts, checkOffset, *i, chunkSize); + assert(checkWordNum == wordNum); + checkOffset._fileOffset += counts._bitLength; + checkOffset._accNumDocs += counts._numDocs; + } + assert(pd.getReadOffset() == w._pFileBitSize); + LOG(info, "%s: words seqRead test OK", logname.c_str()); + } + + { + DC ssd; + ssd._minChunkDocs = chunkSize; + ssd._numWordIds = numWordIds; + DC spd; + spd.copyParams(ssd); + DC pd; + pd.copyParams(ssd); + + RandReader rr(ssd, spd, pd, w); + + uint64_t wordNum = 1; + uint64_t checkWordNum = 0; + for (std::vector<WordCounts>::const_iterator + i = myrand.begin(), + ie = myrand.end(); + i != ie; + ++i, ++wordNum) { + checkWordNum = 0; + bool res = rr.lookup(i->_word, + checkWordNum, + counts, + checkOffset); + assert(res); + (void) res; + checkCounts(i->_word, counts, checkOffset, + *i, chunkSize); + assert(checkWordNum == wordNum); + } + LOG(info, "%s: word randRead test OK", logname.c_str()); + } + + Schema schema; + std::vector<uint32_t> indexes; + { + std::ostringstream fn; + fn << "f0"; + schema.addIndexField(Schema:: + IndexField(fn.str(), + Schema::STRING, + Schema::SINGLE)); + indexes.push_back(0); + } + { + std::unique_ptr<DictionaryFileSeqWrite> + dw(new PageDict4FileSeqWrite); + std::vector<uint32_t> wIndexes; + std::vector<PostingListCounts> wCounts; + search::TuneFileSeqWrite tuneFileWrite; + DummyFileHeaderContext fileHeaderContext; + PostingListParams params; + params.set("numWordIds", numWordIds); + params.set("minChunkDocs", chunkSize); + dw->setParams(params); + bool openres = dw->open("fakedict", + tuneFileWrite, + fileHeaderContext); + assert(openres); + + for (std::vector<WordCounts>::const_iterator + i = myrand.begin(), + ie = myrand.end(); + i != ie; + ++i) { + makeCounts(counts, *i, chunkSize); + dw->writeWord(i->_word, counts); + } + bool closeres = dw->close(); + assert(closeres); + (void) closeres; + + LOG(info, "%s: pagedict4 written", logname.c_str()); + } + { + std::unique_ptr<DictionaryFileSeqRead> dr(new PageDict4FileSeqRead); + search::TuneFileSeqRead tuneFileRead; + + bool openres = dr->open("fakedict", + tuneFileRead); + assert(openres); + (void) openres; + std::string lastWord; + vespalib::string checkWord; + PostingListCounts wCounts; + PostingListCounts rCounts; + uint64_t wordNum = 1; + uint64_t checkWordNum = 5; + for (std::vector<WordCounts>::const_iterator + i = myrand.begin(), + ie = myrand.end(); + i != ie; + ++i, ++wordNum) { + makeCounts(counts, *i, chunkSize); + wCounts = counts; + checkWord.clear(); + checkWordNum = 0; + dr->readWord(checkWord, checkWordNum, rCounts); + assert(rCounts == wCounts); + assert(wordNum == checkWordNum); + assert(checkWord == i->_word); + } + + checkWord = "bad"; + checkWordNum = 5; + dr->readWord(checkWord, checkWordNum, rCounts); + assert(checkWord.empty()); + assert(checkWordNum == DictionaryFileSeqRead::noWordNumHigh()); + bool closeres = dr->close(); + assert(closeres); + (void) closeres; + + LOG(info, "%s: pagedict4 seqverify OK", logname.c_str()); + } + { + std::unique_ptr<DictionaryFileRandRead> drr(new PageDict4RandRead); + search::TuneFileRandRead tuneFileRead; + bool openres = drr->open("fakedict", + tuneFileRead); + assert(openres); + (void) openres; + std::string lastWord; + vespalib::string checkWord; + PostingListCounts wCounts; + PostingListCounts rCounts; + uint64_t wOffset; + uint64_t rOffset; + PostingListOffsetAndCounts rOffsetAndCounts; + uint64_t wordNum = 1; + uint64_t checkWordNum = 5; + std::string missWord; + wOffset = 0; + for (std::vector<WordCounts>::const_iterator + i = myrand.begin(), + ie = myrand.end(); + i != ie; + ++i, ++wordNum) { + makeCounts(counts, *i, chunkSize); + wCounts = counts; + + checkWordNum = 0; + rCounts.clear(); + rOffset = 0; + bool lres = drr->lookup(i->_word, checkWordNum, + rOffsetAndCounts); + assert(lres); + (void) lres; + assert((rOffsetAndCounts._counts._bitLength == 0) == + (rOffsetAndCounts._counts._numDocs == 0)); + rOffset = rOffsetAndCounts._offset; + rCounts = rOffsetAndCounts._counts; + assert(rCounts == wCounts); + assert(wordNum == checkWordNum); + assert(rOffset == wOffset); + + wOffset += wCounts._bitLength; + lastWord = i->_word; + + missWord = i->_word; + missWord.append(1, '\1'); + checkWordNum = 0; + lres = drr->lookup(missWord, checkWordNum, + rOffsetAndCounts); + assert(!lres); + assert(checkWordNum == wordNum + 1); + } + + checkWordNum = 0; + std::string notfoundword = "Thiswordhasbetternotbeindictionary"; + bool lres = drr->lookup(notfoundword, checkWordNum, + rOffsetAndCounts); + assert(!lres); + checkWordNum = 0; + notfoundword = lastWord + "somethingmore"; + lres = drr->lookup(notfoundword, checkWordNum, + rOffsetAndCounts); + assert(!lres); + (void) lres; + LOG(info, "Lookup beyond dict EOF gave wordnum %d", (int) checkWordNum); + + if (firstWordForcedCommon) { + if (!emptyWord) { + checkWordNum = 0; + notfoundword = ""; + lres = drr->lookup(notfoundword, checkWordNum, + rOffsetAndCounts); + assert(!lres); + assert(checkWordNum == 1); + } + if (!myrand.empty()) { + checkWordNum = 0; + notfoundword = myrand.front()._word; + notfoundword.append(1, '\1'); + lres = drr->lookup(notfoundword, checkWordNum, + rOffsetAndCounts); + assert(!lres); + assert(checkWordNum == 2); + } + } + if (lastWordForcedCommon && !myrand.empty()) { + if (myrand.size() > 1) { + checkWordNum = 0; + notfoundword = myrand[myrand.size() - 2]._word; + notfoundword.append(1, '\1'); + lres = drr->lookup(notfoundword, checkWordNum, + rOffsetAndCounts); + assert(!lres); + assert(checkWordNum == myrand.size()); + } + checkWordNum = 0; + notfoundword = myrand[myrand.size() - 1]._word; + notfoundword.append(1, '\1'); + lres = drr->lookup(notfoundword, checkWordNum, + rOffsetAndCounts); + assert(!lres); + assert(checkWordNum == myrand.size() + 1); + } + bool closeres = drr->close(); + assert(closeres); + (void) closeres; + LOG(info, "%s: pagedict4 randverify OK", logname.c_str()); + } +} + + +void +PageDict4TestApp::testWords(void) +{ + ::testWords("smallchunkwordsempty", _rnd, + 1000000, 0, + 64, 80, 72, 64, + false, false, false); + ::testWords("smallchunkwordsempty2", _rnd, + 0, 0, + 64, 80, 72, 64, + false, false, false); + ::testWords("smallchunkwords", _rnd, + 1000000, 100, + 64, 80, 72, 64, + false, false, false); + ::testWords("smallchunkwordswithemptyword", _rnd, + 1000000, 100, + 64, 80, 72, 64, + true, false, false); + ::testWords("smallchunkwordswithcommonfirstword", _rnd, + 1000000, 100, + 64, 80, 72, 64, + false, true, false); + ::testWords("smallchunkwordswithcommonemptyfirstword", _rnd, + 1000000, 100, + 64, 80, 72, 64, + true, true, false); + ::testWords("smallchunkwordswithcommonlastword", _rnd, + 1000000, 100, + 64, 80, 72, 64, + false, false, true); +#if 1 + ::testWords("smallchunkwords2", _rnd, + 1000000, _stress ? 10000 : 100, + 64, 80, 72, 64, + _emptyWord, _firstWordForcedCommon, _lastWordForcedCommon); +#endif +#if 1 + ::testWords("stdwords", _rnd, + 1000000, _stress ? 10000 : 100, + 262144, 80, 72, 64, + _emptyWord, _firstWordForcedCommon, _lastWordForcedCommon); +#endif +} + +FASTOS_MAIN(PageDict4TestApp); diff --git a/searchlib/src/tests/document_store/.gitignore b/searchlib/src/tests/document_store/.gitignore new file mode 100644 index 00000000000..bc9b97decab --- /dev/null +++ b/searchlib/src/tests/document_store/.gitignore @@ -0,0 +1 @@ +searchlib_document_store_test_app diff --git a/searchlib/src/tests/document_store/CMakeLists.txt b/searchlib/src/tests/document_store/CMakeLists.txt new file mode 100644 index 00000000000..18b9e408fae --- /dev/null +++ b/searchlib/src/tests/document_store/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_document_store_test_app + SOURCES + document_store_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_document_store_test_app COMMAND searchlib_document_store_test_app) diff --git a/searchlib/src/tests/document_store/FILES b/searchlib/src/tests/document_store/FILES new file mode 100644 index 00000000000..b1dd2b610d0 --- /dev/null +++ b/searchlib/src/tests/document_store/FILES @@ -0,0 +1 @@ +document_store_test.cpp diff --git a/searchlib/src/tests/document_store/document_store_test.cpp b/searchlib/src/tests/document_store/document_store_test.cpp new file mode 100644 index 00000000000..e6a3d9b5c3d --- /dev/null +++ b/searchlib/src/tests/document_store/document_store_test.cpp @@ -0,0 +1,58 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchlib/docstore/documentstore.h> +#include <vespa/searchlib/docstore/cachestats.h> +#include <vespa/document/repo/documenttyperepo.h> + +using namespace search; + +document::DocumentTypeRepo repo; + +struct NullDataStore : IDataStore { + NullDataStore() : IDataStore("") {} + ssize_t read(uint32_t, vespalib::DataBuffer &) const override { return 0; } + void read(const LidVector &, IBufferVisitor &) const override { } + void write(uint64_t, uint32_t, const void *, size_t) override {} + void remove(uint64_t, uint32_t) override {} + void flush(uint64_t) override {} + + uint64_t initFlush(uint64_t syncToken) override { return syncToken; } + + size_t memoryUsed() const override { return 0; } + size_t memoryMeta() const override { return 0; } + size_t getDiskFootprint() const override { return 0; } + size_t getDiskBloat() const override { return 0; } + uint64_t lastSyncToken() const override { return 0; } + uint64_t tentativeLastSyncToken() const override { return 0; } + fastos::TimeStamp getLastFlushTime() const override { return fastos::TimeStamp(); } + void accept(IDataStoreVisitor &, IDataStoreVisitorProgress &, bool) override { } + double getVisitCost() const override { return 1.0; } + virtual DataStoreStorageStats getStorageStats() const override { + return DataStoreStorageStats(0, 0, 0.0, 0, 0); + } + virtual std::vector<DataStoreFileChunkStats> + getFileChunkStats() const override { + std::vector<DataStoreFileChunkStats> result; + return result; + } +}; + +TEST_FFF("require that uncache docstore lookups are counted", + DocumentStore::Config(document::CompressionConfig::NONE, 0, 0), + NullDataStore(), DocumentStore(f1, f2)) +{ + EXPECT_EQUAL(0u, f3.getCacheStats().misses); + f3.read(1, repo); + EXPECT_EQUAL(1u, f3.getCacheStats().misses); +} + +TEST_FFF("require that cached docstore lookups are counted", + DocumentStore::Config(document::CompressionConfig::NONE, 100000, 100), + NullDataStore(), DocumentStore(f1, f2)) +{ + EXPECT_EQUAL(0u, f3.getCacheStats().misses); + f3.read(1, repo); + EXPECT_EQUAL(1u, f3.getCacheStats().misses); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/document_store/visitor/.gitignore b/searchlib/src/tests/document_store/visitor/.gitignore new file mode 100644 index 00000000000..c97186f86d7 --- /dev/null +++ b/searchlib/src/tests/document_store/visitor/.gitignore @@ -0,0 +1 @@ +searchlib_document_store_visitor_test_app diff --git a/searchlib/src/tests/document_store/visitor/CMakeLists.txt b/searchlib/src/tests/document_store/visitor/CMakeLists.txt new file mode 100644 index 00000000000..976463bdfe8 --- /dev/null +++ b/searchlib/src/tests/document_store/visitor/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_document_store_visitor_test_app + SOURCES + document_store_visitor_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_document_store_visitor_test_app COMMAND searchlib_document_store_visitor_test_app) diff --git a/searchlib/src/tests/document_store/visitor/DESC b/searchlib/src/tests/document_store/visitor/DESC new file mode 100644 index 00000000000..03e9c6681ad --- /dev/null +++ b/searchlib/src/tests/document_store/visitor/DESC @@ -0,0 +1 @@ +Document store visiting test. diff --git a/searchlib/src/tests/document_store/visitor/FILES b/searchlib/src/tests/document_store/visitor/FILES new file mode 100644 index 00000000000..412f9879bb5 --- /dev/null +++ b/searchlib/src/tests/document_store/visitor/FILES @@ -0,0 +1 @@ +document_store_visitor_test.cpp diff --git a/searchlib/src/tests/document_store/visitor/document_store_visitor_test.cpp b/searchlib/src/tests/document_store/visitor/document_store_visitor_test.cpp new file mode 100644 index 00000000000..1898fa35a29 --- /dev/null +++ b/searchlib/src/tests/document_store/visitor/document_store_visitor_test.cpp @@ -0,0 +1,466 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("document_store_visitor_test"); + +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchlib/docstore/documentstore.h> +#include <vespa/searchlib/docstore/logdocumentstore.h> +#include <vespa/searchlib/docstore/cachestats.h> +#include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/searchlib/common/bitvector.h> +#include <vespa/document/repo/documenttyperepo.h> +#include <vespa/document/repo/configbuilder.h> +#include <vespa/vespalib/io/fileutil.h> + +using namespace search; + +using vespalib::string; +using document::DataType; +using document::Document; +using document::DocumentId; +using document::DocumentType; +using document::DocumentTypeRepo; +using vespalib::asciistream; +using index::DummyFileHeaderContext; + +namespace +{ + +const string doc_type_name = "test"; +const string header_name = doc_type_name + ".header"; +const string body_name = doc_type_name + ".body"; + +document::DocumenttypesConfig +makeDocTypeRepoConfig(void) +{ + const int32_t doc_type_id = 787121340; + document::config_builder::DocumenttypesConfigBuilderHelper builder; + builder.document(doc_type_id, + doc_type_name, + document::config_builder::Struct(header_name), + document::config_builder::Struct(body_name). + addField("main", DataType::T_STRING). + addField("extra", DataType::T_STRING)); + return builder.config(); +} + + +Document::UP +makeDoc(const DocumentTypeRepo &repo, uint32_t i, bool before) +{ + asciistream idstr; + idstr << "id:test:test:: " << i; + DocumentId id(idstr.str()); + const DocumentType *docType = repo.getDocumentType(doc_type_name); + Document::UP doc(new Document(*docType, id)); + ASSERT_TRUE(doc.get()); + asciistream mainstr; + mainstr << "static text" << i << " body something"; + for (uint32_t j = 0; j < 10; ++j) { + mainstr << (j + i * 1000) << " "; + } + mainstr << " and end field"; + doc->set("main", mainstr.c_str()); + if (!before) { + doc->set("extra", "foo"); + } + + return doc; +} + +} + +class MyTlSyncer : public transactionlog::SyncProxy +{ + SerialNum _syncedTo; + +public: + MyTlSyncer(void) + : _syncedTo(0) + { + } + + void + sync(SerialNum syncTo) + { + _syncedTo = syncTo; + } +}; + + +class MyVisitorBase +{ +public: + DocumentTypeRepo &_repo; + uint32_t _visitCount; + uint32_t _visitRmCount; + uint32_t _docIdLimit; + BitVector::UP _valid; + bool _before; + + MyVisitorBase(DocumentTypeRepo &repo, uint32_t docIdLimit, bool before); +}; + +MyVisitorBase::MyVisitorBase(DocumentTypeRepo &repo, + uint32_t docIdLimit, + bool before) + : _repo(repo), + _visitCount(0u), + _visitRmCount(0u), + _docIdLimit(docIdLimit), + _valid(BitVector::create(docIdLimit)), + _before(before) +{ +} + + +class MyVisitor : public MyVisitorBase, + public IDocumentStoreReadVisitor +{ +public: + using MyVisitorBase::MyVisitorBase; + + virtual void + visit(uint32_t lid, const Document &doc); + + virtual void + visit(uint32_t lid); +}; + + +void +MyVisitor::visit(uint32_t lid, const Document &doc) +{ + ++_visitCount; + assert(lid < _docIdLimit); + Document::UP expDoc(makeDoc(_repo, lid, _before)); + EXPECT_TRUE(*expDoc == doc); + _valid->slowSetBit(lid); +} + + +void +MyVisitor::visit(uint32_t lid) +{ + ++_visitRmCount; + assert(lid < _docIdLimit); + _valid->slowClearBit(lid); +} + + +class MyRewriteVisitor : public MyVisitorBase, + public IDocumentStoreRewriteVisitor +{ +public: + using MyVisitorBase::MyVisitorBase; + + virtual void + visit(uint32_t lid, Document &doc); +}; + + +void +MyRewriteVisitor::visit(uint32_t lid, Document &doc) +{ + ++_visitCount; + assert(lid < _docIdLimit); + Document::UP expDoc(makeDoc(_repo, lid, _before)); + EXPECT_TRUE(*expDoc == doc); + _valid->slowSetBit(lid); + doc.set("extra", "foo"); +} + + +class MyVisitorProgress : public IDocumentStoreVisitorProgress +{ +public: + double _progress; + uint32_t _updates; + + MyVisitorProgress(); + + virtual void + updateProgress(double progress); + + virtual double + getProgress() const; +}; + + +MyVisitorProgress::MyVisitorProgress() + : _progress(0.0), + _updates(0) +{ +} + + +void +MyVisitorProgress::updateProgress(double progress) +{ + EXPECT_TRUE(progress >= _progress); + _progress = progress; + ++_updates; + LOG(info, + "updateProgress(%6.2f), %u updates", + progress, _updates); +} + + +double +MyVisitorProgress::getProgress() const +{ + return _progress; +} + + +struct Fixture +{ + string _baseDir; + DocumentTypeRepo _repo; + LogDocumentStore::Config _storeConfig; + vespalib::ThreadStackExecutor _executor; + DummyFileHeaderContext _fileHeaderContext; + MyTlSyncer _tlSyncer; + std::unique_ptr<LogDocumentStore> _store; + uint64_t _syncToken; + uint32_t _docIdLimit; + BitVector::UP _valid; + + Fixture(); + + ~Fixture(); + + Document::UP + makeDoc(uint32_t i); + + void + resetDocStore(); + + void + mkdir(); + + void + rmdir(); + + void + setDocIdLimit(uint32_t docIdLimit); + + void + put(const Document &doc, uint32_t lid); + + void + remove(uint32_t lid); + + void + flush(); + + void + populate(uint32_t low, uint32_t high, uint32_t docIdLimit); + + void + applyRemoves(uint32_t rmDocs); + + void + checkRemovePostCond(uint32_t numDocs, + uint32_t docIdLimit, + uint32_t rmDocs, + bool before); +}; + +Fixture::Fixture() + : _baseDir("visitor"), + _repo(makeDocTypeRepoConfig()), + _storeConfig(DocumentStore:: + Config(document::CompressionConfig::NONE, 0, 0), + LogDataStore:: + Config(50000, 0.2, 3.0, 0.2, 1, true, + WriteableFileChunk::Config( + document::CompressionConfig(), + 16384, + 64))), + _executor(_storeConfig.getLogConfig().getNumThreads(), 128 * 1024), + _fileHeaderContext(), + _tlSyncer(), + _store(), + _syncToken(0u), + _docIdLimit(0u), + _valid(BitVector::create(0u)) +{ + rmdir(); + mkdir(); + resetDocStore(); +} + + +Fixture::~Fixture() +{ + _store.reset(); + rmdir(); +} + +Document::UP +Fixture::makeDoc(uint32_t i) +{ + return ::makeDoc(_repo, i, true); +} + +void +Fixture::resetDocStore() +{ + _store.reset(new LogDocumentStore(_executor, + _baseDir, + _storeConfig, + GrowStrategy(), + TuneFileSummary(), + _fileHeaderContext, + _tlSyncer, + NULL)); +} + + +void +Fixture::rmdir() +{ + vespalib::rmdir(_baseDir, true); +} + +void +Fixture::mkdir() +{ + vespalib::mkdir(_baseDir, false); +} + + +void +Fixture::setDocIdLimit(uint32_t docIdLimit) +{ + _docIdLimit = docIdLimit; + _valid->resize(_docIdLimit); +} + +void +Fixture::put(const Document &doc, uint32_t lid) +{ + ++_syncToken; + assert(lid < _docIdLimit); + _store->write(_syncToken, doc, lid); + _valid->slowSetBit(lid); +} + + +void +Fixture::remove(uint32_t lid) +{ + ++_syncToken; + assert(lid < _docIdLimit); + _store->remove(_syncToken, lid); + _valid->slowClearBit(lid); +} + + +void +Fixture::flush() +{ + _store->initFlush(_syncToken); + _store->flush(_syncToken); +} + + +void +Fixture::populate(uint32_t low, uint32_t high, uint32_t docIdLimit) +{ + setDocIdLimit(docIdLimit); + for (uint32_t lid = low; lid < high; ++lid) { + Document::UP doc = makeDoc(lid); + put(*doc, lid); + } +} + + +void +Fixture::applyRemoves(uint32_t rmDocs) +{ + for (uint32_t lid = 20; lid < 20 + rmDocs; ++lid) { + remove(lid); + } + put(*makeDoc(25), 25); + remove(25); + put(*makeDoc(25), 25); +} + + +void +Fixture::checkRemovePostCond(uint32_t numDocs, + uint32_t docIdLimit, + uint32_t rmDocs, + bool before) +{ + MyVisitor visitor(_repo, docIdLimit, before); + MyVisitorProgress visitorProgress; + EXPECT_EQUAL(0.0, visitorProgress.getProgress()); + EXPECT_EQUAL(0u, visitorProgress._updates); + _store->accept(visitor, visitorProgress, _repo); + EXPECT_EQUAL(numDocs - rmDocs + 1, visitor._visitCount); + EXPECT_EQUAL(rmDocs - 1, visitor._visitRmCount); + EXPECT_EQUAL(1.0, visitorProgress.getProgress()); + EXPECT_NOT_EQUAL(0u, visitorProgress._updates); + EXPECT_TRUE(*_valid == *visitor._valid); +} + + +TEST_F("require that basic visit works", Fixture()) +{ + uint32_t numDocs = 3000; + uint32_t docIdLimit = numDocs + 1; + f.populate(1, docIdLimit, docIdLimit); + f.flush(); + MyVisitor visitor(f._repo, docIdLimit, true); + MyVisitorProgress visitorProgress; + EXPECT_EQUAL(0.0, visitorProgress.getProgress()); + EXPECT_EQUAL(0u, visitorProgress._updates); + f._store->accept(visitor, visitorProgress, f._repo); + EXPECT_EQUAL(numDocs, visitor._visitCount); + EXPECT_EQUAL(0u, visitor._visitRmCount); + EXPECT_EQUAL(1.0, visitorProgress.getProgress()); + EXPECT_NOT_EQUAL(0u, visitorProgress._updates); + EXPECT_TRUE(*f._valid == *visitor._valid); +} + + +TEST_F("require that visit with remove works", Fixture()) +{ + uint32_t numDocs = 1000; + uint32_t docIdLimit = numDocs + 1; + f.populate(1, docIdLimit, docIdLimit); + uint32_t rmDocs = 20; + f.applyRemoves(rmDocs); + f.flush(); + f.checkRemovePostCond(numDocs, docIdLimit, rmDocs, true); +} + +TEST_F("require that visit with rewrite and remove works", Fixture()) +{ + uint32_t numDocs = 1000; + uint32_t docIdLimit = numDocs + 1; + f.populate(1, docIdLimit, docIdLimit); + uint32_t rmDocs = 20; + f.applyRemoves(rmDocs); + f.flush(); + f.checkRemovePostCond(numDocs, docIdLimit, rmDocs, true); + { + MyRewriteVisitor visitor(f._repo, docIdLimit, true); + MyVisitorProgress visitorProgress; + EXPECT_EQUAL(0.0, visitorProgress.getProgress()); + EXPECT_EQUAL(0u, visitorProgress._updates); + f._store->accept(visitor, visitorProgress, f._repo); + EXPECT_EQUAL(numDocs - rmDocs + 1, visitor._visitCount); + EXPECT_EQUAL(1.0, visitorProgress.getProgress()); + EXPECT_NOT_EQUAL(0u, visitorProgress._updates); + EXPECT_TRUE(*f._valid == *visitor._valid); + f.flush(); + } + f.checkRemovePostCond(numDocs, docIdLimit, rmDocs, false); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/engine/docsumapi/.gitignore b/searchlib/src/tests/engine/docsumapi/.gitignore new file mode 100644 index 00000000000..1b38a4ff745 --- /dev/null +++ b/searchlib/src/tests/engine/docsumapi/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +docsumapi_test +searchlib_docsumapi_test_app diff --git a/searchlib/src/tests/engine/docsumapi/CMakeLists.txt b/searchlib/src/tests/engine/docsumapi/CMakeLists.txt new file mode 100644 index 00000000000..a8fbe70de4b --- /dev/null +++ b/searchlib/src/tests/engine/docsumapi/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_docsumapi_test_app + SOURCES + docsumapi_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_docsumapi_test_app COMMAND searchlib_docsumapi_test_app) diff --git a/searchlib/src/tests/engine/docsumapi/DESC b/searchlib/src/tests/engine/docsumapi/DESC new file mode 100644 index 00000000000..fa9d72e98be --- /dev/null +++ b/searchlib/src/tests/engine/docsumapi/DESC @@ -0,0 +1 @@ +docsumapi test. Take a look at docsumapi.cpp for details. diff --git a/searchlib/src/tests/engine/docsumapi/FILES b/searchlib/src/tests/engine/docsumapi/FILES new file mode 100644 index 00000000000..3e2e2e636be --- /dev/null +++ b/searchlib/src/tests/engine/docsumapi/FILES @@ -0,0 +1 @@ +docsumapi.cpp diff --git a/searchlib/src/tests/engine/docsumapi/docsumapi_test.cpp b/searchlib/src/tests/engine/docsumapi/docsumapi_test.cpp new file mode 100644 index 00000000000..d96295bb7ad --- /dev/null +++ b/searchlib/src/tests/engine/docsumapi/docsumapi_test.cpp @@ -0,0 +1,185 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("docsumapi_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/common/packets.h> +#include <vespa/searchlib/engine/docsumapi.h> +#include <vespa/searchlib/engine/packetconverter.h> + +using namespace search::engine; +using namespace search::fs4transport; + +namespace { + +// light-weight network hop simulation +template <typename T> void copyPacket(T &src, T &dst) { + FNET_DataBuffer buf; + src.Encode(&buf); + dst.Decode(&buf, buf.GetDataLen()); +} + +} // namespace <unnamed> + +class Test : public vespalib::TestApp +{ +public: + void convertToRequest(); + void convertFromReply(); + int Main(); +}; + +document::GlobalId gid0("aaaaaaaaaaaa"); +document::GlobalId gid1("bbbbbbbbbbbb"); + +void +Test::convertToRequest() +{ + const string sessionId("qrserver.0.XXXXXXXXXXXXX.0"); + + FS4Packet_GETDOCSUMSX src; + src.setTimeout(fastos::TimeStamp(4*fastos::TimeStamp::MS)); + src._features |= GDF_RANKP_QFLAGS; + src.setRanking("seven"); + src._qflags = 5u; + src._features |= GDF_RESCLASSNAME; + src.setResultClassName("resclass"); + src._features |= GDF_PROPERTIES; + src._propsVector.resize(3); + src._propsVector[0].allocEntries(2); + src._propsVector[0].setName("feature", strlen("feature")); + src._propsVector[0].setKey(0, "p1k1", strlen("p1k1")); + src._propsVector[0].setValue(0, "p1v1", strlen("p1v1")); + src._propsVector[0].setKey(1, "p1k2", strlen("p1k2")); + src._propsVector[0].setValue(1, "p1v2", strlen("p1v2")); + src._propsVector[1].allocEntries(2); + src._propsVector[1].setName("caches", strlen("caches")); + src._propsVector[1].setKey(0, "p2k1", strlen("p2k1")); + src._propsVector[1].setValue(0, "p2v1", strlen("p2v1")); + src._propsVector[1].setKey(1, "p2k2", strlen("p2k2")); + src._propsVector[1].setValue(1, "p2v2", strlen("p2v2")); + src._propsVector[2].allocEntries(1); + src._propsVector[2].setName("rank", strlen("rank")); + src._propsVector[2].setKey(0, "sessionId", strlen("sessionId")); + src._propsVector[2].setValue(0, sessionId.c_str(), sessionId.size()); + src._features |= GDF_QUERYSTACK; + src._stackItems = 14u; + src.setStackDump("stackdump"); + src._features |= GDF_LOCATION; + src.setLocation("location"); + src._features |= GDF_MLD; + src.AllocateDocIDs(2); + src._docid[0]._gid = gid0; + src._docid[0]._partid = 5; + src._docid[1]._gid = gid1; + src._docid[1]._partid = 6; + + { // full copy + FS4Packet_GETDOCSUMSX cpy; + copyPacket(src, cpy); + + DocsumRequest dst; + PacketConverter::toDocsumRequest(cpy, dst); + EXPECT_EQUAL((dst.getTimeOfDoom() - dst.getStartTime()).ms(), 4u); + EXPECT_EQUAL(dst.ranking, "seven"); + EXPECT_EQUAL(dst.queryFlags, 5u); + EXPECT_EQUAL(dst.resultClassName, "resclass"); + EXPECT_EQUAL(dst.propertiesMap.size(), 3u); + EXPECT_EQUAL(dst.propertiesMap.featureOverrides().lookup("p1k1").get(), std::string("p1v1")); + EXPECT_EQUAL(dst.propertiesMap.featureOverrides().lookup("p1k2").get(), std::string("p1v2")); + EXPECT_EQUAL(dst.propertiesMap.cacheProperties().lookup("p2k1").get(), std::string("p2v1")); + EXPECT_EQUAL(dst.propertiesMap.cacheProperties().lookup("p2k2").get(), std::string("p2v2")); + EXPECT_EQUAL(dst.propertiesMap.matchProperties().lookup("p3k1").get(), std::string("")); + EXPECT_EQUAL(std::string(&dst.stackDump[0], dst.stackDump.size()), "stackdump"); + EXPECT_EQUAL(dst.location, "location"); + EXPECT_EQUAL(dst._flags, 0u); + EXPECT_EQUAL(dst.hits.size(), 2u); + EXPECT_EQUAL(dst.hits[0].docid, 0u); + EXPECT_TRUE(dst.hits[0].gid == gid0); + EXPECT_EQUAL(dst.hits[0].path, 5u); + EXPECT_EQUAL(dst.hits[1].docid, 0u); + EXPECT_TRUE(dst.hits[1].gid == gid1); + EXPECT_EQUAL(dst.hits[1].path, 6u); + EXPECT_EQUAL(sessionId, + string(&dst.sessionId[0], dst.sessionId.size())); + } + { // without datetime + FS4Packet_GETDOCSUMSX cpy; + copyPacket(src, cpy); + + DocsumRequest dst; + PacketConverter::toDocsumRequest(cpy, dst); + } + { // without mld + FS4Packet_GETDOCSUMSX cpy; + copyPacket(src, cpy); + cpy._features &= ~GDF_MLD; + + DocsumRequest dst; + PacketConverter::toDocsumRequest(cpy, dst); + EXPECT_EQUAL(dst.useWideHits, false); + EXPECT_EQUAL(dst.hits.size(), 2u); + EXPECT_EQUAL(dst.hits[0].docid, 0u); + EXPECT_TRUE(dst.hits[0].gid == gid0); + EXPECT_EQUAL(dst.hits[1].docid, 0u); + EXPECT_TRUE(dst.hits[1].gid == gid1); + } + { // with ignore row flag + FS4Packet_GETDOCSUMSX tcpy; + copyPacket(src, tcpy); + tcpy._features |= GDF_FLAGS; + tcpy._flags = GDFLAG_IGNORE_ROW; + FS4Packet_GETDOCSUMSX cpy; + copyPacket(tcpy, cpy); + DocsumRequest dst; + PacketConverter::toDocsumRequest(cpy, dst); + EXPECT_EQUAL(dst._flags, static_cast<uint32_t>(GDFLAG_IGNORE_ROW)); + } +} + +void +Test::convertFromReply() +{ + DocsumReply src; + src.docsums.resize(2); + src.docsums[0].docid = 1; + src.docsums[0].gid = gid0; + src.docsums[0].data.resize(2); + src.docsums[0].data.str()[0] = 5; + src.docsums[0].data.str()[1] = 6; + src.docsums[1].docid = 2; + src.docsums[1].gid = gid1; + src.docsums[1].data.resize(3); + src.docsums[1].data.str()[0] = 7; + src.docsums[1].data.str()[1] = 8; + src.docsums[1].data.str()[2] = 9; + + { // test first + FS4Packet_DOCSUM dst; + PacketConverter::fromDocsumReplyElement(src.docsums[0], dst); + EXPECT_EQUAL(dst.getGid(), gid0); + EXPECT_EQUAL(dst.getBuf().size(), 2u); + EXPECT_EQUAL(dst.getBuf().c_str()[0], 5); + EXPECT_EQUAL(dst.getBuf().c_str()[1], 6); + } + { // test second + FS4Packet_DOCSUM dst; + PacketConverter::fromDocsumReplyElement(src.docsums[1], dst); + EXPECT_EQUAL(dst.getGid(), gid1); + EXPECT_EQUAL(dst.getBuf().size(), 3u); + EXPECT_EQUAL(dst.getBuf().c_str()[0], 7); + EXPECT_EQUAL(dst.getBuf().c_str()[1], 8); + EXPECT_EQUAL(dst.getBuf().c_str()[2], 9); + } +} + +int +Test::Main() +{ + TEST_INIT("docsumapi_test"); + convertToRequest(); + convertFromReply(); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/engine/monitorapi/.gitignore b/searchlib/src/tests/engine/monitorapi/.gitignore new file mode 100644 index 00000000000..66fc005087f --- /dev/null +++ b/searchlib/src/tests/engine/monitorapi/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +monitorapi_test +searchlib_monitorapi_test_app diff --git a/searchlib/src/tests/engine/monitorapi/CMakeLists.txt b/searchlib/src/tests/engine/monitorapi/CMakeLists.txt new file mode 100644 index 00000000000..f78a8e04fd1 --- /dev/null +++ b/searchlib/src/tests/engine/monitorapi/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_monitorapi_test_app + SOURCES + monitorapi_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_monitorapi_test_app COMMAND searchlib_monitorapi_test_app) diff --git a/searchlib/src/tests/engine/monitorapi/DESC b/searchlib/src/tests/engine/monitorapi/DESC new file mode 100644 index 00000000000..882636f1952 --- /dev/null +++ b/searchlib/src/tests/engine/monitorapi/DESC @@ -0,0 +1 @@ +monitorapi test. Take a look at monitorapi.cpp for details. diff --git a/searchlib/src/tests/engine/monitorapi/FILES b/searchlib/src/tests/engine/monitorapi/FILES new file mode 100644 index 00000000000..16ad6789632 --- /dev/null +++ b/searchlib/src/tests/engine/monitorapi/FILES @@ -0,0 +1 @@ +monitorapi.cpp diff --git a/searchlib/src/tests/engine/monitorapi/monitorapi_test.cpp b/searchlib/src/tests/engine/monitorapi/monitorapi_test.cpp new file mode 100644 index 00000000000..0df52cbe0d8 --- /dev/null +++ b/searchlib/src/tests/engine/monitorapi/monitorapi_test.cpp @@ -0,0 +1,126 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("monitorapi_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/common/packets.h> +#include <vespa/searchlib/engine/monitorapi.h> +#include <vespa/searchlib/engine/packetconverter.h> + +using namespace search::engine; +using namespace search::fs4transport; + +namespace { + +bool checkFeature(uint32_t features, uint32_t mask) { + return ((features & mask) != 0); +} + +bool checkNotFeature(uint32_t features, uint32_t mask) { + return !checkFeature(features, mask); +} + +// light-weight network hop simulation +template <typename T> void copyPacket(T &src, T &dst) { + FNET_DataBuffer buf; + src.Encode(&buf); + dst.Decode(&buf, buf.GetDataLen()); +} + +} // namespace <unnamed> + +class Test : public vespalib::TestApp +{ +public: + void convertToRequest(); + void convertFromReply(); + int Main(); +}; + +void +Test::convertToRequest() +{ + FS4Packet_MONITORQUERYX src; + src._features |= MQF_QFLAGS; + src._qflags = 1u; + + { // copy all + FS4Packet_MONITORQUERYX cpy; + copyPacket(src, cpy); + + MonitorRequest dst; + PacketConverter::toMonitorRequest(cpy, dst); + EXPECT_EQUAL(dst.flags, 1u); + } +} + +void +Test::convertFromReply() +{ + MonitorReply src; + src.mld = true; + src.partid = 1u; + src.timestamp = 2u; + src.totalNodes = 3u; + src.activeNodes = 4u; + src.totalParts = 5u; + src.activeParts = 6u; + src.flags = 7u; + src.activeDocs = 8u; + src.activeDocsRequested = true; + + { // full copy + MonitorReply cpy = src; + + FS4Packet_MONITORRESULTX dst; + PacketConverter::fromMonitorReply(cpy, dst); + EXPECT_EQUAL(dst._partid, 1u); + EXPECT_EQUAL(dst._timestamp, 2u); + EXPECT_TRUE(checkFeature(dst._features, MRF_MLD)); + EXPECT_EQUAL(dst._totalNodes, 3u); + EXPECT_EQUAL(dst._activeNodes, 4u); + EXPECT_EQUAL(dst._totalParts, 5u); + EXPECT_EQUAL(dst._activeParts, 6u); + EXPECT_TRUE(checkFeature(dst._features, MRF_RFLAGS)); + EXPECT_EQUAL(dst._rflags, 7u); + EXPECT_EQUAL(dst._activeDocs, 8u); + EXPECT_TRUE(checkFeature(dst._features, MRF_ACTIVEDOCS)); + } + { // non-mld + MonitorReply cpy = src; + cpy.mld = false; + + FS4Packet_MONITORRESULTX dst; + PacketConverter::fromMonitorReply(cpy, dst); + EXPECT_TRUE(checkNotFeature(dst._features, MRF_MLD)); + } + { // without flags + MonitorReply cpy = src; + cpy.flags = 0; + + FS4Packet_MONITORRESULTX dst; + PacketConverter::fromMonitorReply(cpy, dst); + EXPECT_TRUE(checkNotFeature(dst._features, MRF_RFLAGS)); + EXPECT_EQUAL(dst._rflags, 0u); + } + { // without activedocs + MonitorReply cpy = src; + cpy.activeDocsRequested = false; + + FS4Packet_MONITORRESULTX dst; + PacketConverter::fromMonitorReply(cpy, dst); + EXPECT_TRUE(checkNotFeature(dst._features, MRF_ACTIVEDOCS)); + EXPECT_EQUAL(dst._activeDocs, 0u); + } +} + +int +Test::Main() +{ + TEST_INIT("monitorapi_test"); + convertToRequest(); + convertFromReply(); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/engine/searchapi/.gitignore b/searchlib/src/tests/engine/searchapi/.gitignore new file mode 100644 index 00000000000..92089e63cdd --- /dev/null +++ b/searchlib/src/tests/engine/searchapi/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +searchapi_test +searchlib_searchapi_test_app diff --git a/searchlib/src/tests/engine/searchapi/CMakeLists.txt b/searchlib/src/tests/engine/searchapi/CMakeLists.txt new file mode 100644 index 00000000000..89d1b8197a5 --- /dev/null +++ b/searchlib/src/tests/engine/searchapi/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_searchapi_test_app + SOURCES + searchapi_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_searchapi_test_app COMMAND searchlib_searchapi_test_app) diff --git a/searchlib/src/tests/engine/searchapi/DESC b/searchlib/src/tests/engine/searchapi/DESC new file mode 100644 index 00000000000..b006841d75d --- /dev/null +++ b/searchlib/src/tests/engine/searchapi/DESC @@ -0,0 +1 @@ +searchapi test. Take a look at searchapi.cpp for details. diff --git a/searchlib/src/tests/engine/searchapi/FILES b/searchlib/src/tests/engine/searchapi/FILES new file mode 100644 index 00000000000..806f04bbe4c --- /dev/null +++ b/searchlib/src/tests/engine/searchapi/FILES @@ -0,0 +1 @@ +searchapi.cpp diff --git a/searchlib/src/tests/engine/searchapi/searchapi_test.cpp b/searchlib/src/tests/engine/searchapi/searchapi_test.cpp new file mode 100644 index 00000000000..cd040bfaeac --- /dev/null +++ b/searchlib/src/tests/engine/searchapi/searchapi_test.cpp @@ -0,0 +1,267 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("searchapi_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/common/packets.h> +#include <vespa/searchlib/engine/searchapi.h> +#include <vespa/searchlib/engine/packetconverter.h> + +using namespace search::engine; +using namespace search::fs4transport; + +namespace { + +bool checkFeature(uint32_t features, uint32_t mask) { + return ((features & mask) != 0); +} + +bool checkNotFeature(uint32_t features, uint32_t mask) { + return !checkFeature(features, mask); +} + +// light-weight network hop simulation +template <typename T> void copyPacket(T &src, T &dst) { + FNET_DataBuffer buf; + src.Encode(&buf); + dst.Decode(&buf, buf.GetDataLen()); +} + +} // namespace <unnamed> + +class Test : public vespalib::TestApp +{ +public: + void propertyNames(); + void convertToRequest(); + void convertFromReply(); + int Main(); +}; + +void +Test::propertyNames() +{ + EXPECT_EQUAL(search::MapNames::RANK, "rank"); + EXPECT_EQUAL(search::MapNames::FEATURE, "feature"); + EXPECT_EQUAL(search::MapNames::HIGHLIGHTTERMS, "highlightterms"); + EXPECT_EQUAL(search::MapNames::MATCH, "match"); + EXPECT_EQUAL(search::MapNames::CACHES, "caches"); +} + +void +Test::convertToRequest() +{ + FS4Packet_QUERYX src; + src._offset = 2u; + src._maxhits = 3u; + src.setTimeout(fastos::TimeStamp(4*fastos::TimeStamp::MS)); + src._qflags = 5u; + src._features |= QF_RANKP; + src.setRanking("seven"); + src._features |= QF_PROPERTIES; + src._propsVector.resize(2); + src._propsVector[0].allocEntries(2); + src._propsVector[0].setName("feature", strlen("feature")); + src._propsVector[0].setKey(0, "p1k1", strlen("p1k1")); + src._propsVector[0].setValue(0, "p1v1", strlen("p1v1")); + src._propsVector[0].setKey(1, "p1k2", strlen("p1k2")); + src._propsVector[0].setValue(1, "p1v2", strlen("p1v2")); + src._propsVector[1].allocEntries(2); + src._propsVector[1].setName("caches", strlen("caches")); + src._propsVector[1].setKey(0, "p2k1", strlen("p2k1")); + src._propsVector[1].setValue(0, "p2v1", strlen("p2v1")); + src._propsVector[1].setKey(1, "p2k2", strlen("p2k2")); + src._propsVector[1].setValue(1, "p2v2", strlen("p2v2")); + src._features |= QF_SORTSPEC; + src.setSortSpec("sortspec"); + src._features |= QF_AGGRSPEC; + src.setAggrSpec("aggrspec"); + src._features |= QF_GROUPSPEC; + src.setGroupSpec("groupspec"); + src._features |= QF_SESSIONID; + src.setSessionId("sessionid"); + src._features |= QF_LOCATION; + src.setLocation("location"); + src._features |= QF_PARSEDQUERY; + src._numStackItems = 14u; + src.setStackDump("stackdump"); + + { // full copy + FS4Packet_QUERYX cpy; + copyPacket(src, cpy); + + SearchRequest dst; + PacketConverter::toSearchRequest(cpy, dst); + EXPECT_EQUAL(dst.offset, 2u); + EXPECT_EQUAL(dst.maxhits, 3u); + EXPECT_EQUAL((dst.getTimeOfDoom() - dst.getStartTime()).ms(), 4u); + EXPECT_EQUAL(dst.queryFlags, 5u); + EXPECT_EQUAL(vespalib::string("seven"), dst.ranking); + EXPECT_EQUAL(dst.propertiesMap.size(), 2u); + EXPECT_EQUAL(dst.propertiesMap.featureOverrides().lookup("p1k1").get(), std::string("p1v1")); + EXPECT_EQUAL(dst.propertiesMap.featureOverrides().lookup("p1k2").get(), std::string("p1v2")); + EXPECT_EQUAL(dst.propertiesMap.cacheProperties().lookup("p2k1").get(), std::string("p2v1")); + EXPECT_EQUAL(dst.propertiesMap.cacheProperties().lookup("p2k2").get(), std::string("p2v2")); + EXPECT_EQUAL(dst.propertiesMap.matchProperties().lookup("p3k1").get(), std::string("")); + EXPECT_EQUAL(dst.sortSpec, "sortspec"); + EXPECT_EQUAL(std::string(&dst.groupSpec[0], dst.groupSpec.size()), "groupspec"); + EXPECT_EQUAL(std::string(&dst.sessionId[0], dst.sessionId.size()), "sessionid"); + EXPECT_EQUAL(dst.location, "location"); + EXPECT_EQUAL(dst.stackItems, 14u); + EXPECT_EQUAL(std::string(&dst.stackDump[0], dst.stackDump.size()), "stackdump"); + } + { // without datetime + FS4Packet_QUERYX cpy; + copyPacket(src, cpy); + + SearchRequest dst; + PacketConverter::toSearchRequest(cpy, dst); + } +} + +void +Test::convertFromReply() +{ + SearchReply src; + src.offset = 1u; + src.totalHitCount = 2u; + src.maxRank = 3; + src.setDistributionKey(4u); + src.sortIndex.push_back(0); + src.sortIndex.push_back(1); + src.sortIndex.push_back(2); + src.sortData.push_back(11); + src.sortData.push_back(22); + src.groupResult.push_back(2); + src.useCoverage = true; + src.coverage = SearchReply::Coverage(5, 3); + src.useWideHits = true; + src.hits.resize(2); + document::GlobalId gid0("aaaaaaaaaaaa"); + document::GlobalId gid1("bbbbbbbbbbbb"); + src.hits[0].gid = gid0; + src.hits[0].metric = 5; + src.hits[0].path = 11; + src.hits[0].setDistributionKey(100); + src.hits[1].gid = gid1; + src.hits[1].metric = 4; + src.hits[1].path = 10; + src.hits[1].setDistributionKey(105); + + { // full copy + SearchReply cpy = src; + + FS4Packet_QUERYRESULTX dst0; + PacketConverter::fromSearchReply(cpy, dst0); + FS4Packet_QUERYRESULTX dst; + copyPacket(dst0, dst); + EXPECT_EQUAL(dst._offset, 1u); + EXPECT_EQUAL(dst._numDocs, 2u); + EXPECT_EQUAL(dst._totNumDocs, 2u); + EXPECT_EQUAL(dst._maxRank, 3); + EXPECT_EQUAL(4u, dst.getDistributionKey()); + EXPECT_TRUE(checkFeature(dst._features, QRF_SORTDATA)); + EXPECT_EQUAL(dst._sortIndex[0], 0u); + EXPECT_EQUAL(dst._sortIndex[1], 1u); + EXPECT_EQUAL(dst._sortIndex[2], 2u); + EXPECT_EQUAL(dst._sortData[0], 11); + EXPECT_EQUAL(dst._sortData[1], 22); + EXPECT_TRUE(checkFeature(dst._features, QRF_GROUPDATA)); + EXPECT_EQUAL(dst._groupDataLen, 1u); + EXPECT_EQUAL(dst._groupData[0], 2); + EXPECT_TRUE(checkFeature(dst._features, QRF_COVERAGE)); + EXPECT_EQUAL(dst._coverageDocs, 3u); + EXPECT_EQUAL(dst._activeDocs, 5u); + EXPECT_TRUE(checkFeature(dst._features, QRF_MLD)); + EXPECT_TRUE(dst._hits[0]._gid == gid0); + EXPECT_EQUAL(dst._hits[0]._metric, 5); + EXPECT_EQUAL(dst._hits[0]._partid, 11u); + EXPECT_EQUAL(dst._hits[0].getDistributionKey(), 100u); + EXPECT_TRUE(dst._hits[1]._gid == gid1); + EXPECT_EQUAL(dst._hits[1]._metric, 4); + EXPECT_EQUAL(dst._hits[1]._partid, 10u); + EXPECT_EQUAL(dst._hits[1].getDistributionKey(), 105u); + } + { // not sortdata + SearchReply cpy = src; + cpy.sortIndex.clear(); + cpy.sortData.clear(); + + FS4Packet_QUERYRESULTX dst0; + PacketConverter::fromSearchReply(cpy, dst0); + FS4Packet_QUERYRESULTX dst; + copyPacket(dst0, dst); + EXPECT_TRUE(checkNotFeature(dst._features, QRF_SORTDATA)); + } + { // not groupdata + SearchReply cpy = src; + cpy.groupResult.clear(); + + FS4Packet_QUERYRESULTX dst0; + PacketConverter::fromSearchReply(cpy, dst0); + FS4Packet_QUERYRESULTX dst; + copyPacket(dst0, dst); + EXPECT_TRUE(checkNotFeature(dst._features, QRF_GROUPDATA)); + } + { // non-full coverage + SearchReply cpy = src; + + FS4Packet_QUERYRESULTX dst0; + PacketConverter::fromSearchReply(cpy, dst0); + FS4Packet_QUERYRESULTX dst; + copyPacket(dst0, dst); + EXPECT_TRUE(checkFeature(dst._features, QRF_COVERAGE)); + EXPECT_EQUAL(dst._coverageDocs, 3u); + EXPECT_EQUAL(dst._activeDocs, 5u); + } + { // not coverage + SearchReply cpy = src; + cpy.useCoverage = false; + + FS4Packet_QUERYRESULTX dst0; + PacketConverter::fromSearchReply(cpy, dst0); + FS4Packet_QUERYRESULTX dst; + copyPacket(dst0, dst); + EXPECT_TRUE(checkNotFeature(dst._features, QRF_COVERAGE)); + } + { // non-mld + SearchReply cpy = src; + cpy.useWideHits = false; + + FS4Packet_QUERYRESULTX dst0; + PacketConverter::fromSearchReply(cpy, dst0); + FS4Packet_QUERYRESULTX dst; + copyPacket(dst0, dst); + EXPECT_TRUE(checkNotFeature(dst._features, QRF_MLD)); + EXPECT_TRUE(dst._hits[0]._gid == gid0); + EXPECT_EQUAL(dst._hits[0]._metric, 5); + EXPECT_TRUE(dst._hits[1]._gid == gid1); + EXPECT_EQUAL(dst._hits[1]._metric, 4); + } + { // non-mld not siteid + SearchReply cpy = src; + cpy.useWideHits = false; + + FS4Packet_QUERYRESULTX dst0; + PacketConverter::fromSearchReply(cpy, dst0); + FS4Packet_QUERYRESULTX dst; + copyPacket(dst0, dst); + EXPECT_TRUE(checkNotFeature(dst._features, QRF_MLD)); + EXPECT_TRUE(dst._hits[0]._gid == gid0); + EXPECT_EQUAL(dst._hits[0]._metric, 5); + EXPECT_TRUE(dst._hits[1]._gid == gid1); + EXPECT_EQUAL(dst._hits[1]._metric, 4); + } +} + +int +Test::Main() +{ + TEST_INIT("searchapi_test"); + propertyNames(); + convertToRequest(); + convertFromReply(); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/engine/transportserver/.gitignore b/searchlib/src/tests/engine/transportserver/.gitignore new file mode 100644 index 00000000000..09d836e0004 --- /dev/null +++ b/searchlib/src/tests/engine/transportserver/.gitignore @@ -0,0 +1,5 @@ +.depend +Makefile +transportserver_test +vlog.txt +searchlib_transportserver_test_app diff --git a/searchlib/src/tests/engine/transportserver/CMakeLists.txt b/searchlib/src/tests/engine/transportserver/CMakeLists.txt new file mode 100644 index 00000000000..502279bc728 --- /dev/null +++ b/searchlib/src/tests/engine/transportserver/CMakeLists.txt @@ -0,0 +1,12 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_transportserver_test_app + SOURCES + transportserver_test.cpp + DEPENDS + searchlib +) +vespa_add_test( + NAME searchlib_transportserver_test_app + COMMAND searchlib_transportserver_test_app + ENVIRONMENT "VESPA_LOG_TARGET=file:vlog.txt;VESPA_LOG_LEVEL=\"all -spam\"" +) diff --git a/searchlib/src/tests/engine/transportserver/DESC b/searchlib/src/tests/engine/transportserver/DESC new file mode 100644 index 00000000000..2fb736a9319 --- /dev/null +++ b/searchlib/src/tests/engine/transportserver/DESC @@ -0,0 +1 @@ +transportserver test. Take a look at transportserver.cpp for details. diff --git a/searchlib/src/tests/engine/transportserver/FILES b/searchlib/src/tests/engine/transportserver/FILES new file mode 100644 index 00000000000..ec1b60cf739 --- /dev/null +++ b/searchlib/src/tests/engine/transportserver/FILES @@ -0,0 +1 @@ +transportserver.cpp diff --git a/searchlib/src/tests/engine/transportserver/transportserver_test.cpp b/searchlib/src/tests/engine/transportserver/transportserver_test.cpp new file mode 100644 index 00000000000..af4dc4761bc --- /dev/null +++ b/searchlib/src/tests/engine/transportserver/transportserver_test.cpp @@ -0,0 +1,187 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("transportserver_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/document/base/documentid.h> +#include <vespa/searchlib/common/packets.h> +#include <vespa/searchlib/engine/transportserver.h> +#include <vespa/searchlib/engine/searchapi.h> +#include <vespa/searchlib/engine/docsumapi.h> +#include <vespa/searchlib/engine/monitorapi.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/fnet/fnet.h> +#include <vespa/searchlib/engine/errorcodes.h> + +using namespace document; +using namespace vespalib; +using namespace search::engine; +using namespace search::fs4transport; + +class SyncServer : public search::engine::SearchServer, + public search::engine::DocsumServer, + public search::engine::MonitorServer +{ +private: + virtual SearchReply::UP search(SearchRequest::Source request, SearchClient &client); + virtual DocsumReply::UP getDocsums(DocsumRequest::Source request, DocsumClient &client); + virtual MonitorReply::UP ping(MonitorRequest::UP request, MonitorClient &client); + + SyncServer(const SyncServer &); + SyncServer &operator=(const SyncServer &); +public: + SyncServer() {} + virtual ~SyncServer() {} +}; + +SearchReply::UP +SyncServer::search(SearchRequest::Source request, SearchClient &) +{ + const SearchRequest &req = *request.get(); + SearchReply::UP reply(new SearchReply()); + SearchReply &ret = *reply; + ret.request = request.release(); + LOG(info, "responding to search request..."); + ret.offset = req.offset; + return reply; +} + +DocsumReply::UP +SyncServer::getDocsums(DocsumRequest::Source request, DocsumClient &) +{ + DocsumReply::UP reply(new DocsumReply()); + DocsumReply &ret = *reply; + ret.request = request.release(); + LOG(info, "responding to docsum request..."); + ret.docsums.resize(1); + ret.docsums[0].setData("data", strlen("data")); + ret.docsums[0].gid = DocumentId(vespalib::make_string("doc::100")).getGlobalId(); + return reply; +} + +MonitorReply::UP +SyncServer::ping(MonitorRequest::UP request, MonitorClient &) +{ + MonitorRequest &req = *request; + MonitorReply::UP reply(new MonitorReply()); + MonitorReply &ret = *reply; + LOG(info, "responding to monitor request..."); + ret.timestamp = req.flags; + return reply; +} + +TEST("transportserver") { + { + SyncServer server; + TransportServer transport(server, server, server, 0, + TransportServer::DEBUG_ALL); + ASSERT_TRUE(transport.start()); + int port = transport.getListenPort(); + ASSERT_TRUE(port > 0); + { + FNET_Context ctx; + FastOS_ThreadPool pool(128 * 1024); + FNET_Transport client; + ASSERT_TRUE(client.Start(&pool)); + + FNET_PacketQueue adminQ; + FNET_Connection *conn = client.Connect(make_string("tcp/localhost:%d", port).c_str(), + &FS4PersistentPacketStreamer::Instance, &adminQ); + ASSERT_TRUE(conn != 0); + { + FS4Packet_MONITORQUERYX *mq = new FS4Packet_MONITORQUERYX(); + mq->_qflags = 30; + mq->_features |= MQF_QFLAGS; + conn->PostPacket(mq, FNET_NOID); + FNET_Packet *p = adminQ.DequeuePacket(60000, &ctx); + ASSERT_TRUE(p != 0); + ASSERT_TRUE(p->GetPCODE() == PCODE_MONITORRESULTX); + FS4Packet_MONITORRESULTX *r = (FS4Packet_MONITORRESULTX*)p; + EXPECT_EQUAL(r->_timestamp, 30u); + p->Free(); + } + { + FNET_PacketQueue q; + FNET_Channel *ch = conn->OpenChannel(&q, FNET_Context()); + FS4Packet_QUERYX *qx = new FS4Packet_QUERYX(); + qx->_features |= QF_PARSEDQUERY; + qx->_offset = 100; + ch->Send(qx); + FNET_Packet *p = q.DequeuePacket(60000, &ctx); + ASSERT_TRUE(p != 0); + ASSERT_TRUE(p->GetPCODE() == PCODE_QUERYRESULTX); + FS4Packet_QUERYRESULTX *r = (FS4Packet_QUERYRESULTX*)p; + EXPECT_EQUAL(r->_offset, 100u); + p->Free(); + ch->CloseAndFree(); + } + { + FS4Packet_MONITORQUERYX *mq = new FS4Packet_MONITORQUERYX(); + mq->_qflags = 40; + mq->_features |= MQF_QFLAGS; + conn->PostPacket(mq, FNET_NOID); + FNET_Packet *p = adminQ.DequeuePacket(60000, &ctx); + ASSERT_TRUE(p != 0); + ASSERT_TRUE(p->GetPCODE() == PCODE_MONITORRESULTX); + FS4Packet_MONITORRESULTX *r = (FS4Packet_MONITORRESULTX*)p; + EXPECT_EQUAL(r->_timestamp, 40u); + p->Free(); + } + { + FNET_PacketQueue q; + FNET_Channel *ch = conn->OpenChannel(&q, FNET_Context()); + FS4Packet_GETDOCSUMSX *qdx = new FS4Packet_GETDOCSUMSX(); + ch->Send(qdx); + FNET_Packet *p = q.DequeuePacket(60000, &ctx); + ASSERT_TRUE(p != 0); + ASSERT_TRUE(p->GetPCODE() == PCODE_DOCSUM); + FS4Packet_DOCSUM *r = (FS4Packet_DOCSUM*)p; + EXPECT_EQUAL(r->getGid(), DocumentId("doc::100").getGlobalId()); + p->Free(); + p = q.DequeuePacket(60000, &ctx); + ASSERT_TRUE(p != 0); + ASSERT_TRUE(p->GetPCODE() == PCODE_EOL); + p->Free(); + ch->CloseAndFree(); + } + { + FS4Packet_MONITORQUERYX *mq = new FS4Packet_MONITORQUERYX(); + mq->_qflags = 50; + mq->_features |= MQF_QFLAGS; + conn->PostPacket(mq, FNET_NOID); + FNET_Packet *p = adminQ.DequeuePacket(60000, &ctx); + ASSERT_TRUE(p != 0); + ASSERT_TRUE(p->GetPCODE() == PCODE_MONITORRESULTX); + FS4Packet_MONITORRESULTX *r = (FS4Packet_MONITORRESULTX*)p; + EXPECT_EQUAL(r->_timestamp, 50u); + p->Free(); + } + // shut down client + conn->CloseAdminChannel(); + client.Close(conn); + conn->SubRef(); + client.sync(); + client.ShutDown(true); + pool.Close(); + } + + } +} + +void printError(ErrorCode ecode) { + fprintf(stderr, "error code %u: '%s'\n", ecode, getStringFromErrorCode(ecode)); +} + +TEST("print errors") { + printError(ECODE_NO_ERROR); + printError(ECODE_GENERAL_ERROR); + printError(ECODE_QUERY_PARSE_ERROR); + printError(ECODE_ALL_PARTITIONS_DOWN); + printError(ECODE_ILLEGAL_DATASET); + printError(ECODE_OVERLOADED); + printError(ECODE_NOT_IMPLEMENTED); + printError(ECODE_QUERY_NOT_ALLOWED); + printError(ECODE_TIMEOUT); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/features/.gitignore b/searchlib/src/tests/features/.gitignore new file mode 100644 index 00000000000..1c71377a25e --- /dev/null +++ b/searchlib/src/tests/features/.gitignore @@ -0,0 +1,11 @@ +.depend +Makefile +beta_features_test +featurebenchmark +nativerank_test +prod_features_test +vlog1.txt +vlog2.txt +vlog3.txt +searchlib_prod_features_test_app +searchlib_featurebenchmark_app diff --git a/searchlib/src/tests/features/CMakeLists.txt b/searchlib/src/tests/features/CMakeLists.txt new file mode 100644 index 00000000000..f1703b02c8b --- /dev/null +++ b/searchlib/src/tests/features/CMakeLists.txt @@ -0,0 +1,19 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_prod_features_test_app + SOURCES + prod_features.cpp + prod_features_framework.cpp + prod_features_attributematch.cpp + prod_features_fieldmatch.cpp + prod_features_fieldtermmatch.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_prod_features_test_app COMMAND sh prod_features_test.sh) +vespa_add_executable(searchlib_featurebenchmark_app + SOURCES + featurebenchmark.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_featurebenchmark_app COMMAND searchlib_featurebenchmark_app BENCHMARK) diff --git a/searchlib/src/tests/features/DESC b/searchlib/src/tests/features/DESC new file mode 100644 index 00000000000..333541aa0a0 --- /dev/null +++ b/searchlib/src/tests/features/DESC @@ -0,0 +1 @@ +features test. Take a look at features.cpp for details. diff --git a/searchlib/src/tests/features/FILES b/searchlib/src/tests/features/FILES new file mode 100644 index 00000000000..6e53d562fc0 --- /dev/null +++ b/searchlib/src/tests/features/FILES @@ -0,0 +1,3 @@ +beta_features.cpp +prod_features.cpp +nativerank.cpp diff --git a/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-double.txt b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-double.txt new file mode 100644 index 00000000000..a4319bdae53 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-double.txt @@ -0,0 +1,7 @@ +case=dotProduct +numruns=10000000 +numdocs=1000 +numvalues=1000 +collectiontype=array +datatype=double +dotProduct.vector=[0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9] diff --git a/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-float.txt b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-float.txt new file mode 100644 index 00000000000..0371c72f13a --- /dev/null +++ b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-float.txt @@ -0,0 +1,7 @@ +case=dotProduct +numruns=10000000 +numdocs=1000 +numvalues=1000 +collectiontype=array +datatype=float +dotProduct.vector=[0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9] diff --git a/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-int.txt b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-int.txt new file mode 100644 index 00000000000..0e27edf2e09 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-int.txt @@ -0,0 +1,7 @@ +case=dotProduct +numruns=10000000 +numdocs=1000 +numvalues=1000 +collectiontype=array +datatype=int +dotProduct.vector=[0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9] diff --git a/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-long.txt b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-long.txt new file mode 100644 index 00000000000..ca1aa57e738 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-array-long.txt @@ -0,0 +1,7 @@ +case=dotProduct +numruns=10000000 +numdocs=1000 +numvalues=1000 +collectiontype=array +datatype=long +dotProduct.vector=[0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9] diff --git a/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-wset.txt b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-wset.txt new file mode 100644 index 00000000000..38c323c667d --- /dev/null +++ b/searchlib/src/tests/features/benchmark/dotproduct/c-100000-1000-wset.txt @@ -0,0 +1,7 @@ +case=dotProduct +numruns=1000000 +numdocs=1000 +numvalues=1000 +collectiontype=wset +datatype=int +dotProduct.vector={0:2,1:2,2:2,3:2,4:2,5:2,6:2,7:2,8:2,9:2,10:2,11:2,12:2,13:2,14:2,15:2,16:2,17:2,18:2,19:2,20:2,21:2,22:2,23:2,24:2,25:2,26:2,27:2,28:2,29:2,30:2,31:2,32:2,33:2,34:2,35:2,36:2,37:2,38:2,39:2,40:2,41:2,42:2,43:2,44:2,45:2,46:2,47:2,48:2,49:2,50:2,51:2,52:2,53:2,54:2,55:2,56:2,57:2,58:2,59:2,60:2,61:2,62:2,63:2,64:2,65:2,66:2,67:2,68:2,69:2,70:2,71:2,72:2,73:2,74:2,75:2,76:2,77:2,78:2,79:2,80:2,81:2,82:2,83:2,84:2,85:2,86:2,87:2,88:2,89:2,90:2,91:2,92:2,93:2,94:2,95:2,96:2,97:2,98:2,99:2,100:2,101:2,102:2,103:2,104:2,105:2,106:2,107:2,108:2,109:2,110:2,111:2,112:2,113:2,114:2,115:2,116:2,117:2,118:2,119:2,120:2,121:2,122:2,123:2,124:2,125:2,126:2,127:2,128:2,129:2,130:2,131:2,132:2,133:2,134:2,135:2,136:2,137:2,138:2,139:2,140:2,141:2,142:2,143:2,144:2,145:2,146:2,147:2,148:2,149:2,150:2,151:2,152:2,153:2,154:2,155:2,156:2,157:2,158:2,159:2,160:2,161:2,162:2,163:2,164:2,165:2,166:2,167:2,168:2,169:2,170:2,171:2,172:2,173:2,174:2,175:2,176:2,177:2,178:2,179:2,180:2,181:2,182:2,183:2,184:2,185:2,186:2,187:2,188:2,189:2,190:2,191:2,192:2,193:2,194:2,195:2,196:2,197:2,198:2,199:2,200:2,201:2,202:2,203:2,204:2,205:2,206:2,207:2,208:2,209:2,210:2,211:2,212:2,213:2,214:2,215:2,216:2,217:2,218:2,219:2,220:2,221:2,222:2,223:2,224:2,225:2,226:2,227:2,228:2,229:2,230:2,231:2,232:2,233:2,234:2,235:2,236:2,237:2,238:2,239:2,240:2,241:2,242:2,243:2,244:2,245:2,246:2,247:2,248:2,249:2,250:2,251:2,252:2,253:2,254:2,255:2,256:2,257:2,258:2,259:2,260:2,261:2,262:2,263:2,264:2,265:2,266:2,267:2,268:2,269:2,270:2,271:2,272:2,273:2,274:2,275:2,276:2,277:2,278:2,279:2,280:2,281:2,282:2,283:2,284:2,285:2,286:2,287:2,288:2,289:2,290:2,291:2,292:2,293:2,294:2,295:2,296:2,297:2,298:2,299:2,300:2,301:2,302:2,303:2,304:2,305:2,306:2,307:2,308:2,309:2,310:2,311:2,312:2,313:2,314:2,315:2,316:2,317:2,318:2,319:2,320:2,321:2,322:2,323:2,324:2,325:2,326:2,327:2,328:2,329:2,330:2,331:2,332:2,333:2,334:2,335:2,336:2,337:2,338:2,339:2,340:2,341:2,342:2,343:2,344:2,345:2,346:2,347:2,348:2,349:2,350:2,351:2,352:2,353:2,354:2,355:2,356:2,357:2,358:2,359:2,360:2,361:2,362:2,363:2,364:2,365:2,366:2,367:2,368:2,369:2,370:2,371:2,372:2,373:2,374:2,375:2,376:2,377:2,378:2,379:2,380:2,381:2,382:2,383:2,384:2,385:2,386:2,387:2,388:2,389:2,390:2,391:2,392:2,393:2,394:2,395:2,396:2,397:2,398:2,399:2,400:2,401:2,402:2,403:2,404:2,405:2,406:2,407:2,408:2,409:2,410:2,411:2,412:2,413:2,414:2,415:2,416:2,417:2,418:2,419:2,420:2,421:2,422:2,423:2,424:2,425:2,426:2,427:2,428:2,429:2,430:2,431:2,432:2,433:2,434:2,435:2,436:2,437:2,438:2,439:2,440:2,441:2,442:2,443:2,444:2,445:2,446:2,447:2,448:2,449:2,450:2,451:2,452:2,453:2,454:2,455:2,456:2,457:2,458:2,459:2,460:2,461:2,462:2,463:2,464:2,465:2,466:2,467:2,468:2,469:2,470:2,471:2,472:2,473:2,474:2,475:2,476:2,477:2,478:2,479:2,480:2,481:2,482:2,483:2,484:2,485:2,486:2,487:2,488:2,489:2,490:2,491:2,492:2,493:2,494:2,495:2,496:2,497:2,498:2,499:2,500:2,501:2,502:2,503:2,504:2,505:2,506:2,507:2,508:2,509:2,510:2,511:2,512:2,513:2,514:2,515:2,516:2,517:2,518:2,519:2,520:2,521:2,522:2,523:2,524:2,525:2,526:2,527:2,528:2,529:2,530:2,531:2,532:2,533:2,534:2,535:2,536:2,537:2,538:2,539:2,540:2,541:2,542:2,543:2,544:2,545:2,546:2,547:2,548:2,549:2,550:2,551:2,552:2,553:2,554:2,555:2,556:2,557:2,558:2,559:2,560:2,561:2,562:2,563:2,564:2,565:2,566:2,567:2,568:2,569:2,570:2,571:2,572:2,573:2,574:2,575:2,576:2,577:2,578:2,579:2,580:2,581:2,582:2,583:2,584:2,585:2,586:2,587:2,588:2,589:2,590:2,591:2,592:2,593:2,594:2,595:2,596:2,597:2,598:2,599:2,600:2,601:2,602:2,603:2,604:2,605:2,606:2,607:2,608:2,609:2,610:2,611:2,612:2,613:2,614:2,615:2,616:2,617:2,618:2,619:2,620:2,621:2,622:2,623:2,624:2,625:2,626:2,627:2,628:2,629:2,630:2,631:2,632:2,633:2,634:2,635:2,636:2,637:2,638:2,639:2,640:2,641:2,642:2,643:2,644:2,645:2,646:2,647:2,648:2,649:2,650:2,651:2,652:2,653:2,654:2,655:2,656:2,657:2,658:2,659:2,660:2,661:2,662:2,663:2,664:2,665:2,666:2,667:2,668:2,669:2,670:2,671:2,672:2,673:2,674:2,675:2,676:2,677:2,678:2,679:2,680:2,681:2,682:2,683:2,684:2,685:2,686:2,687:2,688:2,689:2,690:2,691:2,692:2,693:2,694:2,695:2,696:2,697:2,698:2,699:2,700:2,701:2,702:2,703:2,704:2,705:2,706:2,707:2,708:2,709:2,710:2,711:2,712:2,713:2,714:2,715:2,716:2,717:2,718:2,719:2,720:2,721:2,722:2,723:2,724:2,725:2,726:2,727:2,728:2,729:2,730:2,731:2,732:2,733:2,734:2,735:2,736:2,737:2,738:2,739:2,740:2,741:2,742:2,743:2,744:2,745:2,746:2,747:2,748:2,749:2,750:2,751:2,752:2,753:2,754:2,755:2,756:2,757:2,758:2,759:2,760:2,761:2,762:2,763:2,764:2,765:2,766:2,767:2,768:2,769:2,770:2,771:2,772:2,773:2,774:2,775:2,776:2,777:2,778:2,779:2,780:2,781:2,782:2,783:2,784:2,785:2,786:2,787:2,788:2,789:2,790:2,791:2,792:2,793:2,794:2,795:2,796:2,797:2,798:2,799:2,800:2,801:2,802:2,803:2,804:2,805:2,806:2,807:2,808:2,809:2,810:2,811:2,812:2,813:2,814:2,815:2,816:2,817:2,818:2,819:2,820:2,821:2,822:2,823:2,824:2,825:2,826:2,827:2,828:2,829:2,830:2,831:2,832:2,833:2,834:2,835:2,836:2,837:2,838:2,839:2,840:2,841:2,842:2,843:2,844:2,845:2,846:2,847:2,848:2,849:2,850:2,851:2,852:2,853:2,854:2,855:2,856:2,857:2,858:2,859:2,860:2,861:2,862:2,863:2,864:2,865:2,866:2,867:2,868:2,869:2,870:2,871:2,872:2,873:2,874:2,875:2,876:2,877:2,878:2,879:2,880:2,881:2,882:2,883:2,884:2,885:2,886:2,887:2,888:2,889:2,890:2,891:2,892:2,893:2,894:2,895:2,896:2,897:2,898:2,899:2,900:2,901:2,902:2,903:2,904:2,905:2,906:2,907:2,908:2,909:2,910:2,911:2,912:2,913:2,914:2,915:2,916:2,917:2,918:2,919:2,920:2,921:2,922:2,923:2,924:2,925:2,926:2,927:2,928:2,929:2,930:2,931:2,932:2,933:2,934:2,935:2,936:2,937:2,938:2,939:2,940:2,941:2,942:2,943:2,944:2,945:2,946:2,947:2,948:2,949:2,950:2,951:2,952:2,953:2,954:2,955:2,956:2,957:2,958:2,959:2,960:2,961:2,962:2,963:2,964:2,965:2,966:2,967:2,968:2,969:2,970:2,971:2,972:2,973:2,974:2,975:2,976:2,977:2,978:2,979:2,980:2,981:2,982:2,983:2,984:2,985:2,986:2,987:2,988:2,989:2,990:2,991:2,992:2,993:2,994:2,995:2,996:2,997:2,998:2,999:2} diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-100-1.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-1.txt new file mode 100644 index 00000000000..3b3e0915e9e --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-1.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=1 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-100-10.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-10.txt new file mode 100644 index 00000000000..322784fc409 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-10.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=10 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-100-100.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-100.txt new file mode 100644 index 00000000000..9a31201941c --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-100.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=100 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-100-1000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-1000.txt new file mode 100644 index 00000000000..0a7b99c79fb --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-1000.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=1000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-100-10000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-10000.txt new file mode 100644 index 00000000000..1f859dc4ac6 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-10000.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=10000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-100-5.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-5.txt new file mode 100644 index 00000000000..1d9b6de23a4 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-5.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=5 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-100-50.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-50.txt new file mode 100644 index 00000000000..c50f602a111 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-50.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=50 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-100-500.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-500.txt new file mode 100644 index 00000000000..163a9bfd96d --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-100-500.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=500 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-100.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-100.txt new file mode 100644 index 00000000000..b6a1094140b --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-100.txt @@ -0,0 +1,6 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1-callgrind.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1-callgrind.txt new file mode 100644 index 00000000000..d3fc48be0be --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1-callgrind.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=1000 +fieldMatch(bar).maxAlternativeSegmentations=1 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1.txt new file mode 100644 index 00000000000..b6d4d2b4bb3 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=1 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-10.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-10.txt new file mode 100644 index 00000000000..67d1db34e17 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-10.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=10 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-100-callgrind.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-100-callgrind.txt new file mode 100644 index 00000000000..838ee6871f0 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-100-callgrind.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=1000 +fieldMatch(bar).maxAlternativeSegmentations=100 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-100.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-100.txt new file mode 100644 index 00000000000..3e02b0ee27f --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-100.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=100 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1000.txt new file mode 100644 index 00000000000..407579b6bee --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-1000.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=1000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-10000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-10000.txt new file mode 100644 index 00000000000..57aa1759b23 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-10000.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=10000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-5.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-5.txt new file mode 100644 index 00000000000..d91604f0bb5 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-5.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=5 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-50.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-50.txt new file mode 100644 index 00000000000..7d388e25cfa --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-50.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=50 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-500.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-500.txt new file mode 100644 index 00000000000..7cfc899b1f3 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000-500.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=500 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-1000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000.txt new file mode 100644 index 00000000000..f06091fbcaa --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-1000.txt @@ -0,0 +1,6 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=10000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-1.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-1.txt new file mode 100644 index 00000000000..b62b8b21e7c --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-1.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=1000 +fieldMatch(bar).maxAlternativeSegmentations=1 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-10.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-10.txt new file mode 100644 index 00000000000..19f133833aa --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-10.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=1000 +fieldMatch(bar).maxAlternativeSegmentations=10 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-100.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-100.txt new file mode 100644 index 00000000000..7dbfc2731a1 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-100.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=1000 +fieldMatch(bar).maxAlternativeSegmentations=100 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-1000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-1000.txt new file mode 100644 index 00000000000..e436ffb270c --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-1000.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=1000 +fieldMatch(bar).maxAlternativeSegmentations=1000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-10000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-10000.txt new file mode 100644 index 00000000000..ec2727a7035 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-10000.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=1000 +fieldMatch(bar).maxAlternativeSegmentations=10000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-5.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-5.txt new file mode 100644 index 00000000000..cadd682a817 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-5.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=1000 +fieldMatch(bar).maxAlternativeSegmentations=5 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-50.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-50.txt new file mode 100644 index 00000000000..66c3203ad25 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-50.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=1000 +fieldMatch(bar).maxAlternativeSegmentations=50 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-500.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-500.txt new file mode 100644 index 00000000000..c82fba41604 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000-500.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=1000 +fieldMatch(bar).maxAlternativeSegmentations=500 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-10000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000.txt new file mode 100644 index 00000000000..bd2404eba81 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-10000.txt @@ -0,0 +1,6 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c a x x b x x x a x b x x x x x a b x x c +numruns=1000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-20-1.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-1.txt new file mode 100644 index 00000000000..6266271fe4f --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-1.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=1 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-20-10.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-10.txt new file mode 100644 index 00000000000..9f7593f8c76 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-10.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=10 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-20-100.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-100.txt new file mode 100644 index 00000000000..20a26196c44 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-100.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=100 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-20-1000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-1000.txt new file mode 100644 index 00000000000..126a7f4355d --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-1000.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=1000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-20-10000.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-10000.txt new file mode 100644 index 00000000000..456762710e1 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-10000.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=10000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-20-5.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-5.txt new file mode 100644 index 00000000000..2839245ccdd --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-5.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=5 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-20-50.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-50.txt new file mode 100644 index 00000000000..a94fb7cecd8 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-50.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=50 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-20-500.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-500.txt new file mode 100644 index 00000000000..a53dd4fd6a7 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-20-500.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c +numruns=10000 +fieldMatch(bar).maxAlternativeSegmentations=500 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/c-20.txt b/searchlib/src/tests/features/benchmark/fieldmatch/c-20.txt new file mode 100644 index 00000000000..82d455795d4 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/c-20.txt @@ -0,0 +1,6 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a x x b x x x a x b x x x x x a b x x c +numruns=10000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/phrase-02.txt b/searchlib/src/tests/features/benchmark/fieldmatch/phrase-02.txt new file mode 100644 index 00000000000..b55e2d60429 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/phrase-02.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x +numruns=100000 +fieldMatch(bar).maxAlternativeSegmentations=1000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/phrase-10.txt b/searchlib/src/tests/features/benchmark/fieldmatch/phrase-10.txt new file mode 100644 index 00000000000..8f934a3e2a1 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/phrase-10.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x a b c x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x x +numruns=100000 +fieldMatch(bar).maxAlternativeSegmentations=1000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/phrase-50.txt b/searchlib/src/tests/features/benchmark/fieldmatch/phrase-50.txt new file mode 100644 index 00000000000..e1b687802f9 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/phrase-50.txt @@ -0,0 +1,7 @@ +case=fieldMatch +feature=fieldMatch(bar) +index=bar +query=a b c +field=a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x a b c x x x x x x x +numruns=100000 +fieldMatch(bar).maxAlternativeSegmentations=1000 diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/plot.rb b/searchlib/src/tests/features/benchmark/fieldmatch/plot.rb new file mode 100644 index 00000000000..ffbbc25e354 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/plot.rb @@ -0,0 +1,30 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +require '../plotlib' + +if ARGV.size == 0 + puts "must specify folder" + exit +end + +folder = ARGV[0] +extra = "" +extra = ARGV[1] if ARGV.size == 2 +field = [20, 100, 1000, 10000] +segmentation = [1, 5, 10, 50, 100, 500, 1000, 10000] + +dat = folder + "/plot.dat" +png = folder + "/plot.png" + +file = File.open(dat, "w") +segmentation.each do |s| + file.write("#{s} ") + field.each do |f| + file.write(extract_data(folder + "/c-#{f}-#{s}.out") + " ") + end + file.write("\n") +end +file.close + +titles = ["fl-20", "fl-100", "fl-1000", "fl-10000"] + +plot_graph(dat, titles, png, "fieldMatch feature (#{extra})", "maxAlternativeSegmentations", "execution time per document (ms)", folder) diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/readme.txt b/searchlib/src/tests/features/benchmark/fieldmatch/readme.txt new file mode 100644 index 00000000000..a96922e58fb --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/readme.txt @@ -0,0 +1,22 @@ +** Running the benchmark ** +ruby run.rb folder +folder is the place to store the output files. + + +** Generating gnu plots ** +ruby plot.rb folder "description" +folder contains the output files and description are used when setting the title of the graph. + + +** Config file format ** +c-x-y.txt +x is the length of the field and y is the value for maxAlternativeSegmentations. + + +** Running callgrind ** +valgrind --tool=callgrind ../../featurebenchmark -c c-1000-1-callgrind.txt +valgrind --tool=callgrind ../../featurebenchmark -c c-1000-100-callgrind.txt +The numruns config value is reduced in these two config files. + +The output after running callgrind is two files: callgrind.out.x and callgrind.out.y. +Use kcachegrind to look at these two files. diff --git a/searchlib/src/tests/features/benchmark/fieldmatch/run.rb b/searchlib/src/tests/features/benchmark/fieldmatch/run.rb new file mode 100644 index 00000000000..d0350c454e8 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/fieldmatch/run.rb @@ -0,0 +1,17 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +if ARGV.size == 0 + puts "must specify folder" + exit +end + +folder = ARGV[0] +cases = [20, 100, 1000, 10000] +segmentations = [1, 5, 10, 50, 100, 500, 1000, 10000] +cases.each do |c| + segmentations.each do |s| + file = "c-#{c}-#{s}" + cmd = "script -c \"../../featurebenchmark -c #{file}.txt\" " + folder + "/#{file}.out" + puts cmd + `#{cmd}` + end +end diff --git a/searchlib/src/tests/features/benchmark/plotlib.rb b/searchlib/src/tests/features/benchmark/plotlib.rb new file mode 100644 index 00000000000..53a1ee984a9 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/plotlib.rb @@ -0,0 +1,36 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +def plot_graph(dat, titles, png, title, xlabel, ylabel, folder) + plot_cmd = ""; + plot_cmd += "set terminal png\n" + plot_cmd += "set output \"#{png}\"\n" + plot_cmd += "set title \"#{title}\"\n" + plot_cmd += "set xlabel \"#{xlabel}\"\n" + plot_cmd += "set ylabel \"#{ylabel}\"\n" + plot_cmd += "set logscale\n" + + plots = [] + c = 2 + titles.each do |title| + plots.push("\"#{dat}\" using 1:#{c} title \"#{title}\" with linespoints") + c += 1 + end + plot_cmd += "plot " + plot_cmd += plots.join(", ") + + plot_cmd_file = File.open(folder + "/plot.cmd", "w") + plot_cmd_file.write(plot_cmd); + plot_cmd_file.close + cmd = "gnuplot " + folder + "/plot.cmd" + puts cmd + puts `#{cmd}` +end + +def extract_data(file_name) + content = IO.readlines(file_name).join + r = /ETPD:\s*(\d+\.\d+)/ + if content =~ r + return $1 + end + return "0" +end + diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/c-1.txt b/searchlib/src/tests/features/benchmark/rankingexpression/c-1.txt new file mode 100644 index 00000000000..f46508379af --- /dev/null +++ b/searchlib/src/tests/features/benchmark/rankingexpression/c-1.txt @@ -0,0 +1,4 @@ +case=rankingExpression +feature=rankingExpression +numruns=1000000 +rankingExpression.rankingScript=1 diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/c-10.txt b/searchlib/src/tests/features/benchmark/rankingexpression/c-10.txt new file mode 100644 index 00000000000..cd9a34865cb --- /dev/null +++ b/searchlib/src/tests/features/benchmark/rankingexpression/c-10.txt @@ -0,0 +1,4 @@ +case=rankingExpression +feature=rankingExpression +numruns=1000000 +rankingExpression.rankingScript=1+1+1+1+1+1+1+1+1+1 diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/c-100.txt b/searchlib/src/tests/features/benchmark/rankingexpression/c-100.txt new file mode 100644 index 00000000000..1d3007a14c5 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/rankingexpression/c-100.txt @@ -0,0 +1,4 @@ +case=rankingExpression +feature=rankingExpression +numruns=1000000 +rankingExpression.rankingScript=1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1 diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/c-200.txt b/searchlib/src/tests/features/benchmark/rankingexpression/c-200.txt new file mode 100644 index 00000000000..0a9db3c3539 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/rankingexpression/c-200.txt @@ -0,0 +1,4 @@ +case=rankingExpression +feature=rankingExpression +numruns=1000000 +rankingExpression.rankingScript=1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1 diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/c-400.txt b/searchlib/src/tests/features/benchmark/rankingexpression/c-400.txt new file mode 100644 index 00000000000..41600fb943d --- /dev/null +++ b/searchlib/src/tests/features/benchmark/rankingexpression/c-400.txt @@ -0,0 +1,4 @@ +case=rankingExpression +feature=rankingExpression +numruns=1000000 +rankingExpression.rankingScript=1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1 diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/c-5.txt b/searchlib/src/tests/features/benchmark/rankingexpression/c-5.txt new file mode 100644 index 00000000000..b4704f8a822 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/rankingexpression/c-5.txt @@ -0,0 +1,4 @@ +case=rankingExpression +feature=rankingExpression +numruns=1000000 +rankingExpression.rankingScript=1+1+1+1+1 diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/c-50.txt b/searchlib/src/tests/features/benchmark/rankingexpression/c-50.txt new file mode 100644 index 00000000000..74790ff0a21 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/rankingexpression/c-50.txt @@ -0,0 +1,4 @@ +case=rankingExpression +feature=rankingExpression +numruns=1000000 +rankingExpression.rankingScript=1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1 diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/c-800.txt b/searchlib/src/tests/features/benchmark/rankingexpression/c-800.txt new file mode 100644 index 00000000000..57c250137fe --- /dev/null +++ b/searchlib/src/tests/features/benchmark/rankingexpression/c-800.txt @@ -0,0 +1,4 @@ +case=rankingExpression +feature=rankingExpression +numruns=1000000 +rankingExpression.rankingScript=1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1+1 diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/plot.rb b/searchlib/src/tests/features/benchmark/rankingexpression/plot.rb new file mode 100644 index 00000000000..ca586e1176e --- /dev/null +++ b/searchlib/src/tests/features/benchmark/rankingexpression/plot.rb @@ -0,0 +1,22 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +require '../plotlib' + +folder = ARGV[0] +extra = "" +extra = ARGV[1] if ARGV.size == 2 +trees = [1, 5, 10, 50, 100, 200, 400, 800] + +dat = folder + "/plot.dat" +png = folder + "/plot.png" + +file = File.open(dat, "w") +trees.each do |t| + file.write("#{t} ") + file.write(extract_data(folder + "/c-#{t}.out") + " ") + file.write("\n") +end +file.close + +titles = ["expression"] + +plot_graph(dat, titles, png, "rankingExpression feature (#{extra})", "number of trees", "execution time per document (ms)", folder) diff --git a/searchlib/src/tests/features/benchmark/rankingexpression/run.rb b/searchlib/src/tests/features/benchmark/rankingexpression/run.rb new file mode 100644 index 00000000000..2f707e35b51 --- /dev/null +++ b/searchlib/src/tests/features/benchmark/rankingexpression/run.rb @@ -0,0 +1,14 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +if ARGV.size == 0 + puts "must specify folder" + exit +end + +folder = ARGV[0] +trees = [1, 5, 10, 50, 100, 200, 400, 800] +trees.each do |t| + file = "c-#{t}" + cmd = "script -c \"../../featurebenchmark -c #{file}.txt\" " + folder + "/#{file}.out" + puts cmd + `#{cmd}` +end diff --git a/searchlib/src/tests/features/beta/.gitignore b/searchlib/src/tests/features/beta/.gitignore new file mode 100644 index 00000000000..3a7ba416343 --- /dev/null +++ b/searchlib/src/tests/features/beta/.gitignore @@ -0,0 +1 @@ +searchlib_beta_features_test_app diff --git a/searchlib/src/tests/features/beta/CMakeLists.txt b/searchlib/src/tests/features/beta/CMakeLists.txt new file mode 100644 index 00000000000..ee7020f01fc --- /dev/null +++ b/searchlib/src/tests/features/beta/CMakeLists.txt @@ -0,0 +1,12 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_beta_features_test_app + SOURCES + beta_features.cpp + DEPENDS + searchlib +) +vespa_add_test( + NAME searchlib_beta_features_test_app + COMMAND searchlib_beta_features_test_app + ENVIRONMENT "VESPA_LOG_TARGET=file:vlog1.txt" +) diff --git a/searchlib/src/tests/features/beta/beta_features.cpp b/searchlib/src/tests/features/beta/beta_features.cpp new file mode 100644 index 00000000000..e5642f475de --- /dev/null +++ b/searchlib/src/tests/features/beta/beta_features.cpp @@ -0,0 +1,726 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("beta_features_test"); + +#include <boost/tokenizer.hpp> +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/searchlib/attribute/attributemanager.h> +#include <vespa/searchlib/attribute/attributevector.h> +#include <vespa/searchlib/attribute/attributevector.hpp> +#include <vespa/searchlib/attribute/integerbase.h> +#include <vespa/searchlib/attribute/floatbase.h> +#include <vespa/searchlib/attribute/stringbase.h> +#include <vespa/searchlib/features/agefeature.h> +#include <vespa/searchlib/features/attributefeature.h> +#include <vespa/searchlib/features/attributematchfeature.h> +#include <vespa/searchlib/features/fieldlengthfeature.h> +#include <vespa/searchlib/features/fieldmatchfeature.h> +#include <vespa/searchlib/features/fieldtermmatchfeature.h> +#include <vespa/searchlib/features/firstphasefeature.h> +#include <vespa/searchlib/features/flow_completeness_feature.h> +#include <vespa/searchlib/features/jarowinklerdistancefeature.h> +#include <vespa/searchlib/features/matchfeature.h> +#include <vespa/searchlib/features/nowfeature.h> +#include <vespa/searchlib/features/proximityfeature.h> +#include <vespa/searchlib/features/queryfeature.h> +#include <vespa/searchlib/features/querycompletenessfeature.h> +#include <vespa/searchlib/features/randomfeature.h> +#include <vespa/searchlib/features/rankingexpressionfeature.h> +#include <vespa/searchlib/features/reverseproximityfeature.h> +#include <vespa/searchlib/features/setup.h> +#include <vespa/searchlib/features/termeditdistancefeature.h> +#include <vespa/searchlib/features/termfeature.h> +#include <vespa/searchlib/features/utils.h> +#include <vespa/searchlib/fef/featurenamebuilder.h> +#include <vespa/searchlib/fef/indexproperties.h> +#include <vespa/searchlib/fef/test/plugin/setup.h> +#include <vespa/searchlib/util/rand48.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/searchlib/fef/test/ftlib.h> + +using namespace search::features; +using namespace search::fef; +using namespace search::fef::test; + +//--------------------------------------------------------------------------------------------------------------------- +// TermPositionList +//--------------------------------------------------------------------------------------------------------------------- +typedef std::pair<uint32_t, uint32_t> TermPosition; +class TermPositionList : public std::vector<TermPosition> { +public: + TermPositionList &add(uint32_t termId, uint32_t pos) { + push_back(TermPosition(termId, pos)); + return *this; + } + TermPositionList &clear() { + std::vector<TermPosition>::clear(); + return *this; + } +}; + +//--------------------------------------------------------------------------------------------------------------------- +// Test +//--------------------------------------------------------------------------------------------------------------------- +class Test : public FtTestApp { +public: + int Main(); + void testJaroWinklerDistance(); + void testProximity(); + void testFlowCompleteness(); + void testQueryCompleteness(); + void testReverseProximity(); + void testTermEditDistance(); + +private: + void assertJaroWinklerDistance(const vespalib::string &query, const vespalib::string &field, feature_t expected); + void assertQueryCompleteness(FtFeatureTest & ft, uint32_t firstOcc, uint32_t hits, uint32_t miss); + void assertTermEditDistance(const vespalib::string &query, const vespalib::string &field, + uint32_t expectedDel, uint32_t expectedIns, uint32_t expectedSub); + +private: + search::fef::BlueprintFactory _factory; +}; + +TEST_APPHOOK(Test); + +int +Test::Main() +{ + TEST_INIT("beta_features_test"); + + // Configure factory with all known blueprints. + setup_fef_test_plugin(_factory); + setup_search_features(_factory); + + // Test all features. + testJaroWinklerDistance(); TEST_FLUSH(); + testProximity(); TEST_FLUSH(); + testFlowCompleteness(); TEST_FLUSH(); + testQueryCompleteness(); TEST_FLUSH(); + testReverseProximity(); TEST_FLUSH(); + testTermEditDistance(); TEST_FLUSH(); + + TEST_DONE(); + return 0; +} + +void +Test::testJaroWinklerDistance() +{ + { + // Test blueprint. + JaroWinklerDistanceBlueprint pt; + { + EXPECT_TRUE(assertCreateInstance(pt, "jaroWinklerDistance")); + + StringList params, in, out; + FT_SETUP_FAIL(pt, params); + FT_SETUP_FAIL(pt, params.add("foo")); + FT_SETUP_FAIL(pt, params.add("0")); + params.clear(); + + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ie.getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "afoo"); + ie.getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wfoo"); + FT_SETUP_FAIL(pt, ie, params); + FT_SETUP_OK (pt, ie, params.add("foo"), in.add("fieldLength(foo)"), out.add("out")); + FT_SETUP_FAIL(pt, ie, params.add("afoo")); + FT_SETUP_FAIL(pt, ie, params.add("wfoo")); + FT_SETUP_FAIL(pt, ie, params.add("1")); + } + { + FT_DUMP_EMPTY(_factory, "jaroWinklerDistance"); + + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo"); + ie.getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "abar"); + ie.getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wbar"); + FT_DUMP_EMPTY(_factory, "jaroWinklerDistance", ie); // must be a single value index field + + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar"); + StringList dump; + FT_DUMP(_factory, "jaroWinklerDistance", ie, dump/*.add("jaroWinklerDistance(bar).out")*/); + } + } + { + // These measures are taken from table 6 in the paper "Overview of Record Linkage and Current Research Directions" + // by William E. Winkler. It is available at: http://www.census.gov/srd/papers/pdf/rrs2006-02.pdf + // + // Note that the strings used as query and field here are transformed into query and field terms, and therefore + // they all need to be unique. The second occurence of a character in the below names are therefore + // capitalized. A comment is given whenever our result is different from what is presented in the paper (only 2 + // of 17 is actually different). + assertJaroWinklerDistance("shackleford", "shackelford", 1 - 0.982f); + assertJaroWinklerDistance("dunNigham", "cunnigham", 1 - 0.852f); // 3x'n' in query, removed one + assertJaroWinklerDistance("nichlesoN", "nichulsoN", 1 - 0.956f); + assertJaroWinklerDistance("jones", "johnsoN", 1 - 0.832f); + assertJaroWinklerDistance("masSey", "masSie", 1 - 0.933f); + assertJaroWinklerDistance("abroms", "abrAms", 1 - 0.922f); + assertJaroWinklerDistance("hardin", "martinez", 1 - 0.722f); // no measure was given + assertJaroWinklerDistance("itman", "smith", 1 - 0.622f); // no measure was given + assertJaroWinklerDistance("jeraldinE", "geraldinE", 1 - 0.926f); + assertJaroWinklerDistance("marhtA", "marthA", 1 - 0.961f); + assertJaroWinklerDistance("micheLlE", "michael", 1 - 0.921f); + assertJaroWinklerDistance("julies", "juliUs", 1 - 0.933f); + assertJaroWinklerDistance("tanyA", "tonyA", 1 - 0.880f); + assertJaroWinklerDistance("dwayne", "duane", 1 - 0.765f); // was 0.840 in paper + assertJaroWinklerDistance("sean", "suSan", 1 - 0.672f); // was 0.805 in paper + assertJaroWinklerDistance("jon", "john", 1 - 0.933f); + assertJaroWinklerDistance("jon", "jan", 1 - 0.800f); // no measure was given + } +} + +void +Test::assertJaroWinklerDistance(const vespalib::string &query, const vespalib::string &field, feature_t expected) +{ + FtFeatureTest ft(_factory, "jaroWinklerDistance(foo)"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + FT_SETUP(ft, query, StringMap().add("foo", field), 1); + + RankResult res; + ASSERT_TRUE(ft.execute(res.setEpsilon(0.001).addScore("jaroWinklerDistance(foo).out", expected))); +} + +void +Test::testProximity() +{ + + { // Test blueprint. + ProximityBlueprint prototype; + { + EXPECT_TRUE(assertCreateInstance(prototype, "proximity")); + + StringList params, in, out; + FT_SETUP_FAIL(prototype, params); + FT_SETUP_FAIL(prototype, params.add("foo")); + FT_SETUP_FAIL(prototype, params.add("0")); + FT_SETUP_FAIL(prototype, params.add("1")); + FT_SETUP_FAIL(prototype, params.add("2")); + params.clear(); + + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + FT_SETUP_FAIL(prototype, ie, params.add("foo")); + FT_SETUP_FAIL(prototype, ie, params.add("0")); + FT_SETUP_OK (prototype, ie, params.add("1"), in, out.add("out").add("posA").add("posB")); + FT_SETUP_FAIL(prototype, ie, params.add("2")); + } + + { + FT_DUMP_EMPTY(_factory, "proximity"); + + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo"); + FT_DUMP_EMPTY(_factory, "proximity", ie); // must be an index field + + StringList dump; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar"); +#ifdef VISIT_BETA_FEATURES + for (uint32_t a = 0; a < 5; ++a) { + for (uint32_t b = a + 1; b < 6; ++b) { + vespalib::string bn = vespalib::make_string("proximity(bar,%u,%u)", a, b); + dump.add(bn + ".out"); + dump.add(bn + ".posA"); + dump.add(bn + ".posB"); + } + } +#endif + FT_DUMP(_factory, "proximity", ie, dump); + } + } + { + // Test executor. + FtFeatureTest ft(_factory, "proximity(foo,0,1)"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ASSERT_TRUE(ft.setup()); + + search::fef::test::RankResult exp; + exp.addScore("proximity(foo,0,1).out", util::FEATURE_MAX). + addScore("proximity(foo,0,1).posA", util::FEATURE_MAX). + addScore("proximity(foo,0,1).posB", util::FEATURE_MIN); + ASSERT_TRUE(ft.execute(exp, 1)); + } + { + FtFeatureTest ft(_factory, "proximity(foo,0,1)"); + ASSERT_TRUE(!ft.setup()); + + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ASSERT_TRUE(ft.setup()); + + search::fef::test::MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + ASSERT_TRUE(mdb->setFieldLength("foo", 50)); + ASSERT_TRUE(mdb->addOccurence("foo", 0, 30)); + search::fef::test::RankResult exp; + exp.addScore("proximity(foo,0,1).out", util::FEATURE_MAX). + addScore("proximity(foo,0,1).posA", util::FEATURE_MAX). + addScore("proximity(foo,0,1).posB", util::FEATURE_MIN); + ASSERT_TRUE(mdb->apply(1)); + ASSERT_TRUE(ft.execute(exp, 1)); + + ASSERT_TRUE(mdb->addOccurence("foo", 1, 20)); + ASSERT_TRUE(mdb->apply(2)); + ASSERT_TRUE(ft.execute(exp, 2)); + + ASSERT_TRUE(mdb->addOccurence("foo", 0, 10)); + ASSERT_TRUE(mdb->apply(3)); + exp .clear() + .addScore("proximity(foo,0,1).out", 10.0f) + .addScore("proximity(foo,0,1).posA", 10.0f) + .addScore("proximity(foo,0,1).posB", 20.0f); + ASSERT_TRUE(ft.execute(exp, 3)); + } + { + for (int a = 0; a < 10; ++a) { + for (int b = 0; b < 10; ++b) { + FtFeatureTest ft(_factory, "proximity(foo,0,1)"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ASSERT_TRUE(ft.setup()); + + search::fef::test::MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + ASSERT_TRUE(mdb->setFieldLength("foo", 10)); + ASSERT_TRUE(mdb->addOccurence("foo", 0, a)); + ASSERT_TRUE(mdb->addOccurence("foo", 1, b)); + ASSERT_TRUE(mdb->apply(1)); + + search::fef::test::RankResult exp; + exp .addScore("proximity(foo,0,1).out", a < b ? b - a : util::FEATURE_MAX) + .addScore("proximity(foo,0,1).posA", a < b ? a : util::FEATURE_MAX) + .addScore("proximity(foo,0,1).posB", a < b ? b : util::FEATURE_MIN); + TEST_STATE(vespalib::make_string("a=%u, b=%u", a, b).c_str()); + EXPECT_TRUE(ft.execute(exp)); + } + } + } +} + +void +Test::testQueryCompleteness() +{ + { // Test blueprint. + QueryCompletenessBlueprint prototype; + + EXPECT_TRUE(assertCreateInstance(prototype, "queryCompleteness")); + + StringList params, in, out; + FT_SETUP_FAIL(prototype, params); + FT_SETUP_FAIL(prototype, params.add("foo")); + FT_SETUP_FAIL(prototype, params.add("0")); + params.clear(); + + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + FT_SETUP_OK (prototype, ie, params.add("foo"), in, out.add("hit").add("miss")); + FT_SETUP_OK (prototype, ie, params.add("0"), in, out); + FT_SETUP_OK (prototype, ie, params.add("1"), in, out); + FT_SETUP_FAIL(prototype, ie, params.add("2")); + + FT_DUMP_EMPTY(_factory, "queryCompleteness"); + FT_DUMP_EMPTY(_factory, "queryCompleteness", ie); + } + + { // Test executor. + FtFeatureTest ft(_factory, "queryCompleteness(foo)"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + // add 5 term nodes + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ASSERT_TRUE(ft.setup()); + // from 0 to 5 hits (5 to 0 misses) + for (uint32_t i = 0; i < 6; ++i) { + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + mdb->setFieldLength("foo", 10); + for (uint32_t j = 0; j < i; ++j) { + mdb->addOccurence("foo", j, j); + } + ASSERT_TRUE(mdb->apply(1)); + RankResult exp; + exp.addScore("queryCompleteness(foo).hit", (feature_t)(i)); + exp.addScore("queryCompleteness(foo).miss", (feature_t)(5 - i)); + EXPECT_TRUE(ft.execute(exp)); + } + } + { // Test executor. + FtFeatureTest ft(_factory, "queryCompleteness(foo,5,10)"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getQueryEnv().getBuilder().addAllFields(); + ASSERT_TRUE(ft.setup()); + + // before window + assertQueryCompleteness(ft, 4, 0, 1); + // inside window + assertQueryCompleteness(ft, 5, 1, 0); + // inside window + assertQueryCompleteness(ft, 9, 1, 0); + // after window + assertQueryCompleteness(ft, 10, 0, 1); + } +} + +void +Test::assertQueryCompleteness(FtFeatureTest & ft, uint32_t firstOcc, uint32_t hits, uint32_t miss) +{ + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + mdb->setFieldLength("foo", 20); + mdb->addOccurence("foo", 0, firstOcc); + ASSERT_TRUE(mdb->apply(1)); + RankResult exp; + exp.addScore("queryCompleteness(foo,5,10).hit", hits); + exp.addScore("queryCompleteness(foo,5,10).miss", miss); + EXPECT_TRUE(ft.execute(exp)); +} + +// BFI implementation: brute force and ignorance +int cntFlow(int m1, int m2, int m3, int m4) +{ + int flow = 0; + + for (int p1p = 0; p1p < 4; p1p++) { + if (((1 << p1p) & m1) == 0) continue; + for (int p2p = 0; p2p < 4; p2p++) { + if (((1 << p2p) & m2) == 0) continue; + int f2 = 1; + if (p2p != p1p) ++f2; + for (int p3p = 0; p3p < 4; p3p++) { + if (((1 << p3p) & m3) == 0) continue; + int f3 = f2; + if (p3p != p1p && p3p != p2p) ++f3; + for (int p4p = 0; p4p < 4; p4p++) { + if (((1 << p4p) & m4) == 0) continue; + int f4 = f3; + if (p4p != p1p && p4p != p2p && p4p != p3p) ++f4; + if (flow < f4) flow = f4; + } + } + } + } + return flow; +} + +void +Test::testFlowCompleteness() +{ + { // Test blueprint. + TEST_STATE("test flow completeness blueprint"); + FlowCompletenessBlueprint prototype; + + EXPECT_TRUE(assertCreateInstance(prototype, "flowCompleteness")); + + StringList params, in, out; + TEST_DO(FT_SETUP_FAIL(prototype, params)); + TEST_DO(FT_SETUP_FAIL(prototype, params.add("foo"))); + TEST_DO(FT_SETUP_FAIL(prototype, params.add("0"))); + + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + + params.clear(); + params.add("foo"); + + out.add("completeness").add("fieldCompleteness") + .add("queryCompleteness").add("elementWeight") + .add("weight").add("flow"); + + StringList expDump; + for (size_t i = 0; i < out.size(); ++i) { + vespalib::string fn = "flowCompleteness(foo)."; + fn.append(out[i]); + expDump.push_back(fn); + } + + TEST_DO(FT_SETUP_OK(prototype, ie, params, in, out)); + TEST_DO(FT_SETUP_FAIL(prototype, ie, params.add("2"))); + TEST_DO(FT_DUMP_EMPTY(_factory, "flowCompleteness")); +#ifdef notyet + TEST_DO(FT_DUMP(_factory, "flowCompleteness", ie, expDump)); +#endif + } + + { // Test executor. + TEST_STATE("test flow completeness executor"); + + FtFeatureTest ft(_factory, "flowCompleteness(foo)"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + // add 5 term nodes + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ASSERT_TRUE(ft.setup()); + // from 0 to 5 hits (5 to 0 misses) + for (uint32_t i = 0; i < 6; ++i) { + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + mdb->setFieldLength("foo", 10); + for (uint32_t j = 0; j < i; ++j) { + mdb->addOccurence("foo", j, j); + } + ASSERT_TRUE(mdb->apply(1)); + RankResult exp; + exp.setEpsilon(0.000001); + exp.addScore("flowCompleteness(foo)", i * 0.15); + exp.addScore("flowCompleteness(foo).completeness", i * 0.15); // == 0.1*0.5 + 0.2*(1-0.5) + exp.addScore("flowCompleteness(foo).fieldCompleteness", i * 0.1); + exp.addScore("flowCompleteness(foo).queryCompleteness", i * 0.2); + exp.addScore("flowCompleteness(foo).elementWeight", i > 0 ? 1 : 0); + exp.addScore("flowCompleteness(foo).weight", 100.0); + exp.addScore("flowCompleteness(foo).flow", i); + TEST_STATE("run execute"); + EXPECT_TRUE(ft.execute(exp)); + } + } + + + { // Test executor, pass 2 + TEST_STATE("test flow completeness executor (pass 2)"); + + FtFeatureTest ft(_factory, "flowCompleteness(foo)"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + // add 4 term nodes + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ASSERT_TRUE(ft.setup()); + + // each term will have 1 to 3 positions it matches, + // with various points of overlap + + for (uint32_t t0m = 1; t0m < 15 ; ++t0m) { + + for (uint32_t t1m = 1; t1m < 15 ; ++t1m) { + + for (uint32_t t2m = 1; t2m < 15 ; ++t2m) { + + for (uint32_t t3m = 1; t3m < 15 ; ++t3m) { + + int flow = cntFlow(t0m, t1m, t2m, t3m); + + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + mdb->setFieldLength("foo", 4); + for (int pos = 0; pos < 4; ++pos) { + if (((1 << pos) & t0m) != 0) mdb->addOccurence("foo", 0, pos); + if (((1 << pos) & t1m) != 0) mdb->addOccurence("foo", 1, pos); + if (((1 << pos) & t2m) != 0) mdb->addOccurence("foo", 2, pos); + if (((1 << pos) & t3m) != 0) mdb->addOccurence("foo", 3, pos); + } + + ASSERT_TRUE(mdb->apply(1)); + RankResult exp; + exp.setEpsilon(0.0001); + exp.addScore("flowCompleteness(foo)", flow * 0.25); + exp.addScore("flowCompleteness(foo).completeness", flow * 0.25); + exp.addScore("flowCompleteness(foo).fieldCompleteness", flow * 0.25); + exp.addScore("flowCompleteness(foo).queryCompleteness", flow * 0.25); + exp.addScore("flowCompleteness(foo).elementWeight", 1); + exp.addScore("flowCompleteness(foo).weight", 100.0); + exp.addScore("flowCompleteness(foo).flow", flow); + TEST_STATE(vespalib::make_string("execute t0m=%u t1m=%u t2m=%u t3m=%u flow=%u", + t0m, t1m, t2m, t3m, flow).c_str()); + ASSERT_TRUE(ft.execute(exp)); + } + } + } + } + } +} + + +void +Test::testReverseProximity() +{ + { // Test blueprint. + ReverseProximityBlueprint prototype; + { + EXPECT_TRUE(assertCreateInstance(prototype, "reverseProximity")); + + StringList params, in, out; + FT_SETUP_FAIL(prototype, params); + FT_SETUP_FAIL(prototype, params.add("foo")); + FT_SETUP_FAIL(prototype, params.add("0")); + FT_SETUP_FAIL(prototype, params.add("1")); + FT_SETUP_FAIL(prototype, params.add("2")); + params.clear(); + + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + FT_SETUP_FAIL(prototype, ie, params.add("foo")); + FT_SETUP_FAIL(prototype, ie, params.add("0")); + FT_SETUP_OK (prototype, ie, params.add("1"), in, out.add("out").add("posA").add("posB")); + FT_SETUP_FAIL(prototype, ie, params.add("2")); + } + + { + FT_DUMP_EMPTY(_factory, "reverseProximity"); + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo"); + FT_DUMP_EMPTY(_factory, "reverseProximity", ie); // must be an index field + + StringList dump; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar"); +#ifdef VISIT_BETA_FEATURES + for (uint32_t a = 0; a < 5; ++a) { + for (uint32_t b = a + 1; b < 6; ++b) { + vespalib::string bn = vespalib::make_string("reverseProximity(bar,%u,%u)", a, b); + dump.add(bn + ".out"); + dump.add(bn + ".posA"); + dump.add(bn + ".posB"); + } + } +#endif + FT_DUMP(_factory, "reverseProximity", ie, dump); + } + } + + + { // Test executor. + FtFeatureTest ft(_factory, "reverseProximity(foo,0,1)"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ASSERT_TRUE(ft.setup()); + search::fef::test::RankResult exp; + exp.addScore("reverseProximity(foo,0,1).out", util::FEATURE_MAX). + addScore("reverseProximity(foo,0,1).posA", util::FEATURE_MIN). + addScore("reverseProximity(foo,0,1).posB", util::FEATURE_MAX); + ASSERT_TRUE(ft.execute(exp, 1)); + } + { + FtFeatureTest ft(_factory, "reverseProximity(foo,0,1)"); ASSERT_TRUE(!ft.setup()); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); ASSERT_TRUE(ft.setup()); + + search::fef::test::MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + ASSERT_TRUE(mdb->setFieldLength("foo", 50)); + ASSERT_TRUE(mdb->addOccurence("foo", 0, 20)); + search::fef::test::RankResult exp; + exp .addScore("reverseProximity(foo,0,1).out", util::FEATURE_MAX) + .addScore("reverseProximity(foo,0,1).posA", util::FEATURE_MIN) + .addScore("reverseProximity(foo,0,1).posB", util::FEATURE_MAX); + ASSERT_TRUE(mdb->apply(1)); + ASSERT_TRUE(ft.execute(exp, 1)); + + ASSERT_TRUE(mdb->addOccurence("foo", 1, 30)); + ASSERT_TRUE(mdb->apply(2)); + ASSERT_TRUE(ft.execute(exp, 2)); + + ASSERT_TRUE(mdb->addOccurence("foo", 1, 10)); + ASSERT_TRUE(mdb->apply(3)); + exp .clear() + .addScore("reverseProximity(foo,0,1).out", 10.0f) + .addScore("reverseProximity(foo,0,1).posA", 20.0f) + .addScore("reverseProximity(foo,0,1).posB", 10.0f); + ASSERT_TRUE(ft.execute(exp, 3)); + } + { + for (int a = 0; a < 10; ++a) { + for (int b = 0; b < 10; ++b) { + FtFeatureTest ft(_factory, "reverseProximity(foo,0,1)"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ASSERT_TRUE(ft.setup()); + + search::fef::test::MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + ASSERT_TRUE(mdb->setFieldLength("foo", 10)); + ASSERT_TRUE(mdb->addOccurence("foo", 0, a)); + ASSERT_TRUE(mdb->addOccurence("foo", 1, b)); + ASSERT_TRUE(mdb->apply(1)); + + search::fef::test::RankResult exp; + exp .addScore("reverseProximity(foo,0,1).out", a >= b ? a - b : util::FEATURE_MAX) + .addScore("reverseProximity(foo,0,1).posA", a >= b ? a : util::FEATURE_MIN) + .addScore("reverseProximity(foo,0,1).posB", a >= b ? b : util::FEATURE_MAX); + ASSERT_TRUE(ft.execute(exp)); + } + } + } +} + +void +Test::testTermEditDistance() +{ + { // Test blueprint. + TermEditDistanceBlueprint prototype; + { + EXPECT_TRUE(assertCreateInstance(prototype, "termEditDistance")); + + StringList params, in, out; + FT_SETUP_FAIL(prototype, params); + FT_SETUP_FAIL(prototype, params.add("foo")); + FT_SETUP_FAIL(prototype, params.add("0")); + + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ie.getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "afoo"); + ie.getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wfoo"); + FT_SETUP_FAIL(prototype, ie, params.clear()); + FT_SETUP_OK (prototype, ie, params.add("foo"), in.add("fieldLength(foo)"), out.add("out").add("del").add("ins").add("sub")); + FT_SETUP_FAIL(prototype, ie, params.add("afoo")); + FT_SETUP_FAIL(prototype, ie, params.add("wfoo")); + FT_SETUP_FAIL(prototype, ie, params.add("0")); + } + + { + FT_DUMP_EMPTY(_factory, "termEditDistance"); + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo"); + ie.getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "abar"); + ie.getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wbar"); + FT_DUMP_EMPTY(_factory, "termEditDistance", ie); // must be a single-value index field + + StringList dump; +#ifdef VISIT_BETA_FEATURES + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar"); + vespalib::string bn = "termEditDistance(bar)"; + dump.add(bn + ".out"); + dump.add(bn + ".del"); + dump.add(bn + ".ins"); + dump.add(bn + ".sub"); +#endif + FT_DUMP(_factory, "termEditDistance", ie, dump); + } + } + + { // Test executor. + assertTermEditDistance("abcde", "abcde", 0, 0, 0); + assertTermEditDistance("abcde", "abcd.", 0, 0, 1); + assertTermEditDistance("abcde", ".bcd.", 0, 0, 2); + assertTermEditDistance("abcde", ".bc..", 0, 0, 3); + assertTermEditDistance("abcde", "..c..", 0, 0, 4); + assertTermEditDistance("abcd" , "..c..", 0, 1, 3); + assertTermEditDistance("abc", "..c..", 0, 2, 2); + assertTermEditDistance("ab", "..b..", 0, 3, 1); + assertTermEditDistance("a", "..a..", 0, 4, 0); + } +} + +// #pragma GCC diagnostic ignored "-Wstrict-aliasing" + +void +Test::assertTermEditDistance(const vespalib::string &query, const vespalib::string &field, + uint32_t expectedDel, uint32_t expectedIns, uint32_t expectedSub) +{ + // Setup feature test. + vespalib::string feature = "termEditDistance(foo)"; + FtFeatureTest ft(_factory, feature); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + StringMap foo; + foo.add("foo", field); + FT_SETUP(ft, query, foo, 1); + + // Execute and compare results. + search::fef::test::RankResult exp; + exp .addScore(feature + ".out", (feature_t)(expectedDel*1 + expectedIns*1 + expectedSub*1)) + .addScore(feature + ".del", (feature_t)expectedDel) + .addScore(feature + ".ins", (feature_t)expectedIns) + .addScore(feature + ".sub", (feature_t)expectedSub); + ASSERT_TRUE(ft.execute(exp)); +} diff --git a/searchlib/src/tests/features/element_completeness/.gitignore b/searchlib/src/tests/features/element_completeness/.gitignore new file mode 100644 index 00000000000..9d45fbda0ad --- /dev/null +++ b/searchlib/src/tests/features/element_completeness/.gitignore @@ -0,0 +1 @@ +searchlib_element_completeness_test_app diff --git a/searchlib/src/tests/features/element_completeness/CMakeLists.txt b/searchlib/src/tests/features/element_completeness/CMakeLists.txt new file mode 100644 index 00000000000..aee13befe2d --- /dev/null +++ b/searchlib/src/tests/features/element_completeness/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_element_completeness_test_app + SOURCES + element_completeness_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_element_completeness_test_app COMMAND searchlib_element_completeness_test_app) diff --git a/searchlib/src/tests/features/element_completeness/FILES b/searchlib/src/tests/features/element_completeness/FILES new file mode 100644 index 00000000000..5b995b34729 --- /dev/null +++ b/searchlib/src/tests/features/element_completeness/FILES @@ -0,0 +1 @@ +element_completeness_test.cpp diff --git a/searchlib/src/tests/features/element_completeness/element_completeness_test.cpp b/searchlib/src/tests/features/element_completeness/element_completeness_test.cpp new file mode 100644 index 00000000000..24d1625520d --- /dev/null +++ b/searchlib/src/tests/features/element_completeness/element_completeness_test.cpp @@ -0,0 +1,201 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchlib/features/setup.h> +#include <vespa/searchlib/fef/test/indexenvironment.h> +#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h> +#include <vespa/searchlib/fef/test/queryenvironment.h> +#include <vespa/searchlib/features/element_completeness_feature.h> +#include <vespa/searchlib/fef/test/ftlib.h> +#include <vespa/searchlib/fef/test/dummy_dependency_handler.h> + +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; + +std::vector<vespalib::string> featureNamesFoo() { + std::vector<vespalib::string> f; + f.push_back("elementCompleteness(foo).completeness"); + f.push_back("elementCompleteness(foo).fieldCompleteness"); + f.push_back("elementCompleteness(foo).queryCompleteness"); + f.push_back("elementCompleteness(foo).elementWeight"); + return f; +} + +const size_t TOTAL = 0; +const size_t FIELD = 1; +const size_t QUERY = 2; +const size_t WEIGHT = 3; + +FtIndex indexFoo() { + FtIndex idx; + idx.field("foo"); + return idx; +} + +struct BlueprintFactoryFixture { + BlueprintFactory factory; + BlueprintFactoryFixture() : factory() + { + setup_search_features(factory); + } +}; + +struct IndexFixture { + IndexEnvironment indexEnv; + IndexFixture() : indexEnv() + { + IndexEnvironmentBuilder builder(indexEnv); + builder.addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "foo"); + builder.addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "bar"); + } +}; + +struct FeatureDumpFixture : public IDumpFeatureVisitor { + std::vector<vespalib::string> expect; + size_t dumped; + virtual void visitDumpFeature(const vespalib::string &name) { + EXPECT_LESS(dumped, expect.size()); + EXPECT_EQUAL(expect[dumped++], name); + } + FeatureDumpFixture() : IDumpFeatureVisitor(), expect(featureNamesFoo()), dumped(0) {} +}; + +struct RankFixture : BlueprintFactoryFixture { + Properties idxProps; + RankFixture() : BlueprintFactoryFixture(), idxProps() {} + void test(const vespalib::string &queryStr, const FtIndex &index, + feature_t field, feature_t query, int32_t weight = 1, feature_t factor = 0.5, + bool useStaleMatchData = false) + { + std::vector<vespalib::string> names = featureNamesFoo(); + ASSERT_TRUE(names.size() == 4u); + RankResult expect; + expect.addScore(names[TOTAL], field*factor + query*(1-factor)) + .addScore(names[FIELD], field).addScore(names[QUERY], query) + .addScore(names[WEIGHT], (double)weight); + FtFeatureTest ft(factory, names); + ft.getIndexEnv().getProperties().import(idxProps); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "foo"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "bar"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "baz"); + FtTestApp::FT_SETUP(ft, FtUtil::toQuery(queryStr), index, 1); + RankResult actual; + EXPECT_TRUE(ft.executeOnly(actual, useStaleMatchData ? 2 : 1)); + for (size_t i = 0; i < names.size(); ++i) { + TEST_STATE(names[i].c_str()); + EXPECT_EQUAL(expect.getScore(names[i]), actual.getScore(names[i])); + } + } +}; + +TEST_F("require that blueprint can be created from factory", BlueprintFactoryFixture) { + Blueprint::SP bp = f.factory.createBlueprint("elementCompleteness"); + EXPECT_TRUE(bp.get() != 0); + EXPECT_TRUE(dynamic_cast<ElementCompletenessBlueprint*>(bp.get()) != 0); +} + +TEST_FFF("require that appropriate features are dumped", ElementCompletenessBlueprint, IndexFixture, FeatureDumpFixture) { + f1.visitDumpFeatures(f2.indexEnv, f3); + EXPECT_EQUAL(f3.expect.size(), f3.dumped); +} + +TEST_FF("require that setup can be done on index field", ElementCompletenessBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(foo)", f1.getBaseName().c_str())); + EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "foo"))); +} + +TEST_FF("require that setup can not be done on attribute field", ElementCompletenessBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(bar)", f1.getBaseName().c_str())); + EXPECT_TRUE(!((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "bar"))); +} + +TEST_FF("require that default config parameters are correct", ElementCompletenessBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(foo)", f1.getBaseName().c_str())); + EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "foo"))); + EXPECT_EQUAL(0u, f1.getParams().fieldId); + EXPECT_EQUAL(0.5, f1.getParams().fieldCompletenessImportance); +} + +TEST_FF("require that blueprint can be configured", ElementCompletenessBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(foo)", f1.getBaseName().c_str())); + f2.indexEnv.getProperties().add("elementCompleteness(foo).fieldCompletenessImportance", "0.75"); + EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "foo"))); + EXPECT_EQUAL(0.75, f1.getParams().fieldCompletenessImportance); +} + +TEST_F("require that no match gives zero outputs", RankFixture) { + TEST_DO(f.test("x", indexFoo().element("y"), 0.0, 0.0, 0)); +} + +TEST_F("require that perfect match gives max outputs", RankFixture) { + TEST_DO(f.test("x", indexFoo().element("x"), 1.0, 1.0)); +} + +TEST_F("require that matching half the field gives appropriate outputs", RankFixture) { + TEST_DO(f.test("x", indexFoo().element("x y"), 0.5, 1.0)); + TEST_DO(f.test("x y", indexFoo().element("x y a b"), 0.5, 1.0)); +} + +TEST_F("require that matching half the query gives appropriate outputs", RankFixture) { + TEST_DO(f.test("x y", indexFoo().element("x"), 1.0, 0.5)); + TEST_DO(f.test("x y a b", indexFoo().element("x y"), 1.0, 0.5)); +} + +TEST_F("require that query completeness is affected by query term weight", RankFixture) { + TEST_DO(f.test("x!300 y!100", indexFoo().element("y"), 1.0, 0.25)); + TEST_DO(f.test("x!300 y!100", indexFoo().element("x"), 1.0, 0.75)); +} + +TEST_F("require that field completeness is not affected by duplicate field tokens", RankFixture) { + TEST_DO(f.test("x", indexFoo().element("x y y y"), 0.25, 1.00)); + TEST_DO(f.test("x", indexFoo().element("x x y y"), 0.25, 1.00)); + TEST_DO(f.test("x", indexFoo().element("x x x y"), 0.25, 1.00)); + TEST_DO(f.test("x", indexFoo().element("x x x x"), 0.25, 1.00)); +} + +TEST_F("require that field completeness is affected by duplicate query terms", RankFixture) { + TEST_DO(f.test("x", indexFoo().element("x x x x"), 0.25, 1.00)); + TEST_DO(f.test("x x", indexFoo().element("x x x x"), 0.50, 1.00)); + TEST_DO(f.test("x x x", indexFoo().element("x x x x"), 0.75, 1.00)); + TEST_DO(f.test("x x x x", indexFoo().element("x x x x"), 1.00, 1.00)); +} + +TEST_F("require that a single field token can match multiple query terms", RankFixture) { + TEST_DO(f.test("x", indexFoo().element("x"), 1.00, 1.00)); + TEST_DO(f.test("x x", indexFoo().element("x"), 1.00, 1.00)); + TEST_DO(f.test("x x x", indexFoo().element("x"), 1.00, 1.00)); + TEST_DO(f.test("x x x x", indexFoo().element("x"), 1.00, 1.00)); +} + +TEST_F("require that field completeness importance can be adjusted", RankFixture) { + f.idxProps.clear().add("elementCompleteness(foo).fieldCompletenessImportance", "0.1"); + TEST_DO(f.test("x y", indexFoo().element("x"), 1.0, 0.5, 1, 0.1)); + f.idxProps.clear().add("elementCompleteness(foo).fieldCompletenessImportance", "0.4"); + TEST_DO(f.test("x y", indexFoo().element("x"), 1.0, 0.5, 1, 0.4)); + f.idxProps.clear().add("elementCompleteness(foo).fieldCompletenessImportance", "0.7"); + TEST_DO(f.test("x y", indexFoo().element("x"), 1.0, 0.5, 1, 0.7)); +} + +TEST_F("require that order is not relevant", RankFixture) { + TEST_DO(f.test("x y a b", indexFoo().element("n x n y"), 0.5, 0.5)); + TEST_DO(f.test("a b x y", indexFoo().element("y x n n"), 0.5, 0.5)); + TEST_DO(f.test("a y x b", indexFoo().element("x n y n"), 0.5, 0.5)); +} + +TEST_F("require that element is selected based on completeness times element weight", RankFixture) { + f.idxProps.clear().add("elementCompleteness(foo).fieldCompletenessImportance", "0.0"); + TEST_DO(f.test("x y a b", indexFoo().element("x", 39).element("y", 39).element("a b", 19).element("x y a b", 10), 1.0, 1.0, 10, 0.0)); + TEST_DO(f.test("x y a b", indexFoo().element("x", 39).element("y", 39).element("a b", 21).element("x y a b", 10), 1.0, 0.5, 21, 0.0)); + TEST_DO(f.test("x y a b", indexFoo().element("x", 39).element("y", 45).element("a b", 21).element("x y a b", 10), 1.0, 0.25, 45, 0.0)); +} + +TEST_F("require that stale match data is ignored", RankFixture) { + TEST_DO(f.test("x y a b", indexFoo().element("x y"), 0.0, 0.0, 0, 0.5, true)); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/features/element_similarity_feature/.gitignore b/searchlib/src/tests/features/element_similarity_feature/.gitignore new file mode 100644 index 00000000000..36e60cd547e --- /dev/null +++ b/searchlib/src/tests/features/element_similarity_feature/.gitignore @@ -0,0 +1 @@ +searchlib_element_similarity_feature_test_app diff --git a/searchlib/src/tests/features/element_similarity_feature/CMakeLists.txt b/searchlib/src/tests/features/element_similarity_feature/CMakeLists.txt new file mode 100644 index 00000000000..08e3b04cd73 --- /dev/null +++ b/searchlib/src/tests/features/element_similarity_feature/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_element_similarity_feature_test_app + SOURCES + element_similarity_feature_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_element_similarity_feature_test_app COMMAND searchlib_element_similarity_feature_test_app) diff --git a/searchlib/src/tests/features/element_similarity_feature/element_similarity_feature_test.cpp b/searchlib/src/tests/features/element_similarity_feature/element_similarity_feature_test.cpp new file mode 100644 index 00000000000..181f2fb71f3 --- /dev/null +++ b/searchlib/src/tests/features/element_similarity_feature/element_similarity_feature_test.cpp @@ -0,0 +1,371 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchlib/features/setup.h> +#include <vespa/searchlib/fef/test/indexenvironment.h> +#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h> +#include <vespa/searchlib/fef/test/queryenvironment.h> +#include <vespa/searchlib/features/element_similarity_feature.h> +#include <vespa/searchlib/fef/test/ftlib.h> +#include <initializer_list> +#include <vespa/searchlib/fef/test/dummy_dependency_handler.h> + +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; + +const vespalib::string DEFAULT = "elementSimilarity(foo)"; +const vespalib::string PROXIMITY = "elementSimilarity(foo).proximity"; +const vespalib::string ORDER = "elementSimilarity(foo).order"; +const vespalib::string QUERY = "elementSimilarity(foo).query_coverage"; +const vespalib::string FIELD = "elementSimilarity(foo).field_coverage"; +const vespalib::string WEIGHT = "elementSimilarity(foo).weight"; + +FtIndex indexFoo() { + FtIndex idx; + idx.field("foo"); + return idx; +} + +//----------------------------------------------------------------------------- + +struct BlueprintFactoryFixture { + BlueprintFactory factory; + BlueprintFactoryFixture() : factory() + { + setup_search_features(factory); + } +}; + +struct IndexFixture { + IndexEnvironment indexEnv; + IndexFixture() : indexEnv() + { + IndexEnvironmentBuilder builder(indexEnv); + builder.addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "foo"); + builder.addField(FieldType::INDEX, CollectionType::ARRAY, "bar"); + builder.addField(FieldType::INDEX, CollectionType::SINGLE, "baz"); + builder.addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "fox"); + set("elementSimilarity(foo).output.proximity", "max(p)"); + set("elementSimilarity(foo).output.order", "max(o)"); + set("elementSimilarity(foo).output.query_coverage", "max(q)"); + set("elementSimilarity(foo).output.field_coverage", "max(f)"); + set("elementSimilarity(foo).output.weight", "max(w)"); + set("elementSimilarity(bar).output.default", "avg(1)"); + } + IndexFixture &set(const vespalib::string &key, const vespalib::string &value) { + Properties tmp; + tmp.add(key, value); + indexEnv.getProperties().import(tmp); + return *this; + } +}; + +struct FeatureDumpFixture : public IDumpFeatureVisitor { + std::vector<vespalib::string> actual; + FeatureDumpFixture() : IDumpFeatureVisitor(), actual() {} + virtual void visitDumpFeature(const vespalib::string &name) { + actual.push_back(name); + } +}; + +struct RankFixture : BlueprintFactoryFixture { + RankFixture() : BlueprintFactoryFixture() {} + double get_feature(const vespalib::string &query, const FtIndex &index, const vespalib::string &select, + const IndexFixture &idx_env = IndexFixture()) + { + std::vector<vespalib::string> names({"elementSimilarity(foo).default", // use 'default' explicitly to verify default output name + "elementSimilarity(foo).proximity", + "elementSimilarity(foo).order", + "elementSimilarity(foo).query_coverage", + "elementSimilarity(foo).field_coverage", + "elementSimilarity(foo).weight"}); + FtFeatureTest ft(factory, names); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "foo"); + ft.getIndexEnv().getBuilder().getIndexEnv().getProperties().import(idx_env.indexEnv.getProperties()); + FtTestApp::FT_SETUP(ft, FtUtil::toQuery(query), index, 1); + { + RankResult stale; + EXPECT_TRUE(ft.executeOnly(stale, 2)); + EXPECT_EQUAL(0.0, stale.getScore(select)); + } + RankResult actual; + EXPECT_TRUE(ft.executeOnly(actual, 1)); + return actual.getScore(select); + } +}; + +//----------------------------------------------------------------------------- + +double prox(uint32_t dist) { + return (dist > 8) ? 0 : (1.0 - (((dist-1)/8.0) * ((dist-1)/8.0))); +} + +double sum(std::initializer_list<double> values) { + double my_sum = 0.0; + for (double value: values) { + my_sum += value; + } + return my_sum; +} + +double comb(std::initializer_list<double> values) { + return (sum(values)/values.size()); +} + +double mix(double proximity, double order, double query, double field) { + return (0.35 * proximity) + (0.15 * order) + (0.30 * query) + (0.20 * field); +} + +//----------------------------------------------------------------------------- + +template <typename A, typename B> +bool cmp_lists_impl(const A &a, const B &b) { + std::vector<typename A::value_type> tmp_a(a.begin(), a.end()); + std::vector<typename B::value_type> tmp_b(b.begin(), b.end()); + std::sort(tmp_a.begin(), tmp_a.end()); + std::sort(tmp_b.begin(), tmp_b.end()); + if (!EXPECT_EQUAL(tmp_a.size(), tmp_b.size())) { + return false; + } + for (size_t i = 0; i < tmp_a.size(); ++i) { + if(!EXPECT_EQUAL(tmp_a[i], tmp_b[i])) { + return false; + } + } + return true; +} + +template <typename T> +void dump_list(const vespalib::string &name, const T &list) { + fprintf(stderr, "list(name: '%s', size: %zu)\n", name.c_str(), list.size()); + std::vector<typename T::value_type> tmp(list.begin(), list.end()); + std::sort(tmp.begin(), tmp.end()); + for (vespalib::string item: tmp) { + fprintf(stderr, " '%s'\n", item.c_str()); + } +} + +template <typename A, typename B> +bool cmp_lists(const A &a, const B &b) { + if(!cmp_lists_impl(a, b)) { + dump_list("expected", a); + dump_list("actual", b); + return false; + } + return true; +}; + +//----------------------------------------------------------------------------- + +TEST_F("require that blueprint can be created from factory", BlueprintFactoryFixture) { + Blueprint::SP bp = f.factory.createBlueprint("elementSimilarity"); + EXPECT_TRUE(bp.get() != 0); + EXPECT_TRUE(dynamic_cast<ElementSimilarityBlueprint*>(bp.get()) != 0); +} + +TEST_FFF("require that appropriate features are dumped", ElementSimilarityBlueprint, IndexFixture, FeatureDumpFixture) { + f1.visitDumpFeatures(f2.indexEnv, f3); + EXPECT_TRUE(cmp_lists(std::vector<vespalib::string>({"elementSimilarity(foo)", + "elementSimilarity(foo).proximity", + "elementSimilarity(foo).order", + "elementSimilarity(foo).query_coverage", + "elementSimilarity(foo).field_coverage", + "elementSimilarity(foo).weight", + "elementSimilarity(bar)"}), + f3.actual)); +} + +bool try_setup(ElementSimilarityBlueprint &blueprint, const IndexFixture &index, const vespalib::string &field) { + DummyDependencyHandler deps(blueprint); + blueprint.setName(vespalib::make_string("%s(%s)", blueprint.getBaseName().c_str(), field.c_str())); + return ((Blueprint&)blueprint).setup(index.indexEnv, std::vector<vespalib::string>(1, field)); +} + +TEST_FF("require that setup can be done on weighted set index field", ElementSimilarityBlueprint, IndexFixture) { + EXPECT_TRUE(try_setup(f1, f2, "foo")); +} + +TEST_FF("require that setup can be done on array index field", ElementSimilarityBlueprint, IndexFixture) { + EXPECT_TRUE(try_setup(f1, f2, "bar")); +} + +TEST_FF("require that setup can be done on single value index field", ElementSimilarityBlueprint, IndexFixture) { + EXPECT_TRUE(try_setup(f1, f2, "baz")); +} + +TEST_FF("require that setup can not be done on single value attribute field", ElementSimilarityBlueprint, IndexFixture) { + EXPECT_FALSE(try_setup(f1, f2, "fox")); +} + +TEST_FF("require that setup will fail if output expression does not contain an aggregator", ElementSimilarityBlueprint, IndexFixture) { + f2.set("elementSimilarity(foo).output.default", "p"); + EXPECT_FALSE(try_setup(f1, f2, "foo")); +} + +TEST_FF("require that setup will fail if output expression contains an unknown aggregator", ElementSimilarityBlueprint, IndexFixture) { + f2.set("elementSimilarity(foo).output.default", "bogus(p)"); + EXPECT_FALSE(try_setup(f1, f2, "foo")); +} + +TEST_FF("require that setup will fail if output expression contains an unknown symbol", ElementSimilarityBlueprint, IndexFixture) { + f2.set("elementSimilarity(foo).output.default", "max(bogus)"); + EXPECT_FALSE(try_setup(f1, f2, "foo")); +} + +TEST_FF("require that setup will fail if output expression is malformed", ElementSimilarityBlueprint, IndexFixture) { + f2.set("elementSimilarity(foo).output.default", "max(w+)"); + EXPECT_FALSE(try_setup(f1, f2, "foo")); +} + +TEST_F("require that no match gives zero outputs", RankFixture) { + EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), DEFAULT)); + EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), PROXIMITY)); + EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), ORDER)); + EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), QUERY)); + EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), FIELD)); +} + +TEST_F("require that minal perfect match gives max outputs", RankFixture) { + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), DEFAULT)); + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), PROXIMITY)); + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), ORDER)); + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), QUERY)); + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), FIELD)); +} + +TEST_F("require that larger perfect match gives max outputs", RankFixture) { + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), DEFAULT)); + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), PROXIMITY)); + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), ORDER)); + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), QUERY)); + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), FIELD)); +} + +TEST_F("require that extra query terms reduces order but not proximity", RankFixture) { + EXPECT_EQUAL(1.0, f1.get_feature("x y", indexFoo().element("x"), PROXIMITY)); + EXPECT_EQUAL(1.0, f1.get_feature("x y y", indexFoo().element("x"), PROXIMITY)); + EXPECT_EQUAL(1.0, f1.get_feature("x y y y", indexFoo().element("x"), PROXIMITY)); + + EXPECT_EQUAL(0.0, f1.get_feature("x y", indexFoo().element("x"), ORDER)); + EXPECT_EQUAL(0.0, f1.get_feature("x y y", indexFoo().element("x"), ORDER)); + EXPECT_EQUAL(0.0, f1.get_feature("x y y y", indexFoo().element("x"), ORDER)); +} + +TEST_F("require that extra field terms reduces proximity but not order", RankFixture) { + EXPECT_EQUAL(prox(2), f1.get_feature("x", indexFoo().element("x y"), PROXIMITY)); + EXPECT_EQUAL(prox(3), f1.get_feature("x", indexFoo().element("x y y"), PROXIMITY)); + EXPECT_EQUAL(prox(4), f1.get_feature("x", indexFoo().element("x y y y"), PROXIMITY)); + + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x y"), ORDER)); + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x y y"), ORDER)); + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x y y y"), ORDER)); +} + +TEST_F("require that proximity acts as expected", RankFixture) { + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(2), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("a x b c d e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(3), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("a x x b c d e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(4), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("a x x x b c d e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(2), prox(2), prox(2), prox(2)}), f1.get_feature("a b c d e", indexFoo().element("a x b x c x d x e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(2), prox(2), prox(1), prox(3)}), f1.get_feature("a b c d e", indexFoo().element("a x b x c d x x e"), PROXIMITY)); +} + +TEST_F("require that field order does not affect proximity score", RankFixture) { + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e", indexFoo().element("d c a b e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(2), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("d x c a b e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(3), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("d x x c a b e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(4), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("d x x x c a b e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(2), prox(2), prox(2), prox(2)}), f1.get_feature("a b c d e", indexFoo().element("d x c x a x b x e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(2), prox(2), prox(1), prox(3)}), f1.get_feature("a b c d e", indexFoo().element("d x c x a b x x e"), PROXIMITY)); +} + +TEST_F("require that order score acts as expected", RankFixture) { + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), ORDER)); + EXPECT_EQUAL(comb({1.0, 1.0, 1.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("a b c e d"), ORDER)); + EXPECT_EQUAL(comb({0.0, 1.0, 1.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("b a c e d"), ORDER)); + EXPECT_EQUAL(comb({0.0, 1.0, 0.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("b a e d c"), ORDER)); + EXPECT_EQUAL(comb({0.0, 0.0, 0.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("e d c b a"), ORDER)); +} + +TEST_F("require that proximity does not affect order score", RankFixture) { + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), ORDER)); + EXPECT_EQUAL(comb({1.0, 1.0, 1.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("a x b x c x e x d"), ORDER)); + EXPECT_EQUAL(comb({0.0, 1.0, 1.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("b x a x c x e x d"), ORDER)); + EXPECT_EQUAL(comb({0.0, 1.0, 0.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("b x a x e x d x c"), ORDER)); + EXPECT_EQUAL(comb({0.0, 0.0, 0.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("e x d x c x b x a"), ORDER)); +} + +TEST_F("require that query coverage acts as expected", RankFixture) { + EXPECT_EQUAL(5.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), QUERY)); + EXPECT_EQUAL(4.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b c d"), QUERY)); + EXPECT_EQUAL(3.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b c"), QUERY)); + EXPECT_EQUAL(2.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b"), QUERY)); + EXPECT_EQUAL(4.0/7.0, f1.get_feature("a!200 b!200 c d e", indexFoo().element("a b"), QUERY)); + EXPECT_EQUAL(2.0/7.0, f1.get_feature("a b c!500", indexFoo().element("a b"), QUERY)); + EXPECT_EQUAL(5.0/7.0, f1.get_feature("a b c!500", indexFoo().element("c"), QUERY)); +} + +TEST_F("require that field coverage acts as expected", RankFixture) { + EXPECT_EQUAL(5.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), FIELD)); + EXPECT_EQUAL(4.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a x c d e"), FIELD)); + EXPECT_EQUAL(3.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b x x e"), FIELD)); + EXPECT_EQUAL(2.0/5.0, f1.get_feature("a b c d e", indexFoo().element("x x x d e"), FIELD)); +} + +TEST_F("require that first unique match is used per query term", RankFixture) { + EXPECT_EQUAL(prox(3), f1.get_feature("a b", indexFoo().element("a a a b"), PROXIMITY)); + EXPECT_EQUAL(1.0, f1.get_feature("a b", indexFoo().element("a a a b"), ORDER)); + EXPECT_EQUAL(1.0, f1.get_feature("a b", indexFoo().element("a a a b"), QUERY)); + EXPECT_EQUAL(2.0/4.0, f1.get_feature("a b", indexFoo().element("a a a b"), FIELD)); + + EXPECT_EQUAL(comb({prox(1), prox(2)}), f1.get_feature("a b a", indexFoo().element("a a a b"), PROXIMITY)); + EXPECT_EQUAL(0.5, f1.get_feature("a b a", indexFoo().element("a a a b"), ORDER)); + EXPECT_EQUAL(1.0, f1.get_feature("a b a", indexFoo().element("a a a b"), QUERY)); + EXPECT_EQUAL(3.0/4.0, f1.get_feature("a b a", indexFoo().element("a a a b"), FIELD)); +} + +TEST_F("require that default score combines individual signals appropriately", RankFixture) { + EXPECT_EQUAL(comb({prox(1), prox(3), prox(2)}), f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), PROXIMITY)); + EXPECT_EQUAL(comb({1.0, 0.0, 1.0}), f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), ORDER)); + EXPECT_EQUAL(4.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), QUERY)); + EXPECT_EQUAL(4.0/7.0, f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), FIELD)); + EXPECT_EQUAL(mix(comb({prox(1), prox(3), prox(2)}), comb({1.0, 0.0, 1.0}), 4.0/5.0, 4.0/7.0), + f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), DEFAULT)); + EXPECT_EQUAL(7.0 * mix(comb({prox(1), prox(3), prox(2)}), comb({1.0, 0.0, 1.0}), 4.0/5.0, 4.0/7.0), + f1.get_feature("a b c d e", indexFoo().element("a c x x b x d", 7), DEFAULT)); +} + +TEST_FF("require that max aggregation works", RankFixture, IndexFixture) { + f2.set("elementSimilarity(foo).output.default", "max(w)"); + EXPECT_EQUAL(5.0, f1.get_feature("x", indexFoo().element("x y", 5), DEFAULT, f2)); + EXPECT_EQUAL(5.0, f1.get_feature("x", indexFoo().element("x y", 5).element("x y", 3), DEFAULT, f2)); + EXPECT_EQUAL(5.0, f1.get_feature("x", indexFoo().element("x y", 3).element("x y", 5), DEFAULT, f2)); +} + +TEST_FF("require that avg aggregation works", RankFixture, IndexFixture) { + f2.set("elementSimilarity(foo).output.default", "avg(w)"); + EXPECT_EQUAL(5.0, f1.get_feature("x", indexFoo().element("x y", 5), DEFAULT, f2)); + EXPECT_EQUAL(4.0, f1.get_feature("x", indexFoo().element("x y", 5).element("x y", 3), DEFAULT, f2)); + EXPECT_EQUAL(4.0, f1.get_feature("x", indexFoo().element("x y", 3).element("x y", 5), DEFAULT, f2)); +} + +TEST_FF("require that sum aggregation works", RankFixture, IndexFixture) { + f2.set("elementSimilarity(foo).output.default", "sum(w)"); + EXPECT_EQUAL(5.0, f1.get_feature("x", indexFoo().element("x y", 5), DEFAULT, f2)); + EXPECT_EQUAL(8.0, f1.get_feature("x", indexFoo().element("x y", 5).element("x y", 3), DEFAULT, f2)); + EXPECT_EQUAL(8.0, f1.get_feature("x", indexFoo().element("x y", 3).element("x y", 5), DEFAULT, f2)); +} + +TEST_FF("require that element demultiplexing works", RankFixture, IndexFixture) { + f2.set("elementSimilarity(foo).output.default", "sum(q)"); + EXPECT_EQUAL(sum({0.25, 0.5, 0.5, 0.25, 0.5}), + f1.get_feature("x y z t", indexFoo() + .element("x") + .element("x y") + .element("x z") + .element("y") + .element("x z"), DEFAULT, f2)); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/features/euclidean_distance/.gitignore b/searchlib/src/tests/features/euclidean_distance/.gitignore new file mode 100644 index 00000000000..2d08dd27122 --- /dev/null +++ b/searchlib/src/tests/features/euclidean_distance/.gitignore @@ -0,0 +1 @@ +searchlib_euclidean_distance_test_app diff --git a/searchlib/src/tests/features/euclidean_distance/CMakeLists.txt b/searchlib/src/tests/features/euclidean_distance/CMakeLists.txt new file mode 100644 index 00000000000..d79aa9572bc --- /dev/null +++ b/searchlib/src/tests/features/euclidean_distance/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_euclidean_distance_test_app + SOURCES + euclidean_distance_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_euclidean_distance_test_app COMMAND searchlib_euclidean_distance_test_app) diff --git a/searchlib/src/tests/features/euclidean_distance/FILES b/searchlib/src/tests/features/euclidean_distance/FILES new file mode 100644 index 00000000000..4ed7d9969b3 --- /dev/null +++ b/searchlib/src/tests/features/euclidean_distance/FILES @@ -0,0 +1 @@ +euclidean_distance_test.cpp diff --git a/searchlib/src/tests/features/euclidean_distance/euclidean_distance_test.cpp b/searchlib/src/tests/features/euclidean_distance/euclidean_distance_test.cpp new file mode 100644 index 00000000000..b0d97902728 --- /dev/null +++ b/searchlib/src/tests/features/euclidean_distance/euclidean_distance_test.cpp @@ -0,0 +1,115 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/testkit/test_kit.h> + +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/attributevector.h> +#include <vespa/searchlib/attribute/integerbase.h> +#include <vespa/searchlib/attribute/floatbase.h> +#include <vespa/searchlib/features/setup.h> +#include <vespa/searchlib/fef/test/indexenvironment.h> +#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h> +#include <vespa/searchlib/fef/test/queryenvironment.h> +#include <vespa/searchlib/fef/test/ftlib.h> +#include <vespa/searchlib/features/euclidean_distance_feature.h> +#include <vespa/searchlib/fef/fef.h> + +using search::feature_t; +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; +using search::AttributeFactory; +using search::IntegerAttribute; +using search::FloatingPointAttribute; + +typedef search::attribute::Config AVC; +typedef search::attribute::BasicType AVBT; +typedef search::attribute::CollectionType AVCT; +typedef search::AttributeVector::SP AttributePtr; +typedef FtTestApp FTA; + +struct SetupFixture +{ + EuclideanDistanceBlueprint blueprint; + IndexEnvironment indexEnv; + SetupFixture() + : blueprint(), + indexEnv() + { + FieldInfo myField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "myAttribute", 1); + indexEnv.getFields().push_back(myField); + } +}; + +TEST_F("require that blueprint can be created from factory", SetupFixture) +{ + EXPECT_TRUE(FTA::assertCreateInstance(f.blueprint, "euclideanDistance")); +} + +TEST_F("require that setup succeeds with attribute source", SetupFixture) +{ + FTA::FT_SETUP_OK(f.blueprint, f.indexEnv, StringList().add("myAttribute").add("myVector"), + StringList(), StringList().add("distance")); +} + +struct ExecFixture +{ + BlueprintFactory factory; + FtFeatureTest test; + ExecFixture(const vespalib::string &feature) + : factory(), + test(factory, feature) + { + setup_search_features(factory); + setupAttributeVectors(); + setupQueryEnvironment(); + ASSERT_TRUE(test.setup()); + } + void setupAttributeVectors() { + std::vector<AttributePtr> attrs; + attrs.push_back(AttributeFactory::createAttribute("aint", AVC(AVBT::INT32, AVCT::ARRAY))); + attrs.push_back(AttributeFactory::createAttribute("afloat", AVC(AVBT::FLOAT, AVCT::ARRAY))); + + test.getIndexEnv().getFields().push_back(FieldInfo(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint", 0)); + test.getIndexEnv().getFields().push_back(FieldInfo(FieldType::ATTRIBUTE, CollectionType::ARRAY, "afloat", 1)); + + for (const auto &attr : attrs) { + attr->addReservedDoc(); + attr->addDocs(1); + test.getIndexEnv().getAttributeManager().add(attr); + } + + IntegerAttribute *aint = static_cast<IntegerAttribute *>(attrs[0].get()); + aint->append(1, 1, 0); + aint->append(1, -2, 0); + aint->append(1, 3, 0); + + FloatingPointAttribute *afloat = static_cast<FloatingPointAttribute *>(attrs[1].get()); + afloat->append(1, 1.3, 0); + afloat->append(1, 1.5, 0); + afloat->append(1, -1.7, 0); + + for (const auto &attr : attrs) { + attr->commit(); + } + } + void setupQueryEnvironment() { + test.getQueryEnv().getProperties().add("euclideanDistance.intquery", "[4 5 -6]"); + test.getQueryEnv().getProperties().add("euclideanDistance.floatquery", "[4.1 15 0.001]"); + } + +}; + +TEST_F("require that distance is calculated for integer vectors", + ExecFixture("euclideanDistance(aint,intquery)")) +{ + EXPECT_TRUE(f.test.execute(11.789826, 0.000001)); +} + +TEST_F("require that distance is calculated for floating point vectors", + ExecFixture("euclideanDistance(afloat,floatquery)")) +{ + EXPECT_TRUE(f.test.execute(13.891846, 0.000001)); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/features/featurebenchmark.cpp b/searchlib/src/tests/features/featurebenchmark.cpp new file mode 100644 index 00000000000..14e43fa7d47 --- /dev/null +++ b/searchlib/src/tests/features/featurebenchmark.cpp @@ -0,0 +1,657 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("featurebenchmark"); + +#include <fstream> +#include <iomanip> +#include <iostream> +#include <string> +#include <boost/tokenizer.hpp> + +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/attributevector.h> +#include <vespa/searchlib/attribute/attributevector.hpp> +#include <vespa/searchlib/attribute/integerbase.h> +#include <vespa/searchlib/attribute/stringbase.h> + +#include <vespa/searchlib/features/setup.h> +#include <vespa/searchlib/features/utils.h> +#include <vespa/searchlib/fef/functiontablefactory.h> +#include <vespa/searchlib/fef/test/plugin/setup.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/searchlib/fef/test/ftlib.h> + +using namespace search::features; +using namespace search::fef; +using namespace search::fef::test; + +using search::AttributeVector; +using search::AttributeFactory; +using search::IntegerAttribute; +using search::StringAttribute; + +typedef search::attribute::Config AVC; +typedef search::attribute::BasicType AVBT; +typedef search::attribute::CollectionType AVCT; + +typedef AttributeVector::SP AttributePtr; + +class Benchmark : public FtTestApp { +public: + typedef std::vector<std::pair<vespalib::string, vespalib::string> > KeyValueVector; + + class Config { + private: + typedef std::map<vespalib::string, vespalib::string> StringMap; + StringMap _config; + + bool isKnown(const vespalib::string & key) const; + + public: + Config() : _config() {} + Config(const vespalib::string & fileName) : _config() { + init(fileName); + } + void init(const vespalib::string & fileName); + + void add(const vespalib::string & key, const vespalib::string & value) { + _config[key] = value; + } + + void addIfNotFound(const vespalib::string & key, const vespalib::string & value) { + if (_config.count(key) == 0) { + add(key, value); + } + } + + // known config values + vespalib::string getCase(const vespalib::string & fallback = "") const { + return getAsStr("case", fallback); + } + vespalib::string getFeature(const vespalib::string & fallback = "") const { + return getAsStr("feature", fallback); + } + vespalib::string getIndex(const vespalib::string & fallback = "") const { + return getAsStr("index", fallback); + } + vespalib::string getQuery(const vespalib::string & fallback = "") const { + return getAsStr("query", fallback); + } + vespalib::string getField(const vespalib::string & fallback = "") const { + return getAsStr("field", fallback); + } + uint32_t getNumRuns(uint32_t fallback = 1000) const { + return getAsUint32("numruns", fallback); + } + + // access "unknown" config values + vespalib::string getAsStr(const vespalib::string & key, const vespalib::string & fallback = "") const { + StringMap::const_iterator itr = _config.find(key); + if (itr != _config.end()) { + return vespalib::string(itr->second); + } + return vespalib::string(fallback); + } + uint32_t getAsUint32(const vespalib::string & key, uint32_t fallback = 0) const { + return util::strToNum<uint32_t>(getAsStr(key, vespalib::make_string("%u", fallback))); + } + double getAsDouble(const vespalib::string & key, double fallback = 0) const { + return util::strToNum<double>(getAsStr(key, vespalib::make_string("%f", fallback))); + } + + KeyValueVector getUnknown() const; + + friend std::ostream & operator << (std::ostream & os, const Config & cfg); + }; + +private: + search::fef::BlueprintFactory _factory; + FastOS_Time _timer; + double _sample; + + void start() { _timer.SetNow(); } + void sample() { _sample = _timer.MilliSecsToNow(); } + void setupPropertyMap(Properties & props, const KeyValueVector & values); + void runFieldMatch(Config & cfg); + void runRankingExpression(Config & cfg); + + AttributePtr createAttributeVector(AVBT dt, const vespalib::string & name, const vespalib::string & ctype, uint32_t numDocs, + AttributeVector::largeint_t value, uint32_t valueCount); + AttributePtr createAttributeVector(const vespalib::string & name, const vespalib::string & ctype, uint32_t numDocs, + AttributeVector::largeint_t value, uint32_t valueCount); + AttributePtr createStringAttributeVector(const vespalib::string & name, const vespalib::string & ctype, uint32_t numDocs, + const std::vector<vespalib::string> & values); + void runAttributeMatch(Config & cfg); + void runAttribute(Config & cfg); + void runDotProduct(Config & cfg); + void runNativeAttributeMatch(Config & cfg); + void runNativeFieldMatch(Config & cfg); + void runNativeProximity(Config & cfg); + +public: + Benchmark() : _factory(), _timer(), _sample() {} + int Main(); + +}; + +TEST_APPHOOK(Benchmark); + + +bool +Benchmark::Config::isKnown(const vespalib::string & key) const +{ + if (key == vespalib::string("case") || + key == vespalib::string("feature") || + key == vespalib::string("index") || + key == vespalib::string("query") || + key == vespalib::string("field") || + key == vespalib::string("numruns")) + { + return true; + } + return false; +} + +void +Benchmark::Config::init(const vespalib::string & fileName) +{ + std::ifstream is(fileName.c_str()); + if (is.fail()) { + throw std::runtime_error(fileName); + } + + while (is.good()) { + std::string line; + std::getline(is, line); + if (!line.empty()) { + std::vector<vespalib::string> values = FtUtil::tokenize(line, "="); + LOG_ASSERT(values.size() == 2); + add(values[0], values[1]); + } + } +} + +Benchmark::KeyValueVector +Benchmark::Config::getUnknown() const +{ + KeyValueVector retval; + for (StringMap::const_iterator itr = _config.begin(); itr != _config.end(); ++itr) { + if (!isKnown(itr->first)) { + retval.push_back(std::make_pair(itr->first, itr->second)); + } + } + return retval; +} + +std::ostream & operator << (std::ostream & os, const Benchmark::Config & cfg) +{ + std::cout << "getCase: '" << cfg.getCase() << "'" << std::endl; + std::cout << "getFeature: '" << cfg.getFeature() << "'" << std::endl; + std::cout << "getIndex: '" << cfg.getIndex() << "'" << std::endl; + std::cout << "getQuery: '" << cfg.getQuery() << "'" << std::endl; + std::cout << "getField: '" << cfg.getField() << "'" << std::endl; + std::cout << "getNumRuns: '" << cfg.getNumRuns() << "'" << std::endl; + + for (StringMap::const_iterator itr = cfg._config.begin(); itr != cfg._config.end(); ++itr) { + os << "'" << itr->first << "'='" << itr->second << "'" << std::endl; + } + return os; +} + + +void +Benchmark::setupPropertyMap(Properties & props, const KeyValueVector & values) +{ + std::cout << "**** setup property map ****" << std::endl; + for (uint32_t i = 0; i < values.size(); ++i) { + std::cout << "'" << values[i].first << "'='" << values[i].second << "'" << std::endl; + props.add(values[i].first, values[i].second); + } + std::cout << "**** setup property map ****" << std::endl; +} + +void +Benchmark::runFieldMatch(Config & cfg) +{ + cfg.addIfNotFound("feature", "fieldMatch(foo)"); + cfg.addIfNotFound("index", "foo"); + cfg.addIfNotFound("query", "a b c d"); + cfg.addIfNotFound("field", "a x x b x x x a x b x x x x x a b x x x x x x x x x x x x x x x x x c d"); + + std::cout << "**** config ****" << std::endl; + std::cout << cfg << std::endl; + std::cout << "**** config ****" << std::endl; + + vespalib::string feature = cfg.getFeature(); + vespalib::string index = cfg.getIndex(); + vespalib::string query = cfg.getQuery(); + vespalib::string field = cfg.getField(); + uint32_t numRuns = cfg.getNumRuns(); + + FtFeatureTest ft(_factory, feature); + + setupPropertyMap(ft.getIndexEnv().getProperties(), cfg.getUnknown()); + setupFieldMatch(ft, index, query, field, NULL, 0, 0.0f, 0); + + start(); + std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl; + for (uint32_t i = 0; i < numRuns; ++i) { + ft.executeOnly(0); + } + sample(); +} + +void +Benchmark::runRankingExpression(Config & cfg) +{ + cfg.addIfNotFound("feature", "rankingExpression"); + cfg.addIfNotFound("rankingExpression.rankingScript", "1 + 1 + 1 + 1"); + + std::cout << "**** config ****" << std::endl; + std::cout << cfg << std::endl; + std::cout << "**** config ****" << std::endl; + + vespalib::string feature = cfg.getFeature(); + uint32_t numRuns = cfg.getNumRuns(); + + FtFeatureTest ft(_factory, feature); + setupPropertyMap(ft.getIndexEnv().getProperties(), cfg.getUnknown()); + ASSERT_TRUE(ft.setup()); + + start(); + std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl; + for (uint32_t i = 0; i < numRuns; ++i) { + ft.executeOnly(0); + } + sample(); +} + +AttributePtr +Benchmark::createAttributeVector(const vespalib::string & name, const vespalib::string & ctype, uint32_t numDocs, + AttributeVector::largeint_t value, uint32_t valueCount) +{ + return createAttributeVector(AVBT::INT32, name, ctype, numDocs, value, valueCount); +} + +AttributePtr +Benchmark::createAttributeVector(AVBT dt, const vespalib::string & name, const vespalib::string & ctype, uint32_t numDocs, + AttributeVector::largeint_t value, uint32_t valueCount) +{ + AttributePtr a; + if (ctype == "single") { + a = AttributeFactory::createAttribute(name, AVC(dt, AVCT::SINGLE)); + std::cout << "create single int32" << std::endl; + } else if (ctype == "array") { + a = AttributeFactory::createAttribute(name, AVC(dt, AVCT::ARRAY)); + std::cout << "create array int32" << std::endl; + } else if (ctype == "wset") { + a = AttributeFactory::createAttribute(name, AVC(dt, AVCT::WSET)); + std::cout << "create wset int32" << std::endl; + } + + a->addDocs(numDocs); + IntegerAttribute * ia = static_cast<IntegerAttribute *>(a.get()); + for (uint32_t i = 0; i < numDocs; ++i) { + if (ctype == "single") { + ia->update(i, value); + } else { + for (uint32_t j = 0; j < valueCount; ++j) { + if (ctype == "array") { + ia->append(i, value, 0); + } else { + ia->append(i, value + j, j); + } + } + } + } + + a->commit(); + return a; +} + +AttributePtr +Benchmark::createStringAttributeVector(const vespalib::string & name, const vespalib::string & ctype, uint32_t numDocs, + const std::vector<vespalib::string> & values) +{ + AttributePtr a; + if (ctype == "single") { + a = AttributeFactory::createAttribute(name, AVC(AVBT::STRING, AVCT::SINGLE)); + std::cout << "create single string" << std::endl; + } else if (ctype == "array") { + a = AttributeFactory::createAttribute(name, AVC(AVBT::STRING, AVCT::ARRAY)); + std::cout << "create array string" << std::endl; + } else if (ctype == "wset") { + a = AttributeFactory::createAttribute(name, AVC(AVBT::STRING, AVCT::WSET)); + std::cout << "create wset string" << std::endl; + } + + a->addDocs(numDocs); + StringAttribute * sa = static_cast<StringAttribute *>(a.get()); + for (uint32_t i = 0; i < numDocs; ++i) { + if (ctype == "single") { + sa->update(i, values[0]); + } else { + for (uint32_t j = 0; j < values.size(); ++j) { + sa->append(i, values[j], j); + } + } + } + + a->commit(); + return a; +} + +void +Benchmark::runAttributeMatch(Config & cfg) +{ + cfg.addIfNotFound("feature", "attributeMatch(foo)"); + + std::cout << "**** config ****" << std::endl; + std::cout << cfg << std::endl; + std::cout << "**** config ****" << std::endl; + + vespalib::string feature = cfg.getFeature(); + uint32_t numRuns = 1000000; + uint32_t numDocs = 1000000; + + FtFeatureTest ft(_factory, feature); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getAttributeManager().add(createAttributeVector("foo", "single", numDocs, 10, 10)); + ft.getQueryEnv().getBuilder().addAttributeNode("foo"); + setupPropertyMap(ft.getIndexEnv().getProperties(), cfg.getUnknown()); + ASSERT_TRUE(ft.setup()); + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + mdb->setWeight("foo", 0, 0); + mdb->apply(0); + TermFieldMatchData *amd = mdb->getTermFieldMatchData(0, 0); + + start(); + std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl; + for (uint32_t i = 0; i < numRuns; ++i) { + { + amd->reset(0); // preserve old behavior + TermFieldMatchDataPosition pos; + pos.setElementWeight(i % numDocs); + amd->appendPosition(pos); + } + ft.executeOnly(i % numDocs); + } + sample(); +} + +void +Benchmark::runAttribute(Config & cfg) +{ + cfg.addIfNotFound("feature", "attribute(foo,str4)"); + cfg.addIfNotFound("numruns", "10000000"); + + std::cout << "**** config ****" << std::endl; + std::cout << cfg << std::endl; + std::cout << "**** config ****" << std::endl; + + vespalib::string feature = cfg.getFeature(); + uint32_t numRuns = cfg.getNumRuns(); + uint32_t numDocs = cfg.getAsUint32("numdocs", 1000); + StringList values; + values.add("str0").add("str1").add("str2").add("str3").add("str4") + .add("str5").add("str6").add("str7").add("str8").add("str9"); + + FtFeatureTest ft(_factory, feature); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "foo"); + ft.getIndexEnv().getAttributeManager().add(createStringAttributeVector("foo", "wset", numDocs, values)); + ASSERT_TRUE(ft.setup()); + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + + start(); + std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl; + for (uint32_t i = 0; i < numRuns; ++i) { + ft.executeOnly(i % numDocs); + } + sample(); +} + +void +Benchmark::runDotProduct(Config & cfg) +{ + cfg.addIfNotFound("feature", "dotProduct(wsstr,vector)"); + cfg.addIfNotFound("numruns", "1000000"); + cfg.addIfNotFound("numdocs", "1000"); + cfg.addIfNotFound("numvalues", "10"); + + std::cout << "**** config ****" << std::endl; + std::cout << cfg << std::endl; + std::cout << "**** config ****" << std::endl; + + vespalib::string feature = cfg.getFeature(); + vespalib::string collectionType = cfg.getAsStr("collectiontype", "wset"); + vespalib::string dataType = cfg.getAsStr("datatype", "string"); + uint32_t numRuns = cfg.getNumRuns(); + uint32_t numDocs = cfg.getAsUint32("numdocs", 1000); + uint32_t numValues = cfg.getAsUint32("numvalues", 10); + FtFeatureTest ft(_factory, feature); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, + collectionType == "wset" ? CollectionType::WEIGHTEDSET : CollectionType::ARRAY, + "wsstr"); + if (dataType == "string") { + StringList values; + for (uint32_t i = 0; i < numValues; ++i) { + values.add(vespalib::make_string("str%u", i)); + } + + ft.getIndexEnv().getAttributeManager().add(createStringAttributeVector("wsstr", collectionType, numDocs, values)); + } else if (dataType == "int") { + ft.getIndexEnv().getAttributeManager().add(createAttributeVector(AVBT::INT32, "wsstr", collectionType, numDocs, 0, numValues)); + } else if (dataType == "long") { + ft.getIndexEnv().getAttributeManager().add(createAttributeVector(AVBT::INT64, "wsstr", collectionType, numDocs, 0, numValues)); + } else if (dataType == "float") { + ft.getIndexEnv().getAttributeManager().add(createAttributeVector(AVBT::FLOAT, "wsstr", collectionType, numDocs, 0, numValues)); + } else if (dataType == "double") { + ft.getIndexEnv().getAttributeManager().add(createAttributeVector(AVBT::DOUBLE, "wsstr", collectionType, numDocs, 0, numValues)); + } else { + std::cerr << "Illegal data type '" << dataType << std::endl; + } + ft.getQueryEnv().getProperties().add("dotProduct.vector", cfg.getAsStr("dotProduct.vector", "(str0:1)")); + ASSERT_TRUE(ft.setup()); + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + + start(); + std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl; + for (uint32_t i = 0; i < numRuns; ++i) { + ft.executeOnly(i % numDocs); + } + sample(); +} + +void +Benchmark::runNativeAttributeMatch(Config & cfg) +{ + cfg.addIfNotFound("feature", "nativeAttributeMatch(foo)"); + cfg.addIfNotFound("numruns", "10000000"); + cfg.addIfNotFound("numdocs", "1000000"); + + std::cout << "**** config ****" << std::endl; + std::cout << cfg << std::endl; + std::cout << "**** config ****" << std::endl; + + vespalib::string feature = cfg.getFeature(); + uint32_t numRuns = cfg.getNumRuns(); + uint32_t numDocs = cfg.getAsUint32("numdocs"); + + FtFeatureTest ft(_factory, feature); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getTableManager().addFactory(ITableFactory::SP(new FunctionTableFactory(256))); // same as backend + ft.getQueryEnv().getBuilder().addAttributeNode("foo")->setWeight(search::query::Weight(100)); + setupPropertyMap(ft.getIndexEnv().getProperties(), cfg.getUnknown()); + ASSERT_TRUE(ft.setup()); + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + mdb->setWeight("foo", 0, 0); + mdb->apply(0); + + TermFieldMatchData *amd = mdb->getTermFieldMatchData(0, 0); + + start(); + std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl; + for (uint32_t i = 0; i < numRuns; ++i) { + uint32_t docId = i % numDocs; + { + amd->reset(docId); + TermFieldMatchDataPosition pos; + pos.setElementWeight(docId); + amd->appendPosition(pos); + } + ft.executeOnly(docId); + } + sample(); +} + +void +Benchmark::runNativeFieldMatch(Config & cfg) +{ + cfg.addIfNotFound("feature", "nativeFieldMatch(foo)"); + cfg.addIfNotFound("numruns", "10000000"); + + std::cout << "**** config ****" << std::endl; + std::cout << cfg << std::endl; + std::cout << "**** config ****" << std::endl; + + vespalib::string feature = cfg.getFeature(); + uint32_t numRuns = cfg.getNumRuns(); + + FtFeatureTest ft(_factory, feature); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getTableManager().addFactory(ITableFactory::SP(new FunctionTableFactory(256))); // same as backend + std::vector<vespalib::string> searchedFields; + searchedFields.push_back("foo"); + ft.getQueryEnv().getBuilder().addIndexNode(searchedFields); + setupPropertyMap(ft.getIndexEnv().getProperties(), cfg.getUnknown()); + ASSERT_TRUE(ft.setup()); + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + + // setup occurrence data + mdb->setFieldLength("foo", 100); + mdb->addOccurence("foo", 0, 2); + mdb->addOccurence("foo", 0, 8); + mdb->addOccurence("foo", 0, 32); + mdb->addOccurence("foo", 0, 64); + ASSERT_TRUE(mdb->apply(0)); + + start(); + std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl; + for (uint32_t i = 0; i < numRuns; ++i) { + ft.executeOnly(0); + } + sample(); +} + +void +Benchmark::runNativeProximity(Config & cfg) +{ + cfg.addIfNotFound("feature", "nativeProximity(foo)"); + cfg.addIfNotFound("numruns", "10000000"); + + std::cout << "**** config ****" << std::endl; + std::cout << cfg << std::endl; + std::cout << "**** config ****" << std::endl; + + vespalib::string feature = cfg.getFeature(); + uint32_t numRuns = cfg.getNumRuns(); + + FtFeatureTest ft(_factory, feature); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getTableManager().addFactory(ITableFactory::SP(new FunctionTableFactory(256))); // same as backend + std::vector<vespalib::string> searchedFields; + searchedFields.push_back("foo"); + ft.getQueryEnv().getBuilder().addIndexNode(searchedFields); // termId 0 + ft.getQueryEnv().getBuilder().addIndexNode(searchedFields); // termId 1 + setupPropertyMap(ft.getIndexEnv().getProperties(), cfg.getUnknown()); + ASSERT_TRUE(ft.setup()); + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + + // setup occurrence data + mdb->setFieldLength("foo", 100); + mdb->addOccurence("foo", 0, 2); + mdb->addOccurence("foo", 0, 16); + mdb->addOccurence("foo", 0, 32); + mdb->addOccurence("foo", 1, 6); + mdb->addOccurence("foo", 1, 12); + mdb->addOccurence("foo", 1, 30); + ASSERT_TRUE(mdb->apply(0)); + + start(); + std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl; + for (uint32_t i = 0; i < numRuns; ++i) { + ft.executeOnly(0); + } + sample(); +} + +int +Benchmark::Main() +{ + TEST_INIT("featurebenchmark"); + + // Configure factory with all known blueprints. + setup_fef_test_plugin(_factory); + setup_search_features(_factory); + + int idx = 1; + char opt; + const char * arg; + bool optError = false; + vespalib::string file; + vespalib::string feature; + while ((opt = GetOpt("c:f:", arg, idx)) != -1) { + switch (opt) { + case 'c': + file.assign(arg); + break; + case 'f': + feature.assign(arg); + break; + default: + optError = true; + break; + } + } + + if (_argc != idx || optError) { + //usage(); + return -1; + } + + Config cfg; + if (file.empty()) { + cfg.add("case", feature); + } else { + cfg.init(file); + } + + if (cfg.getCase() == vespalib::string("fieldMatch")) { + runFieldMatch(cfg); + } else if (cfg.getCase() == vespalib::string("rankingExpression")) { + runRankingExpression(cfg); + } else if (cfg.getCase() == vespalib::string("attributeMatch")) { + runAttributeMatch(cfg); + } else if (cfg.getCase() == vespalib::string("attribute")) { + runAttribute(cfg); + } else if (cfg.getCase() == vespalib::string("dotProduct")) { + runDotProduct(cfg); + } else if (cfg.getCase() == vespalib::string("nativeAttributeMatch")) { + runNativeAttributeMatch(cfg); + } else if (cfg.getCase() == vespalib::string("nativeFieldMatch")) { + runNativeFieldMatch(cfg); + } else if (cfg.getCase() == vespalib::string("nativeProximity")) { + runNativeProximity(cfg); + } else { + std::cout << "feature case '" << cfg.getCase() << "' is not known" << std::endl; + } + + std::cout << "TET: " << _sample << " (ms)" << std::endl; + std::cout << "ETPD: " << std::fixed << std::setprecision(10) << _sample / cfg.getNumRuns() << " (ms)" << std::endl; + std::cout << "**** '" << cfg.getFeature() << "' ****" << std::endl; + + TEST_DONE(); + return 0; +} + diff --git a/searchlib/src/tests/features/item_raw_score/.gitignore b/searchlib/src/tests/features/item_raw_score/.gitignore new file mode 100644 index 00000000000..29711c1533d --- /dev/null +++ b/searchlib/src/tests/features/item_raw_score/.gitignore @@ -0,0 +1 @@ +searchlib_item_raw_score_test_app diff --git a/searchlib/src/tests/features/item_raw_score/CMakeLists.txt b/searchlib/src/tests/features/item_raw_score/CMakeLists.txt new file mode 100644 index 00000000000..24ef339133c --- /dev/null +++ b/searchlib/src/tests/features/item_raw_score/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_item_raw_score_test_app + SOURCES + item_raw_score_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_item_raw_score_test_app COMMAND searchlib_item_raw_score_test_app) diff --git a/searchlib/src/tests/features/item_raw_score/FILES b/searchlib/src/tests/features/item_raw_score/FILES new file mode 100644 index 00000000000..bce307ff6c1 --- /dev/null +++ b/searchlib/src/tests/features/item_raw_score/FILES @@ -0,0 +1 @@ +item_raw_score_test.cpp diff --git a/searchlib/src/tests/features/item_raw_score/item_raw_score_test.cpp b/searchlib/src/tests/features/item_raw_score/item_raw_score_test.cpp new file mode 100644 index 00000000000..20f9449062d --- /dev/null +++ b/searchlib/src/tests/features/item_raw_score/item_raw_score_test.cpp @@ -0,0 +1,158 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchlib/features/setup.h> +#include <vespa/searchlib/fef/test/indexenvironment.h> +#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h> +#include <vespa/searchlib/fef/test/queryenvironment.h> +#include <vespa/searchlib/features/item_raw_score_feature.h> +#include <vespa/searchlib/fef/fef.h> +#include <vespa/searchlib/fef/test/dummy_dependency_handler.h> + +using search::feature_t; +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; + +const vespalib::string featureName("itemRawScore(label)"); + +struct BlueprintFactoryFixture { + BlueprintFactory factory; + BlueprintFactoryFixture() : factory() + { + setup_search_features(factory); + } +}; + +struct IndexFixture { + IndexEnvironment indexEnv; + IndexFixture() : indexEnv() + { + IndexEnvironmentBuilder builder(indexEnv); + builder.addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + builder.addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar"); + } +}; + +struct FeatureDumpFixture : public IDumpFeatureVisitor { + virtual void visitDumpFeature(const vespalib::string &) { + TEST_ERROR("no features should be dumped"); + } + FeatureDumpFixture() : IDumpFeatureVisitor() {} +}; + +struct Labels { + virtual void inject(Properties &p) const = 0; + virtual ~Labels() {} +}; +struct NoLabel : public Labels { + virtual void inject(Properties &) const {} +}; +struct SingleLabel : public Labels { + vespalib::string label; + uint32_t uid; + SingleLabel(const vespalib::string &l, uint32_t x) : label(l), uid(x) {} + virtual void inject(Properties &p) const { + vespalib::asciistream key; + key << "vespa.label." << label << ".id"; + vespalib::asciistream value; + value << uid; + p.add(key.str(), value.str()); + } +}; + +struct RankFixture : BlueprintFactoryFixture, IndexFixture { + QueryEnvironment queryEnv; + RankSetup rankSetup; + RankProgram::UP rankProgram; + MatchDataLayout mdl; + std::vector<TermFieldHandle> fooHandles; + std::vector<TermFieldHandle> barHandles; + RankFixture(size_t fooCnt, size_t barCnt, const Labels &labels) + : queryEnv(&indexEnv), rankSetup(factory, indexEnv), + rankProgram(), mdl(), fooHandles(), barHandles() + { + for (size_t i = 0; i < fooCnt; ++i) { + uint32_t fieldId = indexEnv.getFieldByName("foo")->id(); + fooHandles.push_back(mdl.allocTermField(fieldId)); + SimpleTermData term; + term.setUniqueId(i + 1); + term.addField(fieldId).setHandle(fooHandles.back()); + queryEnv.getTerms().push_back(term); + } + for (size_t i = 0; i < barCnt; ++i) { + uint32_t fieldId = indexEnv.getFieldByName("bar")->id(); + barHandles.push_back(mdl.allocTermField(fieldId)); + SimpleTermData term; + term.setUniqueId(fooCnt + i + 1); + term.addField(fieldId).setHandle(barHandles.back()); + queryEnv.getTerms().push_back(term); + } + labels.inject(queryEnv.getProperties()); + rankSetup.setFirstPhaseRank(featureName); + rankSetup.setIgnoreDefaultRankFeatures(true); + ASSERT_TRUE(rankSetup.compile()); + rankProgram = rankSetup.create_first_phase_program(); + rankProgram->setup(mdl, queryEnv); + } + feature_t getScore(uint32_t docId) { + rankProgram->run(docId); + return *Utils::getScoreFeature(*rankProgram); + } + void setScore(TermFieldHandle handle, uint32_t docId, feature_t score) { + rankProgram->match_data().resolveTermField(handle)->setRawScore(docId, score); + } + void setFooScore(uint32_t i, uint32_t docId, feature_t score) { + ASSERT_LESS(i, fooHandles.size()); + setScore(fooHandles[i], docId, score); + } + void setBarScore(uint32_t i, uint32_t docId, feature_t score) { + ASSERT_LESS(i, barHandles.size()); + setScore(barHandles[i], docId, score); + } +}; + +TEST_F("require that blueprint can be created from factory", BlueprintFactoryFixture) { + Blueprint::SP bp = f.factory.createBlueprint("itemRawScore"); + EXPECT_TRUE(bp.get() != 0); + EXPECT_TRUE(dynamic_cast<ItemRawScoreBlueprint*>(bp.get()) != 0); +} + +TEST_FFF("require that no features are dumped", ItemRawScoreBlueprint, IndexFixture, FeatureDumpFixture) { + f1.visitDumpFeatures(f2.indexEnv, f3); +} + +TEST_FF("require that setup can be done on random label", ItemRawScoreBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(random_label)", f1.getBaseName().c_str())); + EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "random_label"))); +} + +TEST_FF("require that no label gives 0.0 item raw score", NoLabel(), RankFixture(2, 2, f1)) { + EXPECT_EQUAL(0.0, f2.getScore(10)); +} + +TEST_FF("require that unrelated label gives 0.0 item raw score", SingleLabel("unrelated", 1), RankFixture(2, 2, f1)) { + EXPECT_EQUAL(0.0, f2.getScore(10)); +} + +TEST_FF("require that item raw score can be obtained", SingleLabel("label", 1), RankFixture(2, 2, f1)) { + f2.setFooScore(0, 10, 5.0); + EXPECT_EQUAL(5.0, f2.getScore(10)); +} + +TEST_FF("require that other raw scores are ignored", SingleLabel("label", 2), RankFixture(2, 2, f1)) { + f2.setFooScore(0, 10, 1.0); + f2.setFooScore(1, 10, 2.0); + f2.setBarScore(0, 10, 5.0); + f2.setBarScore(1, 10, 6.0); + EXPECT_EQUAL(2.0, f2.getScore(10)); +} + +TEST_FF("require that stale raw score is ignored", SingleLabel("label", 2), RankFixture(2, 2, f1)) { + f2.setFooScore(0, 10, 1.0); + f2.setFooScore(1, 5, 2.0); + EXPECT_EQUAL(0.0, f2.getScore(10)); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/features/native_dot_product/.gitignore b/searchlib/src/tests/features/native_dot_product/.gitignore new file mode 100644 index 00000000000..d95f15f4492 --- /dev/null +++ b/searchlib/src/tests/features/native_dot_product/.gitignore @@ -0,0 +1 @@ +searchlib_native_dot_product_test_app diff --git a/searchlib/src/tests/features/native_dot_product/CMakeLists.txt b/searchlib/src/tests/features/native_dot_product/CMakeLists.txt new file mode 100644 index 00000000000..2dad758c82d --- /dev/null +++ b/searchlib/src/tests/features/native_dot_product/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_native_dot_product_test_app + SOURCES + native_dot_product_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_native_dot_product_test_app COMMAND searchlib_native_dot_product_test_app) diff --git a/searchlib/src/tests/features/native_dot_product/FILES b/searchlib/src/tests/features/native_dot_product/FILES new file mode 100644 index 00000000000..ab007656448 --- /dev/null +++ b/searchlib/src/tests/features/native_dot_product/FILES @@ -0,0 +1 @@ +native_dot_product_test.cpp diff --git a/searchlib/src/tests/features/native_dot_product/native_dot_product_test.cpp b/searchlib/src/tests/features/native_dot_product/native_dot_product_test.cpp new file mode 100644 index 00000000000..3e3702cceec --- /dev/null +++ b/searchlib/src/tests/features/native_dot_product/native_dot_product_test.cpp @@ -0,0 +1,191 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchlib/features/setup.h> +#include <vespa/searchlib/fef/test/indexenvironment.h> +#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h> +#include <vespa/searchlib/fef/test/queryenvironment.h> +#include <vespa/searchlib/features/native_dot_product_feature.h> +#include <vespa/searchlib/fef/fef.h> +#include <vespa/searchlib/query/weight.h> +#include <vespa/searchlib/fef/test/dummy_dependency_handler.h> + +using search::feature_t; +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; + +const std::string featureName("nativeDotProduct(foo)"); + +struct BlueprintFactoryFixture { + BlueprintFactory factory; + BlueprintFactoryFixture() : factory() + { + setup_search_features(factory); + } +}; + +struct IndexFixture { + IndexEnvironment indexEnv; + IndexFixture() : indexEnv() + { + IndexEnvironmentBuilder builder(indexEnv); + builder.addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "foo"); + builder.addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "bar"); + } +}; + +struct FeatureDumpFixture : public IDumpFeatureVisitor { + virtual void visitDumpFeature(const vespalib::string &) { + TEST_ERROR("no features should be dumped"); + } + FeatureDumpFixture() : IDumpFeatureVisitor() {} +}; + +std::vector<uint32_t> vec() { + std::vector<uint32_t> ret; + return ret; +} + +std::vector<uint32_t> vec(uint32_t w1) { + std::vector<uint32_t> ret; + ret.push_back(w1); + return ret; +} + +std::vector<uint32_t> vec(uint32_t w1, uint32_t w2) { + std::vector<uint32_t> ret; + ret.push_back(w1); + ret.push_back(w2); + return ret; +} + +std::vector<uint32_t> vec(uint32_t w1, uint32_t w2, uint32_t w3) { + std::vector<uint32_t> ret; + ret.push_back(w1); + ret.push_back(w2); + ret.push_back(w3); + return ret; +} + +struct RankFixture : BlueprintFactoryFixture, IndexFixture { + QueryEnvironment queryEnv; + RankSetup rankSetup; + RankProgram::UP rankProgram; + MatchDataLayout mdl; + std::vector<TermFieldHandle> fooHandles; + std::vector<TermFieldHandle> barHandles; + RankFixture(const std::vector<uint32_t> &fooWeights, + const std::vector<uint32_t> &barWeights) + : queryEnv(&indexEnv), rankSetup(factory, indexEnv), + rankProgram(), mdl(), fooHandles(), barHandles() + { + for (size_t i = 0; i < fooWeights.size(); ++i) { + uint32_t fieldId = indexEnv.getFieldByName("foo")->id(); + fooHandles.push_back(mdl.allocTermField(fieldId)); + SimpleTermData term; + term.addField(fieldId).setHandle(fooHandles.back()); + term.setWeight(search::query::Weight(fooWeights[i])); + queryEnv.getTerms().push_back(term); + } + for (size_t i = 0; i < barWeights.size(); ++i) { + uint32_t fieldId = indexEnv.getFieldByName("bar")->id(); + barHandles.push_back(mdl.allocTermField(fieldId)); + SimpleTermData term; + term.addField(fieldId).setHandle(barHandles.back()); + term.setWeight(search::query::Weight(barWeights[i])); + queryEnv.getTerms().push_back(term); + } + rankSetup.setFirstPhaseRank(featureName); + rankSetup.setIgnoreDefaultRankFeatures(true); + ASSERT_TRUE(rankSetup.compile()); + rankProgram = rankSetup.create_first_phase_program(); + rankProgram->setup(mdl, queryEnv); + } + feature_t getScore(uint32_t docId) { + rankProgram->run(docId); + return *Utils::getScoreFeature(*rankProgram); + } + void setFooWeight(uint32_t i, uint32_t docId, int32_t index_weight) { + ASSERT_LESS(i, fooHandles.size()); + TermFieldMatchDataPosition pos; + pos.setElementWeight(index_weight); + rankProgram->match_data().resolveTermField(fooHandles[i])->reset(docId); + rankProgram->match_data().resolveTermField(fooHandles[i])->appendPosition(pos); + } + void setBarWeight(uint32_t i, uint32_t docId, int32_t index_weight) { + ASSERT_LESS(i, barHandles.size()); + TermFieldMatchDataPosition pos; + pos.setElementWeight(index_weight); + rankProgram->match_data().resolveTermField(barHandles[i])->reset(docId); + rankProgram->match_data().resolveTermField(barHandles[i])->appendPosition(pos); + } +}; + +TEST_F("require that blueprint can be created from factory", BlueprintFactoryFixture) { + Blueprint::SP bp = f.factory.createBlueprint("nativeDotProduct"); + EXPECT_TRUE(bp.get() != 0); + EXPECT_TRUE(dynamic_cast<NativeDotProductBlueprint*>(bp.get()) != 0); +} + +TEST_FFF("require that no features are dumped", NativeDotProductBlueprint, IndexFixture, FeatureDumpFixture) { + f1.visitDumpFeatures(f2.indexEnv, f3); +} + +TEST_FF("require that setup can be done on index field", NativeDotProductBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(foo)", f1.getBaseName().c_str())); + EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "foo"))); +} + +TEST_FF("require that setup can be done on attribute field", NativeDotProductBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(bar)", f1.getBaseName().c_str())); + EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "bar"))); +} + +TEST_FF("require that setup fails for unknown field", NativeDotProductBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(unknown)", f1.getBaseName().c_str())); + EXPECT_TRUE(!((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "unknown"))); +} + +TEST_F("require that not searching a field will give it 0.0 dot product", RankFixture(vec(), vec(1, 2, 3))) { + EXPECT_EQUAL(0.0, f1.getScore(10)); +} + +TEST_F("require that dot product works for single match", RankFixture(vec(5), vec())) { + f1.setFooWeight(0, 10, 7); + EXPECT_EQUAL(35, f1.getScore(10)); +} + +TEST_F("require that dot product works for multiple matches", RankFixture(vec(1, 3, 5), vec())) { + f1.setFooWeight(0, 10, 2); + f1.setFooWeight(1, 10, 4); + f1.setFooWeight(2, 10, 6); + EXPECT_EQUAL(44, f1.getScore(10)); +} + +TEST_F("require that stale data is ignored", RankFixture(vec(1, 3, 5), vec())) { + f1.setFooWeight(0, 10, 2); + f1.setFooWeight(1, 9, 4); + f1.setFooWeight(2, 10, 6); + EXPECT_EQUAL(32, f1.getScore(10)); +} + +TEST_F("require that data from other fields is ignored", RankFixture(vec(1, 3), vec(5, 7))) { + f1.setFooWeight(0, 10, 2); + f1.setFooWeight(1, 10, 4); + f1.setBarWeight(0, 10, 6); + f1.setBarWeight(1, 10, 8); + EXPECT_EQUAL(14, f1.getScore(10)); +} + +TEST_F("require that negative weights in the index works", RankFixture(vec(1, 3), vec())) { + f1.setFooWeight(0, 10, 2); + f1.setFooWeight(1, 10, -4); + EXPECT_EQUAL(-10, f1.getScore(10)); +} + + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp new file mode 100644 index 00000000000..b0bac4b576d --- /dev/null +++ b/searchlib/src/tests/features/prod_features.cpp @@ -0,0 +1,1937 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("prod_features_test"); + +#include "prod_features.h" +#include <boost/tokenizer.hpp> +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/attributevector.h> +#include <vespa/searchlib/attribute/attributevector.hpp> +#include <vespa/searchlib/attribute/extendableattributes.h> +#include <vespa/searchlib/attribute/floatbase.h> +#include <vespa/searchlib/attribute/integerbase.h> +#include <vespa/searchlib/attribute/stringbase.h> +#include <vespa/searchlib/features/agefeature.h> +#include <vespa/searchlib/features/array_parser.hpp> +#include <vespa/searchlib/features/attributefeature.h> +#include <vespa/searchlib/features/attributematchfeature.h> +#include <vespa/searchlib/features/closenessfeature.h> +#include <vespa/searchlib/features/distancefeature.h> +#include <vespa/searchlib/features/dotproductfeature.h> +#include <vespa/searchlib/features/fieldlengthfeature.h> +#include <vespa/searchlib/features/fieldmatchfeature.h> +#include <vespa/searchlib/features/fieldtermmatchfeature.h> +#include <vespa/searchlib/features/firstphasefeature.h> +#include <vespa/searchlib/features/foreachfeature.h> +#include <vespa/searchlib/features/freshnessfeature.h> +#include <vespa/searchlib/features/matchesfeature.h> +#include <vespa/searchlib/features/matchfeature.h> +#include <vespa/searchlib/features/nowfeature.h> +#include <vespa/searchlib/features/queryfeature.h> +#include <vespa/searchlib/features/querytermcountfeature.h> +#include <vespa/searchlib/features/randomfeature.h> +#include <vespa/searchlib/features/rankingexpressionfeature.h> +#include <vespa/searchlib/features/setup.h> +#include <vespa/searchlib/features/termfeature.h> +#include <vespa/searchlib/features/utils.h> +#include <vespa/searchlib/features/valuefeature.h> +#include <vespa/searchlib/features/weighted_set_parser.hpp> +#include <vespa/searchlib/fef/featurenamebuilder.h> +#include <vespa/searchlib/fef/indexproperties.h> +#include <vespa/searchlib/fef/queryproperties.h> +#include <vespa/searchlib/fef/test/plugin/setup.h> +#include <vespa/searchlib/fef/test/dummy_dependency_handler.h> +#include <vespa/searchlib/util/rand48.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/vespalib/geo/zcurve.h> +#include <vespa/vespalib/util/string_hash.h> + +using namespace search::features; +using namespace search::fef; +using namespace search::fef::test; + +using search::AttributeVector; +using search::AttributeFactory; +using search::IntegerAttribute; +using search::FloatingPointAttribute; +using search::StringAttribute; +using search::WeightedSetStringExtAttribute; +using search::attribute::WeightedEnumContent; + +typedef AttributeVector::SP AttributePtr; + +typedef search::attribute::Config AVC; +typedef search::attribute::BasicType AVBT; +typedef search::attribute::CollectionType AVCT; + +const double EPS = 10e-6; + + +TEST_APPHOOK(Test); + +int +Test::Main() +{ + TEST_INIT("prod_features_test"); + + // Configure factory with all known blueprints. + setup_fef_test_plugin(_factory); + setup_search_features(_factory); + + // Test all features. + TEST_DO(testFramework()); TEST_FLUSH(); + TEST_DO(testFtLib()); TEST_FLUSH(); + TEST_DO(testAge()); TEST_FLUSH(); + TEST_DO(testAttribute()); TEST_FLUSH(); + TEST_DO(testAttributeMatch()); TEST_FLUSH(); + TEST_DO(testCloseness()); TEST_FLUSH(); + TEST_DO(testDistance()); TEST_FLUSH(); + TEST_DO(testDistanceToPath()); TEST_FLUSH(); + TEST_DO(testDotProduct()); TEST_FLUSH(); + TEST_DO(testFieldLength()); TEST_FLUSH(); + TEST_DO(testFieldMatch()); TEST_FLUSH(); + TEST_DO(testFieldTermMatch()); TEST_FLUSH(); + TEST_DO(testFirstPhase()); TEST_FLUSH(); + TEST_DO(testForeach()); TEST_FLUSH(); + TEST_DO(testFreshness()); TEST_FLUSH(); + TEST_DO(testMatch()); TEST_FLUSH(); + TEST_DO(testMatches()); TEST_FLUSH(); + TEST_DO(testNow()); TEST_FLUSH(); + TEST_DO(testQuery()); TEST_FLUSH(); + TEST_DO(testQueryTermCount()); TEST_FLUSH(); + TEST_DO(testRandom()); TEST_FLUSH(); + TEST_DO(testRankingExpression()); TEST_FLUSH(); + TEST_DO(testTerm()); TEST_FLUSH(); + TEST_DO(testTermDistance()); TEST_FLUSH(); + TEST_DO(testUtils()); TEST_FLUSH(); + + TEST_DONE(); + return 0; +} + + +void +Test::testFtLib() +{ + { // toQuery + FtQuery q = FtUtil::toQuery("a b!50 0.5:c!200%0.5 d%0.3 e!300 0.3:f "); + ASSERT_TRUE(q.size() == 6); + EXPECT_EQUAL(q[0].term, vespalib::string("a")); + EXPECT_EQUAL(q[0].termWeight.percent(), 100); + EXPECT_APPROX(q[0].connexity, 0.1f, EPS); + EXPECT_APPROX(q[0].significance, 0.1f, EPS); + EXPECT_EQUAL(q[1].term, vespalib::string("b")); + EXPECT_EQUAL(q[1].termWeight.percent(), 50); + EXPECT_APPROX(q[1].connexity, 0.1f, EPS); + EXPECT_APPROX(q[1].significance, 0.1f, EPS); + EXPECT_EQUAL(q[2].term, vespalib::string("c")); + EXPECT_EQUAL(q[2].termWeight.percent(), 200); + EXPECT_APPROX(q[2].connexity, 0.5f, EPS); + EXPECT_APPROX(q[2].significance, 0.5f, EPS); + EXPECT_EQUAL(q[3].term, vespalib::string("d")); + EXPECT_EQUAL(q[3].termWeight.percent(), 100); + EXPECT_APPROX(q[3].connexity, 0.1f, EPS); + EXPECT_APPROX(q[3].significance, 0.3f, EPS); + EXPECT_EQUAL(q[4].term, vespalib::string("e")); + EXPECT_EQUAL(q[4].termWeight.percent(), 300); + EXPECT_APPROX(q[4].connexity, 0.1f, EPS); + EXPECT_APPROX(q[4].significance, 0.1f, EPS); + EXPECT_EQUAL(q[5].term, vespalib::string("f")); + EXPECT_EQUAL(q[5].termWeight.percent(), 100); + EXPECT_APPROX(q[5].connexity, 0.3f, EPS); + EXPECT_APPROX(q[5].significance, 0.1f, EPS); + } + { // toRankResult + RankResult rr = toRankResult("foo", "a:0.5 b:-0.5 c:2 d:3 "); + std::vector<vespalib::string> keys = rr.getKeys(); + ASSERT_TRUE(keys.size() == 4); + EXPECT_EQUAL(keys[0], vespalib::string("foo.a")); + EXPECT_EQUAL(keys[1], vespalib::string("foo.b")); + EXPECT_EQUAL(keys[2], vespalib::string("foo.c")); + EXPECT_EQUAL(keys[3], vespalib::string("foo.d")); + EXPECT_APPROX(rr.getScore("foo.a"), 0.5f, EPS); + EXPECT_APPROX(rr.getScore("foo.b"), -0.5f, EPS); + EXPECT_APPROX(rr.getScore("foo.c"), 2.0f, EPS); + EXPECT_APPROX(rr.getScore("foo.d"), 3.0f, EPS); + } +} + + +void +Test::testAge() +{ + { // Test blueprint + FtIndexEnvironment idx_env; + idx_env.getBuilder() + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "datetime") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "datetime2"); + + AgeBlueprint pt; + EXPECT_TRUE(assertCreateInstance(pt, "age")); + + StringList params, in, out; + FT_SETUP_FAIL(pt, idx_env, params); + FT_SETUP_OK(pt, idx_env, params.add("datetime"), in.add("now"), out.add("out")); + FT_SETUP_FAIL(pt, idx_env, params.add("datetime2")); + + FT_DUMP_EMPTY(_factory, "age"); + } + + { // Test executor + assertAge(0, "doctime", 60, 120); + assertAge(60, "doctime", 180, 120); + assertAge(15000000000, "doctime", 20000000000, 5000000000); + } +} + +void +Test::assertAge(feature_t expAge, const vespalib::string & attr, uint64_t now, uint64_t docTime) +{ + vespalib::string feature = "age(" + attr + ")"; + FtFeatureTest ft(_factory, feature); + setupForAgeTest(ft, docTime); + ft.getQueryEnv().getProperties().add(queryproperties::now::SystemTime::NAME, + vespalib::make_string("%" PRIu64, now)); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(RankResult().addScore(feature, expAge))); +} + +void +Test::setupForAgeTest(FtFeatureTest & ft, uint64_t docTime) +{ + AttributePtr doctime = AttributeFactory::createAttribute("doctime", AVC(AVBT::INT64, AVCT::SINGLE)); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "doctime"); + doctime->addReservedDoc(); + doctime->addDocs(1); + ft.getIndexEnv().getAttributeManager().add(doctime); + (static_cast<IntegerAttribute *>(doctime.get()))->update(1, docTime); + doctime->commit(); +} + +void +Test::testAttribute() +{ + AttributeBlueprint prototype; + { + FtIndexEnvironment idx_env; + idx_env.getBuilder() + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar"); + + EXPECT_TRUE(assertCreateInstance(prototype, "attribute")); + + StringList params, in, out; + FT_SETUP_FAIL(prototype, idx_env, params); // expects 1 - 2 params + + FT_SETUP_OK(prototype, idx_env, params.add("bar"), in, + out.add("value").add("weight").add("contains").add("count")); + FT_SETUP_OK(prototype, idx_env, params.add("0"), in, out); + + FT_DUMP_EMPTY(_factory, "attribute"); + } + { // single attributes + RankResult exp; + exp.addScore("attribute(sint)", 10). + addScore("attribute(sint,0)", 10). + addScore("attribute(sfloat)", 60.5f). + addScore("attribute(sstr)", (feature_t)vespalib::hash_code("foo")). + addScore("attribute(sint).count", 1). + addScore("attribute(sfloat).count", 1). + addScore("attribute(sstr).count", 1). + addScore("attribute(udefint)", search::attribute::getUndefined<feature_t>()). + addScore("attribute(udeffloat)", search::attribute::getUndefined<feature_t>()). + addScore("attribute(udefstr)", (feature_t)vespalib::hash_code("")); + + FtFeatureTest ft(_factory, exp.getKeys()); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint"). + addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sfloat"). + addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sstr"). + addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "udefint"). + addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "udeffloat"). + addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "udefstr"); + setupForAttributeTest(ft); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(exp)); + } + { // array attributes + RankResult exp; + exp.addScore("attribute(aint)", 0). + addScore("attribute(aint,0)", 20). + addScore("attribute(aint,1)", 30). + addScore("attribute(aint,2)", 0). + addScore("attribute(afloat,0)", 70.5f). + addScore("attribute(afloat,1)", 80.5f). + addScore("attribute(astr,0)", (feature_t)vespalib::hash_code("bar")). + addScore("attribute(astr,1)", (feature_t)vespalib::hash_code("baz")). + addScore("attribute(aint).count", 2). + addScore("attribute(aint,0).count", 0). + addScore("attribute(afloat).count", 2). + addScore("attribute(afloat,0).count", 0). + addScore("attribute(astr).count", 2). + addScore("attribute(astr,0).count", 0); + + FtFeatureTest ft(_factory, exp.getKeys()); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint"). + addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "afloat"). + addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "astr"); + setupForAttributeTest(ft); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(exp)); + } + { // weighted set attributes + RankResult exp; + exp.addScore("attribute(wsint).value", 0). + addScore("attribute(wsint).weight", 0). + addScore("attribute(wsint).contains", 0). + addScore("attribute(wsint,100).value", 0). + addScore("attribute(wsint,100).weight", 0). + addScore("attribute(wsint,100).contains", 0). + addScore("attribute(wsint,40).value", 40). + addScore("attribute(wsint,40).weight", 10). + addScore("attribute(wsint,40).contains", 1). + addScore("attribute(wsint,50).value", 50). + addScore("attribute(wsint,50).weight", 20). + addScore("attribute(wsint,50).contains", 1). + addScore("attribute(wsfloat).value", 0). + addScore("attribute(wsfloat).weight", 0). + addScore("attribute(wsfloat).contains", 0). + addScore("attribute(wsfloat,1000.5).value", 0). + addScore("attribute(wsfloat,1000.5).weight", 0). + addScore("attribute(wsfloat,1000.5).contains", 0). + addScore("attribute(wsfloat,90.5).value", 90.5f). + addScore("attribute(wsfloat,90.5).weight", -30). + addScore("attribute(wsfloat,90.5).contains", 1). + addScore("attribute(wsfloat,100.5).value", 100.5f). + addScore("attribute(wsfloat,100.5).weight", -40). + addScore("attribute(wsfloat,100.5).contains", 1). + addScore("attribute(wsstr).value", 0). + addScore("attribute(wsstr).weight", 0). + addScore("attribute(wsstr).contains", 0). + addScore("attribute(wsstr,foo).value", 0). + addScore("attribute(wsstr,foo).weight", 0). + addScore("attribute(wsstr,foo).contains", 0). + addScore("attribute(wsstr,qux).value", (feature_t)vespalib::hash_code("qux")). + addScore("attribute(wsstr,qux).weight", 11). + addScore("attribute(wsstr,qux).contains", 1). + addScore("attribute(wsstr,quux).value", (feature_t)vespalib::hash_code("quux")). + addScore("attribute(wsstr,quux).weight", 12). + addScore("attribute(wsstr,quux).contains", 1). + addScore("attribute(wsint).count", 2). + addScore("attribute(wsint,40).count", 0). + addScore("attribute(wsfloat).count", 2). + addScore("attribute(wsfloat,90.5).count", 0). + addScore("attribute(wsstr).count", 2). + addScore("attribute(wsstr,qux).count", 0); + + FtFeatureTest ft(_factory, exp.getKeys()); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint"). + addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsfloat"). + addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsstr"); + setupForAttributeTest(ft); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(exp)); + } + { // unique only attribute + RankResult exp; + exp.addScore("attribute(unique).value", 0). + addScore("attribute(unique).weight", 0). + addScore("attribute(unique).contains", 0). + addScore("attribute(unique).count", 0); + + FtFeatureTest ft(_factory, exp.getKeys()); + setupForAttributeTest(ft); + ASSERT_TRUE(ft.setup()); + //ASSERT_TRUE(ft.execute(exp)); + } +} + + +void +Test::setupForAttributeTest(FtFeatureTest &ft, bool setup_env) +{ + // setup an original attribute manager with attributes + std::vector<AttributePtr> avs; + avs.push_back(AttributeFactory::createAttribute("sint", AVC(AVBT::INT32, AVCT::SINGLE))); // 0 + avs.push_back(AttributeFactory::createAttribute("aint", AVC(AVBT::INT32, AVCT::ARRAY))); // 1 + avs.push_back(AttributeFactory::createAttribute("wsint", AVC(AVBT::INT32, AVCT::WSET))); // 2 + avs.push_back(AttributeFactory::createAttribute("sfloat", AVC(AVBT::FLOAT, AVCT::SINGLE))); // 3 + avs.push_back(AttributeFactory::createAttribute("afloat", AVC(AVBT::FLOAT, AVCT::ARRAY))); // 4 + avs.push_back(AttributeFactory::createAttribute("wsfloat",AVC(AVBT::FLOAT, AVCT::WSET))); // 5 + avs.push_back(AttributeFactory::createAttribute("sstr", AVC(AVBT::STRING, AVCT::SINGLE))); // 6 + avs.push_back(AttributeFactory::createAttribute("astr", AVC(AVBT::STRING, AVCT::ARRAY))); // 7 + avs.push_back(AttributeFactory::createAttribute("wsstr", AVC(AVBT::STRING, AVCT::WSET))); // 8 + avs.push_back(AttributeFactory::createAttribute("udefint", AVC(AVBT::INT32, AVCT::SINGLE))); // 9 + avs.push_back(AttributeFactory::createAttribute("udeffloat", AVC(AVBT::FLOAT, AVCT::SINGLE))); // 10 + avs.push_back(AttributeFactory::createAttribute("udefstr", AVC(AVBT::STRING, AVCT::SINGLE))); // 11 + + // simulate a unique only attribute as specified in sd + AVC cfg(AVBT::INT32, AVCT::SINGLE); + cfg.setFastSearch(true); + avs.push_back(AttributeFactory::createAttribute("unique", cfg)); // 9 + + if (setup_env) { + // register attributes in index environment + ft.getIndexEnv().getBuilder() + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint") + .addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint") + .addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sfloat") + .addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "afloat") + .addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsfloat") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sstr") + .addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "astr") + .addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsstr") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "udefint") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "udeffloat") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "udefstr") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "unique"); + } + + for (uint32_t i = 0; i < avs.size(); ++i) { + avs[i]->addReservedDoc(); + avs[i]->addDocs(1); + ft.getIndexEnv().getAttributeManager().add(avs[i]); + } + + // integer attributes + (static_cast<IntegerAttribute *>(avs[0].get()))->update(1, 10); + (static_cast<IntegerAttribute *>(avs[1].get()))->append(1, 20, 0); + (static_cast<IntegerAttribute *>(avs[1].get()))->append(1, 30, 0); + (static_cast<IntegerAttribute *>(avs[2].get()))->append(1, 40, 10); + (static_cast<IntegerAttribute *>(avs[2].get()))->append(1, 50, 20); + (static_cast<IntegerAttribute *>(avs[9].get()))->update(1, search::attribute::getUndefined<int32_t>()); + // feature_t attributes + (static_cast<FloatingPointAttribute *>(avs[3].get()))->update(1, 60.5f); + (static_cast<FloatingPointAttribute *>(avs[4].get()))->append(1, 70.5f, 0); + (static_cast<FloatingPointAttribute *>(avs[4].get()))->append(1, 80.5f, 0); + (static_cast<FloatingPointAttribute *>(avs[5].get()))->append(1, 90.5f, -30); + (static_cast<FloatingPointAttribute *>(avs[5].get()))->append(1, 100.5f, -40); + (static_cast<FloatingPointAttribute *>(avs[10].get()))->update(1, search::attribute::getUndefined<float>()); + // string attributes + (static_cast<StringAttribute *>(avs[6].get()))->update(1, "foo"); + (static_cast<StringAttribute *>(avs[7].get()))->append(1, "bar", 0); + (static_cast<StringAttribute *>(avs[7].get()))->append(1, "baz", 0); + (static_cast<StringAttribute *>(avs[8].get()))->append(1, "qux", 11); + (static_cast<StringAttribute *>(avs[8].get()))->append(1, "quux", 12); + (static_cast<StringAttribute *>(avs[11].get()))->update(1, ""); + + for (uint32_t i = 0; i < avs.size() - 1; ++i) { // do not commit the noupdate attribute + avs[i]->commit(); + } + + // save 'sint' and load it into 'unique' (only way to set a noupdate attribute) + ASSERT_TRUE(avs[0]->saveAs(avs[9]->getBaseFileName())); + ASSERT_TRUE(avs[9]->load()); +} + +void +Test::testCloseness() +{ + { // Test blueprint. + ClosenessBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "closeness")); + + StringList params, in, out; + FT_SETUP_FAIL(pt, params); + FT_SETUP_OK(pt, params.add("name"), in.add("distance(name)"), out.add("out").add("logscale")); + + FT_DUMP_EMPTY(_factory, "closeness"); + } + + { // Test executor. + assertCloseness(1, "pos", 0); + assertCloseness(0.8, "pos", 1802661); + assertCloseness(0, "pos", 9013306); + // use non-existing attribute -> default distance + assertCloseness(0, "no", 0); + + // use non-default maxDistance + assertCloseness(1, "pos", 0, 100); + assertCloseness(0.5, "pos", 50, 100); + assertCloseness(0, "pos", 100, 100); + assertCloseness(0, "pos", 101, 100); + + // test logscale using halfResponse (define that x = 10 should give 0.5 -> s = -10^2/(2*10 - 100) = 1.25 (scale distance)) + assertCloseness(1, "pos", 0, 100, 10); + assertCloseness(0.5, "pos", 10, 100, 10); + assertCloseness(0, "pos", 100, 100, 10); + assertCloseness(0, "pos", 101, 100, 10); + } +} + +void +Test::assertCloseness(feature_t exp, const vespalib::string & attr, double distance, double maxDistance, double halfResponse) +{ + vespalib::string feature = "closeness(" + attr + ")"; + FtFeatureTest ft(_factory, feature); + std::vector<std::pair<int32_t, int32_t> > positions; + int32_t x = 0; + positions.push_back(std::make_pair(x, x)); + setupForDistanceTest(ft, "pos", positions, false); + ft.getQueryEnv().getLocation().setXPosition((int)distance); + ft.getQueryEnv().getLocation().setValid(true); + if (maxDistance > 0) { + ft.getIndexEnv().getProperties().add(feature + ".maxDistance", + vespalib::make_string("%u", (unsigned int)maxDistance)); + } + if (halfResponse > 0) { + ft.getIndexEnv().getProperties().add(feature + ".halfResponse", + vespalib::make_string("%f", halfResponse)); + feature.append(".logscale"); + } + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(RankResult().addScore(feature, exp))); +} + +void +Test::testFieldLength() +{ + FieldLengthBlueprint pt; + + { // Test blueprint. + EXPECT_TRUE(assertCreateInstance(pt, "fieldLength")); + + StringList params, in, out; + FT_SETUP_FAIL(pt, params); + FtIndexEnvironment ie; + ie.getBuilder() + .addField(FieldType::INDEX, CollectionType::SINGLE, "foo") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar") + .addField(FieldType::INDEX, CollectionType::ARRAY, "afoo") + .addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wfoo"); + FT_SETUP_FAIL(pt, params.add("qux")); // does not exists + FT_SETUP_FAIL(pt, params.clear().add("bar")); // not an index + FT_SETUP_FAIL(pt, params.clear().add("afoo")); // wrong collection type + FT_SETUP_FAIL(pt, params.clear().add("wfoo")); // wrong collection type + FT_SETUP_OK(pt, ie, params.clear().add("foo"), in, out.add("out")); + + FT_DUMP_EMPTY(_factory, "fieldLength"); + FT_DUMP_EMPTY(_factory, "fieldLength", ie); + } + + { // Test executor. + for (uint32_t i = 0; i < 10; ++i) { + StringList features; + features.add("fieldLength(foo)").add("fieldLength(baz)"); + FtFeatureTest ft(_factory, features); + ASSERT_TRUE(!ft.setup()); + + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"). + addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar").addField(FieldType::INDEX, CollectionType::SINGLE, "baz"); + ft.getQueryEnv().getBuilder().addAllFields(); + ASSERT_TRUE(ft.setup()); + + search::fef::test::MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + ASSERT_TRUE(mdb->addOccurence("foo", 0, i)); + ASSERT_TRUE(mdb->setFieldLength("foo", i + 10)); + ASSERT_TRUE(mdb->addOccurence("baz", 0, i)); + ASSERT_TRUE(mdb->setFieldLength("baz", i + 20)); + ASSERT_TRUE(mdb->apply(1)); + ASSERT_TRUE(ft.execute(RankResult() + .addScore("fieldLength(foo)", (feature_t)i + 10) + .addScore("fieldLength(baz)", (feature_t)i + 20))); + } + } +} + + +void +Test::assertFieldMatch(const vespalib::string & spec, + const vespalib::string & query, + const vespalib::string & field, + const fieldmatch::Params * params, + uint32_t totalTermWeight, + feature_t totalSignificance) +{ + LOG(info, "assertFieldMatch('%s', '%s', '%s', (%u))", spec.c_str(), query.c_str(), field.c_str(), totalTermWeight); + + // Setup feature test. + vespalib::string feature = "fieldMatch(foo)"; + FtFeatureTest ft(_factory, feature); + + setupFieldMatch(ft, "foo", query, field, params, totalTermWeight, totalSignificance, 1); + + // Execute and compare results. + RankResult rr = toRankResult(feature, spec); + rr.setEpsilon(1e-4); // same as java tests + ASSERT_TRUE(ft.execute(rr)); +} + +void +Test::assertFieldMatch(const vespalib::string & spec, + const vespalib::string & query, + const vespalib::string & field, + uint32_t totalTermWeight) +{ + assertFieldMatch(spec, query, field, NULL, totalTermWeight); +} + +void +Test::assertFieldMatchTS(const vespalib::string & spec, + const vespalib::string & query, + const vespalib::string & field, + feature_t totalSignificance) +{ + assertFieldMatch(spec, query, field, NULL, 0, totalSignificance); +} + + +void +Test::testFirstPhase() +{ + { // Test blueprint. + FirstPhaseBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "firstPhase")); + + FtIndexEnvironment ie; + ie.getProperties().add(indexproperties::rank::FirstPhase::NAME, "random"); // override nativeRank dependency + + StringList params, in, out; + FT_SETUP_OK(pt, ie, params, in.add("random"), out.add("score")); + FT_SETUP_FAIL(pt, params.add("foo")); + params.clear(); + + FT_DUMP(_factory, "firstPhase", ie, StringList().add("firstPhase")); + } + + { // Test executor. + FtFeatureTest ft(_factory, "firstPhase"); + ft.getIndexEnv().getProperties().add(indexproperties::rank::FirstPhase::NAME, "value(10)"); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(10.0f)); + } +} + +void +Test::testForeach() +{ + { // Test blueprint. + ForeachBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "foreach")); + + StringList params, in, out; + out.add("value"); + FT_SETUP_FAIL(pt, params); + // illegal dimension + FT_SETUP_FAIL(pt, params.add("squares").add("N").add("foo").add("true").add("sum")); + // illegal condition + FT_SETUP_FAIL(pt, params.clear().add("fields").add("N").add("foo").add("false").add("sum")); + // illegal operation + FT_SETUP_FAIL(pt, params.clear().add("fields").add("N").add("foo").add("true").add("dotproduct")); + + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar"); + ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "baz"); + + // various dimensions + FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo(N)").add("true").add("sum"), + in.clear().add("foo(0)").add("foo(1)").add("foo(2)").add("foo(3)").add("foo(4)"). + add("foo(5)").add("foo(6)").add("foo(7)").add("foo(8)").add("foo(9)"). + add("foo(10)").add("foo(11)").add("foo(12)").add("foo(13)").add("foo(14)").add("foo(15)"), out); + ie.getProperties().add("foreach.maxTerms", "1"); + FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("sum"), + in.clear().add("foo"), out); + FT_SETUP_OK(pt, ie, params.clear().add("fields").add("N").add("foo(N)").add("true").add("sum"), + in.clear().add("foo(foo)").add("foo(bar)"), out); + FT_SETUP_OK(pt, ie, params.clear().add("attributes").add("N").add("foo(N)").add("true").add("sum"), + in.clear().add("foo(baz)"), out); + + // various conditions + FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("sum"), in.clear().add("foo"), out); + FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("<4").add("sum"), in, out); + FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add(">4").add("sum"), in, out); + // various operations + FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("sum"), in, out); + FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("product"), in, out); + FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("average"), in, out); + FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("max"), in, out); + FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("min"), in, out); + FT_SETUP_OK(pt, ie, params.clear().add("terms").add("N").add("foo").add("true").add("count"), in, out); + + FT_DUMP_EMPTY(_factory, "foreach"); + } + { // Test executor + // single loop + assertForeachOperation( 16.5, "true", "sum"); + assertForeachOperation(-2106, "true", "product"); + assertForeachOperation( 3.3, "true", "average"); + assertForeachOperation( 8, "true", "max"); + assertForeachOperation( -4.5, "true", "min"); + assertForeachOperation( 5, "true", "count"); + + assertForeachOperation(3, "\">4\"", "count"); + assertForeachOperation(2, "\">4.5\"", "count"); + assertForeachOperation(2, "\"<4\"", "count"); + assertForeachOperation(2, "\"<4.5\"", "count"); + assertForeachOperation(4, "\">0\"", "count"); + assertForeachOperation(1, "\"<0\"", "count"); + assertForeachOperation(4, "\">-4.5\"", "count"); + assertForeachOperation(1, "\"<-4.4\"", "count"); + + { // average without any values + FtFeatureTest ft(_factory, "foreach(fields,N,value(N),true,average)"); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(0)); + } + + { // double loop + vespalib::string feature = + "foreach(fields,N,foreach(attributes,M,rankingExpression(\"value(N)+value(M)\"),true,product),true,sum)"; + LOG(info, "double loop feature: '%s'", feature.c_str()); + FtFeatureTest ft(_factory, feature); + ft.getIndexEnv().getProperties().add("foreach.maxTerms", "1"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "1"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "2"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "3"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "4"); + // ((1 + 3) * (1 + 4)) + ((2 + 3) * (2 + 4)) = 4 * 5 + 5 * 6 = 20 + 30 = 50 + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(50)); + ASSERT_TRUE(ft.execute(50)); // check that reset works + } + } +} + +void +Test::assertForeachOperation(feature_t exp, const vespalib::string & cond, const vespalib::string & op) +{ + vespalib::string feature = "foreach(fields,N,value(N)," + cond + "," + op + ")"; + FtFeatureTest ft(_factory, feature); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "4.5"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "2"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "8"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "6.5"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "-4.5"); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(exp)); + ASSERT_TRUE(ft.execute(exp)); // check that reset works +} + + +void +Test::testFreshness() +{ + { // Test blueprint. + FtIndexEnvironment idx_env; + idx_env.getBuilder() + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "name"); + + FreshnessBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "freshness")); + + StringList params, in, out; + FT_SETUP_FAIL(pt, idx_env, params); + FT_SETUP_OK(pt, idx_env, params.add("name"), in.add("age(name)"), out.add("out").add("logscale")); + + FT_DUMP_EMPTY(_factory, "freshness"); + } + + { // Test executor. + assertFreshness(1, "doctime", 0); + assertFreshness(0.5, "doctime", 3*15*24*60*60); + assertFreshness(0, "doctime", 3*30*24*60*60); + // use non-default maxAge + assertFreshness(1, "doctime", 0, 120); + assertFreshness(0.75, "doctime", 30, 120); + assertFreshness(0.5, "doctime", 60, 120); + assertFreshness(0, "doctime", 120, 120); + assertFreshness(0, "doctime", 121, 120); + + // test logscale + assertFreshness(1, "doctime", 0, 0, 0, true); + assertFreshness(0.5, "doctime", 7*24*60*60, 0, 0, true); + assertFreshness(0, "doctime", 3*30*24*60*60, 0, 0, true); + // use non-default maxAge & halfResponse + assertFreshness(1, "doctime", 0, 120, 30, true); + assertFreshness(0.5, "doctime", 30, 120, 30, true); // half response after 30 secs + assertFreshness(0, "doctime", 120, 120, 30, true); + assertFreshness(0, "doctime", 121, 120, 30, true); + // test invalid half response + assertFreshness(0.5, "doctime", 1, 120, 0.5, true); // half response is set to 1 + assertFreshness(0.5, "doctime", 59, 120, 70, true); // half response is set to 120/2 - 1 + } +} + +void +Test::assertFreshness(feature_t expFreshness, const vespalib::string & attr, uint32_t age, uint32_t maxAge, double halfResponse, bool logScale) +{ + vespalib::string feature = "freshness(" + attr + ")"; + FtFeatureTest ft(_factory, feature); + setupForAgeTest(ft, 60); // time = 60 + if (maxAge > 0) { + ft.getIndexEnv().getProperties().add("freshness(" + attr + ").maxAge", + vespalib::make_string("%u", maxAge)); + } + if (halfResponse > 0) { + ft.getIndexEnv().getProperties().add("freshness(" + attr + ").halfResponse", + vespalib::make_string("%f", halfResponse)); + } + if (logScale) { + feature.append(".logscale"); + } + ft.getQueryEnv().getProperties().add(queryproperties::now::SystemTime::NAME, + vespalib::make_string("%u", age + 60)); // now = age + 60 + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(RankResult().addScore(feature, expFreshness).setEpsilon(EPS))); +} + +void +Test::testDistance() +{ + { // Test blueprint. + DistanceBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "distance")); + + StringList params, in, out; + FT_SETUP_FAIL(pt, params); + FT_SETUP_OK(pt, params.add("pos"), in, out.add("out")); + + FT_DUMP_EMPTY(_factory, "distance"); + } + + { // Test executor. + + { // test 2D single location (zcurve) + assert2DZDistance(static_cast<feature_t>(sqrt(650.0f)), "5:-5", 10, 20); + assert2DZDistance(static_cast<feature_t>(sqrt(250.0f)), "5:-5", 10, -20); + assert2DZDistance(static_cast<feature_t>(sqrt(450.0f)), "5:-5", -10, -20); + assert2DZDistance(static_cast<feature_t>(sqrt(850.0f)), "5:-5", -10, 20); + assert2DZDistance(static_cast<feature_t>(sqrt(250.0f)), "5:-5", 15, -20, 0x80000000); // 2^31 + } + + { // test 2D multi location (zcurve) + vespalib::string positions = "5:-5,35:0,5:40,35:-40"; + assert2DZDistance(static_cast<feature_t>(sqrt(425.0f)), positions, 10, 20); + assert2DZDistance(static_cast<feature_t>(sqrt(250.0f)), positions, 10, -20); + assert2DZDistance(static_cast<feature_t>(sqrt(450.0f)), positions, -10, -20); + assert2DZDistance(static_cast<feature_t>(sqrt(625.0f)), positions, -10, 20); + assert2DZDistance(static_cast<feature_t>(sqrt(250.0f)), positions, 15, -20, 0x80000000); // 2^31 + assert2DZDistance(static_cast<feature_t>(sqrt(425.0f)), positions, 45, -20, 0x80000000); // 2^31 + } + + { // test default distance + { // non-existing attribute + FtFeatureTest ft(_factory, "distance(pos)"); + ft.getQueryEnv().getLocation().setValid(true); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(RankResult().addScore("distance(pos)", 6400000000.0))); + } + { // wrong attribute type (float) + FtFeatureTest ft(_factory, "distance(pos)"); + AttributePtr pos = AttributeFactory::createAttribute("pos", AVC(AVBT::FLOAT, AVCT::SINGLE)); + pos->commit(); + ft.getIndexEnv().getAttributeManager().add(pos); + ft.getQueryEnv().getLocation().setValid(true); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(RankResult().addScore("distance(pos)", 6400000000.0))); + } + { // wrong attribute type (string) + FtFeatureTest ft(_factory, "distance(pos)"); + AttributePtr pos = AttributeFactory::createAttribute("pos", AVC(AVBT::STRING, AVCT::SINGLE)); + pos->commit(); + ft.getIndexEnv().getAttributeManager().add(pos); + ft.getQueryEnv().getLocation().setValid(true); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(RankResult().addScore("distance(pos)", 6400000000.0))); + } + { // wrong attribute collection type (weighted set) + FtFeatureTest ft(_factory, "distance(pos)"); + AttributePtr pos = AttributeFactory::createAttribute("pos", AVC(AVBT::INT64, AVCT::WSET)); + pos->commit(); + ft.getIndexEnv().getAttributeManager().add(pos); + ft.getQueryEnv().getLocation().setValid(true); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(RankResult().addScore("distance(pos)", 6400000000.0))); + } + } + } +} + +void +Test::setupForDistanceTest(FtFeatureTest &ft, const vespalib::string & attrName, + const std::vector<std::pair<int32_t, int32_t> > & positions, bool zcurve) +{ + AttributePtr pos = AttributeFactory::createAttribute(attrName, AVC(AVBT::INT64, AVCT::ARRAY)); + + pos->addReservedDoc(); + pos->addDocs(1); + ft.getIndexEnv().getAttributeManager().add(pos); + + IntegerAttribute * ia = static_cast<IntegerAttribute *>(pos.get()); + for (uint32_t i = 0; i < positions.size(); ++i) { + if (zcurve) { + ia->append(1, vespalib::geo::ZCurve::encode(positions[i].first, positions[i].second), 0); + } else { + ia->append(1, positions[i].first, 0); + } + } + + pos->commit(); +} + +void +Test::assert2DZDistance(feature_t exp, const vespalib::string & positions, + int32_t xquery, int32_t yquery, uint32_t xAspect) +{ + LOG(info, "assert2DZDistance(%g, %s, %d, %d, %u)", exp, positions.c_str(), xquery, yquery, xAspect); + FtFeatureTest ft(_factory, "distance(pos)"); + std::vector<vespalib::string> ta = FtUtil::tokenize(positions, ","); + std::vector<std::pair<int32_t, int32_t> > pos; + for (uint32_t i = 0; i < ta.size(); ++i) { + std::vector<vespalib::string> tb = FtUtil::tokenize(ta[i], ":"); + int32_t x = util::strToNum<int32_t>(tb[0]); + int32_t y = util::strToNum<int32_t>(tb[1]); + pos.push_back(std::make_pair(x, y)); + } + setupForDistanceTest(ft, "pos", pos, true); + ft.getQueryEnv().getLocation().setXPosition(xquery); + ft.getQueryEnv().getLocation().setYPosition(yquery); + ft.getQueryEnv().getLocation().setXAspect(xAspect); + ft.getQueryEnv().getLocation().setValid(true); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(RankResult().setEpsilon(1e-4). + addScore("distance(pos)", exp))); +} + +void +Test::testDistanceToPath() +{ + { + // Test blueprint. + DistanceToPathBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "distanceToPath")); + + StringList params, in, out; + FT_SETUP_FAIL(pt, params); + FT_SETUP_OK(pt, params.add("pos"), in, out.add("distance").add("traveled").add("product")); + FT_SETUP_FAIL(pt, params.add("foo")); + + FT_DUMP_EMPTY(_factory, "distanceToPath"); + } + + { + // Test executor. + std::vector<std::pair<int32_t, int32_t> > pos; + pos.push_back(std::make_pair(0, 0)); + + // invalid path + assertDistanceToPath(pos, "a"); + assertDistanceToPath(pos, "("); + assertDistanceToPath(pos, "(a"); + assertDistanceToPath(pos, "(a)"); + assertDistanceToPath(pos, "(-1)"); + assertDistanceToPath(pos, "(-1,1)"); + assertDistanceToPath(pos, "(-1,1,1)"); + assertDistanceToPath(pos, "(-1 1 1 1)"); + + // path on either side of document + assertDistanceToPath(pos, "(-1,1,1,1)", 1, 0.5, 2); + assertDistanceToPath(pos, "(-1,-1,1,-1)", 1, 0.5, -2); + + // zero length path + assertDistanceToPath(pos, "(0,0,0,0)", 0, 0); + assertDistanceToPath(pos, "(0,0,0,0,0,0)", 0, 0); + assertDistanceToPath(pos, "(0,1,0,1)", 1, 0); + assertDistanceToPath(pos, "(0,1,0,1,0,1)", 1, 0); + + // path crosses document + assertDistanceToPath(pos, "(-1,1,1,-1)", 0, 0.5); + assertDistanceToPath(pos, "(-2,2,2,-2)", 0, 0.5); + assertDistanceToPath(pos, "(-1,1,3,-3)", 0, 0.25); + + // intersection outside segments + assertDistanceToPath(pos, "(1,0,2,0)", 1, 0); // before + assertDistanceToPath(pos, "(0,1,0,2)", 1, 0); + assertDistanceToPath(pos, "(-2,0,-1,0)", 1, 1); // after + assertDistanceToPath(pos, "(0,-2,0,-1)", 1, 1); + + // various paths + assertDistanceToPath(pos, "(-3,1,2,1,2,-2,-2,-2)", 1, 0.25, 5); + assertDistanceToPath(pos, "(-3,2,2,2,2,-1,0,-1)", 1, 1, 2); + + // multiple document locations + pos.push_back(std::make_pair(0, 1)); + assertDistanceToPath(pos, "(-1,1,1,1)", 0, 0.5); + assertDistanceToPath(pos, "(-2,-1,-1,1)", 1, 1, 2); + assertDistanceToPath(pos, "(-1,0.25,1,0.25)", 0.25, 0.5, 0.5); + + { + // Test defaults. + RankResult res; + res.addScore("distanceToPath(pos).distance", DistanceExecutor::DEFAULT_DISTANCE); + res.addScore("distanceToPath(pos).traveled", 1); + { + // Non-existing attribute. + FtFeatureTest ft(_factory, "distanceToPath(pos)"); + ft.getQueryEnv().getProperties().add("distanceToPath(pos).path", "0 0 1 1"); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(res)); + } + { + // Wrong attribute type (float). + FtFeatureTest ft(_factory, "distanceToPath(pos)"); + AttributePtr att = AttributeFactory::createAttribute("pos", AVC(AVBT::FLOAT, AVCT::SINGLE)); + att->commit(); + ft.getIndexEnv().getAttributeManager().add(att); + ft.getQueryEnv().getProperties().add("distanceToPath(pos).path", "0 0 1 1"); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(res)); + } + { + // Wrong attribute type (string). + FtFeatureTest ft(_factory, "distanceToPath(pos)"); + AttributePtr att = AttributeFactory::createAttribute("pos", AVC(AVBT::STRING, AVCT::SINGLE)); + att->commit(); + ft.getIndexEnv().getAttributeManager().add(att); + ft.getQueryEnv().getProperties().add("distanceToPath(pos).path", "0 0 1 1"); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(res)); + } + { + // Wrong attribute collection type (weighted set). + FtFeatureTest ft(_factory, "distanceToPath(pos)"); + AttributePtr att = AttributeFactory::createAttribute("pos", AVC(AVBT::INT64, AVCT::WSET)); + att->commit(); + ft.getIndexEnv().getAttributeManager().add(att); + ft.getQueryEnv().getProperties().add("distanceToPath(pos).path", "0 0 1 1"); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(res)); + } + } + } +} + +void +Test::assertDistanceToPath(const std::vector<std::pair<int32_t, int32_t> > pos, + const vespalib::string &path, feature_t distance, feature_t traveled, feature_t product) +{ + LOG(info, "Testing distance to path '%s' with %zd document locations.", path.c_str(), pos.size()); + + FtFeatureTest ft(_factory, "distanceToPath(pos)"); + setupForDistanceTest(ft, "pos", pos, true); + + ft.getQueryEnv().getProperties().add("distanceToPath(pos).path", path); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(RankResult() + .addScore("distanceToPath(pos).distance", distance) + .addScore("distanceToPath(pos).traveled", traveled) + .addScore("distanceToPath(pos).product", product))); +} + +void +Test::setupForDocumentTest(FtFeatureTest &ft, const vespalib::string & attrName, const vespalib::string & docType) +{ + AttributePtr type = AttributeFactory::createAttribute(attrName, AVC(AVBT::STRING, AVCT::SINGLE)); + + type->addReservedDoc(); + type->addDocs(1); + ft.getIndexEnv().getAttributeManager().add(type); + + (static_cast<StringAttribute *>(type.get()))->update(1, docType); + type->commit(); +} + +void +Test::testDotProduct() +{ + { // Test blueprint. + FtIndexEnvironment idx_env; + idx_env.getBuilder() + .addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "attribute"); + + DotProductBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "dotProduct")); + + StringList params, in, out; + FT_SETUP_FAIL(pt, idx_env, params); + FT_SETUP_OK(pt, idx_env, params.add("attribute").add("vector"), in, out.add("scalar")); + + FT_DUMP_EMPTY(_factory, "dotProduct"); + } + + { // Test vector parser + { // string enum vector + FtFeatureTest ft(_factory, "value(0)"); + setupForDotProductTest(ft); + search::AttributeGuard::UP ag(ft.getIndexEnv().getAttributeManager().getAttribute("wsstr")); + const search::attribute::IAttributeVector * sv = ag->operator->(); + EXPECT_TRUE(sv->hasEnum()); + search::attribute::EnumHandle e; + { + dotproduct::wset::EnumVector out(sv); + WeightedSetParser::parse("", out); + EXPECT_EQUAL(out.getVector().size(), 0u); + WeightedSetParser::parse("()", out); + EXPECT_EQUAL(out.getVector().size(), 0u); + WeightedSetParser::parse("(a;1)", out); + EXPECT_EQUAL(out.getVector().size(), 0u); + WeightedSetParser::parse("(a:1)", out); + EXPECT_EQUAL(out.getVector().size(), 1u); + EXPECT_TRUE(sv->findEnum("a", e)); + EXPECT_EQUAL(out.getVector()[0].first, e); + EXPECT_EQUAL(out.getVector()[0].second, 1.0); + } + std::vector<vespalib::string> v = {"(b:2.5,c:-3.5)", "{b:2.5,c:-3.5}"}; + for(const vespalib::string & s : v) { + dotproduct::wset::EnumVector out(sv); + WeightedSetParser::parse(s, out); + EXPECT_EQUAL(out.getVector().size(), 2u); + EXPECT_TRUE(sv->findEnum("b", e)); + EXPECT_EQUAL(out.getVector()[0].first, e); + EXPECT_EQUAL(out.getVector()[0].second, 2.5); + EXPECT_TRUE(sv->findEnum("c", e)); + EXPECT_EQUAL(out.getVector()[1].first, e); + EXPECT_EQUAL(out.getVector()[1].second, -3.5); + } + { // test funky syntax + dotproduct::wset::EnumVector out(sv); + WeightedSetParser::parse("( a: 1, b:2 ,c: , :3)", out); + EXPECT_EQUAL(out.getVector().size(), 3u); + EXPECT_TRUE(sv->findEnum("a", e)); + EXPECT_EQUAL(out.getVector()[0].first, e); + EXPECT_EQUAL(out.getVector()[0].second, 1); + EXPECT_TRUE(sv->findEnum("b", e)); + EXPECT_EQUAL(out.getVector()[1].first, e); + EXPECT_EQUAL(out.getVector()[1].second, 2); + EXPECT_TRUE(sv->findEnum("c", e)); + EXPECT_EQUAL(out.getVector()[2].first, e); + EXPECT_EQUAL(out.getVector()[2].second, 0); + } + { // strings not in attribute vector + dotproduct::wset::EnumVector out(sv); + WeightedSetParser::parse("(not:1)", out); + EXPECT_EQUAL(out.getVector().size(), 0u); + } + } + { // string vector + dotproduct::wset::StringVector out; + WeightedSetParser::parse("(b:2.5,c:-3.5)", out); + EXPECT_EQUAL(out.getVector().size(), 2u); + EXPECT_EQUAL(out.getVector()[0].first, "b"); + EXPECT_EQUAL(out.getVector()[0].second, 2.5); + EXPECT_EQUAL(out.getVector()[1].first, "c"); + EXPECT_EQUAL(out.getVector()[1].second, -3.5); + } + { // integer vector + dotproduct::wset::IntegerVector out; + WeightedSetParser::parse("(20:2.5,30:-3.5)", out); + EXPECT_EQUAL(out.getVector().size(), 2u); + EXPECT_EQUAL(out.getVector()[0].first, 20); + EXPECT_EQUAL(out.getVector()[0].second, 2.5); + EXPECT_EQUAL(out.getVector()[1].first, 30); + EXPECT_EQUAL(out.getVector()[1].second, -3.5); + } + } + { // Array parser + std::vector<vespalib::string> v = {"(0:2,7:-3,1:-3)", "{0:2,7:-3,1:-3}", "[2 -3 0 0 0 0 0 -3]"}; + for(const vespalib::string & s : v) { + std::vector<int32_t> out; + ArrayParser::parse(s, out); + EXPECT_EQUAL(8u, out.size()); + EXPECT_EQUAL(2, out[0]); + EXPECT_EQUAL(-3, out[1]); + EXPECT_EQUAL(0, out[2]); + EXPECT_EQUAL(0, out[3]); + EXPECT_EQUAL(0, out[4]); + EXPECT_EQUAL(0, out[5]); + EXPECT_EQUAL(0, out[6]); + EXPECT_EQUAL(-3, out[7]); + } + } + { + vespalib::string s = "[[1:3]]"; + std::vector<int32_t> out; + ArrayParser::parse(s, out); + EXPECT_EQUAL(0u, out.size()); + } + + { // Test executor. + { // string enum attribute + // docId = 1 + assertDotProduct(0, "()"); + assertDotProduct(0, "(f:5)"); + assertDotProduct(0, "(f:5,g:5)"); + assertDotProduct(-5, "(a:-5)"); + assertDotProduct(25, "(e:5)"); + assertDotProduct(-5.5, "(a:-5.5)"); + assertDotProduct(27.5, "(e:5.5)"); + assertDotProduct(55, "(a:1,b:2,c:3,d:4,e:5)"); + assertDotProduct(20, "(b:10,b:15)"); + // docId = 2 + assertDotProduct(0, "()", 2); + assertDotProduct(0, "(a:1,b:2,c:3,d:4,e:5)", 2); + } + { // string attribute + assertDotProduct(0, "(f:5,g:5)", 1, "wsextstr"); + assertDotProduct(550, "(a:1,b:2,c:3,d:4,e:5)", 1, "wsextstr"); + } + { // integer attribute + assertDotProduct(0, "()", 1, "wsint"); + assertDotProduct(0, "(6:5,7:5)", 1, "wsint"); + assertDotProduct(55, "(1:1,2:2,3:3,4:4,5:5)", 1, "wsint"); + } + std::vector<const char *> attributes = {"arrint", "arrfloat", "arrint_fast", "arrfloat_fast"}; + for (const char * name : attributes) { + assertDotProduct(0, "()", 1, name); + assertDotProduct(0, "(6:5,7:5)", 1, name); + assertDotProduct(55, "(0:1,1:2,2:3,3:4,4:5)", 1, name); + assertDotProduct(55, "[1 2 3 4 5]", 1, name); + assertDotProduct(41, "{3:4,4:5}", 1, name); + } + { // float array attribute + assertDotProduct(55, "[1.0 2.0 3.0 4.0 5.0]", 1, "arrfloat"); + assertDotProduct(41, "{3:4,4:5.0}", 1, "arrfloat"); + } + { // Sparse float array attribute. + assertDotProduct(17, "(0:1,3:4,50:97)", 1, "arrfloat"); + } + + assertDotProduct(0, "(0:1,3:4,50:97)", 1, "sint"); // attribute of the wrong type + assertDotProduct(17, "(0:1,3:4,50:97)", 1, "sint", "arrfloat"); // attribute override + assertDotProduct(0, "(0:1,3:4,50:97)", 1, "sint", "arrfloat_non_existing"); // incorrect attribute override + } + { // Test that correct executor is created + FtFeatureTest ft(_factory, "value(0)"); + setupForDotProductTest(ft); + ft.getQueryEnv().getProperties().add("dotProduct.vector", "(a:1)"); + ParameterList params; + params.push_back(Parameter(ParameterType::ATTRIBUTE, "wsstr")); + params.push_back(Parameter(ParameterType::STRING, "vector")); + DotProductBlueprint bp; + DummyDependencyHandler deps(bp); + EXPECT_TRUE(bp.setup(ft.getIndexEnv(), params)); + FeatureExecutor::LP exc = bp.createExecutor(ft.getQueryEnv()); + // check that we have the optimized enum version + dotproduct::wset::DotProductExecutor<dotproduct::wset::EnumVector, WeightedEnumContent> * myExc = + dynamic_cast<dotproduct::wset::DotProductExecutor<dotproduct::wset::EnumVector, WeightedEnumContent> *>(exc.get()); + EXPECT_TRUE(myExc != nullptr); + EXPECT_EQUAL(1u, deps.output.size()); + } +} + +void +Test::assertDotProduct(feature_t exp, const vespalib::string & vector, uint32_t docId, + const vespalib::string & attribute, const vespalib::string & attributeOverride) +{ + RankResult rr; + rr.addScore("dotProduct(" + attribute + ",vector)", exp); + FtFeatureTest ft(_factory, rr.getKeys()); + setupForDotProductTest(ft); + ft.getQueryEnv().getProperties().add("dotProduct.vector", vector); + if ( ! attributeOverride.empty() ) { + ft.getQueryEnv().getProperties().add("dotProduct." + attribute + ".override.name", attributeOverride); + } + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(rr, docId)); +} + +void +Test::setupForDotProductTest(FtFeatureTest & ft) +{ + struct Config { + const char * name; + AVBT dataType; + AVCT collectionType; + bool fastSearch; + }; + std::vector<Config> cfgList = { {"wsint", AVBT::INT32, AVCT::WSET, false}, + {"arrint", AVBT::INT32, AVCT::ARRAY, false}, + {"arrfloat", AVBT::FLOAT, AVCT::ARRAY, false}, + {"arrint_fast", AVBT::INT32, AVCT::ARRAY, true}, + {"arrfloat_fast", AVBT::FLOAT, AVCT::ARRAY, true} + }; + AttributePtr a = AttributeFactory::createAttribute("wsstr", AVC(AVBT::STRING, AVCT::WSET)); + AttributePtr c = AttributeFactory::createAttribute("sint", AVC(AVBT::INT32, AVCT::SINGLE)); + AttributePtr d(new search::WeightedSetStringExtAttribute("wsextstr")); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsstr"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsextstr"); + for (const Config & cfg : cfgList) { + AttributePtr baf = AttributeFactory::createAttribute(cfg.name, AVC(cfg.dataType, + cfg.collectionType, + cfg.fastSearch)); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, + cfg.collectionType==AVCT::ARRAY + ? CollectionType::ARRAY + : CollectionType::WEIGHTEDSET, + cfg.name); + baf->addReservedDoc(); + baf->addDocs(2); + ft.getIndexEnv().getAttributeManager().add(baf); + for (size_t i(1); i < 6; i++) { + IntegerAttribute * ia = dynamic_cast<IntegerAttribute *>(baf.get()); + if (ia) { + ia->append(1, i, i); + } else { + FloatingPointAttribute * fa = dynamic_cast<FloatingPointAttribute *>(baf.get()); + fa->append(1, i, i); + } + } + baf->commit(); + } + + a->addReservedDoc(); + c->addReservedDoc(); + a->addDocs(2); + c->addDocs(2); + ft.getIndexEnv().getAttributeManager().add(a); + ft.getIndexEnv().getAttributeManager().add(c); + ft.getIndexEnv().getAttributeManager().add(d); + + StringAttribute * sa = static_cast<StringAttribute *>(a.get()); + sa->append(1, "a", 1); + sa->append(1, "b", 2); + sa->append(1, "c", 3); + sa->append(1, "d", 4); + sa->append(1, "e", 5); + + WeightedSetStringExtAttribute * ea = static_cast<WeightedSetStringExtAttribute *>(d.get()); + EXPECT_TRUE(!ea->hasEnum()); + uint32_t docId; + ea->addDoc(docId); // reserved doc + ea->addDoc(docId); + ea->add("a", 10); + ea->add("b", 20); + ea->add("c", 30); + ea->add("d", 40); + ea->add("e", 50); + ea->addDoc(docId); + + a->commit(); + c->commit(); +} + +void +Test::testNow() +{ + { + // Test blueprint. + NowBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "now")); + + StringList params, in, out; + FT_SETUP_OK (pt, params, in, out.add("out")); + FT_SETUP_FAIL(pt, params.add("foo")); + + FT_DUMP(_factory, "now", StringList().add("now")); + } + + { + // Test executor. + FtFeatureTest ft(_factory, "now"); + ASSERT_TRUE(ft.setup()); + + RankResult res; + res.addScore("now", 0.0f); + for (uint32_t i = 1; i <= 10; ++i) { + feature_t last = res.getScore("now"); + res.clear(); + ASSERT_TRUE(ft.executeOnly(res, i)); + ASSERT_TRUE(last <= res.getScore("now")); + } + } + + { + // Test executor with ms resolution + FtFeatureTest ft(_factory, "now"); + ft.getQueryEnv().getProperties().add("vespa.now", "15000000000"); + ASSERT_TRUE(ft.setup()); + + RankResult res; + ASSERT_TRUE(ft.executeOnly(res, 0)); + feature_t now = 15000000000; + ASSERT_EQUAL(now, res.getScore("now")); + } +} + + +void +Test::testMatch() +{ + { // Test blueprint. + MatchBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "match")); + + FtFeatureTest ft(_factory, ""); + setupForAttributeTest(ft); + + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "bar"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "baz"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint"); + + FtIndexEnvironment idx_env; + idx_env.getBuilder() + .addField(FieldType::INDEX, CollectionType::SINGLE, "foo") + .addField(FieldType::INDEX, CollectionType::ARRAY, "bar") + .addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "baz") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint") + .addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint") + .addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint"); + + StringList params, in, out; + FT_SETUP_OK(pt, params, in, out.add("score").add("totalWeight")); + FT_SETUP_OK(pt, idx_env, params, in + .add("fieldMatch(foo)") + .add("elementCompleteness(bar)") + .add("elementCompleteness(baz)") + .add("attributeMatch(sint)") + .add("attributeMatch(aint)") + .add("attributeMatch(wsint)"), out + .add("weight.foo") + .add("weight.bar") + .add("weight.baz") + .add("weight.sint") + .add("weight.aint") + .add("weight.wsint")); + FT_SETUP_FAIL(pt, idx_env, params.add("1")); // expects 0 parameters + + FT_DUMP_EMPTY(_factory, "match"); + } + + { // Test executor + FtFeatureTest ft(_factory, "match"); + + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "bar"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "baz"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint"); + + ft.getIndexEnv().getProperties().add("vespa.fieldweight.foo", "100"); // assign weight to all fields, simulate sd behaviour + ft.getIndexEnv().getProperties().add("vespa.fieldweight.bar", "200"); + ft.getIndexEnv().getProperties().add("vespa.fieldweight.sint", "300"); + ft.getIndexEnv().getProperties().add("vespa.fieldweight.aint", "400"); + + // search in field 'foo' + ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo")); // term id 0 + + // search in field 'sint' + ft.getQueryEnv().getBuilder().addAttributeNode("sint"); // term id 1 + setupForAttributeTest(ft, false); + + ASSERT_TRUE(ft.setup()); + + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + + // add hit for field 'foo' for search term 0 + ASSERT_TRUE(mdb->setFieldLength("foo", 1)); + ASSERT_TRUE(mdb->addOccurence("foo", 0, 0)); + ASSERT_TRUE(mdb->setWeight("sint", 1, 0)); + ASSERT_TRUE(mdb->apply(1)); + + RankResult rr = toRankResult("match", "score:1 totalWeight:400 weight.foo:100 weight.bar:200 weight.baz:100 weight.sint:300 weight.aint:400 weight.wsint:100"); + rr.setEpsilon(1e-4); // same as java tests + ASSERT_TRUE(ft.execute(rr)); + } + + { // Test executor + FtFeatureTest ft(_factory, "match"); + + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + // search in field 'foo' + ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo")); // term id 0 + ASSERT_TRUE(ft.setup()); + + // must create this so that term match data is configured with the term data object + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + + // no hits on docId 1 + RankResult rr = toRankResult("match", "score:0 totalWeight:0 weight.foo:100"); + ASSERT_TRUE(ft.execute(rr, 1)); + } +} + +void +Test::testMatches() +{ + { // Test blueprint. + MatchesBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "matches")); + + FtFeatureTest ft(_factory, ""); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar"); + + StringList params, in, out; + FT_SETUP_FAIL(pt, ft.getIndexEnv(), params); // expects 1-2 parameters + FT_SETUP_FAIL(pt, ft.getIndexEnv(), params.add("baz")); // cannot find the field + FT_SETUP_OK(pt, ft.getIndexEnv(), params.clear().add("foo"), in, out.add("out")); + FT_SETUP_OK(pt, ft.getIndexEnv(), params.add("1"), in, out); + FT_SETUP_OK(pt, ft.getIndexEnv(), params.clear().add("bar"), in, out); + FT_SETUP_OK(pt, ft.getIndexEnv(), params.add("1"), in, out); + + FT_DUMP_EMPTY(_factory, "matches"); + } + { // Test executor for index fields + EXPECT_TRUE(assertMatches(0, "x", "a")); + EXPECT_TRUE(assertMatches(1, "a", "a")); + EXPECT_TRUE(assertMatches(1, "a b", "a b")); + // change docId to indicate no matches in the field + EXPECT_TRUE(assertMatches(0, "a", "a", "matches(foo)", 2)); + // specify termIdx as second parameter + EXPECT_TRUE(assertMatches(0, "x", "a", "matches(foo,0)")); + EXPECT_TRUE(assertMatches(1, "a", "a", "matches(foo,0)")); + EXPECT_TRUE(assertMatches(0, "a", "a", "matches(foo,1)")); + EXPECT_TRUE(assertMatches(0, "x b", "a b", "matches(foo,0)")); + EXPECT_TRUE(assertMatches(1, "x b", "a b", "matches(foo,1)")); + } + { // Test executor for attribute fields + FtFeatureTest ft(_factory, StringList().add("matches(foo)"). + add("matches(baz)"). + add("matches(foo,0)"). + add("matches(foo,1)"). + add("matches(foo,2)"). + add("matches(foo,3)")); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "baz"); + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("foo") != NULL); // query term 0, hit in foo + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("bar") != NULL); // query term 1, hit in bar + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("foo") != NULL); // query term 2, hit in foo + ASSERT_TRUE(ft.setup()); + + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + mdb->setWeight("foo", 0, 0); + mdb->setWeight("bar", 1, 0); + mdb->apply(1); + EXPECT_TRUE(ft.execute(RankResult().addScore("matches(foo)", 1))); + EXPECT_TRUE(ft.execute(RankResult().addScore("matches(baz)", 0))); + EXPECT_TRUE(ft.execute(RankResult().addScore("matches(foo,0)", 1))); + EXPECT_TRUE(ft.execute(RankResult().addScore("matches(foo,1)", 0))); + EXPECT_TRUE(ft.execute(RankResult().addScore("matches(foo,2)", 0))); + EXPECT_TRUE(ft.execute(RankResult().addScore("matches(foo,3)", 0))); + } +} + +bool +Test::assertMatches(uint32_t output, + const vespalib::string & query, + const vespalib::string & field, + const vespalib::string & feature, + uint32_t docId) +{ + LOG(info, "assertMatches(%u, '%s', '%s', '%s')", output, query.c_str(), field.c_str(), feature.c_str()); + + // Setup feature test. + FtFeatureTest ft(_factory, feature); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + std::map<vespalib::string, std::vector<vespalib::string> > index; + index["foo"] = FtUtil::tokenize(field); + FT_SETUP(ft, FtUtil::toQuery(query), index, 1); + + ASSERT_TRUE(ft.execute(output, EPS, docId)); + // Execute and compare results. + if (!EXPECT_TRUE(ft.execute(output, EPS, docId))) return false; + return true; +} + + +void +Test::testQuery() +{ + { // Test blueprint. + QueryBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "query")); + + StringList params, in, out; + FT_SETUP_FAIL(pt, params); + FT_SETUP_OK(pt, params.add("foo"), in, out.add("out")); + + FT_DUMP_EMPTY(_factory, "query"); + } + + { // Test executor. + RankResult exp; + exp.addScore("query(def1)", 1.0). + addScore("query(def2)", 2.0). + addScore("query(def3)", 0.0). + addScore("query(val1)", 1.1). + addScore("query(val2)", 2.2). + addScore("query(hash1)", vespalib::hash_code("foo")). + addScore("query(hash2)", vespalib::hash_code("2")). + addScore("query(hash3)", vespalib::hash_code("foo")). + addScore("query(hash4)", vespalib::hash_code("'foo")); + FtFeatureTest ft(_factory, exp.getKeys()); + ft.getIndexEnv().getProperties() + .add("query(def1)", "1.0") + .add("$def2", "2.0"); + ft.getQueryEnv().getProperties() + .add("val1", "1.1") + .add("$val2", "2.2") + .add("hash1", "foo") + .add("hash2", "'2") + .add("hash3", "'foo") + .add("hash4", "''foo"); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(exp)); + } +} + +void +Test::testQueryTermCount() +{ + { // Test blueprint. + QueryTermCountBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "queryTermCount")); + + StringList params, in, out; + FT_SETUP_OK(pt, params, in, out.add("out")); + FT_SETUP_FAIL(pt, params.add("foo")); + + StringList dump; + FT_DUMP(_factory, "queryTermCount", dump.add("queryTermCount")); + } + + { // Test executor. + FtFeatureTest ft(_factory, "queryTermCount"); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(RankResult().addScore("queryTermCount", 0))); + } + + { // Test executor. + FtFeatureTest ft(_factory, "queryTermCount"); + ft.getQueryEnv().getBuilder().addAllFields(); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(RankResult().addScore("queryTermCount", 1))); + } + + { // Test executor. + FtFeatureTest ft(_factory, "queryTermCount"); + ft.getQueryEnv().getBuilder().addAllFields(); + ft.getQueryEnv().getBuilder().addAllFields(); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(RankResult().addScore("queryTermCount", 2))); + } +} + +void +Test::testRandom() +{ + { // Test blueprint. + RandomBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "random")); + + StringList params, in, out; + FT_SETUP_OK (pt, params, in, out.add("out").add("match")); + FT_SETUP_OK (pt, params.add("1"), in, out); + FT_SETUP_FAIL(pt, params.add("2")); + + FT_DUMP_EMPTY(_factory, "random"); + } + + { // Test executor (seed specified through config) + FtFeatureTest ft(_factory, "random"); + ft.getIndexEnv().getProperties().add("random.seed", "100"); + ASSERT_TRUE(ft.setup()); + search::Rand48 rnd; + rnd.srand48(100); + for (uint32_t i = 0; i < 5; ++i) { + feature_t exp = rnd.lrand48() / (feature_t)0x80000000u; + ASSERT_TRUE(ft.execute(exp, EPS, i + 1)); + } + } + { // Test executor (current time used as seed) + FtFeatureTest ft(_factory, "random"); + ASSERT_TRUE(ft.setup()); + RankResult rr; + rr.addScore("random", 1.0f); + for (uint32_t i = 0; i < 5; ++i) { + feature_t last = rr.getScore("random"); + rr.clear(); + ASSERT_TRUE(ft.executeOnly(rr, i + 1)); + ASSERT_TRUE(last != rr.getScore("random")); + } + } + { // Test executor (random.match) + FtFeatureTest ft(_factory, "random.match"); + ft.getQueryEnv().getProperties().add("random.match.seed", "100"); + ASSERT_TRUE(ft.setup()); + search::Rand48 rnd; + for (uint32_t i = 1; i <= 5; ++i) { + rnd.srand48(100 + i); // seed + lid + feature_t exp = rnd.lrand48() / (feature_t)0x80000000u; + ASSERT_TRUE(ft.execute(exp, EPS, i)); + } + } +} + + +void +Test::testRankingExpression() +{ + { // Test blueprint. + RankingExpressionBlueprint prototype; + + EXPECT_TRUE(assertCreateInstance(prototype, "rankingExpression")); + + StringList params, in, out; + FT_SETUP_FAIL(prototype, params); // requires config to run without params + FT_SETUP_OK (prototype, params.add("foo.out"), in.add("foo.out"), out.add("out")); + FT_SETUP_FAIL(prototype, params.add("bar.out")); + FT_SETUP_OK (prototype, params.clear().add("log((1 + 2)- 3 * 4 / 5 )"), in.clear(), out); + FT_SETUP_OK (prototype, + params.clear().add("if(if(f1.out<1,0,1)<if(f2.out<2,0,1),f3.out,3)"), + in.clear().add("f1.out").add("f2.out").add("f3.out"), out); + + FT_DUMP_EMPTY(_factory, "rankingExpression"); + } + + { // Test executor. + { + FtFeatureTest ft(_factory, getExpression("if(1<2,3,4)")); + ASSERT_TRUE(ft.setup()); + EXPECT_TRUE(ft.execute(3.0f)); + } + { + FtFeatureTest ft(_factory, getExpression("sqrt(100)")); + ASSERT_TRUE(ft.setup()); + EXPECT_TRUE(ft.execute(10.0f)); + } + { + FtFeatureTest ft(_factory, getExpression("mysum(value(4),value(4))")); + ASSERT_TRUE(ft.setup()); + EXPECT_TRUE(ft.execute(8.0f)); + } + { + FtFeatureTest ft(_factory, getExpression("if(mysum(value(4),value(4))>3+4,1,0)")); + ASSERT_TRUE(ft.setup()); + EXPECT_TRUE(ft.execute(1.0f)); + } + { + FtFeatureTest ft(_factory, "rankingExpression"); + ft.getIndexEnv().getProperties().add("rankingExpression.rankingScript", "if(1<2,3,4)"); + ASSERT_TRUE(ft.setup()); + EXPECT_TRUE(ft.execute(3.0f)); + } + { + FtFeatureTest ft(_factory, "rankingExpression(foo)"); + ft.getIndexEnv().getProperties().add("rankingExpression(foo).rankingScript", "if(1<2,3,4)"); + ASSERT_TRUE(ft.setup()); + EXPECT_TRUE(ft.execute(3.0f)); + } + { + FtFeatureTest ft(_factory, "rankingExpression"); + ft.getIndexEnv().getProperties() + .add("rankingExpression.rankingScript", "if(") + .add("rankingExpression.rankingScript", "1<") + .add("rankingExpression.rankingScript", "2,") + .add("rankingExpression.rankingScript", "3,") + .add("rankingExpression.rankingScript", "4)"); + ASSERT_TRUE(ft.setup()); + EXPECT_TRUE(ft.execute(3.0f)); + } + { + // test interpreted expression + vespalib::string my_expr("3.0 + value(4.0) + sum(tensorFromWeightedSet(query(my_tensor)))"); + FtFeatureTest ft(_factory, getExpression(my_expr)); + ft.getQueryEnv().getProperties().add("my_tensor", "{a:1,b:2,c:3}"); + ASSERT_TRUE(ft.setup()); + EXPECT_TRUE(ft.execute(13.0)); + } + } +} + +vespalib::string +Test::getExpression(const vespalib::string ¶meter) const +{ + typedef search::fef::FeatureNameBuilder FNB; + return FNB().baseName("rankingExpression").parameter(parameter).buildName(); +} + +void +Test::testTerm() +{ + { + // Test blueprint. + TermBlueprint pt; + { + EXPECT_TRUE(assertCreateInstance(pt, "term")); + + StringList params, in, out; + FT_SETUP_OK (pt, params.add("0"), in, out.add("connectedness").add("significance").add("weight")); + FT_SETUP_FAIL(pt, params.add("1")); + } + { + StringList dump; + for (uint32_t term = 0; term < 3; ++term) { + vespalib::string bn = vespalib::make_string("term(%u)", term); + dump.add(bn + ".connectedness").add(bn + ".significance").add(bn + ".weight"); + } + FtIndexEnvironment ie; + ie.getProperties().add("term.numTerms", "3"); + FT_DUMP(_factory, "term", ie, dump); // check override + + for (uint32_t term = 3; term < 5; ++term) { + vespalib::string bn = vespalib::make_string("term(%u)", term); + dump.add(bn + ".connectedness").add(bn + ".significance").add(bn + ".weight"); + } + FT_DUMP(_factory, "term", dump); // check default + } + } + + { + // Test executor. + FtFeatureTest ft(_factory, "term(0)"); + ASSERT_TRUE(ft.setup()); + + RankResult exp; + exp .addScore("term(0).connectedness", 0) + .addScore("term(0).significance", 0) + .addScore("term(0).weight", 0); + ASSERT_TRUE(ft.execute(exp)); + } + { + // Test executor. + FtFeatureTest ft(_factory, StringList().add("term(1)").add("term(2)")); + ft.getIndexEnv().getBuilder() + .addField(FieldType::INDEX, CollectionType::SINGLE, "idx1") // field 0 + .addField(FieldType::INDEX, CollectionType::SINGLE, "idx2") // field 1 + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "attr"); // field 2 + ft.getQueryEnv().getBuilder().addAllFields().setUniqueId(0); + ft.getQueryEnv().getBuilder().addAllFields().setUniqueId(1).setWeight(search::query::Weight(200)).lookupField(0)->setDocFreq(0.5); + ft.getQueryEnv().getBuilder().addAttributeNode("attr")->setUniqueId(2).setWeight(search::query::Weight(400)).lookupField(2)->setDocFreq(0.25); + // setup connectedness between term 1 and term 0 + ft.getQueryEnv().getProperties().add("vespa.term.1.connexity", "0"); + ft.getQueryEnv().getProperties().add("vespa.term.1.connexity", "0.7"); + ASSERT_TRUE(ft.setup()); + + RankResult exp; + exp.addScore("term(1).significance", util::getSignificance(0.50)). + addScore("term(1).weight", 200.0f). + addScore("term(1).connectedness", 0.7f). + addScore("term(2).significance", util::getSignificance(0.25)). + addScore("term(2).weight", 400.0f). + addScore("term(2).connectedness", 0.1f). // default connectedness + setEpsilon(10e-6); + ASSERT_TRUE(ft.execute(exp)); + } + { + // Test executor. + FtFeatureTest ft(_factory, "term(0)"); + ft.getQueryEnv().getBuilder().addAllFields().setUniqueId(0); + // setup significance for term 0 + ft.getQueryEnv().getProperties().add("vespa.term.0.significance", "0.3"); + ASSERT_TRUE(ft.setup()); + + ASSERT_TRUE(ft.execute(RankResult().addScore("term(0).significance", 0.3f).setEpsilon(10e-6))); + } +} + +void +Test::testTermDistance() +{ + { // test blueprint + TermDistanceBlueprint pt; + { + EXPECT_TRUE(assertCreateInstance(pt, "termDistance")); + + StringList params, in, out; + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar"); + FT_SETUP_FAIL(pt, params); + FT_SETUP_FAIL(pt, ie, params.add("baz").add("0").add("0")); + FT_SETUP_FAIL(pt, ie, params.clear().add("bar").add("0").add("0")); + + FT_SETUP_OK(pt, ie, params.clear().add("foo").add("0").add("0"), + in, out.add("forward").add("forwardTermPosition") + .add("reverse").add("reverseTermPosition")); + } + { + FT_DUMP_EMPTY(_factory, "termDistance"); + } + } + + { // test executor + typedef TermDistanceCalculator::Result Result; + const uint32_t UV = TermDistanceCalculator::UNDEFINED_VALUE; + + EXPECT_TRUE(assertTermDistance(Result(), "a b", "x x")); + EXPECT_TRUE(assertTermDistance(Result(), "a b", "a x")); + EXPECT_TRUE(assertTermDistance(Result(), "a b", "x b")); + EXPECT_TRUE(assertTermDistance(Result(), "a", "a b")); + EXPECT_TRUE(assertTermDistance(Result(), "a", "a a")); + EXPECT_TRUE(assertTermDistance(Result(1,0,UV,UV), "a b", "a b")); + EXPECT_TRUE(assertTermDistance(Result(2,0,UV,UV), "a b", "a x b")); + EXPECT_TRUE(assertTermDistance(Result(UV,UV,1,0), "a b", "b a")); + EXPECT_TRUE(assertTermDistance(Result(UV,UV,2,0), "a b", "b x a")); + EXPECT_TRUE(assertTermDistance(Result(2,18,1,20), "a b", "a x x x x x b x x x x a x x x b x x a x b a")); + EXPECT_TRUE(assertTermDistance(Result(1,0,2,1), "a b", "a b x a x x b x x x a x x x x b x x x x x a")); + EXPECT_TRUE(assertTermDistance(Result(1,0,1,1), "a b", "a b a b a")); // first best is kept + EXPECT_TRUE(assertTermDistance(Result(1,0,1,0), "a a", "a a")); + EXPECT_TRUE(assertTermDistance(Result(2,0,2,0), "a a", "a x a")); + } +} + +bool +Test::assertTermDistance(const TermDistanceCalculator::Result & exp, + const vespalib::string & query, + const vespalib::string & field, + uint32_t docId) +{ + LOG(info, "assertTermDistance('%s', '%s')", query.c_str(), field.c_str()); + + vespalib::string feature = "termDistance(foo,0,1)"; + FtFeatureTest ft(_factory, feature); + + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + StringVectorMap index; + index["foo"] = FtUtil::tokenize(field); + FT_SETUP(ft, FtUtil::toQuery(query), index, 1); + + RankResult rr; + rr.addScore(feature + ".forward", exp.forwardDist); + rr.addScore(feature + ".forwardTermPosition", exp.forwardTermPos); + rr.addScore(feature + ".reverse", exp.reverseDist); + rr.addScore(feature + ".reverseTermPosition", exp.reverseTermPos); + if (!EXPECT_TRUE(ft.execute(rr, docId))) { + return false; + } + return true; +} + +void +Test::testUtils() +{ + { // getSignificance + EXPECT_APPROX(util::getSignificance(0.0), 1, EPS); + EXPECT_APPROX(util::getSignificance(0.0 + 1.0e-7), 1, EPS); + EXPECT_APPROX(util::getSignificance(1.0), 0.5, EPS); + EXPECT_APPROX(util::getSignificance(1.0 + 1.0e-7), 0.5, EPS); + feature_t last = 1; + for (uint32_t i = 2; i <= 100; i = i + 1) { + feature_t s = util::getSignificance(i * 1.0e-6); + EXPECT_GREATER(s, 0); + EXPECT_LESS(s, 1); + EXPECT_LESS(s, last); + last = s; + } + for (uint32_t i = 999900; i <= 1000000; i = i + 1) { + feature_t s = util::getSignificance(i * 1.0e-6); + EXPECT_GREATER(s, 0); + EXPECT_LESS(s, 1); + EXPECT_LESS(s, last); + last = s; + } + } +} + diff --git a/searchlib/src/tests/features/prod_features.h b/searchlib/src/tests/features/prod_features.h new file mode 100644 index 00000000000..dd15981af1f --- /dev/null +++ b/searchlib/src/tests/features/prod_features.h @@ -0,0 +1,175 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchlib/features/distancetopathfeature.h> +#include <vespa/searchlib/features/termdistancefeature.h> +#include <vespa/searchlib/fef/test/ftlib.h> + +class Test : public FtTestApp +{ +public: + int Main(); + void testFramework(); + void testFtLib(); + void testAge(); + void testAttribute(); + void testAttributeMatch(); + void testCloseness(); + void testDistance(); + void testDistanceToPath(); + void testDotProduct(); + void testFieldLength(); + void testFieldMatch(); + void testFieldTermMatch(); + void testFirstPhase(); + void testForeach(); + void testFreshness(); + void testMatch(); + void testMatches(); + void testNow(); + void testQuery(); + void testQueryTermCount(); + void testRandom(); + void testRankingExpression(); + void testTerm(); + void testTermDistance(); + void testUtils(); + +private: + void + testFieldMatchBluePrint(); + + void + testFieldMatchExecutor(); + + void + testFieldMatchExecutorOutOfOrder(); + + void + testFieldMatchExecutorSegments(); + + void + testFieldMatchExecutorGaps(); + + void + testFieldMatchExecutorHead(); + + void + testFieldMatchExecutorTail(); + + void + testFieldMatchExecutorLongestSequence(); + + void + testFieldMatchExecutorMatches(); + + void + testFieldMatchExecutorCompleteness(); + + void + testFieldMatchExecutorOrderness(); + + void + testFieldMatchExecutorRelatedness(); + + void + testFieldMatchExecutorLongestSequenceRatio(); + + void + testFieldMatchExecutorEarliness(); + + void + testFieldMatchExecutorWeight(); + + void + testFieldMatchExecutorSignificance(); + + void + testFieldMatchExecutorImportance(); + + void + testFieldMatchExecutorOccurrence(); + + void + testFieldMatchExecutorAbsoluteOccurrence(); + + void + testFieldMatchExecutorWeightedOccurrence(); + + void + testFieldMatchExecutorWeightedAbsoluteOccurrence(); + + void + testFieldMatchExecutorSignificantOccurrence(); + + void + testFieldMatchExecutorUnweightedProximity(); + + void + testFieldMatchExecutorReverseProximity(); + + void + testFieldMatchExecutorAbsoluteProximity(); + + void + testFieldMatchExecutorMultiSegmentProximity(); + + void + testFieldMatchExecutorSegmentDistance(); + + void + testFieldMatchExecutorSegmentProximity(); + + void + testFieldMatchExecutorSegmentStarts(); + + void + testFieldMatchExecutorMoreThanASegmentLengthOfUnmatchedQuery(); + + void + testFieldMatchExecutorQueryRepeats(); + + void + testFieldMatchExecutorZeroCases(); + + void + testFieldMatchExecutorExceedingIterationLimit(); + + void + testFieldMatchExecutorRemaining(); + + + void assertAge(feature_t expAge, const vespalib::string & attr, uint64_t now, uint64_t docTime); + void setupForAgeTest(FtFeatureTest & ft, uint64_t docTime); + void setupForAttributeTest(FtFeatureTest &ft, bool setup_env = true); + void assertCloseness(feature_t exp, const vespalib::string & attr, double distance, double maxDistance = 0, double halfResponse = 0); + void setupForDistanceTest(FtFeatureTest & ft, const vespalib::string & attrName, + const std::vector<std::pair<int32_t, int32_t> > & positions, bool zcurve); + void assert2DZDistance(feature_t exp, const vespalib::string & positions, + int32_t xquery, int32_t yquery, uint32_t xAspect = 0); + void assertDistanceToPath(const std::vector<std::pair<int32_t, int32_t> > pos, const vespalib::string &path, + feature_t distance = search::features::DistanceToPathExecutor::DEFAULT_DISTANCE, + feature_t traveled = 1, feature_t product = 0); + void setupForDocumentTest(FtFeatureTest &ft, const vespalib::string & attrName, const vespalib::string & docType); + void assertDotProduct(feature_t exp, const vespalib::string & vector, uint32_t docId = 1, + const vespalib::string & attribute = "wsstr", const vespalib::string & attributeOverride=""); + void setupForDotProductTest(FtFeatureTest & ft); + void assertFieldMatch(const vespalib::string & spec, const vespalib::string & query, const vespalib::string & field, + const search::features::fieldmatch::Params * params = NULL, uint32_t totalTermWeight = 0, feature_t totalSignificance = 0.0f); + void assertFieldMatch(const vespalib::string & spec, const vespalib::string & query, const vespalib::string & field, + uint32_t totalTermWeight); + void assertFieldMatchTS(const vespalib::string & spec, const vespalib::string & query, const vespalib::string & field, + feature_t totalSignificance); + vespalib::string getExpression(const vespalib::string ¶meter) const; + void assertForeachOperation(feature_t exp, const vespalib::string & cond, const vespalib::string & op); + void assertFreshness(feature_t expFreshness, const vespalib::string & attr, uint32_t age, uint32_t maxAge = 0, double halfResponse = 0, bool logScale = false); + bool assertTermDistance(const search::features::TermDistanceCalculator::Result & exp, const vespalib::string & query, + const vespalib::string & field, uint32_t docId = 1); + bool assertMatches(uint32_t output, const vespalib::string & query, const vespalib::string & field, + const vespalib::string & feature = "matches(foo)", uint32_t docId = 1); + +private: + search::fef::BlueprintFactory _factory; +}; + diff --git a/searchlib/src/tests/features/prod_features_attributematch.cpp b/searchlib/src/tests/features/prod_features_attributematch.cpp new file mode 100644 index 00000000000..06b2b859709 --- /dev/null +++ b/searchlib/src/tests/features/prod_features_attributematch.cpp @@ -0,0 +1,300 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP(".prod_features_attributematch"); + +#include "prod_features.h" +#include <vespa/searchlib/features/attributematchfeature.h> +#include <vespa/searchlib/attribute/attributefactory.h> + +using namespace search::features; +using namespace search::fef; +using namespace search::fef::test; + +using search::AttributeVector; +using search::AttributeFactory; + +typedef AttributeVector::SP AttributePtr; + +typedef search::attribute::Config AVC; +typedef search::attribute::BasicType AVBT; +typedef search::attribute::CollectionType AVCT; + + +void +Test::testAttributeMatch() +{ + AttributeMatchBlueprint pt; + { + EXPECT_TRUE(assertCreateInstance(pt, "attributeMatch")); + + StringList params, in, out; + FT_SETUP_FAIL(pt, params); // expects 1 param + FT_SETUP_FAIL(pt, params.add("foo")); // field must exists + + FtIndexEnvironment idx_env; + idx_env.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + FT_SETUP_FAIL(pt, idx_env, params); // field must be an attribute + idx_env.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint"); + + FT_SETUP_OK(pt, idx_env, params.clear().add("sint"), in, out + .add("completeness") + .add("queryCompleteness") + .add("fieldCompleteness") + .add("normalizedWeight") + .add("normalizedWeightedWeight") + .add("weight") + .add("significance") + .add("importance") + .add("matches") + .add("totalWeight") + .add("averageWeight")); + + FT_DUMP_EMPTY(_factory, "attributeMatch"); + + FT_DUMP(_factory, "attributeMatch", idx_env, out.clear() + .add("attributeMatch(sint)") + .add("attributeMatch(sint).completeness") + .add("attributeMatch(sint).queryCompleteness") + .add("attributeMatch(sint).fieldCompleteness") + .add("attributeMatch(sint).normalizedWeight") + .add("attributeMatch(sint).normalizedWeightedWeight") + .add("attributeMatch(sint).weight") + .add("attributeMatch(sint).significance") + .add("attributeMatch(sint).importance") + .add("attributeMatch(sint).matches") + .add("attributeMatch(sint).totalWeight") + .add("attributeMatch(sint).averageWeight")); + } + + { // single attributes + FtFeatureTest ft(_factory, StringList(). + add("attributeMatch(sint)").add("attributeMatch(sfloat)").add("attributeMatch(sstr)")); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint"); // 2 matches + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sfloat"); // 1 matches + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sstr"); // 0 matches + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + setupForAttributeTest(ft); + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("sint") != NULL); // query term 0, hit in sint + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("sint") != NULL); // query term 1, .. + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("sint") != NULL); // query term 2, .. + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("sint") != NULL); // query term 3, .. + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("sfloat") != NULL); // query term 4, hit in sfloat + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo")) != NULL); + ft.getQueryEnv().getTerms()[0].setWeight(search::query::Weight(20)); + ft.getQueryEnv().getTerms()[0].setUniqueId(0); + ft.getQueryEnv().getTerms()[1].setWeight(search::query::Weight(20)); + ft.getQueryEnv().getTerms()[1].setUniqueId(1); + ft.getQueryEnv().getTerms()[2].setWeight(search::query::Weight(10)); + ft.getQueryEnv().getTerms()[2].setUniqueId(1); + ft.getQueryEnv().getTerms()[3].setWeight(search::query::Weight(10)); + ft.getQueryEnv().getTerms()[3].setUniqueId(1); + ft.getQueryEnv().getTerms()[4].setWeight(search::query::Weight(20)); + ft.getQueryEnv().getTerms()[4].setUniqueId(1); + ft.getQueryEnv().getTerms()[5].setWeight(search::query::Weight(20)); + ft.getQueryEnv().getTerms()[5].setUniqueId(1); + ft.getQueryEnv().getProperties().add("vespa.term.0.significance", "0.5"); // change significance for term 0 + ft.getQueryEnv().getProperties().add("vespa.term.1.significance", "0.1"); // change significance for all other terms + ASSERT_TRUE(ft.setup()); + + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + mdb->setWeight("sint", 0, 0); + mdb->setWeight("sint", 1, 0); + mdb->setWeight("sfloat", 4, 0); + mdb->apply(1); + RankResult exp; + exp.addScore("attributeMatch(sint)", 0.5f). // same as completeness + addScore("attributeMatch(sint).matches", 2). + addScore("attributeMatch(sint).totalWeight", 0). + addScore("attributeMatch(sint).averageWeight", 0). + addScore("attributeMatch(sint).completeness", 0.5f). + addScore("attributeMatch(sint).queryCompleteness", 0.5f). + addScore("attributeMatch(sint).fieldCompleteness", 1). + addScore("attributeMatch(sint).normalizedWeight", 0). + addScore("attributeMatch(sint).normalizedWeightedWeight", 0). + addScore("attributeMatch(sint).weight", 0.4). + addScore("attributeMatch(sint).significance", 0.6). + addScore("attributeMatch(sint).importance", 0.5). + addScore("attributeMatch(sfloat)", 1). // same as completeness + addScore("attributeMatch(sfloat).matches", 1). + addScore("attributeMatch(sfloat).totalWeight", 0). + addScore("attributeMatch(sfloat).averageWeight", 0). + addScore("attributeMatch(sfloat).completeness", 1). + addScore("attributeMatch(sfloat).queryCompleteness", 1). + addScore("attributeMatch(sfloat).fieldCompleteness", 1). + addScore("attributeMatch(sfloat).normalizedWeight", 0). + addScore("attributeMatch(sfloat).normalizedWeightedWeight", 0). + addScore("attributeMatch(sfloat).weight", 0.2). + addScore("attributeMatch(sfloat).significance", 0.1). + addScore("attributeMatch(sfloat).importance", 0.15). + addScore("attributeMatch(sstr)", 0). // same as completeness + addScore("attributeMatch(sstr).matches", 0). + addScore("attributeMatch(sstr).totalWeight", 0). + addScore("attributeMatch(sstr).averageWeight", 0). + addScore("attributeMatch(sstr).completeness", 0). + addScore("attributeMatch(sstr).queryCompleteness", 0). + addScore("attributeMatch(sstr).fieldCompleteness", 0). + addScore("attributeMatch(sstr).normalizedWeight", 0). + addScore("attributeMatch(sstr).normalizedWeightedWeight", 0). + addScore("attributeMatch(sstr).weight", 0). + addScore("attributeMatch(sstr).significance", 0). + addScore("attributeMatch(sstr).importance", 0). + setEpsilon(10e-6); + ASSERT_TRUE(ft.execute(exp)); + ASSERT_TRUE(ft.execute(exp)); + } + + { // array attributes + + FtFeatureTest ft(_factory, StringList().add("attributeMatch(aint)")); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint"); // 1 matches + ft.getIndexEnv().getProperties().add("attributeMatch(aint).fieldCompletenessImportance", "0.5"); + setupForAttributeTest(ft); + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("aint") != NULL); // 0 + ASSERT_TRUE(ft.setup()); + + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + mdb->setWeight("aint", 0, 0); + mdb->apply(1); + RankResult exp; + exp.addScore("attributeMatch(aint)", 0.75f) // same as completeness + .addScore("attributeMatch(aint).matches", 1) + .addScore("attributeMatch(aint).totalWeight", 0) + .addScore("attributeMatch(aint).averageWeight", 0) + .addScore("attributeMatch(aint).completeness", 0.75f) + .addScore("attributeMatch(aint).queryCompleteness", 1) + .addScore("attributeMatch(aint).fieldCompleteness", 0.5f) + .addScore("attributeMatch(aint).normalizedWeight", 0) + .addScore("attributeMatch(aint).normalizedWeightedWeight", 0); + ASSERT_TRUE(ft.execute(exp)); + ASSERT_TRUE(ft.execute(exp)); + } + + { // weighted set attributes + FtFeatureTest ft(_factory, StringList(). + add("attributeMatch(wsint)").add("attributeMatch(wsfloat)").add("attributeMatch(wsstr)")); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint"); // 2 matches + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsfloat"); // 1 matches + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsstr"); // 0 matches + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getProperties().add("attributeMatch(wsint).maxWeight", "100"); + setupForAttributeTest(ft); + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("wsint") != NULL); // 0 + ft.getQueryEnv().getTerms()[0].setWeight(search::query::Weight(2)); + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("wsint") != NULL); // 1 + ft.getQueryEnv().getTerms()[1].setWeight(search::query::Weight(3)); + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("wsfloat") != NULL); // 2 + ft.getQueryEnv().getTerms()[2].setWeight(search::query::Weight(0)); + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo")) != NULL); + ft.getQueryEnv().getTerms()[3].setWeight(search::query::Weight(0)); + ASSERT_TRUE(ft.setup()); + + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + mdb->setWeight("wsint", 0, 10); + mdb->setWeight("wsint", 1, 20); + mdb->setWeight("wsfloat", 2, -30); + mdb->apply(1); + RankResult exp; + + // test all three attributes + exp.addScore("attributeMatch(wsint)", 1). // same as completeness + addScore("attributeMatch(wsint).matches", 2). + addScore("attributeMatch(wsint).totalWeight", 30). + addScore("attributeMatch(wsint).averageWeight", 15). + addScore("attributeMatch(wsint).completeness", 1). + addScore("attributeMatch(wsint).queryCompleteness", 1). + addScore("attributeMatch(wsint).fieldCompleteness", 1). + addScore("attributeMatch(wsint).normalizedWeight", 0.1f). + addScore("attributeMatch(wsint).normalizedWeightedWeight", 0.16f). + addScore("attributeMatch(wsfloat)", 0.95). // same as completeness + addScore("attributeMatch(wsfloat).matches", 1). + addScore("attributeMatch(wsfloat).totalWeight", -30). + addScore("attributeMatch(wsfloat).averageWeight", -30). + addScore("attributeMatch(wsfloat).completeness", 0.95). + addScore("attributeMatch(wsfloat).queryCompleteness", 1). + addScore("attributeMatch(wsfloat).fieldCompleteness", 0). + addScore("attributeMatch(wsfloat).normalizedWeight", 0). + addScore("attributeMatch(wsfloat).normalizedWeightedWeight", 0). + addScore("attributeMatch(wsstr)", 0). // same as completeness + addScore("attributeMatch(wsstr).matches", 0). + addScore("attributeMatch(wsstr).totalWeight", 0). + addScore("attributeMatch(wsstr).averageWeight", 0). + addScore("attributeMatch(wsstr).completeness", 0). + addScore("attributeMatch(wsstr).queryCompleteness", 0). + addScore("attributeMatch(wsstr).fieldCompleteness", 0). + addScore("attributeMatch(wsstr).normalizedWeight", 0). + addScore("attributeMatch(wsstr).normalizedWeightedWeight", 0). + setEpsilon(10e-6); + ASSERT_TRUE(ft.execute(exp)); + ASSERT_TRUE(ft.execute(exp)); + + // test fieldCompleteness + mdb->setWeight("wsint", 0, 0); + mdb->setWeight("wsint", 1, 15); + mdb->apply(1); + exp.clear(). + addScore("attributeMatch(wsint).fieldCompleteness", 0.5f); + ASSERT_TRUE(ft.execute(exp)); + + // test that normalized values lies in the interval [0,1]. + mdb->setWeight("wsfloat", 2, 1000); + mdb->apply(1); + ft.getQueryEnv().getTerms()[2].setWeight(search::query::Weight(100)); + exp.clear(). + addScore("attributeMatch(wsfloat).normalizedWeight", 1). + addScore("attributeMatch(wsfloat).normalizedWeightedWeight", 1); + ASSERT_TRUE(ft.execute(exp)); + } + + { // unique only attribute + FtFeatureTest ft(_factory, "attributeMatch(unique)"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "unique"); + setupForAttributeTest(ft); + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("unique") != NULL); + ASSERT_TRUE(ft.setup()); + + RankResult exp; + exp.addScore("attributeMatch(unique)", 0). // same as completeness + addScore("attributeMatch(unique).matches", 0). + addScore("attributeMatch(unique).totalWeight", 0). + addScore("attributeMatch(unique).averageWeight", 0). + addScore("attributeMatch(unique).completeness", 0). + addScore("attributeMatch(unique).queryCompleteness", 0). + addScore("attributeMatch(unique).fieldCompleteness", 0). + addScore("attributeMatch(unique).normalizedWeight", 0). + addScore("attributeMatch(unique).normalizedWeightedWeight", 0); + ASSERT_TRUE(ft.execute(exp)); + } + { + FtFeatureTest ft(_factory, StringList().add("attributeMatch(aint)").add("attributeMatch(wint)")); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wint"); + + // setup an array and wset attributes with 0 elements + AttributePtr aint = AttributeFactory::createAttribute("aint", AVC (AVBT::INT32, AVCT::ARRAY)); + AttributePtr wint = AttributeFactory::createAttribute("wint", AVC(AVBT::INT32, AVCT::WSET)); + aint->addReservedDoc(); + wint->addReservedDoc(); + ft.getIndexEnv().getAttributeManager().add(aint); + ft.getIndexEnv().getAttributeManager().add(wint); + aint->addDocs(1); + aint->commit(); + ASSERT_TRUE(aint->getValueCount(0) == 0); + wint->addDocs(1); + wint->commit(); + ASSERT_TRUE(wint->getValueCount(0) == 0); + + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("aint") != NULL); + ASSERT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("wint") != NULL); + ASSERT_TRUE(ft.setup()); + + RankResult exp; + exp.addScore("attributeMatch(aint)", 0). // same as completeness + addScore("attributeMatch(aint).completeness", 0). + addScore("attributeMatch(aint).fieldCompleteness", 0). + addScore("attributeMatch(wint)", 0). // same as completeness + addScore("attributeMatch(wint).completeness", 0). + addScore("attributeMatch(wint).fieldCompleteness", 0); + ASSERT_TRUE(ft.execute(exp)); + } +} diff --git a/searchlib/src/tests/features/prod_features_fieldmatch.cpp b/searchlib/src/tests/features/prod_features_fieldmatch.cpp new file mode 100644 index 00000000000..e26d6a92fa6 --- /dev/null +++ b/searchlib/src/tests/features/prod_features_fieldmatch.cpp @@ -0,0 +1,1079 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP(".prod_features_fieldmatch"); + +#include <vespa/searchlib/fef/test/ftlib.h> + +#include "prod_features.h" + +#include <vespa/searchlib/features/fieldmatchfeature.h> + +using namespace search::features; +using namespace search::fef; +using namespace search::fef::test; + +using search::AttributeVector; + +void +Test::testFieldMatch() +{ + testFieldMatchBluePrint(); + testFieldMatchExecutor(); +} + + +void +Test::testFieldMatchBluePrint() +{ + FieldMatchBlueprint pt; + StringList out; + out.add("score"). + add("proximity"). + add("completeness"). + add("queryCompleteness"). + add("fieldCompleteness"). + add("orderness"). + add("relatedness"). + add("earliness"). + add("longestSequenceRatio"). + add("segmentProximity"). + add("unweightedProximity"). + add("absoluteProximity"). + add("occurrence"). + add("absoluteOccurrence"). + add("weightedOccurrence"). + add("weightedAbsoluteOccurrence"). + add("significantOccurrence"). + + add("weight"). + add("significance"). + add("importance"). + + add("segments"). + add("matches"). + add("outOfOrder"). + add("gaps"). + add("gapLength"). + add("longestSequence"). + add("head"). + add("tail"). + add("segmentDistance"). + add("degradedMatches"); + { + EXPECT_TRUE(assertCreateInstance(pt, "fieldMatch")); + + StringList params, in; + FT_SETUP_FAIL(pt, params); + FT_SETUP_FAIL(pt, params.add("foo")); + FT_SETUP_FAIL(pt, params.add("bar")); + params.clear(); + + { + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo"); + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar"); + ie.getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "abar"); + ie.getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wbar"); + FT_SETUP_FAIL(pt, ie, params.add("foo")); + FT_SETUP_FAIL(pt, ie, params.add("abar")); + FT_SETUP_FAIL(pt, ie, params.add("wbar")); + + FT_SETUP_OK(pt, ie, params.clear().add("bar"), in, out); + } + + { // test illegal proximity table + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + Properties & p = ie.getProperties(); + p.add("fieldMatch(foo).proximityLimit", "1"); + + // too few elements, should be 3 (1*2 + 1) + p.add("fieldMatch(foo).proximityTable", "0.5"); + p.add("fieldMatch(foo).proximityTable", "1.0"); + FT_SETUP_FAIL(pt, ie, params); + + // too many elements, should be 3 (1*2 + 1) + p.add("fieldMatch(foo).proximityTable", "1.0"); + p.add("fieldMatch(foo).proximityTable", "0.5"); + FT_SETUP_FAIL(pt, ie, params); + } + } + { // test dumping with a regular index field + FT_DUMP_EMPTY(_factory, "fieldMatch"); + + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo"); + FT_DUMP_EMPTY(_factory, "fieldMatch", ie); // must be an index field + + ie.getBuilder().addField(FieldType::INDEX, CollectionType::ARRAY, "abar"); + FT_DUMP_EMPTY(_factory, "fieldMatch", ie); // must be single value + + ie.getBuilder().addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wbar"); + FT_DUMP_EMPTY(_factory, "fieldMatch", ie); // must be single value + + StringList dump; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar"); + vespalib::string bn = "fieldMatch(bar)"; + dump.add(bn); + for (uint32_t i = 1; i < out.size(); ++i) { + dump.add(bn + "." + out[i]); + } + FT_DUMP(_factory, "fieldMatch", ie, dump); + } + + { // test dumping with a filter index field + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ie.getFields()[0].setFilter(true); + + StringList dump; + vespalib::string bn = "fieldMatch(foo)"; + dump.add(bn); + dump.add(bn + ".completeness"); + dump.add(bn + ".queryCompleteness"); + dump.add(bn + ".weight"); + dump.add(bn + ".matches"); + dump.add(bn + ".degradedMatches"); + FT_DUMP(_factory, "fieldMatch", ie, dump); + } +} + + +void +Test::testFieldMatchExecutor() +{ + testFieldMatchExecutorOutOfOrder(); + testFieldMatchExecutorSegments(); + testFieldMatchExecutorGaps(); + testFieldMatchExecutorHead(); + testFieldMatchExecutorTail(); + testFieldMatchExecutorLongestSequence(); + testFieldMatchExecutorMatches(); + testFieldMatchExecutorCompleteness(); + testFieldMatchExecutorOrderness(); + testFieldMatchExecutorRelatedness(); + testFieldMatchExecutorLongestSequenceRatio(); + testFieldMatchExecutorEarliness(); + testFieldMatchExecutorWeight(); + testFieldMatchExecutorSignificance(); + testFieldMatchExecutorImportance(); + testFieldMatchExecutorOccurrence(); + testFieldMatchExecutorAbsoluteOccurrence(); + testFieldMatchExecutorWeightedOccurrence(); + testFieldMatchExecutorWeightedAbsoluteOccurrence(); + testFieldMatchExecutorSignificantOccurrence(); + testFieldMatchExecutorUnweightedProximity(); + testFieldMatchExecutorReverseProximity(); + testFieldMatchExecutorAbsoluteProximity(); + testFieldMatchExecutorMultiSegmentProximity(); + testFieldMatchExecutorSegmentDistance(); + testFieldMatchExecutorSegmentProximity(); + testFieldMatchExecutorSegmentStarts(); + testFieldMatchExecutorMoreThanASegmentLengthOfUnmatchedQuery(); + testFieldMatchExecutorQueryRepeats(); + testFieldMatchExecutorZeroCases(); + testFieldMatchExecutorExceedingIterationLimit(); + testFieldMatchExecutorRemaining(); +} + + +void +Test::testFieldMatchExecutorOutOfOrder() +{ + assertFieldMatch("outOfOrder:0","a","a"); + assertFieldMatch("outOfOrder:0","a b c","a b c"); + assertFieldMatch("outOfOrder:1","a b c","a c b"); + assertFieldMatch("outOfOrder:2","a b c","c b a"); + assertFieldMatch("outOfOrder:2","a b c d e","c x a b x x x x x e x x d"); + assertFieldMatch("outOfOrder:2","a b c d e","c x a b x x x x x e x x d"); + assertFieldMatch("outOfOrder:2","a b c d e","c x a b x x x x x e x x d"); +} + + +void +Test::testFieldMatchExecutorSegments() +{ + assertFieldMatch("segments:1","a","a"); + assertFieldMatch("segments:1","a b c","a b c"); + assertFieldMatch("segments:1","a b c","a x x b c"); + assertFieldMatch("segments:2","a b c","a x x x x x x x x x x x x x x x x x x x b c"); + assertFieldMatch("segments:2","a b c","b c x x x x x x x x x x x x x x x x x x x a"); + assertFieldMatch("segments:2 gaps:1","a b c","x x x a x x x x x x x x x x x x x x x x x x x b x x c x x"); + assertFieldMatch("segments:2 gaps:0 outOfOrder:0","a b c","b c x x x x x x x x x x x x x x x x x x x a"); + assertFieldMatch("segments:2 gaps:1","a b c","x x x b x x c x x x x x x x x x x x x x x x x x x x a x x"); + assertFieldMatch("segments:2 gaps:1","a y y b c","x x x b x x c x x x x x x x x x x x x x x x x x x x a x x"); +} + + +void +Test::testFieldMatchExecutorGaps() +{ + assertFieldMatch("gaps:0","a","a"); + assertFieldMatch("gaps:0","x�a","a"); // TODO: which char ? + assertFieldMatch("gaps:0 gapLength:0","a b c","a b c"); + assertFieldMatch("gaps:1 gapLength:1","a b","b a"); + assertFieldMatch("gaps:1 gapLength:1","a b c","a x b c"); + assertFieldMatch("gaps:1 gapLength:3","a b c","a x X Xb c"); + assertFieldMatch("gaps:2 gapLength:2 outOfOrder:1","a b c","a c b"); + assertFieldMatch("gaps:2 gapLength:2 outOfOrder:0","a b c","a x b x c"); + assertFieldMatch("gaps:2 gapLength:5 outOfOrder:1","a b c","a x c x b"); + assertFieldMatch("gaps:3 outOfOrder:2 segments:1","a b c d e","x d x x b c x x a e"); + assertFieldMatch("gaps:0","y a b c","a b c x"); +} + + +void +Test::testFieldMatchExecutorHead() +{ + assertFieldMatch("head:0","a","a"); + //assertFieldMatch("head:0","y","a"); // no hit, executor will not run + assertFieldMatch("head:1","a","x a"); + assertFieldMatch("head:2","a b c","x x a b c"); + assertFieldMatch("head:2","a b c","x x c x x a b"); + assertFieldMatch("head:2","a b c","x x c x x x x x x x x x x x x x x x a b"); +} + + +void +Test::testFieldMatchExecutorTail() +{ + assertFieldMatch("tail:0","a","a"); + //assertFieldMatch("tail:0","y","a"); // no hit, executor will not run + assertFieldMatch("tail:1","a","a x"); + assertFieldMatch("tail:2","a b c","a b c x x"); + assertFieldMatch("tail:2","a b c","x x x c x x x x a b x x"); + assertFieldMatch("tail:0","a b c","x x c x x x x x x x x x x x x x x x a b"); +} + +void +Test::testFieldMatchExecutorLongestSequence() +{ + assertFieldMatch("longestSequence:1","a","a"); + assertFieldMatch("longestSequence:1","a","a b c"); + assertFieldMatch("longestSequence:1","b","a b c"); + assertFieldMatch("longestSequence:3","a b c","x x a b c x x a b x"); + assertFieldMatch("longestSequence:3 segments:1","a b c","x x a b x x a b c x"); + assertFieldMatch("longestSequence:2","a b c d","x x c d x x a b x"); + assertFieldMatch("longestSequence:2","a b c d","x x a b x c d x x"); + assertFieldMatch("longestSequence:2","a b c d","x x a b x x x x x x x x x x x x x x x x x c d x x"); + assertFieldMatch("longestSequence:4 segments:1","a b c d","x x a b x x x x x x x x x x x x x x x x x c d x x a b c d"); +} + + +void +Test::testFieldMatchExecutorMatches() +{ + assertFieldMatch("matches:1 queryCompleteness:1 fieldCompleteness:1","a","a"); + assertFieldMatch("matches:3 queryCompleteness:1 fieldCompleteness:1","a b c","a b c"); + assertFieldMatch("matches:3 queryCompleteness:1 fieldCompleteness:0.5","a b c","a b c a b d"); + assertFieldMatch("matches:3 queryCompleteness:0.5 fieldCompleteness:0.25","a y y b c y","a x x b c x a x a b x x"); +} + + +void +Test::testFieldMatchExecutorCompleteness() +{ + assertFieldMatch("completeness:1 queryCompleteness:1 fieldCompleteness:1","a","a"); + assertFieldMatch("completeness:0 queryCompleteness:0 fieldCompleteness:0","a","x"); + assertFieldMatch("completeness:0 queryCompleteness:0 fieldCompleteness:0","y","a"); + assertFieldMatch("completeness:0.975 queryCompleteness:1 fieldCompleteness:0.5","a","a a"); + assertFieldMatch("completeness:0.525 queryCompleteness:0.5 fieldCompleteness:1","a a","a"); + assertFieldMatch("completeness:1 queryCompleteness:1 fieldCompleteness:1","a b c","a b c"); + assertFieldMatch("completeness:0.525 queryCompleteness:0.5 fieldCompleteness:1","a b c d","a b"); + assertFieldMatch("completeness:0.975 queryCompleteness:1 fieldCompleteness:0.5","a b","a b c d"); + assertFieldMatch("completeness:0.97 queryCompleteness:1 fieldCompleteness:0.4","a b","a b c d e"); +} + + +void +Test::testFieldMatchExecutorOrderness() +{ + assertFieldMatch("orderness:1", "a","a"); + // Note: we have no hits -> orderness: 0(1) + assertFieldMatch("orderness:0", "a","x"); + assertFieldMatch("orderness:0", "a a a","a"); // Oh well... + assertFieldMatch("orderness:1", "a","a a a"); + assertFieldMatch("orderness:0", "a b","b a"); + assertFieldMatch("orderness:0.5","a b c","b a c"); + assertFieldMatch("orderness:0.5","a b c d","c b d x x x x x x x x x x x x x x x x x x x x x a"); +} + + +void +Test::testFieldMatchExecutorRelatedness() +{ + assertFieldMatch("relatedness:1", "a","a"); + assertFieldMatch("relatedness:0", "a","x"); + assertFieldMatch("relatedness:1", "a b","a b"); + assertFieldMatch("relatedness:1", "a b c","a b c"); + assertFieldMatch("relatedness:0.5","a b c","a b x x x x x x x x x x x x x x x x x x x x x x x c"); + assertFieldMatch("relatedness:0.5","a y b y y y c","a b x x x x x x x x x x x x x x x x x x x x x x x c"); +} + + +void +Test::testFieldMatchExecutorLongestSequenceRatio() +{ + assertFieldMatch("longestSequenceRatio:1", "a","a"); + assertFieldMatch("longestSequenceRatio:0", "a","x"); + assertFieldMatch("longestSequenceRatio:1", "a a","a"); + assertFieldMatch("longestSequenceRatio:1", "a","a a"); + assertFieldMatch("longestSequenceRatio:1", "a b","a b"); + assertFieldMatch("longestSequenceRatio:1", "a y"," a x"); + assertFieldMatch("longestSequenceRatio:0.5","a b","a x b"); + assertFieldMatch("longestSequenceRatio:0.75","a b c d","x x a b x a x c d a b c x d x"); +} + + +void +Test::testFieldMatchExecutorEarliness() +{ + assertFieldMatch("earliness:1", "a","a"); + assertFieldMatch("earliness:0", "a","x"); + assertFieldMatch("earliness:1", "a","a a a"); + assertFieldMatch("earliness:1", "a a a","a"); + assertFieldMatch("earliness:0.8", "b","a b c"); + assertFieldMatch("earliness:0.8", "b","a b"); + assertFieldMatch("earliness:0.9091","a b c","x b c x x x x x a x x x"); + assertFieldMatch("earliness:0.2", "a b c","x b c a x x x x a x x x x x x x a b c x x"); +} + + +void +Test::testFieldMatchExecutorWeight() +{ + assertFieldMatch("weight:1", "a","a"); + assertFieldMatch("weight:0", "y","a"); + assertFieldMatch("weight:0.3333","a a a","a"); + assertFieldMatch("weight:1", "a","a a a"); + assertFieldMatch("weight:1", "a b c","a b c"); + assertFieldMatch("weight:1", "a b c","x x a b x a x c x x a b x c c x"); + + assertFieldMatch("weight:0.3333","a b c","a"); + assertFieldMatch("weight:0.6667","a b c","a b"); + + assertFieldMatch("weight:1", "a b c!200","a b c"); // Best + assertFieldMatch("weight:0.75","a b c!200","b c"); // Middle + assertFieldMatch("weight:0.5", "a b c!200","a b"); // Worst + + assertFieldMatch("weight:1","a!300 b c!200","a b c"); // Best too + + assertFieldMatch("weight:1", "a b c!50","a b c"); // Best + assertFieldMatch("weight:0.6","a b c!50","b c"); // Worse + assertFieldMatch("weight:0.4","a b c!50","b"); // Worse + assertFieldMatch("weight:0.2","a b c!50","c"); // Worst + assertFieldMatch("weight:0.8","a b c!50","a b"); // Middle + + assertFieldMatch("weight:1", "a b c!0","a b c"); // Best + assertFieldMatch("weight:0.5","a b c!0","b c"); // Worst + assertFieldMatch("weight:1", "a b c!0","a b"); // As good as best + assertFieldMatch("weight:0", "a b c!0","c"); // No contribution + + assertFieldMatch("weight:0","a!0 b!0","a b"); + assertFieldMatch("weight:0","a!0 b!0",""); + + // The query also has other terms having a total weight of 300 + // so we add a weight parameter which is the sum of the weights of this query terms + 300 + assertFieldMatch("weight:0.25", "a","a",400); + assertFieldMatch("weight:0", "y","a",400); + assertFieldMatch("weight:0.1667","a a a","a",600); + assertFieldMatch("weight:0.25", "a","a a a",400); + assertFieldMatch("weight:0.5", "a b c","a b c",600); + assertFieldMatch("weight:0.5", "a b c","x x a b x a x c x x a b x c c x",600); + + assertFieldMatch("weight:0.1667","a b c","a",600); + assertFieldMatch("weight:0.3333","a b c","a b",600); + + assertFieldMatch("weight:0.5714","a b c!200","a b c",700); // Best + assertFieldMatch("weight:0.4286","a b c!200","b c",700); // Middle + assertFieldMatch("weight:0.2857","a b c!200","a b",700); // Worst + + assertFieldMatch("weight:0.6667","a!300 b c!200","a b c",900); // Better than best + + assertFieldMatch("weight:0.4545","a b c!50","a b c",550); // Best + assertFieldMatch("weight:0.2727","a b c!50","b c",550); // Worse + assertFieldMatch("weight:0.1818","a b c!50","b",550); // Worse + assertFieldMatch("weight:0.0909","a b c!50","c",550); // Worst + assertFieldMatch("weight:0.3636","a b c!50","a b",550); // Middle + + assertFieldMatch("weight:0.4","a b c!0","a b c",500); // Best + assertFieldMatch("weight:0.2","a b c!0","b c",500); // Worst + assertFieldMatch("weight:0.4","a b c!0","a b",500); // As good as best + assertFieldMatch("weight:0", "a b c!0","c",500); // No contribution + + assertFieldMatch("weight:0","a!0 b!0","a b",300); + assertFieldMatch("weight:0","a!0 b!0","",300); +} + + +void +Test::testFieldMatchExecutorSignificance() +{ + assertFieldMatch("significance:1", "a","a"); + assertFieldMatch("significance:0", "a","x"); + assertFieldMatch("significance:0.3333","a a a","a"); + assertFieldMatch("significance:1", "a","a a a"); + assertFieldMatch("significance:1", "a b c","a b c"); + assertFieldMatch("significance:1", "a b c","x x a b x a x c x x a b x c c x"); + + assertFieldMatch("significance:0.3333","a b c","a"); + assertFieldMatch("significance:0.6667","a b c","a b"); + + assertFieldMatch("significance:1", "a b c%0.2","a b c"); // Best + assertFieldMatch("significance:0.75","a b c%0.2","b c"); // Middle + assertFieldMatch("significance:0.5", "a b c%0.2","a b"); // Worst + + assertFieldMatch("significance:1","a%0.3 b c%0.2","a b c"); // Best too + + assertFieldMatch("significance:1", "a b c%0.05","a b c"); // Best + assertFieldMatch("significance:0.6","a b c%0.05","b c"); // Worse + assertFieldMatch("significance:0.4","a b c%0.05","b"); // Worse + assertFieldMatch("significance:0.2","a b c%0.05","c"); // Worst + assertFieldMatch("significance:0.8","a b c%0.05","a b"); // Middle + + assertFieldMatch("significance:1", "a b c%0","a b c"); // Best + assertFieldMatch("significance:0.5","a b c%0","b c"); // Worst + assertFieldMatch("significance:1", "a b c%0","a b"); // As good as best + assertFieldMatch("significance:0", "a b c%0","c"); // No contribution + + assertFieldMatch("significance:0","a%0 b%0","a b"); + assertFieldMatch("significance:0","a%0 b%0",""); + + // The query also has other terms having a total significance of 0.3 + // so we add a significance parameter which is the sum of the significances of this query terms + 0.3 + assertFieldMatchTS("significance:0.25", "a","a",0.4f); + assertFieldMatchTS("significance:0", "y","a",0.4f); + assertFieldMatchTS("significance:0.1667","a a a","a",0.6f); + assertFieldMatchTS("significance:0.25", "a","a a a",0.4f); + assertFieldMatchTS("significance:0.5", "a b c","a b c",0.6f); + assertFieldMatchTS("significance:0.5", "a b c","x x a b x a x c x x a b x c c x",0.6f); + + assertFieldMatchTS("significance:0.1667","a b c","a",0.6f); + assertFieldMatchTS("significance:0.3333","a b c","a b",0.6f); + + assertFieldMatchTS("significance:0.5714","a b c%0.2","a b c",0.7f); // Best + assertFieldMatchTS("significance:0.4286","a b c%0.2","b c",0.7f); // Middle + assertFieldMatchTS("significance:0.2857","a b c%0.2","a b",0.7f); // Worst + + assertFieldMatchTS("significance:0.6667","a%0.3 b c%0.2","a b c",0.9f); // Better than best + + assertFieldMatchTS("significance:0.4545","a b c%0.05","a b c",0.55f); // Best + assertFieldMatchTS("significance:0.2727","a b c%0.05","b c",0.55f); // Worse + assertFieldMatchTS("significance:0.1818","a b c%0.05","b",0.55f); // Worse + assertFieldMatchTS("significance:0.0909","a b c%0.05","c",0.55f); // Worst + assertFieldMatchTS("significance:0.3636","a b c%0.05","a b",0.55f); // Middle + + assertFieldMatchTS("significance:0.4","a b c%0","a b c",0.5f); // Best + assertFieldMatchTS("significance:0.2","a b c%0","b c",0.5f); // Worst + assertFieldMatchTS("significance:0.4","a b c%0","a b",0.5f); // As good as best + assertFieldMatchTS("significance:0", "a b c%0","c",0.5f); // No contribution + + assertFieldMatchTS("significance:0","a%0 b%0","a b",0.3f); + assertFieldMatchTS("significance:0","a%0 b%0","",0.3f); +} + + +void +Test::testFieldMatchExecutorImportance() +{ + assertFieldMatch("importance:0.75","a b c", "a x x b x c c c",600); + assertFieldMatch("importance:0.85","a b!500 c","a x x b x c c c",1000); + + // Twice as common - twice as weighty, but total weight has the extra 300 - less than the previous + assertFieldMatch("importance:0.7857","a b!200%0.05 c","a x x b x c c c",700); + // Here higher importancy exactly offsets the lowered uniqueness + assertFieldMatch("importance:0.85","a b!500%0.5 c","a x x b x c c c",1000); +} + + +void +Test::testFieldMatchExecutorOccurrence() +{ + assertFieldMatch("occurrence:0","a","x"); + assertFieldMatch("occurrence:1","a","a"); + assertFieldMatch("occurrence:0","a a a","x"); + assertFieldMatch("occurrence:1","a a a","a"); + assertFieldMatch("occurrence:1","a a a","a a a"); + assertFieldMatch("occurrence:1","a a a","a a a a"); + assertFieldMatch("occurrence:0.3571","a","x x x a x x a x a x x x a a"); + assertFieldMatch("occurrence:1","a","a a a a a a a a a a a a a a"); + assertFieldMatch("occurrence:1","a b","a b b a a a a a b a a b a a"); + + // tests going beyond the occurrence limit + fieldmatch::Params params; + params.setMaxOccurrences(10); + assertFieldMatch("occurrence:1", "a b","a a a a a a a a a a b b", ¶ms); + assertFieldMatch("occurrence:0.9231","a b","a a a a a a a a a a a b b", ¶ms); // Starting to cut off + assertFieldMatch("occurrence:0.6", "a b","a a a a a a a a a a a a a a a a a a a a a b b", ¶ms); // Way beyond cutoff for a + assertFieldMatch("occurrence:1", "a b","a a a a a a a a a a b b b b b b b b b b", ¶ms); // Exactly no cutoff + assertFieldMatch("occurrence:1", "a b","a a a a a a a a a a a b b b b b b b b b b b", ¶ms); // Field is too large to consider field length +} + + +void +Test::testFieldMatchExecutorAbsoluteOccurrence() +{ + assertFieldMatch("absoluteOccurrence:0", "a","x"); + assertFieldMatch("absoluteOccurrence:0.01","a","a"); + assertFieldMatch("absoluteOccurrence:0","a a a","x"); + assertFieldMatch("absoluteOccurrence:0.01", "a a a","a"); + assertFieldMatch("absoluteOccurrence:0.03", "a a a","a a a"); + assertFieldMatch("absoluteOccurrence:0.04", "a a a","a a a a"); + assertFieldMatch("absoluteOccurrence:0.05","a","x x x a x x a x a x x x a a"); + assertFieldMatch("absoluteOccurrence:0.14","a","a a a a a a a a a a a a a a"); + assertFieldMatch("absoluteOccurrence:0.07","a b","a b b a a a a a b a a b a a"); + + // tests going beyond the occurrence limit + fieldmatch::Params params; + params.setMaxOccurrences(10); + assertFieldMatch("absoluteOccurrence:0.6","a b","a a a a a a a a a a b b", ¶ms); + assertFieldMatch("absoluteOccurrence:0.6","a b","a a a a a a a a a a a b b", ¶ms); // Starting to cut off + assertFieldMatch("absoluteOccurrence:0.6","a b","a a a a a a a a a a a a a a a a a a a a a b b", ¶ms); // Way beyond cutoff for a + assertFieldMatch("absoluteOccurrence:1", "a b","a a a a a a a a a a b b b b b b b b b b", ¶ms); // Exactly no cutoff + assertFieldMatch("absoluteOccurrence:1", "a b","a a a a a a a a a a a b b b b b b b b b b b", ¶ms); // Field is too large to consider field length +} + + +void +Test::testFieldMatchExecutorWeightedOccurrence() +{ + assertFieldMatch("weightedOccurrence:0","a!200","x"); + assertFieldMatch("weightedOccurrence:1","a!200","a"); + assertFieldMatch("weightedOccurrence:0","a!200 a a","x"); + assertFieldMatch("weightedOccurrence:1","a!200 a a","a"); + assertFieldMatch("weightedOccurrence:1","a a a","a a a"); + assertFieldMatch("weightedOccurrence:1","a!200 a a","a a a a"); + assertFieldMatch("weightedOccurrence:0.3571","a!200","x x x a x x a x a x x x a a"); + assertFieldMatch("weightedOccurrence:1","a!200","a a a a a a a a a a a a a a"); + assertFieldMatch("weightedOccurrence:0.5","a b","a b b a a a a a b a a b a a"); + + assertFieldMatch("weightedOccurrence:0.5714","a!200 b","a b b a a a a a b a a b a a"); + assertFieldMatch("weightedOccurrence:0.6753","a!1000 b","a b b a a a a a b a a b a a"); // Should be higher + assertFieldMatch("weightedOccurrence:0.4286","a b!200","a b b a a a a a b a a b a a"); // Should be lower + assertFieldMatch("weightedOccurrence:0.3061","a b!2000","a b b a a a a a b a a b a a"); // Should be even lower + + assertFieldMatch("weightedOccurrence:0.30","a b", "a a b b b b x x x x"); + assertFieldMatch("weightedOccurrence:0.3333","a b!200","a a b b b b x x x x"); // More frequent is more important - higher + assertFieldMatch("weightedOccurrence:0.2667","a!200 b","a a b b b b x x x x"); // Less frequent is more important - lower + assertFieldMatch("weightedOccurrence:0.2667","a b!50", "a a b b b b x x x x"); // Same relative + + assertFieldMatch("weightedOccurrence:0","a!0 b!0", "a a b b b b x x x x"); + + // tests going beyond the occurrence limit + fieldmatch::Params params; + params.setMaxOccurrences(10); + assertFieldMatch("weightedOccurrence:0.6","a b","a a a a a a a a a a b b", ¶ms); + assertFieldMatch("weightedOccurrence:0.6","a b","a a a a a a a a a a a b b", ¶ms); // Starting to cut off + assertFieldMatch("weightedOccurrence:0.6","a b","a a a a a a a a a a a a a a a a a a a a a b b", ¶ms); // Way beyond cutoff for a + assertFieldMatch("weightedOccurrence:1", "a b","a a a a a a a a a a b b b b b b b b b b", ¶ms); // Exactly no cutoff + assertFieldMatch("weightedOccurrence:1", "a b","a a a a a a a a a a a b b b b b b b b b b b", ¶ms); // Field is too large to consider field length + + assertFieldMatch("weightedOccurrence:0.7333","a!200 b","a a a a a a a a a a b b", ¶ms); + assertFieldMatch("weightedOccurrence:0.4667","a b!200","a a a a a a a a a a b b", ¶ms); + assertFieldMatch("weightedOccurrence:0.7333","a!200 b","a a a a a a a a a a a b b", ¶ms); // Starting to cut off + assertFieldMatch("weightedOccurrence:0.7333","a!200 b","a a a a a a a a a a a a a a a a a a a a a b b", ¶ms); // Way beyond cutoff for a + assertFieldMatch("weightedOccurrence:1", "a!200 b","a a a a a a a a a a b b b b b b b b b b", ¶ms); // Exactly no cutoff + assertFieldMatch("weightedOccurrence:1", "a!200 b","a a a a a a a a a a a b b b b b b b b b b b", ¶ms); // Field is too large to consider field length +} + + +void +Test::testFieldMatchExecutorWeightedAbsoluteOccurrence() +{ + assertFieldMatch("weightedAbsoluteOccurrence:0", "a!200","x"); + assertFieldMatch("weightedAbsoluteOccurrence:0.01", "a!200","a"); + assertFieldMatch("weightedAbsoluteOccurrence:0", "a!200 a a","x"); + assertFieldMatch("weightedAbsoluteOccurrence:0.01", "a!200 a a","a"); + assertFieldMatch("weightedAbsoluteOccurrence:0.03", "a a a","a a a"); + assertFieldMatch("weightedAbsoluteOccurrence:0.04", "a!200 a a","a a a a"); + assertFieldMatch("weightedAbsoluteOccurrence:0.05", "a!200","x x x a x x a x a x x x a a"); + assertFieldMatch("weightedAbsoluteOccurrence:0.14", "a!200","a a a a a a a a a a a a a a"); + assertFieldMatch("weightedAbsoluteOccurrence:0.07","a b","a b b a a a a a b a a b a a"); + + assertFieldMatch("weightedAbsoluteOccurrence:0.08", "a!200 b","a b b a a a a a b a a b a a"); + assertFieldMatch("weightedAbsoluteOccurrence:0.0945","a!1000 b","a b b a a a a a b a a b a a"); // Should be higher + assertFieldMatch("weightedAbsoluteOccurrence:0.06", "a b!200","a b b a a a a a b a a b a a"); // Should be lower + assertFieldMatch("weightedAbsoluteOccurrence:0.0429","a b!2000","a b b a a a a a b a a b a a"); // Should be even lower + + assertFieldMatch("weightedAbsoluteOccurrence:0.03", "a b", "a a b b b b x x x x"); + assertFieldMatch("weightedAbsoluteOccurrence:0.0333","a b!200","a a b b b b x x x x"); // More frequent is more important - higher + assertFieldMatch("weightedAbsoluteOccurrence:0.0267","a!200 b","a a b b b b x x x x"); // Less frequent is more important - lower + assertFieldMatch("weightedAbsoluteOccurrence:0.0267","a b!50", "a a b b b b x x x x"); // Same relative + + assertFieldMatch("weightedAbsoluteOccurrence:0","a!0 b!0", "a a b b b b x x x x"); + + // tests going beyond the occurrence limit + fieldmatch::Params params; + params.setMaxOccurrences(10); + assertFieldMatch("weightedAbsoluteOccurrence:0.6","a b","a a a a a a a a a a b b", ¶ms); + assertFieldMatch("weightedAbsoluteOccurrence:0.6","a b","a a a a a a a a a a a b b", ¶ms); // Starting to cut off + assertFieldMatch("weightedAbsoluteOccurrence:0.6","a b","a a a a a a a a a a a a a a a a a a a a a b b", ¶ms); // Way beyond cutoff for a + assertFieldMatch("weightedAbsoluteOccurrence:1", "a b","a a a a a a a a a a b b b b b b b b b b", ¶ms); // Exactly no cutoff + assertFieldMatch("weightedAbsoluteOccurrence:1", "a b","a a a a a a a a a a a b b b b b b b b b b b", ¶ms); // Field is too large to consider field length + + assertFieldMatch("weightedAbsoluteOccurrence:0.7333","a!200 b","a a a a a a a a a a b b", ¶ms); + assertFieldMatch("weightedAbsoluteOccurrence:0.4667","a b!200","a a a a a a a a a a b b", ¶ms); + assertFieldMatch("weightedAbsoluteOccurrence:0.7333","a!200 b","a a a a a a a a a a a b b", ¶ms); // Starting to cut off + assertFieldMatch("weightedAbsoluteOccurrence:0.7333","a!200 b","a a a a a a a a a a a a a a a a a a a a a b b", ¶ms); // Way beyond cutoff for a + assertFieldMatch("weightedAbsoluteOccurrence:1", "a!200 b","a a a a a a a a a a b b b b b b b b b b", ¶ms); // Exactly no cutoff + assertFieldMatch("weightedAbsoluteOccurrence:1", "a!200 b","a a a a a a a a a a a b b b b b b b b b b b", ¶ms); // Field is too large to consider field length +} + + +void +Test::testFieldMatchExecutorSignificantOccurrence() +{ + assertFieldMatch("significantOccurrence:0","a%0.2","x"); + assertFieldMatch("significantOccurrence:1","a%0.2","a"); + assertFieldMatch("significantOccurrence:0","a%0.2 a a","x"); + assertFieldMatch("significantOccurrence:1","a%0.2 a a","a"); + assertFieldMatch("significantOccurrence:1","a a a","a a a"); + assertFieldMatch("significantOccurrence:1","a%0.2 a a","a a a a"); + assertFieldMatch("significantOccurrence:0.3571","a%0.2","x x x a x x a x a x x x a a"); + assertFieldMatch("significantOccurrence:1","a%0.2","a a a a a a a a a a a a a a"); + assertFieldMatch("significantOccurrence:0.5","a b","a b b a a a a a b a a b a a"); + + assertFieldMatch("significantOccurrence:0.5714","a%0.2 b","a b b a a a a a b a a b a a"); + assertFieldMatch("significantOccurrence:0.6753","a%1 b","a b b a a a a a b a a b a a"); // Should be higher + assertFieldMatch("significantOccurrence:0.4286","a b%0.2","a b b a a a a a b a a b a a"); // Should be lower + assertFieldMatch("significantOccurrence:0.3247","a b%1","a b b a a a a a b a a b a a"); // Should be even lower + + assertFieldMatch("significantOccurrence:0.30","a b", "a a b b b b x x x x"); + assertFieldMatch("significantOccurrence:0.3333","a b%0.2","a a b b b b x x x x"); // More frequent is more important - higher + assertFieldMatch("significantOccurrence:0.2667","a%0.2 b","a a b b b b x x x x"); // Less frequent is more important - lower + assertFieldMatch("significantOccurrence:0.2667","a b%0.05", "a a b b b b x x x x"); // Same relative + + assertFieldMatch("significantOccurrence:0","a%0 b%0", "a a b b b b x x x x"); + + // tests going beyond the occurrence limit + fieldmatch::Params params; + params.setMaxOccurrences(10); + assertFieldMatch("significantOccurrence:0.6","a b","a a a a a a a a a a b b", ¶ms); + assertFieldMatch("significantOccurrence:0.6","a b","a a a a a a a a a a a b b", ¶ms); // Starting to cut off + assertFieldMatch("significantOccurrence:0.6","a b","a a a a a a a a a a a a a a a a a a a a a b b", ¶ms); // Way beyond cutoff for a + assertFieldMatch("significantOccurrence:1", "a b","a a a a a a a a a a b b b b b b b b b b", ¶ms); // Exactly no cutoff + assertFieldMatch("significantOccurrence:1", "a b","a a a a a a a a a a a b b b b b b b b b b b", ¶ms); // Field is too large to consider field length + + assertFieldMatch("significantOccurrence:0.7333","a%0.2 b","a a a a a a a a a a b b", ¶ms); + assertFieldMatch("significantOccurrence:0.4667","a b%0.2","a a a a a a a a a a b b", ¶ms); + assertFieldMatch("significantOccurrence:0.7333","a%0.2 b","a a a a a a a a a a a b b", ¶ms); // Starting to cut off + assertFieldMatch("significantOccurrence:0.7333","a%0.2 b","a a a a a a a a a a a a a a a a a a a a a b b", ¶ms); // Way beyond cutoff for a + assertFieldMatch("significantOccurrence:1", "a%0.2 b","a a a a a a a a a a b b b b b b b b b b", ¶ms); // Exactly no cutoff + assertFieldMatch("significantOccurrence:1", "a%0.2 b","a a a a a a a a a a a b b b b b b b b b b b", ¶ms); // Field is too large to consider field length +} + +void +Test::testFieldMatchExecutorUnweightedProximity() +{ + assertFieldMatch("unweightedProximity:1", "a","a"); + assertFieldMatch("unweightedProximity:1", "a b c","a b c"); + assertFieldMatch("unweightedProximity:1", "a b c","a b c x"); + assertFieldMatch("unweightedProximity:1", "y a b c","a b c x"); + assertFieldMatch("unweightedProximity:1", "y a b c","a b c x"); + assertFieldMatch("unweightedProximity:0.855","y a b c","a b x c x"); + assertFieldMatch("unweightedProximity:0.750","y a b c","a b x x c x"); + assertFieldMatch("unweightedProximity:0.71", "y a b c","a x b x c x"); // Should be slightly worse than the previous one + assertFieldMatch("unweightedProximity:0.605","y a b c","a x b x x c x"); + assertFieldMatch("unweightedProximity:0.53", "y a b c","a x b x x x c x"); + assertFieldMatch("unweightedProximity:0.5", "y a b c","a x x b x x c x"); +} + + +void +Test::testFieldMatchExecutorReverseProximity() +{ + assertFieldMatch("unweightedProximity:0.33", "a b","b a"); + assertFieldMatch("unweightedProximity:0.62", "a b c","c a b"); + assertFieldMatch("unweightedProximity:0.585", "y a b c","c x a b"); + assertFieldMatch("unweightedProximity:0.33", "a b c","c b a"); + assertFieldMatch("unweightedProximity:0.6875","a b c d e","a b d c e"); + assertFieldMatch("unweightedProximity:0.9275","a b c d e","a b x c d e"); +} + + +void +Test::testFieldMatchExecutorAbsoluteProximity() +{ + assertFieldMatch("absoluteProximity:0.1 proximity:1", "a b","a b"); + assertFieldMatch("absoluteProximity:0.3 proximity:1", "a 0.3:b","a b"); + assertFieldMatch("absoluteProximity:0.1 proximity:1", "a 0.0:b","a b"); + assertFieldMatch("absoluteProximity:1 proximity:1", "a 1.0:b","a b"); + assertFieldMatch("absoluteProximity:0.033 proximity:0.33", "a b","b a"); + assertFieldMatch("absoluteProximity:0.0108 proximity:0.0359","a 0.3:b","b a"); // Should be worse than the previous one + assertFieldMatch("absoluteProximity:0.1 proximity:1", "a 0.0:b","b a"); + assertFieldMatch("absoluteProximity:0 proximity:0", "a 1.0:b","b a"); + + assertFieldMatch("absoluteProximity:0.0605 proximity:0.605", "a b c","a x b x x c"); + assertFieldMatch("absoluteProximity:0.0701 proximity:0.2003","a 0.5:b 0.2:c","a x b x x c"); // Most important is close, less important is far: Better + assertFieldMatch("absoluteProximity:0.0605 proximity:0.605", "a b c","a x x b x c"); + assertFieldMatch("absoluteProximity:0.0582 proximity:0.1663","a 0.5:b 0.2:c","a x x b x c"); // Most important is far, less important is close: Worse + + assertFieldMatch("absoluteProximity:0.0727 proximity:0.7267","a b c d","a b x x x x x c d"); + assertFieldMatch("absoluteProximity:0.1 proximity:1", "a b 0:c d","a b x x x x x c d"); // Should be better because the gap is unimportant + + // test with another proximity table + std::vector<feature_t> pt; + pt.push_back(0.2); + pt.push_back(0.4); + pt.push_back(0.6); + pt.push_back(0.8); + pt.push_back(1.0); + pt.push_back(0.8); + pt.push_back(0.6); + pt.push_back(0.4); + pt.push_back(0.2); + fieldmatch::Params params; + params.setProximityLimit(4); + params.setProximityTable(pt); + assertFieldMatch("absoluteProximity:0.07 proximity:0.7", "a b c","a x b x x c", ¶ms); + assertFieldMatch("absoluteProximity:0.1179 proximity:0.3369","a 0.5:b 0.2:c","a x b x x c", ¶ms); // Most important is close, less important is far: Better + assertFieldMatch("absoluteProximity:0.07 proximity:0.7", "a b c","a x x b x c", ¶ms); + assertFieldMatch("absoluteProximity:0.0834 proximity:0.2384","a 0.5:b 0.2:c","a x x b x c", ¶ms); // Most important is far, less important is close: Worse +} + + +void +Test::testFieldMatchExecutorMultiSegmentProximity() +{ + assertFieldMatch("absoluteProximity:0.1 proximity:1", "a b c", "a b x x x x x x x x x x x x x x x x x x x x x x c"); + assertFieldMatch("absoluteProximity:0.05 proximity:0.5","a b c", "a x x b x x x x x x x x x x x x x x x x x x x x x x c"); + assertFieldMatch("absoluteProximity:0.075 proximity:0.75","a b c d","a x x b x x x x x x x x x x x x x x x x x x x x x x c d"); +} + + +void +Test::testFieldMatchExecutorSegmentDistance() +{ + assertFieldMatch("segmentDistance:13 absoluteProximity:0.1", "a b c","a b x x x x x x x x x x c"); + assertFieldMatch("segmentDistance:13 absoluteProximity:0.5", "a 0.5:b c","a b x x x x x x x x x x c"); + assertFieldMatch("segmentDistance:13 absoluteProximity:0.1", "a b c","b c x x x x x x x x x x a"); + assertFieldMatch("segmentDistance:25 absoluteProximity:0.1", "a b c","b x x x x x x x x x x x a x x x x x x x x x x c"); + assertFieldMatch("segmentDistance:13 absoluteProximity:0.006","a b c","a x x x x x x x x x x x b x x x x x x x x c"); + assertFieldMatch("segmentDistance:24 absoluteProximity:0.1", "a b c","a x x x x x x x x x x x b x x x x x x x x x c"); + assertFieldMatch("segmentDistance:25 absoluteProximity:0.1", "a b c","a x x x x x x x x x x x b x x x x x x x x x x c"); + assertFieldMatch("segmentDistance:25 absoluteProximity:0.1", "a b c","c x x x x x x x x x x x b x x x x x x x x x x a"); +} + + +void +Test::testFieldMatchExecutorSegmentProximity() +{ + assertFieldMatch("segmentProximity:1", "a","a"); + assertFieldMatch("segmentProximity:0", "a","x"); + assertFieldMatch("segmentProximity:1", "a","a x"); + assertFieldMatch("segmentProximity:0", "a b","a x x x x x x x x x x x x x x x x x x x x x x x b"); + assertFieldMatch("segmentProximity:0.4","a b","a x x x x x x x x x x x x x x x x x x x x x x b x x x x x x x x x x x x x x x x"); + assertFieldMatch("segmentProximity:0", "a b c","a b x x x x x x x x x x x x x x x x x x x x x c"); + assertFieldMatch("segmentProximity:0.4","a b c","a b x x x x x x x x x x x x x x x x x x x x x c x x x x x x x x x x x x x x x x"); + assertFieldMatch("segmentProximity:0.4","a b c","b c x x x x x x x x x x x x x x x x x x x x x a x x x x x x x x x x x x x x x x"); +} + + +void +Test::testFieldMatchExecutorSegmentStarts() +{ +#ifdef FIELDMATCH_OUTPUTS_SEGMENTSTARTS + // Test cases where we choose between multiple different segmentations + { // test segmentSelection + assertFieldMatch("segments:2 absoluteProximity:0.1 proximity:1 segmentStarts:19,41", + "a b c d e","x a b x c x x x x x x x x x x x x x x a b c x x x x x x x x x e x d x c d x x x c d e"); + // 0 1 2 3 4 5 6 7 8 9�0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 + // 0 1 2 3 4 + // Should choose - - - - - + + assertFieldMatch("segments:1 absoluteProximity:0.0778 proximity:0.778","a b c d e f","x x a b b b c f e d a b c d x e x x x x x f d e f a b c a a b b c c d d e e f f"); + + // Prefer one segment with ok proximity or two segments with great proximity + assertFieldMatch("segments:1 segmentStarts:0","a b c d","a b x c d x x x x x x x x x x x a b x x x x x x x x x x x c d"); + assertFieldMatch("segments:1 segmentStarts:0","a b c d","a b x x x x x x x x c d x x x x x x x x x x x a b x x x x x x x x x x x c d"); + } +#endif +} + + +void +Test::testFieldMatchExecutorMoreThanASegmentLengthOfUnmatchedQuery() +{ + assertFieldMatch("absoluteProximity:0.1 proximity:1","a b y y y y y y y y y y y y y y y","a b"); + assertFieldMatch("segments:2 absoluteProximity:0.1 proximity:1","a b c d y y y y y y y y y y y y y y y","a b x x x x x x x x x x x x x x x x x x c d"); + assertFieldMatch("segments:2 absoluteProximity:0.1 proximity:1","a b y y y y y y y y y y y y y y y c d","a b x x x x x x x x x x x x x x x x x x c d"); +} + + +void +Test::testFieldMatchExecutorQueryRepeats() +{ + // Not really handled perfectly, but good enough + assertFieldMatch("absoluteProximity:0.1 proximity:1 head:0 tail:0", "a a a","a"); + assertFieldMatch("absoluteProximity:0.1 proximity:1 head:0 tail:0 gapLength:0","a a b c c","a a b c c"); + assertFieldMatch("absoluteProximity:0.1 proximity:1 head:0 tail:0 gapLength:0","a a b c c","a b c"); + assertFieldMatch("absoluteProximity:0.1 proximity:1 head:0 tail:0 gapLength:0","a b a b","a b a b"); + assertFieldMatch("absoluteProximity:0.0903 proximity:0.9033 head:0 tail:0 gapLength:1","a b a b","a b x a b"); + // Both terms take the same segment: + assertFieldMatch("absoluteProximity:0.1 proximity:1 segments:2 gapLength:0 head:3 tail:18","a a","x x x a x x x x x x x x x x x x x x a x x x"); + // But not when the second is preferable + assertFieldMatch("absoluteProximity:0.1 proximity:1 segments:2 gapLength:0 head:3 tail:3","a b b a","x x x a b x x x x x x x x x x x x x x b a x x x"); + assertFieldMatch("matches:2 fieldCompleteness:1","a b b b","a b"); +} + + +void +Test::testFieldMatchExecutorZeroCases() +{ + // Note: we have no hits -> absoluteProximity:0(0.1) proximity:0(1) + assertFieldMatch("absoluteProximity:0 proximity:0 matches:0","y","a"); + assertFieldMatch("absoluteProximity:0 proximity:0 matches:0","a","x"); + assertFieldMatch("absoluteProximity:0 proximity:0 matches:0","","x"); + assertFieldMatch("absoluteProximity:0 proximity:0 matches:0","y",""); + assertFieldMatch("absoluteProximity:0 proximity:0 matches:0","",""); +} + + +void +Test::testFieldMatchExecutorExceedingIterationLimit() +{ + // Segments found: a x x b and c d + { + fieldmatch::Params params; + params.setMaxAlternativeSegmentations(0); + assertFieldMatch("matches:4 tail:0 proximity:0.75 absoluteProximity:0.075","a b c d","a x x b x x x a x b x x x x x a b x x x x x x x x x x x x x x x x x c d", ¶ms); + } + + // Segments found: a x b and c d + { + fieldmatch::Params params; + params.setMaxAlternativeSegmentations(1); + assertFieldMatch("matches:4 tail:0 proximity:0.855 absoluteProximity:0.0855","a b c d","a x x b x x x a x b x x x x x a b x x x x x x x x x x x x x x x x x c d", ¶ms); + } + + // Segments found: a b and c d + { + fieldmatch::Params params; + params.setMaxAlternativeSegmentations(2); + assertFieldMatch("matches:4 tail:0 proximity:1 absoluteProximity:0.1","a b c d","a x x b x x x a x b x x x x x a b x x x x x x x x x x x x x x x x x c d", ¶ms); + } +} + + +void +Test::testFieldMatchExecutorRemaining() +{ + + { // test match (aka score) + // Ordered by decreasing match score per query + assertFieldMatch("score:1", "a","a"); + assertFieldMatch("score:0.9339","a","a x"); + assertFieldMatch("score:0", "a","x"); + assertFieldMatch("score:0.9243","a","x a"); + assertFieldMatch("score:0.9025","a","x a x"); + + assertFieldMatch("score:1", "a b","a b"); + assertFieldMatch("score:0.9558","a b","a b x"); + assertFieldMatch("score:0.9463","a b","x a b"); + assertFieldMatch("score:0.1296","a b","a x x x x x x x x x x x x x x x x x x x x x x b"); + assertFieldMatch("score:0.1288","a b","a x x x x x x x x x x x x x x x x x x x x x x x x x x x b"); + + assertFieldMatch("score:0.8647","a b c","x x a x b x x x x x x x x a b c x x x x x x x x c x x"); + assertFieldMatch("score:0.861", "a b c","x x a x b x x x x x x x x x x a b c x x x x x x c x x"); + assertFieldMatch("score:0.4869","a b c","a b x x x x x x x x x x x x x x x x x x x x x x c x x"); + assertFieldMatch("score:0.4853","a b c","x x a x b x x x x x x x x x x b a c x x x x x x c x x"); + assertFieldMatch("score:0.3621","a b c","a x b x x x x x x x x x x x x x x x x x x x x x c x x"); + assertFieldMatch("score:0.3619","a b c","x x a x b x x x x x x x x x x x x x x x x x x x c x x"); + assertFieldMatch("score:0.3584","a b c","x x a x b x x x x x x x x x x x x x x x x x x x x x c"); + assertFieldMatch("score:0.3421","a b c","x x a x b x x x x x x x x x x x x x x x x x x x x x x"); + + assertFieldMatch("score:0.3474","a b c","x x a x b x x x x x x x x x x x x x x b x x x b x b x"); + } + + { // test repeated match + // gap==1 caused by finding two possible segments due to repeated matching + assertFieldMatch("fieldCompleteness:1 queryCompleteness:0.6667 segments:1 earliness:1 gaps:1", + "pizza hut pizza","pizza hut"); + } + + //------------------- extra tests -------------------// + + { // test with a query on an attribute field + LOG(info, "Query on an attribute field"); + vespalib::string feature = "fieldMatch(foo)"; + FtFeatureTest ft(_factory, feature); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar"); + ft.getQueryEnv().getBuilder().addAttributeNode("bar"); + ASSERT_TRUE(ft.setup()); + ASSERT_TRUE(ft.execute(toRankResult(feature, "score:0"))); + } + + + { // test with query on another index field as well + LOG(info, "Query on an another index field"); + FtFeatureTest ft(_factory, StringList().add("fieldMatch(foo)")); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar"); + ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo")); // search on 'foo' (0) + ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("bar")); // search on 'bar' (1) + ASSERT_TRUE(ft.setup()); + + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + + // add occurrence for 'foo' with query=a + ASSERT_TRUE(mdb->setFieldLength("foo", 1)); + ASSERT_TRUE(mdb->addOccurence("foo", 0, 0)); // a + + // add occurrence for 'bar' with query=a + ASSERT_TRUE(mdb->setFieldLength("bar", 2)); + ASSERT_TRUE(mdb->addOccurence("bar", 1, 1)); // x a + + ASSERT_TRUE(mdb->apply(1)); + + ASSERT_TRUE(ft.execute(toRankResult("fieldMatch(foo)", "score:1 matches:1 queryCompleteness:1 fieldCompleteness:1"))); + ASSERT_TRUE(ft.execute(toRankResult("fieldMatch(foo)", "score:0"), 2)); // another docid -> no hit -> default values + } + + { // search on more than one document + LOG(info, "Query on more than one document"); + FtFeatureTest ft(_factory, StringList().add("fieldMatch(foo)")); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo")); // 'a' (0) + ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo")); // 'b' (1) + ASSERT_TRUE(ft.setup()); + + // check that we get the same results as this + // assertFieldMatch("score:1", "a b","a b"); + // assertFieldMatch("score:0.9558","a b","a b x"); + // assertFieldMatch("score:0.932", "a b","x a b"); + + { // docid 1: "a b" + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + ASSERT_TRUE(mdb->setFieldLength("foo", 2)); + ASSERT_TRUE(mdb->addOccurence("foo", 0, 0)); // 'a' + ASSERT_TRUE(mdb->addOccurence("foo", 1, 1)); // 'b' + ASSERT_TRUE(mdb->apply(1)); + ASSERT_TRUE(ft.execute(toRankResult("fieldMatch(foo)", "score:1 matches:2"), 1)); + } + { // docid 2: "a b x" + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + ASSERT_TRUE(mdb->setFieldLength("foo", 3)); + ASSERT_TRUE(mdb->addOccurence("foo", 0, 0)); // 'a' + ASSERT_TRUE(mdb->addOccurence("foo", 1, 1)); // 'b' + ASSERT_TRUE(mdb->apply(1)); + RankResult rr = toRankResult("fieldMatch(foo)", "score:0.9558 matches:2"); + rr.setEpsilon(1e-4); // same as java tests + ASSERT_TRUE(ft.execute(rr, 1)); + } + { // docid 3: "x a b" + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + ASSERT_TRUE(mdb->setFieldLength("foo", 3)); + ASSERT_TRUE(mdb->addOccurence("foo", 0, 1)); // 'a' + ASSERT_TRUE(mdb->addOccurence("foo", 1, 2)); // 'b' + ASSERT_TRUE(mdb->apply(2)); + RankResult rr = toRankResult("fieldMatch(foo)", "score:0.9463 matches:2"); + rr.setEpsilon(1e-4); // same as java tests + ASSERT_TRUE(ft.execute(rr, 2)); + } + } + + { // test where not all hits have position information + LOG(info, "Not all hits have position information"); + FtFeatureTest ft(_factory, StringList().add("fieldMatch(foo)")); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar"); + ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo"))->setWeight(search::query::Weight(200)); // search for 'a' (termId 0) + ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo"))->setWeight(search::query::Weight(400)); // search for 'b' (termId 1) + ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo"))->setWeight(search::query::Weight(600)); // search for 'c' (termId 2) + ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo"))->setWeight(search::query::Weight(800)); // search for 'd' (termId 3) + ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("bar"))->setWeight(search::query::Weight(1000)); // search for 'e' (termId 4) + ASSERT_TRUE(ft.setup()); + + assertFieldMatch("score:0.3389 completeness:0.5083 degradedMatches:0", "a b c d", "x a b"); + + // field: x a b + { // no pos occ for term b -> score is somewhat degraded (lower .occurrence) + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + // add occurrence with query term 'a' + ASSERT_TRUE(mdb->setFieldLength("foo", 3)); + ASSERT_TRUE(mdb->addOccurence("foo", 0, 1)); + // add hit with query term 'b' + mdb->getTermFieldMatchData(1, 0)->reset(1); + ASSERT_TRUE(mdb->apply(1)); + ASSERT_TRUE(ft.execute(toRankResult("fieldMatch(foo)", + "score:0.3231 completeness:0.5083 queryCompleteness:0.5 weight:0.2 matches:2 degradedMatches:1"). + setEpsilon(1e-4))); + } + { // no pos occ for term a & b + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + // add hit with query term 'a' + mdb->getTermFieldMatchData(0, 0)->reset(1); + // add hit with query term 'b' + mdb->getTermFieldMatchData(1, 0)->reset(1); + ASSERT_TRUE(mdb->apply(1)); + ASSERT_TRUE(ft.execute(toRankResult("fieldMatch(foo)", + "score:0 completeness:0.475 queryCompleteness:0.5 weight:0.2 matches:2 degradedMatches:2"). + setEpsilon(1e-4))); + } + } + + { // invalid field length + LOG(info, "We have an invalid field length"); + FtFeatureTest ft(_factory, StringList().add("fieldMatch(foo)")); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo"))->setWeight(search::query::Weight(100)); // search for 'a' (termId 0) + ASSERT_TRUE(ft.setup()); + + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + + // add occurrence with query term 'a' + ASSERT_TRUE(mdb->setFieldLength("foo", search::fef::FieldPositionsIterator::UNKNOWN_LENGTH)); // invalid field length + ASSERT_TRUE(mdb->addOccurence("foo", 0, 10)); + + ASSERT_TRUE(mdb->apply(1)); + + ASSERT_TRUE(ft.execute(toRankResult("fieldMatch(foo)", "score:0 matches:1 degradedMatches:0"))); + } + + { // test default values when we do not have hits in the field + LOG(info, "Default values when we have no hits"); + FtFeatureTest ft(_factory, StringList().add("fieldMatch(foo)")); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo")); // search on 'foo' (0) + ASSERT_TRUE(ft.setup()); + + // must create this so that term match data is configured with the term data object + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + + RankResult rr = toRankResult("fieldMatch(foo)", + "score:0 " + "proximity:0 " + "completeness:0 " + "queryCompleteness:0 " + "fieldCompleteness:0 " + "orderness:0 " + "relatedness:0 " + "earliness:0 " + "longestSequenceRatio:0 " + "segmentProximity:0 " + "unweightedProximity:0 " + "absoluteProximity:0 " + "occurrence:0 " + "absoluteOccurrence:0 " + "weightedOccurrence:0 " + "weightedAbsoluteOccurrence:0 " + "significantOccurrence:0 " + "weight:0 " + "significance:0 " + "importance:0 " + "segments:0 " + "matches:0 " + "outOfOrder:0 " + "gaps:0 " + "gapLength:0 " + "longestSequence:0 " + "head:0 " + "tail:0 " + "segmentDistance:0 ") + .setEpsilon(10e-6); + + ASSERT_TRUE(ft.execute(rr, 1)); // another docid -> no hit -> default values + } +} diff --git a/searchlib/src/tests/features/prod_features_fieldtermmatch.cpp b/searchlib/src/tests/features/prod_features_fieldtermmatch.cpp new file mode 100644 index 00000000000..04caadd2029 --- /dev/null +++ b/searchlib/src/tests/features/prod_features_fieldtermmatch.cpp @@ -0,0 +1,113 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP(".prod_features_fieldtermmatch"); + +#include "prod_features.h" +#include <vespa/searchlib/features/fieldtermmatchfeature.h> + +using namespace search::features; +using namespace search::fef; +using namespace search::fef::test; + +void +Test::testFieldTermMatch() +{ + { + // Test blueprint. + FieldTermMatchBlueprint pt; + { + EXPECT_TRUE(assertCreateInstance(pt, "fieldTermMatch")); + + StringList params, in, out; + FT_SETUP_FAIL(pt, params); + FT_SETUP_FAIL(pt, params.add("foo")); + FT_SETUP_FAIL(pt, params.add("0")); + FT_SETUP_FAIL(pt, params.add("1")); + params.clear(); + + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + FT_SETUP_FAIL(pt, ie, params.add("foo")); + FT_SETUP_OK (pt, ie, params.add("0"), in, + out.add("firstPosition") + .add("lastPosition") + .add("occurrences").add("weight").add("exactness")); + FT_SETUP_FAIL(pt, ie, params.add("1")); + } + { + FT_DUMP_EMPTY(_factory, "fieldTermMatch"); + + FtIndexEnvironment ie; + ie.getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "foo"); + FT_DUMP_EMPTY(_factory, "fieldTermMatch", ie); // must be an index field + + StringList dump; + ie.getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar"); + for (uint32_t term = 0; term < 5; ++term) { + vespalib::string bn = vespalib::make_string("fieldTermMatch(bar,%u)", term); + dump.add(bn + ".firstPosition").add(bn + ".occurrences").add(bn + ".weight"); + } + FT_DUMP(_factory, "fieldTermMatch", ie, dump); + + ie.getProperties().add("fieldTermMatch.numTerms", "0"); + FT_DUMP_EMPTY(_factory, "fieldTermMatch", ie); + + ie.getProperties().add("fieldTermMatch.numTerms.bar", "5"); + FT_DUMP(_factory, "fieldTermMatch", ie, dump); + } + } + + { // Test executor. + FtFeatureTest ft(_factory, "fieldTermMatch(foo,0)"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ASSERT_TRUE(ft.setup()); + RankResult exp; + exp .addScore("fieldTermMatch(foo,0).firstPosition", 1000000) + .addScore("fieldTermMatch(foo,0).lastPosition", 1000000) + .addScore("fieldTermMatch(foo,0).occurrences", 0) + .addScore("fieldTermMatch(foo,0).weight", 0) + .addScore("fieldTermMatch(foo,0).exactness", 0); + ASSERT_TRUE(ft.execute(exp)); + } + { + // Test executor. + FtFeatureTest ft(_factory, "fieldTermMatch(foo,0)"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getQueryEnv().getBuilder().addAllFields(); + ASSERT_TRUE(ft.setup()); + + search::fef::test::MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + ASSERT_TRUE(mdb->setFieldLength("foo", 100)); + ASSERT_TRUE(mdb->addOccurence("foo", 0, 10)); + ASSERT_TRUE(mdb->addOccurence("foo", 0, 20)); + ASSERT_TRUE(mdb->apply(1)); + + search::fef::test::RankResult exp; + exp .addScore("fieldTermMatch(foo,0).firstPosition", 10) + .addScore("fieldTermMatch(foo,0).lastPosition", 20) + .addScore("fieldTermMatch(foo,0).occurrences", 2) + .addScore("fieldTermMatch(foo,0).weight", 2) + .addScore("fieldTermMatch(foo,0).exactness", 1); + ASSERT_TRUE(ft.execute(exp)); + } + { + // Test executor (match without position information) + FtFeatureTest ft(_factory, "fieldTermMatch(foo,0)"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getQueryEnv().getBuilder().addIndexNode(StringList().add("foo")); + ASSERT_TRUE(ft.setup()); + + // make sure the term match data is initialized with the term data + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + mdb->getTermFieldMatchData(0, 0)->reset(1); + + search::fef::test::RankResult exp; + exp .addScore("fieldTermMatch(foo,0).firstPosition", 1000000) + .addScore("fieldTermMatch(foo,0).lastPosition", 1000000) + .addScore("fieldTermMatch(foo,0).occurrences", 1) + .addScore("fieldTermMatch(foo,0).weight", 0) + .addScore("fieldTermMatch(foo,0).exactness", 0); + ASSERT_TRUE(ft.execute(exp)); + } +} diff --git a/searchlib/src/tests/features/prod_features_framework.cpp b/searchlib/src/tests/features/prod_features_framework.cpp new file mode 100644 index 00000000000..5ce5e2c3177 --- /dev/null +++ b/searchlib/src/tests/features/prod_features_framework.cpp @@ -0,0 +1,174 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP(".prod_features_framework"); + +#include "prod_features.h" +#include <vespa/searchlib/features/valuefeature.h> + +using namespace search::features; +using namespace search::fef; +using namespace search::fef::test; + +void +Test::testFramework() +{ + LOG(info, "testFramework()"); + IndexEnvironment indexEnv; + { // test index environment builder + IndexEnvironmentBuilder ieb(indexEnv); + ieb.addField(FieldType::INDEX, CollectionType::SINGLE, "foo") + .addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "bar") + .addField(FieldType::INDEX, CollectionType::ARRAY, "baz"); + { + const FieldInfo * info = indexEnv.getFieldByName("foo"); + ASSERT_TRUE(info != NULL); + EXPECT_EQUAL(info->id(), 0u); + EXPECT_TRUE(info->type() == FieldType::INDEX); + EXPECT_TRUE(info->collection() == CollectionType::SINGLE); + } + { + const FieldInfo * info = indexEnv.getFieldByName("bar"); + ASSERT_TRUE(info != NULL); + EXPECT_EQUAL(info->id(), 1u); + EXPECT_TRUE(info->type() == FieldType::ATTRIBUTE); + EXPECT_TRUE(info->collection() == CollectionType::WEIGHTEDSET); + } + { + const FieldInfo * info = indexEnv.getFieldByName("baz"); + ASSERT_TRUE(info != NULL); + EXPECT_EQUAL(info->id(), 2u); + EXPECT_TRUE(info->type() == FieldType::INDEX); + EXPECT_TRUE(info->collection() == CollectionType::ARRAY); + } + ASSERT_TRUE(indexEnv.getFieldByName("qux") == NULL); + } + + QueryEnvironment queryEnv(&indexEnv); + MatchDataLayout layout; + { // test query environment builder + QueryEnvironmentBuilder qeb(queryEnv, layout); + { + SimpleTermData &tr = qeb.addAllFields(); + ASSERT_TRUE(tr.lookupField(0) != 0); + ASSERT_TRUE(tr.lookupField(1) != 0); + ASSERT_TRUE(tr.lookupField(2) != 0); + EXPECT_TRUE(tr.lookupField(3) == 0); + EXPECT_TRUE(tr.lookupField(0)->getHandle() == 0u); + EXPECT_TRUE(tr.lookupField(1)->getHandle() == 1u); + EXPECT_TRUE(tr.lookupField(2)->getHandle() == 2u); + const ITermData *tp = queryEnv.getTerm(0); + ASSERT_TRUE(tp != NULL); + EXPECT_EQUAL(tp, &tr); + } + { + SimpleTermData *tr = qeb.addAttributeNode("bar"); + ASSERT_TRUE(tr != 0); + ASSERT_TRUE(tr->lookupField(1) != 0); + EXPECT_TRUE(tr->lookupField(0) == 0); + EXPECT_TRUE(tr->lookupField(2) == 0); + EXPECT_TRUE(tr->lookupField(3) == 0); + EXPECT_TRUE(tr->lookupField(1)->getHandle() == 3u); + const ITermData *tp = queryEnv.getTerm(1); + ASSERT_TRUE(tp != NULL); + EXPECT_EQUAL(tp, tr); + } + } + + MatchData::UP data = layout.createMatchData(); + EXPECT_EQUAL(data->getNumTermFields(), 4u); + EXPECT_EQUAL(data->getNumFeatures(), 0u); + + { // check match data access + MatchDataBuilder mdb(queryEnv, *data); + + // setup some occurence lists + ASSERT_TRUE(mdb.addOccurence("foo", 0, 20)); + ASSERT_TRUE(mdb.addOccurence("foo", 0, 10)); + ASSERT_TRUE(mdb.setFieldLength("foo", 50)); + ASSERT_TRUE(mdb.addOccurence("baz", 0, 15)); + ASSERT_TRUE(mdb.addOccurence("baz", 0, 5)); + ASSERT_TRUE(mdb.setFieldLength("baz", 100)); + ASSERT_TRUE(mdb.apply(100)); + + { + { + TermFieldMatchData *tfmd = mdb.getTermFieldMatchData(0, 0); + ASSERT_TRUE(tfmd != NULL); + + FieldPositionsIterator itr = tfmd->getIterator(); // foo (index) + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(itr.getFieldLength(), 50u); + EXPECT_EQUAL(itr.getPosition(), 10u); + itr.next(); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(itr.getPosition(), 20u); + itr.next(); + ASSERT_TRUE(!itr.valid()); + } + { + TermFieldMatchData *tfmd = mdb.getTermFieldMatchData(0, 1); + ASSERT_TRUE(tfmd != NULL); + + FieldPositionsIterator itr = tfmd->getIterator(); // bar (attribute) + ASSERT_TRUE(!itr.valid()); + } + { + TermFieldMatchData *tfmd = mdb.getTermFieldMatchData(0, 2); + ASSERT_TRUE(tfmd != NULL); + + FieldPositionsIterator itr = tfmd->getIterator(); // baz (index) + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(itr.getFieldLength(), 100u); + EXPECT_EQUAL(itr.getPosition(), 5u); + itr.next(); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(itr.getPosition(), 15u); + itr.next(); + ASSERT_TRUE(!itr.valid()); + } + } + { + TermFieldMatchData *tfmd = mdb.getTermFieldMatchData(1, 1); + ASSERT_TRUE(tfmd != NULL); + + FieldPositionsIterator itr = tfmd->getIterator(); // bar (attribute) + ASSERT_TRUE(!itr.valid()); + } + } + { // check that data is cleared + MatchDataBuilder mdb(queryEnv, *data); + EXPECT_EQUAL(mdb.getTermFieldMatchData(0, 0)->getDocId(), TermFieldMatchData::invalidId()); + EXPECT_EQUAL(mdb.getTermFieldMatchData(0, 1)->getDocId(), TermFieldMatchData::invalidId()); + EXPECT_EQUAL(mdb.getTermFieldMatchData(0, 2)->getDocId(), TermFieldMatchData::invalidId()); + EXPECT_EQUAL(mdb.getTermFieldMatchData(1, 1)->getDocId(), TermFieldMatchData::invalidId()); + + // test illegal things + ASSERT_TRUE(!mdb.addOccurence("foo", 1, 10)); // invalid term/field combination + } + + BlueprintFactory factory; + factory.addPrototype(Blueprint::SP(new ValueBlueprint())); + Properties overrides; + + { // test feature test runner + FeatureTest ft(factory, indexEnv, queryEnv, layout, + StringList().add("value(10)").add("value(20)").add("value(30)"), overrides); + MatchDataBuilder::UP mdb1 = ft.createMatchDataBuilder(); + EXPECT_TRUE(mdb1.get() == NULL); + EXPECT_TRUE(!ft.execute(RankResult().addScore("value(10)", 10.0f))); + ASSERT_TRUE(ft.setup()); + MatchDataBuilder::UP mdb2 = ft.createMatchDataBuilder(); + EXPECT_TRUE(mdb2.get() != NULL); + + EXPECT_TRUE(ft.execute(RankResult().addScore("value(10)", 10.0f).addScore("value(20)", 20.0f))); + EXPECT_TRUE(!ft.execute(RankResult().addScore("value(10)", 20.0f))); + EXPECT_TRUE(!ft.execute(RankResult().addScore("value(5)", 5.0f))); + } + { // test simple constructor + MatchDataLayout mdl; // match data layout cannot be reused + FeatureTest ft(factory, indexEnv, queryEnv, mdl, "value(10)", overrides); + ASSERT_TRUE(ft.setup()); + EXPECT_TRUE(ft.execute(10.0f)); + } +} diff --git a/searchlib/src/tests/features/prod_features_test.sh b/searchlib/src/tests/features/prod_features_test.sh new file mode 100755 index 00000000000..bec2b49807f --- /dev/null +++ b/searchlib/src/tests/features/prod_features_test.sh @@ -0,0 +1,3 @@ +#!/bin/bash +VESPA_LOG_TARGET=file:vlog2.txt $VALGRIND ./searchlib_prod_features_test_app +rm -rf *.dat diff --git a/searchlib/src/tests/features/ranking_expression/.gitignore b/searchlib/src/tests/features/ranking_expression/.gitignore new file mode 100644 index 00000000000..63ab51e663a --- /dev/null +++ b/searchlib/src/tests/features/ranking_expression/.gitignore @@ -0,0 +1 @@ +searchlib_ranking_expression_test_app diff --git a/searchlib/src/tests/features/ranking_expression/CMakeLists.txt b/searchlib/src/tests/features/ranking_expression/CMakeLists.txt new file mode 100644 index 00000000000..4caddaa7bd8 --- /dev/null +++ b/searchlib/src/tests/features/ranking_expression/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_ranking_expression_test_app + SOURCES + ranking_expression_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_ranking_expression_test_app COMMAND searchlib_ranking_expression_test_app) diff --git a/searchlib/src/tests/features/ranking_expression/ranking_expression_test.cpp b/searchlib/src/tests/features/ranking_expression/ranking_expression_test.cpp new file mode 100644 index 00000000000..64fb3477951 --- /dev/null +++ b/searchlib/src/tests/features/ranking_expression/ranking_expression_test.cpp @@ -0,0 +1,90 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/testkit/test_kit.h> + +#include <vespa/vespalib/eval/value_type.h> +#include <vespa/searchlib/fef/feature_type.h> +#include <vespa/searchlib/fef/featurenameparser.h> +#include <vespa/searchlib/features/rankingexpressionfeature.h> +#include <vespa/searchlib/fef/test/dummy_dependency_handler.h> +#include <vespa/searchlib/fef/test/indexenvironment.h> + +using namespace search::features; +using namespace search::fef::test; +using namespace search::fef; +using namespace vespalib::eval; + +using TypeMap = std::map<vespalib::string,vespalib::string>; + +struct SetupResult { + IndexEnvironment index_env; + RankingExpressionBlueprint rank; + DummyDependencyHandler deps; + bool setup_ok; + SetupResult(const TypeMap &object_inputs, + const vespalib::string &expression) + : index_env(), rank(), deps(rank), setup_ok(false) + { + rank.setName("self"); + index_env.getProperties().add("self.rankingScript", expression); + for (const auto &input: object_inputs) { + deps.define_object_input(input.first, ValueType::from_spec(input.second)); + } + setup_ok = rank.setup(index_env, {}); + EXPECT_TRUE(!deps.accept_type_mismatch); + } +}; + +void verify_output_type(const TypeMap &object_inputs, + const vespalib::string &expression, const FeatureType &expect) +{ + SetupResult result(object_inputs, expression); + EXPECT_TRUE(result.setup_ok); + EXPECT_EQUAL(1u, result.deps.output.size()); + ASSERT_EQUAL(1u, result.deps.output_type.size()); + if (expect.is_object()) { + EXPECT_EQUAL(expect.type(), result.deps.output_type[0].type()); + } else { + EXPECT_TRUE(!result.deps.output_type[0].is_object()); + } +} + +void verify_setup_fail(const TypeMap &object_inputs, + const vespalib::string &expression) +{ + SetupResult result(object_inputs, expression); + EXPECT_TRUE(!result.setup_ok); + EXPECT_EQUAL(0u, result.deps.output.size()); +} + +TEST("require that expression with only number inputs produce number output (compiled)") { + TEST_DO(verify_output_type({}, "a*b", FeatureType::number())); +} + +TEST("require that expression with object input produces object output (interpreted)") { + TEST_DO(verify_output_type({{"b", "double"}}, "a*b", FeatureType::object(ValueType::double_type()))); +} + +TEST("require that expression with internal tensor operations produce object output (interpreted)") { + TEST_DO(verify_output_type({}, "a*b*sum({{x:1}:5,{x:2}:7})", FeatureType::object(ValueType::double_type()))); +} + +TEST("require that ranking expression can resolve to concrete complex type") { + TEST_DO(verify_output_type({{"a", "tensor(x{},y{})"}, {"b", "tensor(y{},z{})"}}, "a*b", + FeatureType::object(ValueType::from_spec("tensor(x{},y{},z{})")))); +} + +TEST("require that ranking expression can resolve to abstract complex type") { + TEST_DO(verify_output_type({{"a", "tensor"}}, "a*b", FeatureType::object(ValueType::from_spec("tensor")))); +} + +TEST("require that ranking expression can resolve to 'any' type") { + TEST_DO(verify_output_type({{"a", "tensor(x{},y{})"}, {"b", "tensor"}}, "a*b", + FeatureType::object(ValueType::from_spec("any")))); +} + +TEST("require that setup fails for incompatible types") { + TEST_DO(verify_setup_fail({{"a", "tensor(x{},y{})"}, {"b", "tensor(y[10],z{})"}}, "a*b")); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/features/raw_score/.gitignore b/searchlib/src/tests/features/raw_score/.gitignore new file mode 100644 index 00000000000..a1b2d4e3f16 --- /dev/null +++ b/searchlib/src/tests/features/raw_score/.gitignore @@ -0,0 +1 @@ +searchlib_raw_score_test_app diff --git a/searchlib/src/tests/features/raw_score/CMakeLists.txt b/searchlib/src/tests/features/raw_score/CMakeLists.txt new file mode 100644 index 00000000000..a672b7b071d --- /dev/null +++ b/searchlib/src/tests/features/raw_score/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_raw_score_test_app + SOURCES + raw_score_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_raw_score_test_app COMMAND searchlib_raw_score_test_app) diff --git a/searchlib/src/tests/features/raw_score/FILES b/searchlib/src/tests/features/raw_score/FILES new file mode 100644 index 00000000000..479927259ee --- /dev/null +++ b/searchlib/src/tests/features/raw_score/FILES @@ -0,0 +1 @@ +raw_score_test.cpp diff --git a/searchlib/src/tests/features/raw_score/raw_score_test.cpp b/searchlib/src/tests/features/raw_score/raw_score_test.cpp new file mode 100644 index 00000000000..0a15ff69318 --- /dev/null +++ b/searchlib/src/tests/features/raw_score/raw_score_test.cpp @@ -0,0 +1,151 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchlib/features/setup.h> +#include <vespa/searchlib/fef/test/indexenvironment.h> +#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h> +#include <vespa/searchlib/fef/test/queryenvironment.h> +#include <vespa/searchlib/features/raw_score_feature.h> +#include <vespa/searchlib/fef/fef.h> +#include <vespa/searchlib/fef/test/dummy_dependency_handler.h> + +using search::feature_t; +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; + +const std::string featureName("rawScore(foo)"); + +struct BlueprintFactoryFixture { + BlueprintFactory factory; + BlueprintFactoryFixture() : factory() + { + setup_search_features(factory); + } +}; + +struct IndexFixture { + IndexEnvironment indexEnv; + IndexFixture() : indexEnv() + { + IndexEnvironmentBuilder builder(indexEnv); + builder.addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + builder.addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar"); + } +}; + +struct FeatureDumpFixture : public IDumpFeatureVisitor { + virtual void visitDumpFeature(const vespalib::string &) { + TEST_ERROR("no features should be dumped"); + } + FeatureDumpFixture() : IDumpFeatureVisitor() {} +}; + +struct RankFixture : BlueprintFactoryFixture, IndexFixture { + QueryEnvironment queryEnv; + RankSetup rankSetup; + RankProgram::UP rankProgram; + MatchDataLayout mdl; + std::vector<TermFieldHandle> fooHandles; + std::vector<TermFieldHandle> barHandles; + RankFixture(size_t fooCnt, size_t barCnt) + : queryEnv(&indexEnv), rankSetup(factory, indexEnv), + rankProgram(), mdl(), fooHandles(), barHandles() + { + for (size_t i = 0; i < fooCnt; ++i) { + uint32_t fieldId = indexEnv.getFieldByName("foo")->id(); + fooHandles.push_back(mdl.allocTermField(fieldId)); + SimpleTermData term; + term.addField(fieldId).setHandle(fooHandles.back()); + queryEnv.getTerms().push_back(term); + } + for (size_t i = 0; i < barCnt; ++i) { + uint32_t fieldId = indexEnv.getFieldByName("bar")->id(); + barHandles.push_back(mdl.allocTermField(fieldId)); + SimpleTermData term; + term.addField(fieldId).setHandle(barHandles.back()); + queryEnv.getTerms().push_back(term); + } + rankSetup.setFirstPhaseRank(featureName); + rankSetup.setIgnoreDefaultRankFeatures(true); + ASSERT_TRUE(rankSetup.compile()); + rankProgram = rankSetup.create_first_phase_program(); + rankProgram->setup(mdl, queryEnv); + } + feature_t getScore(uint32_t docId) { + rankProgram->run(docId); + return *Utils::getScoreFeature(*rankProgram); + } + void setScore(TermFieldHandle handle, uint32_t docId, feature_t score) { + rankProgram->match_data().resolveTermField(handle)->setRawScore(docId, score); + } + void setFooScore(uint32_t i, uint32_t docId, feature_t score) { + ASSERT_LESS(i, fooHandles.size()); + setScore(fooHandles[i], docId, score); + } + void setBarScore(uint32_t i, uint32_t docId, feature_t score) { + ASSERT_LESS(i, barHandles.size()); + setScore(barHandles[i], docId, score); + } +}; + +TEST_F("require that blueprint can be created from factory", BlueprintFactoryFixture) { + Blueprint::SP bp = f.factory.createBlueprint("rawScore"); + EXPECT_TRUE(bp.get() != 0); + EXPECT_TRUE(dynamic_cast<RawScoreBlueprint*>(bp.get()) != 0); +} + +TEST_FFF("require that no features are dumped", RawScoreBlueprint, IndexFixture, FeatureDumpFixture) { + f1.visitDumpFeatures(f2.indexEnv, f3); +} + +TEST_FF("require that setup can be done on index field", RawScoreBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(foo)", f1.getBaseName().c_str())); + EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "foo"))); +} + +TEST_FF("require that setup can be done on attribute field", RawScoreBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(bar)", f1.getBaseName().c_str())); + EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "bar"))); +} + +TEST_FF("require that setup fails for unknown field", RawScoreBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(unknown)", f1.getBaseName().c_str())); + EXPECT_TRUE(!((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "unknown"))); +} + +TEST_F("require that not searching a filed will give it 0.0 raw score", RankFixture(0, 3)) { + EXPECT_EQUAL(0.0, f1.getScore(10)); +} + +TEST_F("require that raw score can be obtained", RankFixture(1, 0)) { + f1.setFooScore(0, 10, 5.0); + EXPECT_EQUAL(5.0, f1.getScore(10)); +} + +TEST_F("require that multiple raw scores are accumulated", RankFixture(3, 0)) { + f1.setFooScore(0, 10, 1.0); + f1.setFooScore(1, 10, 2.0); + f1.setFooScore(2, 10, 3.0); + EXPECT_EQUAL(6.0, f1.getScore(10)); +} + +TEST_F("require that stale raw scores are ignored", RankFixture(3, 0)) { + f1.setFooScore(0, 10, 1.0); + f1.setFooScore(1, 9, 2.0); + f1.setFooScore(2, 10, 3.0); + EXPECT_EQUAL(4.0, f1.getScore(10)); +} + +TEST_F("require that raw scores from other fields are ignored", RankFixture(2, 2)) { + f1.setFooScore(0, 10, 1.0); + f1.setFooScore(1, 10, 2.0); + f1.setBarScore(0, 10, 5.0); + f1.setBarScore(1, 10, 6.0); + EXPECT_EQUAL(3.0, f1.getScore(10)); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/features/subqueries/.gitignore b/searchlib/src/tests/features/subqueries/.gitignore new file mode 100644 index 00000000000..63dc19177d1 --- /dev/null +++ b/searchlib/src/tests/features/subqueries/.gitignore @@ -0,0 +1 @@ +searchlib_subqueries_test_app diff --git a/searchlib/src/tests/features/subqueries/CMakeLists.txt b/searchlib/src/tests/features/subqueries/CMakeLists.txt new file mode 100644 index 00000000000..45845e8ec1b --- /dev/null +++ b/searchlib/src/tests/features/subqueries/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_subqueries_test_app + SOURCES + subqueries_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_subqueries_test_app COMMAND searchlib_subqueries_test_app) diff --git a/searchlib/src/tests/features/subqueries/subqueries_test.cpp b/searchlib/src/tests/features/subqueries/subqueries_test.cpp new file mode 100644 index 00000000000..160ec404b20 --- /dev/null +++ b/searchlib/src/tests/features/subqueries/subqueries_test.cpp @@ -0,0 +1,162 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchlib/features/setup.h> +#include <vespa/searchlib/fef/test/indexenvironment.h> +#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h> +#include <vespa/searchlib/fef/test/queryenvironment.h> +#include <vespa/searchlib/features/subqueries_feature.h> +#include <vespa/searchlib/fef/fef.h> +#include <vespa/searchlib/fef/test/dummy_dependency_handler.h> + +using search::feature_t; +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; + +struct BlueprintFactoryFixture { + BlueprintFactory factory; + BlueprintFactoryFixture() : factory() + { + setup_search_features(factory); + } +}; + +struct IndexFixture { + IndexEnvironment indexEnv; + IndexFixture() : indexEnv() + { + IndexEnvironmentBuilder builder(indexEnv); + builder.addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + builder.addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar"); + } +}; + +struct FeatureDumpFixture : public IDumpFeatureVisitor { + virtual void visitDumpFeature(const vespalib::string &) { + TEST_ERROR("no features should be dumped"); + } + FeatureDumpFixture() : IDumpFeatureVisitor() {} +}; + +struct RankFixture : BlueprintFactoryFixture, IndexFixture { + QueryEnvironment queryEnv; + RankSetup rankSetup; + RankProgram::UP rankProgram; + MatchDataLayout mdl; + std::vector<TermFieldHandle> fooHandles; + std::vector<TermFieldHandle> barHandles; + RankFixture(size_t fooCnt, size_t barCnt, + std::string featureName = "subqueries(foo)") + : queryEnv(&indexEnv), rankSetup(factory, indexEnv), + rankProgram(), mdl(), fooHandles(), barHandles() + { + fooHandles = addFields(fooCnt, indexEnv.getFieldByName("foo")->id()); + barHandles = addFields(barCnt, indexEnv.getFieldByName("bar")->id()); + rankSetup.setFirstPhaseRank(featureName); + rankSetup.setIgnoreDefaultRankFeatures(true); + ASSERT_TRUE(rankSetup.compile()); + rankProgram = rankSetup.create_first_phase_program(); + rankProgram->setup(mdl, queryEnv); + } + std::vector<TermFieldHandle> addFields(size_t count, uint32_t fieldId) { + std::vector<TermFieldHandle> handles; + for (size_t i = 0; i < count; ++i) { + handles.push_back(mdl.allocTermField(fieldId)); + SimpleTermData term; + term.addField(fieldId).setHandle(handles.back()); + queryEnv.getTerms().push_back(term); + } + return handles; + } + feature_t getSubqueries(uint32_t docId) { + rankProgram->run(docId); + return *Utils::getScoreFeature(*rankProgram); + } + void setSubqueries(TermFieldHandle handle, uint32_t docId, + uint64_t subqueries) { + rankProgram->match_data().resolveTermField(handle)->setSubqueries(docId, subqueries); + } + void setFooSubqueries(uint32_t i, uint32_t docId, uint64_t subqueries) { + ASSERT_LESS(i, fooHandles.size()); + setSubqueries(fooHandles[i], docId, subqueries); + } + void setBarSubqueries(uint32_t i, uint32_t docId, uint64_t subqueries) { + ASSERT_LESS(i, barHandles.size()); + setSubqueries(barHandles[i], docId, subqueries); + } +}; + +TEST_F("require that blueprint can be created from factory", + BlueprintFactoryFixture) { + Blueprint::SP bp = f.factory.createBlueprint("subqueries"); + EXPECT_TRUE(bp.get() != 0); + EXPECT_TRUE(dynamic_cast<SubqueriesBlueprint*>(bp.get()) != 0); +} + +TEST_FFF("require that no features are dumped", + SubqueriesBlueprint, IndexFixture, FeatureDumpFixture) { + f1.visitDumpFeatures(f2.indexEnv, f3); +} + +TEST_FF("require that setup can be done on index field", + SubqueriesBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(foo)", f1.getBaseName().c_str())); + EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, {"foo"})); +} + +TEST_FF("require that setup can be done on attribute field", + SubqueriesBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(bar)", f1.getBaseName().c_str())); + EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, {"bar"})); +} + +TEST_FF("require that setup fails for unknown field", + SubqueriesBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(unknown)", f1.getBaseName().c_str())); + EXPECT_FALSE(((Blueprint&)f1).setup(f2.indexEnv, {"unknown"})); +} + +TEST_F("require that not searching a field will give it 0 subqueries", + RankFixture(0, 3)) { + EXPECT_EQUAL(0, f1.getSubqueries(10)); +} + +TEST_F("require that subqueries can be obtained", RankFixture(1, 0)) { + f1.setFooSubqueries(0, 10, 0x1234); + EXPECT_EQUAL(0x1234, f1.getSubqueries(10)); +} + +TEST_F("require that msb subqueries can be obtained", + RankFixture(1, 0, "subqueries(foo).msb")) { + f1.setFooSubqueries(0, 10, 0x123412345678ULL); + EXPECT_EQUAL(0x1234, f1.getSubqueries(10)); +} + +TEST_F("require that multiple subqueries are accumulated", RankFixture(3, 0)) { + f1.setFooSubqueries(0, 10, 1); + f1.setFooSubqueries(1, 10, 2); + f1.setFooSubqueries(2, 10, 4); + EXPECT_EQUAL(7, f1.getSubqueries(10)); +} + +TEST_F("require that stale subqueries are ignored", RankFixture(3, 0)) { + f1.setFooSubqueries(0, 10, 1); + f1.setFooSubqueries(1, 9, 2); + f1.setFooSubqueries(2, 10, 4); + EXPECT_EQUAL(5, f1.getSubqueries(10)); +} + +TEST_F("require that subqueries from other fields are ignored", + RankFixture(2, 2)) { + f1.setFooSubqueries(0, 10, 1); + f1.setFooSubqueries(1, 10, 2); + f1.setBarSubqueries(0, 10, 4); + f1.setBarSubqueries(1, 10, 8); + EXPECT_EQUAL(3, f1.getSubqueries(10)); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/features/tensor/.gitignore b/searchlib/src/tests/features/tensor/.gitignore new file mode 100644 index 00000000000..ae6d6dfb414 --- /dev/null +++ b/searchlib/src/tests/features/tensor/.gitignore @@ -0,0 +1 @@ +searchlib_tensor_test_app diff --git a/searchlib/src/tests/features/tensor/CMakeLists.txt b/searchlib/src/tests/features/tensor/CMakeLists.txt new file mode 100644 index 00000000000..33f7d44d8fe --- /dev/null +++ b/searchlib/src/tests/features/tensor/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_tensor_test_app + SOURCES + tensor_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_tensor_test_app COMMAND searchlib_tensor_test_app) diff --git a/searchlib/src/tests/features/tensor/FILES b/searchlib/src/tests/features/tensor/FILES new file mode 100644 index 00000000000..6ece9b360b5 --- /dev/null +++ b/searchlib/src/tests/features/tensor/FILES @@ -0,0 +1 @@ +tensor_test.cpp diff --git a/searchlib/src/tests/features/tensor/tensor_test.cpp b/searchlib/src/tests/features/tensor/tensor_test.cpp new file mode 100644 index 00000000000..caceea0f47b --- /dev/null +++ b/searchlib/src/tests/features/tensor/tensor_test.cpp @@ -0,0 +1,237 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/eval/function.h> + +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/attributevector.h> +#include <vespa/searchlib/attribute/tensorattribute.h> +#include <vespa/searchlib/features/setup.h> +#include <vespa/searchlib/fef/test/indexenvironment.h> +#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h> +#include <vespa/searchlib/fef/test/queryenvironment.h> +#include <vespa/searchlib/fef/test/ftlib.h> +#include <vespa/searchlib/fef/fef.h> +#include <vespa/vespalib/tensor/tensor_factory.h> +#include <vespa/vespalib/tensor/default_tensor.h> +#include <vespa/vespalib/tensor/serialization/typed_binary_format.h> +#include <vespa/searchlib/attribute/tensorattribute.h> +#include <vespa/vespalib/eval/interpreted_function.h> +#include <vespa/vespalib/tensor/default_tensor_engine.h> + +using search::feature_t; +using namespace search::fef; +using namespace search::fef::indexproperties; +using namespace search::fef::test; +using namespace search::features; +using search::AttributeFactory; +using search::attribute::TensorAttribute; +using search::AttributeVector; +using vespalib::eval::Value; +using vespalib::eval::Function; +using vespalib::tensor::Tensor; +using vespalib::tensor::TensorCells; +using vespalib::tensor::DenseTensorCells; +using vespalib::tensor::TensorDimensions; +using vespalib::tensor::TensorFactory; +using vespalib::tensor::TensorType; +using vespalib::eval::InterpretedFunction; +using vespalib::tensor::DefaultTensorEngine; + +typedef search::attribute::Config AVC; +typedef search::attribute::BasicType AVBT; +typedef search::attribute::CollectionType AVCT; +typedef search::AttributeVector::SP AttributePtr; +typedef FtTestApp FTA; + +namespace +{ + +Tensor::UP createTensor(const TensorCells &cells, + const TensorDimensions &dimensions) { + vespalib::tensor::DefaultTensor::builder builder; + return TensorFactory::create(cells, dimensions, builder); +} + +} + +struct ExecFixture +{ + BlueprintFactory factory; + FtFeatureTest test; + ExecFixture(const vespalib::string &feature) + : factory(), + test(factory, feature) + { + setup_search_features(factory); + setupAttributeVectors(); + setupQueryEnvironment(); + ASSERT_TRUE(test.setup()); + } + void addAttributeField(const vespalib::string &attrName) { + test.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, attrName); + } + AttributeVector::SP createStringAttribute(const vespalib::string &attrName) { + addAttributeField(attrName); + return AttributeFactory::createAttribute(attrName, AVC(AVBT::STRING, AVCT::SINGLE)); + } + AttributeVector::SP createTensorAttribute(const vespalib::string &attrName, const vespalib::string &type) { + addAttributeField(attrName); + AVC config(AVBT::TENSOR, AVCT::SINGLE); + config.setTensorType(TensorType::fromSpec(type)); + return AttributeFactory::createAttribute(attrName, config); + } + void setAttributeTensorType(const vespalib::string &attrName, const vespalib::string &type) { + type::Attribute::set(test.getIndexEnv().getProperties(), attrName, type); + } + void setQueryTensorType(const vespalib::string &queryFeatureName, const vespalib::string &type) { + type::QueryFeature::set(test.getIndexEnv().getProperties(), queryFeatureName, type); + } + void setupAttributeVectors() { + std::vector<AttributePtr> attrs; + attrs.push_back(createTensorAttribute("tensorattr", "tensor(x{})")); + attrs.push_back(createStringAttribute("singlestr")); + attrs.push_back(createTensorAttribute("wrongtype", "tensor(y{})")); + addAttributeField("null"); + setAttributeTensorType("tensorattr", "tensor(x{})"); + setAttributeTensorType("wrongtype", "tensor(x{})"); + setAttributeTensorType("null", "tensor(x{})"); + + for (const auto &attr : attrs) { + attr->addReservedDoc(); + attr->addDocs(2); + attr->clearDoc(1); + attr->clearDoc(2); + attr->commit(); + test.getIndexEnv().getAttributeManager().add(attr); + } + + TensorAttribute *tensorAttr = + dynamic_cast<TensorAttribute *>(attrs[0].get()); + + tensorAttr->setTensor(1, *createTensor({ {{{"x", "a"}}, 3}, + {{{"x", "b"}}, 5}, + {{{"x", "c"}}, 7} }, + { "x" })); + + for (const auto &attr : attrs) { + attr->commit(); + } + } + void setQueryTensor(const vespalib::string &tensorName, + const vespalib::string &tensorTypeSpec, + const TensorCells &cells, + const TensorDimensions &dimensions) + { + auto tensor = createTensor(cells, dimensions); + vespalib::nbostream stream; + vespalib::tensor::TypedBinaryFormat::serialize(stream, *tensor); + test.getQueryEnv().getProperties().add(tensorName, + vespalib::stringref(stream.peek(), stream.size())); + setQueryTensorType(tensorName, tensorTypeSpec); + } + + void setupQueryEnvironment() { + setQueryTensor("tensorquery", + "tensor(q{})", + { {{{"q", "d"}}, 11 }, + {{{"q", "e"}}, 13 }, + {{{"q", "f"}}, 17 } }, + { "q" }); + setQueryTensor("mappedtensorquery", + "tensor(x[2])", + { {{{"x", "0"},{"y", "0"}}, 11 }, + {{{"x", "0"},{"y", "1"}}, 13 }, + {{{"x", "1"},{"y", "0"}}, 17 } }, + { "x", "y" }); + setQueryTensorType("null", "tensor(q{})"); + } + const Tensor &extractTensor() { + const Value::CREF *value = test.resolveObjectFeature(); + ASSERT_TRUE(value != nullptr); + ASSERT_TRUE(value->get().is_tensor()); + return static_cast<const Tensor &>(*value->get().as_tensor()); + } + const Tensor &execute(uint32_t docId = 1) { + test.executeOnly(docId); + return extractTensor(); + } +}; + +struct AsTensor { + InterpretedFunction ifun; + InterpretedFunction::Context ctx; + const Value *result; + explicit AsTensor(const vespalib::string &expr) + : ifun(DefaultTensorEngine::ref(), Function::parse(expr)), ctx(), result(&ifun.eval(ctx)) + { + ASSERT_TRUE(result->is_tensor()); + } + bool operator==(const Tensor &rhs) const { return static_cast<const Tensor &>(*result->as_tensor()).equals(rhs); } +}; + +std::ostream &operator<<(std::ostream &os, const AsTensor &my_tensor) { + os << my_tensor.result->as_tensor(); + return os; +} + +TEST_F("require that tensor attribute can be extracted as tensor in attribute feature", + ExecFixture("attribute(tensorattr)")) +{ + EXPECT_EQUAL(AsTensor("{ {x:b}:5, {x:c}:7, {x:a}:3 }"), f.execute()); +} + +TEST_F("require that tensor from query can be extracted as tensor in query feature", + ExecFixture("query(tensorquery)")) +{ + EXPECT_EQUAL(AsTensor("{ {q:f}:17, {q:d}:11, {q:e}:13 }"), f.execute()); +} + +TEST_F("require that empty tensor is created if attribute does not exists", + ExecFixture("attribute(null)")) +{ + EXPECT_EQUAL(AsTensor("{ }"), f.execute()); +} + +TEST_F("require that empty tensor is created if tensor type is wrong", + ExecFixture("attribute(wrongtype)")) +{ + EXPECT_EQUAL(AsTensor("{ }"), f.execute()); +} + +TEST_F("require that empty tensor is created if query parameter is not found", + ExecFixture("query(null)")) +{ + EXPECT_EQUAL(AsTensor("{ }"), f.execute()); +} + +TEST_F("require that empty tensor is created if document has no tensor", + ExecFixture("attribute(tensorattr)")) { + EXPECT_EQUAL(AsTensor("{ }"), f.execute(2)); +} + +struct AsDenseTensor { + Tensor::UP tensor; + explicit AsDenseTensor(const DenseTensorCells &cells) + : tensor(TensorFactory::createDense(cells)) + { + ASSERT_TRUE(!!tensor); + } + bool operator==(const Tensor &rhs) const { return tensor->equals(rhs); } +}; + + +std::ostream &operator<<(std::ostream &os, const AsDenseTensor &my_tensor) { + os << *my_tensor.tensor; + return os; +} + +TEST_F("require that tensor from query is mapped", + ExecFixture("query(mappedtensorquery)")) { + EXPECT_EQUAL(AsDenseTensor({ {{{"x", 0}}, 24}, + {{{"x", 1}}, 17} }), + f.execute()); +} + + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/features/tensor_from_labels/.gitignore b/searchlib/src/tests/features/tensor_from_labels/.gitignore new file mode 100644 index 00000000000..0e241941ca3 --- /dev/null +++ b/searchlib/src/tests/features/tensor_from_labels/.gitignore @@ -0,0 +1 @@ +searchlib_tensor_from_labels_test_app diff --git a/searchlib/src/tests/features/tensor_from_labels/CMakeLists.txt b/searchlib/src/tests/features/tensor_from_labels/CMakeLists.txt new file mode 100644 index 00000000000..db1814a0f66 --- /dev/null +++ b/searchlib/src/tests/features/tensor_from_labels/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_tensor_from_labels_test_app + SOURCES + tensor_from_labels_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_tensor_from_labels_test_app COMMAND searchlib_tensor_from_labels_test_app) diff --git a/searchlib/src/tests/features/tensor_from_labels/FILES b/searchlib/src/tests/features/tensor_from_labels/FILES new file mode 100644 index 00000000000..daecb2bbf5b --- /dev/null +++ b/searchlib/src/tests/features/tensor_from_labels/FILES @@ -0,0 +1 @@ +tensor_from_labels_test.cpp diff --git a/searchlib/src/tests/features/tensor_from_labels/tensor_from_labels_test.cpp b/searchlib/src/tests/features/tensor_from_labels/tensor_from_labels_test.cpp new file mode 100644 index 00000000000..b15ffb956ce --- /dev/null +++ b/searchlib/src/tests/features/tensor_from_labels/tensor_from_labels_test.cpp @@ -0,0 +1,211 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/eval/function.h> +#include <vespa/vespalib/eval/interpreted_function.h> +#include <vespa/vespalib/tensor/tensor.h> +#include <vespa/vespalib/tensor/default_tensor_engine.h> + +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/attributevector.h> +#include <vespa/searchlib/attribute/integerbase.h> +#include <vespa/searchlib/attribute/stringbase.h> +#include <vespa/searchlib/features/setup.h> +#include <vespa/searchlib/fef/test/indexenvironment.h> +#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h> +#include <vespa/searchlib/fef/test/queryenvironment.h> +#include <vespa/searchlib/fef/test/ftlib.h> +#include <vespa/searchlib/features/tensor_from_labels_feature.h> +#include <vespa/searchlib/fef/fef.h> + +using search::feature_t; +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; +using search::AttributeFactory; +using search::IntegerAttribute; +using search::StringAttribute; +using vespalib::eval::Value; +using vespalib::eval::Function; +using vespalib::eval::InterpretedFunction; +using vespalib::tensor::Tensor; +using vespalib::tensor::DefaultTensorEngine; + +typedef search::attribute::Config AVC; +typedef search::attribute::BasicType AVBT; +typedef search::attribute::CollectionType AVCT; +typedef search::AttributeVector::SP AttributePtr; +typedef FtTestApp FTA; + +struct SetupFixture +{ + TensorFromLabelsBlueprint blueprint; + IndexEnvironment indexEnv; + SetupFixture() + : blueprint(), + indexEnv() + { + } +}; + +TEST_F("require that blueprint can be created from factory", SetupFixture) +{ + EXPECT_TRUE(FTA::assertCreateInstance(f.blueprint, "tensorFromLabels")); +} + +TEST_F("require that setup fails if source spec is invalid", SetupFixture) +{ + FTA::FT_SETUP_FAIL(f.blueprint, f.indexEnv, StringList().add("source(foo)")); +} + +TEST_F("require that setup succeeds with attribute source", SetupFixture) +{ + FTA::FT_SETUP_OK(f.blueprint, f.indexEnv, StringList().add("attribute(foo)"), + StringList(), StringList().add("tensor")); +} + +TEST_F("require that setup succeeds with query source", SetupFixture) +{ + FTA::FT_SETUP_OK(f.blueprint, f.indexEnv, StringList().add("query(foo)"), + StringList(), StringList().add("tensor")); +} + +struct ExecFixture +{ + BlueprintFactory factory; + FtFeatureTest test; + ExecFixture(const vespalib::string &feature) + : factory(), + test(factory, feature) + { + setup_search_features(factory); + setupAttributeVectors(); + setupQueryEnvironment(); + ASSERT_TRUE(test.setup()); + } + void setupAttributeVectors() { + std::vector<AttributePtr> attrs; + attrs.push_back(AttributeFactory::createAttribute("astr", AVC(AVBT::STRING, AVCT::ARRAY))); + attrs.push_back(AttributeFactory::createAttribute("aint", AVC(AVBT::INT32, AVCT::ARRAY))); + attrs.push_back(AttributeFactory::createAttribute("wsstr", AVC(AVBT::STRING, AVCT::WSET))); + + for (const auto &attr : attrs) { + attr->addReservedDoc(); + attr->addDocs(1); + test.getIndexEnv().getAttributeManager().add(attr); + } + + StringAttribute *astr = static_cast<StringAttribute *>(attrs[0].get()); + // Note that the weight parameter is not used + astr->append(1, "a", 0); + astr->append(1, "b", 0); + astr->append(1, "c", 0); + + IntegerAttribute *aint = static_cast<IntegerAttribute *>(attrs[1].get()); + aint->append(1, 3, 0); + aint->append(1, 5, 0); + aint->append(1, 7, 0); + + for (const auto &attr : attrs) { + attr->commit(); + } + } + void setupQueryEnvironment() { + test.getQueryEnv().getProperties().add("astr_query", "[d e f]"); + test.getQueryEnv().getProperties().add("aint_query", "[11 13 17]"); + } + const Tensor &extractTensor() { + const Value::CREF *value = test.resolveObjectFeature(); + ASSERT_TRUE(value != nullptr); + ASSERT_TRUE(value->get().is_tensor()); + return static_cast<const Tensor &>(*value->get().as_tensor()); + } + const Tensor &execute() { + test.executeOnly(); + return extractTensor(); + } +}; + +struct AsTensor { + InterpretedFunction ifun; + InterpretedFunction::Context ctx; + const Value *result; + explicit AsTensor(const vespalib::string &expr) + : ifun(DefaultTensorEngine::ref(), Function::parse(expr)), ctx(), result(&ifun.eval(ctx)) + { + ASSERT_TRUE(result->is_tensor()); + } + bool operator==(const Tensor &rhs) const { return static_cast<const Tensor &>(*result->as_tensor()).equals(rhs); } +}; + +std::ostream &operator<<(std::ostream &os, const AsTensor &my_tensor) { + os << my_tensor.result->as_tensor(); + return os; +} + +// Tests for attribute source: + +TEST_F("require that array string attribute can be converted to tensor (default dimension)", + ExecFixture("tensorFromLabels(attribute(astr))")) +{ + EXPECT_EQUAL(AsTensor("{ {astr:a}:1, {astr:b}:1, {astr:c}:1 }"), f.execute()); +} + +TEST_F("require that array string attribute can be converted to tensor (explicit dimension)", + ExecFixture("tensorFromLabels(attribute(astr),dim)")) +{ + EXPECT_EQUAL(AsTensor("{ {dim:a}:1, {dim:b}:1, {dim:c}:1 }"), f.execute()); +} + +TEST_F("require that array integer attribute can be converted to tensor (default dimension)", + ExecFixture("tensorFromLabels(attribute(aint))")) +{ + EXPECT_EQUAL(AsTensor("{ {aint:7}:1, {aint:3}:1, {aint:5}:1 }"), f.execute()); +} + +TEST_F("require that array attribute can be converted to tensor (explicit dimension)", + ExecFixture("tensorFromLabels(attribute(aint),dim)")) +{ + EXPECT_EQUAL(AsTensor("{ {dim:7}:1, {dim:3}:1, {dim:5}:1 }"), f.execute()); +} + +TEST_F("require that empty tensor is created if attribute does not exists", + ExecFixture("tensorFromLabels(attribute(null))")) +{ + EXPECT_EQUAL(AsTensor("{ }"), f.execute()); +} + +TEST_F("require that empty tensor is created if attribute type is not supported", + ExecFixture("tensorFromLabels(attribute(wsstr))")) +{ + EXPECT_EQUAL(AsTensor("{ }"), f.execute()); +} + + +// Tests for query source: + +TEST_F("require that string array from query can be converted to tensor (default dimension)", + ExecFixture("tensorFromLabels(query(astr_query))")) +{ + EXPECT_EQUAL(AsTensor("{ {astr_query:d}:1, {astr_query:e}:1, {astr_query:f}:1 }"), f.execute()); +} + +TEST_F("require that integer array from query can be converted to tensor (default dimension)", + ExecFixture("tensorFromLabels(query(aint_query))")) +{ + EXPECT_EQUAL(AsTensor("{ {aint_query:13}:1, {aint_query:17}:1, {aint_query:11}:1 }"), f.execute()); +} + +TEST_F("require that string array from query can be converted to tensor (explicit dimension)", + ExecFixture("tensorFromLabels(query(astr_query),dim)")) +{ + EXPECT_EQUAL(AsTensor("{ {dim:d}:1, {dim:e}:1, {dim:f}:1 }"), f.execute()); +} + +TEST_F("require that empty tensor is created if query parameter is not found", + ExecFixture("tensorFromLabels(query(null))")) +{ + EXPECT_EQUAL(AsTensor("{ }"), f.execute()); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/features/tensor_from_weighted_set/.gitignore b/searchlib/src/tests/features/tensor_from_weighted_set/.gitignore new file mode 100644 index 00000000000..a56eade053e --- /dev/null +++ b/searchlib/src/tests/features/tensor_from_weighted_set/.gitignore @@ -0,0 +1 @@ +searchlib_tensor_from_weighted_set_test_app diff --git a/searchlib/src/tests/features/tensor_from_weighted_set/CMakeLists.txt b/searchlib/src/tests/features/tensor_from_weighted_set/CMakeLists.txt new file mode 100644 index 00000000000..7c38b301679 --- /dev/null +++ b/searchlib/src/tests/features/tensor_from_weighted_set/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_tensor_from_weighted_set_test_app + SOURCES + tensor_from_weighted_set_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_tensor_from_weighted_set_test_app COMMAND searchlib_tensor_from_weighted_set_test_app) diff --git a/searchlib/src/tests/features/tensor_from_weighted_set/FILES b/searchlib/src/tests/features/tensor_from_weighted_set/FILES new file mode 100644 index 00000000000..639a54230b1 --- /dev/null +++ b/searchlib/src/tests/features/tensor_from_weighted_set/FILES @@ -0,0 +1 @@ +tensor_from_weighted_set_test.cpp diff --git a/searchlib/src/tests/features/tensor_from_weighted_set/tensor_from_weighted_set_test.cpp b/searchlib/src/tests/features/tensor_from_weighted_set/tensor_from_weighted_set_test.cpp new file mode 100644 index 00000000000..163fd5b5389 --- /dev/null +++ b/searchlib/src/tests/features/tensor_from_weighted_set/tensor_from_weighted_set_test.cpp @@ -0,0 +1,198 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/eval/function.h> +#include <vespa/vespalib/eval/interpreted_function.h> +#include <vespa/vespalib/tensor/tensor.h> +#include <vespa/vespalib/tensor/default_tensor_engine.h> + +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/attributevector.h> +#include <vespa/searchlib/attribute/integerbase.h> +#include <vespa/searchlib/attribute/stringbase.h> +#include <vespa/searchlib/features/setup.h> +#include <vespa/searchlib/fef/test/indexenvironment.h> +#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h> +#include <vespa/searchlib/fef/test/queryenvironment.h> +#include <vespa/searchlib/fef/test/ftlib.h> +#include <vespa/searchlib/features/tensor_from_weighted_set_feature.h> +#include <vespa/searchlib/fef/fef.h> + +using search::feature_t; +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; +using search::AttributeFactory; +using search::IntegerAttribute; +using search::StringAttribute; +using vespalib::eval::Value; +using vespalib::eval::Function; +using vespalib::eval::InterpretedFunction; +using vespalib::tensor::Tensor; +using vespalib::tensor::DefaultTensorEngine; + +typedef search::attribute::Config AVC; +typedef search::attribute::BasicType AVBT; +typedef search::attribute::CollectionType AVCT; +typedef search::AttributeVector::SP AttributePtr; +typedef FtTestApp FTA; + +struct SetupFixture +{ + TensorFromWeightedSetBlueprint blueprint; + IndexEnvironment indexEnv; + SetupFixture() + : blueprint(), + indexEnv() + { + } +}; + +TEST_F("require that blueprint can be created from factory", SetupFixture) +{ + EXPECT_TRUE(FTA::assertCreateInstance(f.blueprint, "tensorFromWeightedSet")); +} + +TEST_F("require that setup fails if source spec is invalid", SetupFixture) +{ + FTA::FT_SETUP_FAIL(f.blueprint, f.indexEnv, StringList().add("source(foo)")); +} + +TEST_F("require that setup succeeds with attribute source", SetupFixture) +{ + FTA::FT_SETUP_OK(f.blueprint, f.indexEnv, StringList().add("attribute(foo)"), + StringList(), StringList().add("tensor")); +} + +TEST_F("require that setup succeeds with query source", SetupFixture) +{ + FTA::FT_SETUP_OK(f.blueprint, f.indexEnv, StringList().add("query(foo)"), + StringList(), StringList().add("tensor")); +} + +struct ExecFixture +{ + BlueprintFactory factory; + FtFeatureTest test; + ExecFixture(const vespalib::string &feature) + : factory(), + test(factory, feature) + { + setup_search_features(factory); + setupAttributeVectors(); + setupQueryEnvironment(); + ASSERT_TRUE(test.setup()); + } + void setupAttributeVectors() { + std::vector<AttributePtr> attrs; + attrs.push_back(AttributeFactory::createAttribute("wsstr", AVC(AVBT::STRING, AVCT::WSET))); + attrs.push_back(AttributeFactory::createAttribute("wsint", AVC(AVBT::INT32, AVCT::WSET))); + attrs.push_back(AttributeFactory::createAttribute("astr", AVC(AVBT::STRING, AVCT::ARRAY))); + + for (const auto &attr : attrs) { + attr->addReservedDoc(); + attr->addDocs(1); + test.getIndexEnv().getAttributeManager().add(attr); + } + + StringAttribute *wsstr = static_cast<StringAttribute *>(attrs[0].get()); + wsstr->append(1, "a", 3); + wsstr->append(1, "b", 5); + wsstr->append(1, "c", 7); + + IntegerAttribute *wsint = static_cast<IntegerAttribute *>(attrs[1].get()); + wsint->append(1, 11, 3); + wsint->append(1, 13, 5); + wsint->append(1, 17, 7); + + for (const auto &attr : attrs) { + attr->commit(); + } + } + void setupQueryEnvironment() { + test.getQueryEnv().getProperties().add("wsquery", "{d:11,e:13,f:17}"); + } + const Tensor &extractTensor() { + const Value::CREF *value = test.resolveObjectFeature(); + ASSERT_TRUE(value != nullptr); + ASSERT_TRUE(value->get().is_tensor()); + return static_cast<const Tensor &>(*value->get().as_tensor()); + } + const Tensor &execute() { + test.executeOnly(); + return extractTensor(); + } +}; + +struct AsTensor { + InterpretedFunction ifun; + InterpretedFunction::Context ctx; + const Value *result; + explicit AsTensor(const vespalib::string &expr) + : ifun(DefaultTensorEngine::ref(), Function::parse(expr)), ctx(), result(&ifun.eval(ctx)) + { + ASSERT_TRUE(result->is_tensor()); + } + bool operator==(const Tensor &rhs) const { return static_cast<const Tensor &>(*result->as_tensor()).equals(rhs); } +}; + +std::ostream &operator<<(std::ostream &os, const AsTensor &my_tensor) { + os << my_tensor.result->as_tensor(); + return os; +} + +TEST_F("require that weighted set string attribute can be converted to tensor (default dimension)", + ExecFixture("tensorFromWeightedSet(attribute(wsstr))")) +{ + EXPECT_EQUAL(AsTensor("{ {wsstr:b}:5, {wsstr:c}:7, {wsstr:a}:3 }"), f.execute()); +} + +TEST_F("require that weighted set string attribute can be converted to tensor (explicit dimension)", + ExecFixture("tensorFromWeightedSet(attribute(wsstr),dim)")) +{ + EXPECT_EQUAL(AsTensor("{ {dim:a}:3, {dim:b}:5, {dim:c}:7 }"), f.execute()); +} + +TEST_F("require that weighted set integer attribute can be converted to tensor (default dimension)", + ExecFixture("tensorFromWeightedSet(attribute(wsint))")) +{ + EXPECT_EQUAL(AsTensor("{ {wsint:13}:5, {wsint:17}:7, {wsint:11}:3 }"), f.execute()); +} + +TEST_F("require that weighted set integer attribute can be converted to tensor (explicit dimension)", + ExecFixture("tensorFromWeightedSet(attribute(wsint),dim)")) +{ + EXPECT_EQUAL(AsTensor("{ {dim:17}:7, {dim:11}:3, {dim:13}:5 }"), f.execute()); +} + +TEST_F("require that weighted set from query can be converted to tensor (default dimension)", + ExecFixture("tensorFromWeightedSet(query(wsquery))")) +{ + EXPECT_EQUAL(AsTensor("{ {wsquery:f}:17, {wsquery:d}:11, {wsquery:e}:13 }"), f.execute()); +} + +TEST_F("require that weighted set from query can be converted to tensor (explicit dimension)", + ExecFixture("tensorFromWeightedSet(query(wsquery),dim)")) +{ + EXPECT_EQUAL(AsTensor("{ {dim:d}:11, {dim:e}:13, {dim:f}:17 }"), f.execute()); +} + +TEST_F("require that empty tensor is created if attribute does not exists", + ExecFixture("tensorFromWeightedSet(attribute(null))")) +{ + EXPECT_EQUAL(AsTensor("{ }"), f.execute()); +} + +TEST_F("require that empty tensor is created if attribute type is not supported", + ExecFixture("tensorFromWeightedSet(attribute(astr))")) +{ + EXPECT_EQUAL(AsTensor("{ }"), f.execute()); +} + +TEST_F("require that empty tensor is created if query parameter is not found", + ExecFixture("tensorFromWeightedSet(query(null))")) +{ + EXPECT_EQUAL(AsTensor("{ }"), f.execute()); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/features/text_similarity_feature/.gitignore b/searchlib/src/tests/features/text_similarity_feature/.gitignore new file mode 100644 index 00000000000..9ffa5b46a43 --- /dev/null +++ b/searchlib/src/tests/features/text_similarity_feature/.gitignore @@ -0,0 +1 @@ +searchlib_text_similarity_feature_test_app diff --git a/searchlib/src/tests/features/text_similarity_feature/CMakeLists.txt b/searchlib/src/tests/features/text_similarity_feature/CMakeLists.txt new file mode 100644 index 00000000000..e0cb043c8f1 --- /dev/null +++ b/searchlib/src/tests/features/text_similarity_feature/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_text_similarity_feature_test_app + SOURCES + text_similarity_feature_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_text_similarity_feature_test_app COMMAND searchlib_text_similarity_feature_test_app) diff --git a/searchlib/src/tests/features/text_similarity_feature/FILES b/searchlib/src/tests/features/text_similarity_feature/FILES new file mode 100644 index 00000000000..dfa5173742d --- /dev/null +++ b/searchlib/src/tests/features/text_similarity_feature/FILES @@ -0,0 +1 @@ +text_similarity_feature_test.cpp diff --git a/searchlib/src/tests/features/text_similarity_feature/text_similarity_feature_test.cpp b/searchlib/src/tests/features/text_similarity_feature/text_similarity_feature_test.cpp new file mode 100644 index 00000000000..6a6b9d0a48e --- /dev/null +++ b/searchlib/src/tests/features/text_similarity_feature/text_similarity_feature_test.cpp @@ -0,0 +1,245 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchlib/features/setup.h> +#include <vespa/searchlib/fef/test/indexenvironment.h> +#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h> +#include <vespa/searchlib/fef/test/queryenvironment.h> +#include <vespa/searchlib/features/text_similarity_feature.h> +#include <vespa/searchlib/fef/test/ftlib.h> +#include <initializer_list> +#include <vespa/searchlib/fef/test/dummy_dependency_handler.h> + +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; + +std::vector<vespalib::string> featureNamesFoo() { + std::vector<vespalib::string> f; + f.push_back("textSimilarity(foo).score"); + f.push_back("textSimilarity(foo).proximity"); + f.push_back("textSimilarity(foo).order"); + f.push_back("textSimilarity(foo).queryCoverage"); + f.push_back("textSimilarity(foo).fieldCoverage"); + return f; +} + +const size_t SCORE = 0; +const size_t PROXIMITY = 1; +const size_t ORDER = 2; +const size_t QUERY = 3; +const size_t FIELD = 4; + +FtIndex indexFoo() { + FtIndex idx; + idx.field("foo"); + return idx; +} + +struct BlueprintFactoryFixture { + BlueprintFactory factory; + BlueprintFactoryFixture() : factory() + { + setup_search_features(factory); + } +}; + +struct IndexFixture { + IndexEnvironment indexEnv; + IndexFixture() : indexEnv() + { + IndexEnvironmentBuilder builder(indexEnv); + builder.addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + builder.addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "bar"); + builder.addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "baz"); + } +}; + +struct FeatureDumpFixture : public IDumpFeatureVisitor { + std::vector<vespalib::string> expect; + size_t dumped; + virtual void visitDumpFeature(const vespalib::string &name) { + EXPECT_LESS(dumped, expect.size()); + EXPECT_EQUAL(expect[dumped++], name); + } + FeatureDumpFixture() : IDumpFeatureVisitor(), expect(featureNamesFoo()), dumped(0) {} +}; + +struct RankFixture : BlueprintFactoryFixture { + RankFixture() : BlueprintFactoryFixture() {} + double get_feature(const vespalib::string &query, const FtIndex &index, size_t select, + bool useStaleMatchData = false) + { + std::vector<vespalib::string> names = featureNamesFoo(); + ASSERT_TRUE(names.size() == 5u); + FtFeatureTest ft(factory, names); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + FtTestApp::FT_SETUP(ft, FtUtil::toQuery(query), index, 1); + RankResult actual; + EXPECT_TRUE(ft.executeOnly(actual, useStaleMatchData ? 2 : 1)); + return actual.getScore(names[select]); + } +}; + +double prox(uint32_t dist) { + return (dist > 8) ? 0 : (1.0 - (((dist-1)/8.0) * ((dist-1)/8.0))); +} + +double comb(std::initializer_list<double> values) { + double sum = 0.0; + for (double value: values) { + sum += value; + } + return (sum/values.size()); +} + +double mix(double proximity, double order, double query, double field) { + return (0.35 * proximity) + (0.15 * order) + (0.30 * query) + (0.20 * field); +} + +TEST_F("require that blueprint can be created from factory", BlueprintFactoryFixture) { + Blueprint::SP bp = f.factory.createBlueprint("textSimilarity"); + EXPECT_TRUE(bp.get() != 0); + EXPECT_TRUE(dynamic_cast<TextSimilarityBlueprint*>(bp.get()) != 0); +} + +TEST_FFF("require that appropriate features are dumped", TextSimilarityBlueprint, IndexFixture, FeatureDumpFixture) { + f1.visitDumpFeatures(f2.indexEnv, f3); + EXPECT_EQUAL(f3.expect.size(), f3.dumped); +} + +TEST_FF("require that setup can be done on single value index field", TextSimilarityBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(foo)", f1.getBaseName().c_str())); + EXPECT_TRUE(((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "foo"))); +} + +TEST_FF("require that setup can not be done on weighted set index field", TextSimilarityBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(bar)", f1.getBaseName().c_str())); + EXPECT_TRUE(!((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "bar"))); +} + +TEST_FF("require that setup can not be done on single value attribute field", TextSimilarityBlueprint, IndexFixture) { + DummyDependencyHandler deps(f1); + f1.setName(vespalib::make_string("%s(baz)", f1.getBaseName().c_str())); + EXPECT_TRUE(!((Blueprint&)f1).setup(f2.indexEnv, std::vector<vespalib::string>(1, "baz"))); +} + +TEST_F("require that no match gives zero outputs", RankFixture) { + EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), SCORE)); + EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), PROXIMITY)); + EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), ORDER)); + EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), QUERY)); + EXPECT_EQUAL(0.0, f1.get_feature("x", indexFoo().element("y"), FIELD)); +} + +TEST_F("require that minal perfect match gives max outputs", RankFixture) { + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), SCORE)); + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), PROXIMITY)); + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), ORDER)); + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), QUERY)); + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x"), FIELD)); +} + +TEST_F("require that larger perfect match gives max outputs", RankFixture) { + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), SCORE)); + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), PROXIMITY)); + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), ORDER)); + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), QUERY)); + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e f g", indexFoo().element("a b c d e f g"), FIELD)); +} + +TEST_F("require that extra query terms reduces order but not proximity", RankFixture) { + EXPECT_EQUAL(1.0, f1.get_feature("x y", indexFoo().element("x"), PROXIMITY)); + EXPECT_EQUAL(1.0, f1.get_feature("x y y", indexFoo().element("x"), PROXIMITY)); + EXPECT_EQUAL(1.0, f1.get_feature("x y y y", indexFoo().element("x"), PROXIMITY)); + + EXPECT_EQUAL(0.0, f1.get_feature("x y", indexFoo().element("x"), ORDER)); + EXPECT_EQUAL(0.0, f1.get_feature("x y y", indexFoo().element("x"), ORDER)); + EXPECT_EQUAL(0.0, f1.get_feature("x y y y", indexFoo().element("x"), ORDER)); +} + +TEST_F("require that extra field terms reduces proximity but not order", RankFixture) { + EXPECT_EQUAL(prox(2), f1.get_feature("x", indexFoo().element("x y"), PROXIMITY)); + EXPECT_EQUAL(prox(3), f1.get_feature("x", indexFoo().element("x y y"), PROXIMITY)); + EXPECT_EQUAL(prox(4), f1.get_feature("x", indexFoo().element("x y y y"), PROXIMITY)); + + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x y"), ORDER)); + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x y y"), ORDER)); + EXPECT_EQUAL(1.0, f1.get_feature("x", indexFoo().element("x y y y"), ORDER)); +} + +TEST_F("require that proximity acts as expected", RankFixture) { + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(2), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("a x b c d e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(3), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("a x x b c d e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(4), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("a x x x b c d e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(2), prox(2), prox(2), prox(2)}), f1.get_feature("a b c d e", indexFoo().element("a x b x c x d x e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(2), prox(2), prox(1), prox(3)}), f1.get_feature("a b c d e", indexFoo().element("a x b x c d x x e"), PROXIMITY)); +} + +TEST_F("require that field order does not affect proximity score", RankFixture) { + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e", indexFoo().element("d c a b e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(2), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("d x c a b e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(3), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("d x x c a b e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(4), prox(1), prox(1), prox(1)}), f1.get_feature("a b c d e", indexFoo().element("d x x x c a b e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(2), prox(2), prox(2), prox(2)}), f1.get_feature("a b c d e", indexFoo().element("d x c x a x b x e"), PROXIMITY)); + EXPECT_EQUAL(comb({prox(2), prox(2), prox(1), prox(3)}), f1.get_feature("a b c d e", indexFoo().element("d x c x a b x x e"), PROXIMITY)); +} + +TEST_F("require that order score acts as expected", RankFixture) { + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), ORDER)); + EXPECT_EQUAL(comb({1.0, 1.0, 1.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("a b c e d"), ORDER)); + EXPECT_EQUAL(comb({0.0, 1.0, 1.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("b a c e d"), ORDER)); + EXPECT_EQUAL(comb({0.0, 1.0, 0.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("b a e d c"), ORDER)); + EXPECT_EQUAL(comb({0.0, 0.0, 0.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("e d c b a"), ORDER)); +} + +TEST_F("require that proximity does not affect order score", RankFixture) { + EXPECT_EQUAL(1.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), ORDER)); + EXPECT_EQUAL(comb({1.0, 1.0, 1.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("a x b x c x e x d"), ORDER)); + EXPECT_EQUAL(comb({0.0, 1.0, 1.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("b x a x c x e x d"), ORDER)); + EXPECT_EQUAL(comb({0.0, 1.0, 0.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("b x a x e x d x c"), ORDER)); + EXPECT_EQUAL(comb({0.0, 0.0, 0.0, 0.0}), f1.get_feature("a b c d e", indexFoo().element("e x d x c x b x a"), ORDER)); +} + +TEST_F("require that query coverage acts as expected", RankFixture) { + EXPECT_EQUAL(5.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), QUERY)); + EXPECT_EQUAL(4.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b c d"), QUERY)); + EXPECT_EQUAL(3.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b c"), QUERY)); + EXPECT_EQUAL(2.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b"), QUERY)); + EXPECT_EQUAL(4.0/7.0, f1.get_feature("a!200 b!200 c d e", indexFoo().element("a b"), QUERY)); + EXPECT_EQUAL(2.0/7.0, f1.get_feature("a b c!500", indexFoo().element("a b"), QUERY)); + EXPECT_EQUAL(5.0/7.0, f1.get_feature("a b c!500", indexFoo().element("c"), QUERY)); +} + +TEST_F("require that field coverage acts as expected", RankFixture) { + EXPECT_EQUAL(5.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b c d e"), FIELD)); + EXPECT_EQUAL(4.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a x c d e"), FIELD)); + EXPECT_EQUAL(3.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a b x x e"), FIELD)); + EXPECT_EQUAL(2.0/5.0, f1.get_feature("a b c d e", indexFoo().element("x x x d e"), FIELD)); +} + +TEST_F("require that first unique match is used per query term", RankFixture) { + EXPECT_EQUAL(prox(3), f1.get_feature("a b", indexFoo().element("a a a b"), PROXIMITY)); + EXPECT_EQUAL(1.0, f1.get_feature("a b", indexFoo().element("a a a b"), ORDER)); + EXPECT_EQUAL(1.0, f1.get_feature("a b", indexFoo().element("a a a b"), QUERY)); + EXPECT_EQUAL(2.0/4.0, f1.get_feature("a b", indexFoo().element("a a a b"), FIELD)); + + EXPECT_EQUAL(comb({prox(1), prox(2)}), f1.get_feature("a b a", indexFoo().element("a a a b"), PROXIMITY)); + EXPECT_EQUAL(0.5, f1.get_feature("a b a", indexFoo().element("a a a b"), ORDER)); + EXPECT_EQUAL(1.0, f1.get_feature("a b a", indexFoo().element("a a a b"), QUERY)); + EXPECT_EQUAL(3.0/4.0, f1.get_feature("a b a", indexFoo().element("a a a b"), FIELD)); +} + +TEST_F("require that overall score combines individual signals appropriately", RankFixture) { + EXPECT_EQUAL(comb({prox(1), prox(3), prox(2)}), f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), PROXIMITY)); + EXPECT_EQUAL(comb({1.0, 0.0, 1.0}), f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), ORDER)); + EXPECT_EQUAL(4.0/5.0, f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), QUERY)); + EXPECT_EQUAL(4.0/7.0, f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), FIELD)); + EXPECT_EQUAL(mix(comb({prox(1), prox(3), prox(2)}), comb({1.0, 0.0, 1.0}), 4.0/5.0, 4.0/7.0), + f1.get_feature("a b c d e", indexFoo().element("a c x x b x d"), SCORE)); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/features/util/.gitignore b/searchlib/src/tests/features/util/.gitignore new file mode 100644 index 00000000000..14e50fdaf47 --- /dev/null +++ b/searchlib/src/tests/features/util/.gitignore @@ -0,0 +1 @@ +searchlib_util_test_app diff --git a/searchlib/src/tests/features/util/CMakeLists.txt b/searchlib/src/tests/features/util/CMakeLists.txt new file mode 100644 index 00000000000..95a0bf3b45d --- /dev/null +++ b/searchlib/src/tests/features/util/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_util_test_app + SOURCES + util_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_util_test_app COMMAND searchlib_util_test_app) diff --git a/searchlib/src/tests/features/util/FILES b/searchlib/src/tests/features/util/FILES new file mode 100644 index 00000000000..f0bd0a06305 --- /dev/null +++ b/searchlib/src/tests/features/util/FILES @@ -0,0 +1 @@ +util_test.cpp diff --git a/searchlib/src/tests/features/util/util_test.cpp b/searchlib/src/tests/features/util/util_test.cpp new file mode 100644 index 00000000000..d2f97631d0f --- /dev/null +++ b/searchlib/src/tests/features/util/util_test.cpp @@ -0,0 +1,40 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchlib/features/utils.h> +#include <vespa/searchlib/fef/test/indexenvironment.h> +#include <vespa/searchlib/fef/test/queryenvironment.h> + +using namespace search; +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; + +SimpleTermData make_term(uint32_t uid) { + SimpleTermData term; + term.setUniqueId(uid); + return term; +} + +struct TermLabelFixture { + IndexEnvironment indexEnv; + QueryEnvironment queryEnv; + TermLabelFixture() : indexEnv(), queryEnv(&indexEnv) { + queryEnv.getTerms().push_back(make_term(5)); + queryEnv.getTerms().push_back(make_term(0)); + queryEnv.getTerms().push_back(make_term(10)); + queryEnv.getProperties().add("vespa.label.foo.id", "5"); + queryEnv.getProperties().add("vespa.label.bar.id", "0"); // undefined uid + queryEnv.getProperties().add("vespa.label.baz.id", "10"); + queryEnv.getProperties().add("vespa.label.fox.id", "7"); // non-existing + } +}; + +TEST_F("require that label can be mapped to term", TermLabelFixture) { + EXPECT_EQUAL((ITermData*)&f1.queryEnv.getTerms()[0], util::getTermByLabel(f1.queryEnv, "foo")); + EXPECT_EQUAL((ITermData*)0, util::getTermByLabel(f1.queryEnv, "bar")); + EXPECT_EQUAL((ITermData*)&f1.queryEnv.getTerms()[2], util::getTermByLabel(f1.queryEnv, "baz")); + EXPECT_EQUAL((ITermData*)0, util::getTermByLabel(f1.queryEnv, "fox")); + EXPECT_EQUAL((ITermData*)0, util::getTermByLabel(f1.queryEnv, "unknown")); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/fef/.gitignore b/searchlib/src/tests/fef/.gitignore new file mode 100644 index 00000000000..ff604ccaf00 --- /dev/null +++ b/searchlib/src/tests/fef/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +fef_test +searchlib_fef_test_app diff --git a/searchlib/src/tests/fef/CMakeLists.txt b/searchlib/src/tests/fef/CMakeLists.txt new file mode 100644 index 00000000000..a239ba972c3 --- /dev/null +++ b/searchlib/src/tests/fef/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_fef_test_app + SOURCES + fef_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_fef_test_app COMMAND searchlib_fef_test_app) diff --git a/searchlib/src/tests/fef/DESC b/searchlib/src/tests/fef/DESC new file mode 100644 index 00000000000..431ee7a1a1f --- /dev/null +++ b/searchlib/src/tests/fef/DESC @@ -0,0 +1 @@ +fef test. Take a look at fef.cpp for details. diff --git a/searchlib/src/tests/fef/FILES b/searchlib/src/tests/fef/FILES new file mode 100644 index 00000000000..7e6752e501e --- /dev/null +++ b/searchlib/src/tests/fef/FILES @@ -0,0 +1 @@ +fef.cpp diff --git a/searchlib/src/tests/fef/attributecontent/.gitignore b/searchlib/src/tests/fef/attributecontent/.gitignore new file mode 100644 index 00000000000..dd57ee57362 --- /dev/null +++ b/searchlib/src/tests/fef/attributecontent/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +attributecontent_test +searchlib_attributecontent_test_app diff --git a/searchlib/src/tests/fef/attributecontent/CMakeLists.txt b/searchlib/src/tests/fef/attributecontent/CMakeLists.txt new file mode 100644 index 00000000000..84cdb3d4fce --- /dev/null +++ b/searchlib/src/tests/fef/attributecontent/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_attributecontent_test_app + SOURCES + attributecontent_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_attributecontent_test_app COMMAND searchlib_attributecontent_test_app) diff --git a/searchlib/src/tests/fef/attributecontent/DESC b/searchlib/src/tests/fef/attributecontent/DESC new file mode 100644 index 00000000000..fa1c457c573 --- /dev/null +++ b/searchlib/src/tests/fef/attributecontent/DESC @@ -0,0 +1 @@ +attributecontent test. Take a look at attributecontent.cpp for details. diff --git a/searchlib/src/tests/fef/attributecontent/FILES b/searchlib/src/tests/fef/attributecontent/FILES new file mode 100644 index 00000000000..4325e907b45 --- /dev/null +++ b/searchlib/src/tests/fef/attributecontent/FILES @@ -0,0 +1 @@ +attributecontent.cpp diff --git a/searchlib/src/tests/fef/attributecontent/attributecontent_test.cpp b/searchlib/src/tests/fef/attributecontent/attributecontent_test.cpp new file mode 100644 index 00000000000..66430994016 --- /dev/null +++ b/searchlib/src/tests/fef/attributecontent/attributecontent_test.cpp @@ -0,0 +1,106 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("attributecontent_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchcommon/attribute/attributecontent.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/integerbase.h> + +#include <vespa/searchlib/attribute/attributevector.hpp> + +using namespace search::attribute; + +namespace search { +namespace fef { + +class Test : public vespalib::TestApp { +private: + void testWriteAndRead(); + void testFill(); + +public: + int Main(); +}; + +void +Test::testWriteAndRead() +{ + typedef search::attribute::AttributeContent<uint32_t> UintContent; + UintContent buf; + EXPECT_EQUAL(buf.capacity(), 16u); + EXPECT_EQUAL(buf.size(), 0u); + + uint32_t i; + uint32_t * data; + const uint32_t * itr; + for (i = 0, data = buf.data(); i < 16; ++i, ++data) { + *data = i; + } + buf.setSize(16); + EXPECT_EQUAL(buf.size(), 16u); + for (i = 0, itr = buf.begin(); itr != buf.end(); ++i, ++itr) { + EXPECT_EQUAL(*itr, i); + EXPECT_EQUAL(buf[i], i); + } + EXPECT_EQUAL(i, 16u); + + buf.allocate(10); + EXPECT_EQUAL(buf.capacity(), 16u); + EXPECT_EQUAL(buf.size(), 16u); + buf.allocate(32); + EXPECT_EQUAL(buf.capacity(), 32u); + EXPECT_EQUAL(buf.size(), 0u); + + for (i = 0, data = buf.data(); i < 32; ++i, ++data) { + *data = i; + } + buf.setSize(32); + EXPECT_EQUAL(buf.size(), 32u); + for (i = 0, itr = buf.begin(); itr != buf.end(); ++i, ++itr) { + EXPECT_EQUAL(*itr, i); + EXPECT_EQUAL(buf[i], i); + } + EXPECT_EQUAL(i, 32u); +} + +void +Test::testFill() +{ + Config cfg(BasicType::INT32, CollectionType::ARRAY); + AttributeVector::SP av = AttributeFactory::createAttribute("aint32", cfg); + av->addDocs(2); + IntegerAttribute * ia = static_cast<IntegerAttribute *>(av.get()); + ia->append(0, 10, 0); + ia->append(1, 20, 0); + ia->append(1, 30, 0); + av->commit(); + const IAttributeVector & iav = *av.get(); + IntegerContent buf; + buf.fill(iav, 0); + EXPECT_EQUAL(1u, buf.size()); + EXPECT_EQUAL(10, buf[0]); + buf.fill(iav, 1); + EXPECT_EQUAL(2u, buf.size()); + EXPECT_EQUAL(20, buf[0]); + EXPECT_EQUAL(30, buf[1]); + buf.fill(iav, 0); + EXPECT_EQUAL(1u, buf.size()); + EXPECT_EQUAL(10, buf[0]); +} + +int +Test::Main() +{ + TEST_INIT("attributecontent_test"); + + testWriteAndRead(); + testFill(); + + TEST_DONE(); +} + +} // namespace fef +} // namespace search + +TEST_APPHOOK(search::fef::Test); diff --git a/searchlib/src/tests/fef/featurenamebuilder/.gitignore b/searchlib/src/tests/fef/featurenamebuilder/.gitignore new file mode 100644 index 00000000000..781f49956a9 --- /dev/null +++ b/searchlib/src/tests/fef/featurenamebuilder/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +featurenamebuilder_test +searchlib_featurenamebuilder_test_app diff --git a/searchlib/src/tests/fef/featurenamebuilder/CMakeLists.txt b/searchlib/src/tests/fef/featurenamebuilder/CMakeLists.txt new file mode 100644 index 00000000000..167642c1337 --- /dev/null +++ b/searchlib/src/tests/fef/featurenamebuilder/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_featurenamebuilder_test_app + SOURCES + featurenamebuilder_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_featurenamebuilder_test_app COMMAND searchlib_featurenamebuilder_test_app) diff --git a/searchlib/src/tests/fef/featurenamebuilder/DESC b/searchlib/src/tests/fef/featurenamebuilder/DESC new file mode 100644 index 00000000000..38abf1af794 --- /dev/null +++ b/searchlib/src/tests/fef/featurenamebuilder/DESC @@ -0,0 +1 @@ +featurenamebuilder test. Take a look at featurenamebuilder.cpp for details. diff --git a/searchlib/src/tests/fef/featurenamebuilder/FILES b/searchlib/src/tests/fef/featurenamebuilder/FILES new file mode 100644 index 00000000000..71df1d1033f --- /dev/null +++ b/searchlib/src/tests/fef/featurenamebuilder/FILES @@ -0,0 +1 @@ +featurenamebuilder.cpp diff --git a/searchlib/src/tests/fef/featurenamebuilder/featurenamebuilder_test.cpp b/searchlib/src/tests/fef/featurenamebuilder/featurenamebuilder_test.cpp new file mode 100644 index 00000000000..0e574c776b5 --- /dev/null +++ b/searchlib/src/tests/fef/featurenamebuilder/featurenamebuilder_test.cpp @@ -0,0 +1,78 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("featurenamebuilder_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/fef/featurenamebuilder.h> + +using namespace search::fef; + +typedef FeatureNameBuilder B; + +TEST_SETUP(Test); + +int +Test::Main() +{ + TEST_INIT("featurenamebuilder_test"); + + // normal cases + EXPECT_EQUAL(B().baseName("foo").buildName(), "foo"); + EXPECT_EQUAL(B().baseName("foo").output("out").buildName(), "foo.out"); + EXPECT_EQUAL(B().baseName("foo").parameter("a").parameter("b").buildName(), "foo(a,b)"); + EXPECT_EQUAL(B().baseName("foo").parameter("a").parameter("b").output("out").buildName(), "foo(a,b).out"); + + // empty base = empty name + EXPECT_EQUAL(B().baseName("").buildName(), ""); + EXPECT_EQUAL(B().baseName("").output("out").buildName(), ""); + EXPECT_EQUAL(B().baseName("").parameter("a").parameter("b").buildName(), ""); + EXPECT_EQUAL(B().baseName("").parameter("a").parameter("b").output("out").buildName(), ""); + + // quoting + EXPECT_EQUAL(B().baseName("foo").parameter("a,b").output("out").buildName(), "foo(\"a,b\").out"); + EXPECT_EQUAL(B().baseName("foo").parameter("a\\").output("out").buildName(), "foo(\"a\\\\\").out"); + EXPECT_EQUAL(B().baseName("foo").parameter("a)").output("out").buildName(), "foo(\"a)\").out"); + EXPECT_EQUAL(B().baseName("foo").parameter(" ").output("out").buildName(), "foo(\" \").out"); + EXPECT_EQUAL(B().baseName("foo").parameter("\"").output("out").buildName(), "foo(\"\\\"\").out"); + EXPECT_EQUAL(B().baseName("foo").parameter("\\\t\n\r\f\x15").output("out").buildName(), "foo(\"\\\\\\t\\n\\r\\f\\x15\").out"); + EXPECT_EQUAL(B().baseName("foo").parameter("\\\t\n\r\f\x20").output("out").buildName(), "foo(\"\\\\\\t\\n\\r\\f \").out"); + + // empty parameters + EXPECT_EQUAL(B().baseName("foo").parameter("").output("out").buildName(), "foo().out"); + EXPECT_EQUAL(B().baseName("foo").parameter("").parameter("").output("out").buildName(), "foo(,).out"); + EXPECT_EQUAL(B().baseName("foo").parameter("").parameter("").parameter("").output("out").buildName(), "foo(,,).out"); + EXPECT_EQUAL(B().baseName("foo").parameter("").parameter("x").parameter("").output("out").buildName(), "foo(,x,).out"); + + // test change components + EXPECT_EQUAL(B().baseName("foo").parameter("a").parameter("b").output("out").buildName(), "foo(a,b).out"); + EXPECT_EQUAL(B().baseName("foo").parameter("a").parameter("b").output("out").baseName("bar").buildName(), "bar(a,b).out"); + EXPECT_EQUAL(B().baseName("foo").parameter("a").parameter("b").output("out").clearParameters().buildName(), "foo.out"); + EXPECT_EQUAL(B().baseName("foo").parameter("a").parameter("b").output("out").clearParameters().parameter("x").buildName(), "foo(x).out"); + EXPECT_EQUAL(B().baseName("foo").parameter("a").parameter("b").output("out").output("").buildName(), "foo(a,b)"); + EXPECT_EQUAL(B().baseName("foo").parameter("a").parameter("b").output("out").output("len").buildName(), "foo(a,b).len"); + + // test exact quote vs non-quote + EXPECT_EQUAL(B().baseName("foo").parameter("a").buildName(), "foo(a)"); + EXPECT_EQUAL(B().baseName("foo").parameter(" a").buildName(), "foo(\" a\")"); + EXPECT_EQUAL(B().baseName("foo").parameter("a.out").buildName(), "foo(a.out)"); + EXPECT_EQUAL(B().baseName("foo").parameter(" a.out").buildName(), "foo(\" a.out\")"); + EXPECT_EQUAL(B().baseName("foo").parameter("bar(a,b)").buildName(), "foo(bar(a,b))"); + EXPECT_EQUAL(B().baseName("foo").parameter("bar(a, b)").buildName(), "foo(\"bar(a, b)\")"); + EXPECT_EQUAL(B().baseName("foo").parameter("bar(a,b).out").buildName(), "foo(bar(a,b).out)"); + EXPECT_EQUAL(B().baseName("foo").parameter("bar(a, b).out").buildName(), "foo(\"bar(a, b).out\")"); + + // test non-exact quote vs non-quote + EXPECT_EQUAL(B().baseName("foo").parameter(" \t\n\r\f", false).buildName(), "foo()"); + EXPECT_EQUAL(B().baseName("foo").parameter(" \t\n\r\fbar ", false).buildName(), "foo(bar)"); + EXPECT_EQUAL(B().baseName("foo").parameter(" bar ", false).buildName(), "foo(bar)"); + EXPECT_EQUAL(B().baseName("foo").parameter(" a b ", false).buildName(), "foo(\" a b \")"); + EXPECT_EQUAL(B().baseName("foo").parameter("a%", false).buildName(), "foo(\"a%\")"); + EXPECT_EQUAL(B().baseName("foo").parameter("foo\"\\", false).buildName(), "foo(\"foo\\\"\\\\\")"); + EXPECT_EQUAL(B().baseName("foo").parameter(" a . out ", false).buildName(), "foo(a.out)"); + EXPECT_EQUAL(B().baseName("foo").parameter(" bar ( a , b ) ", false).buildName(), "foo(bar(a,b))"); + EXPECT_EQUAL(B().baseName("foo").parameter(" bar ( a , b ) . out ", false).buildName(), "foo(bar(a,b).out)"); + EXPECT_EQUAL(B().baseName("foo").parameter(" bar ( a , b ) . out.2 ", false).buildName(), "foo(bar(a,b).out.2)"); + EXPECT_EQUAL(B().baseName("foo").parameter(" bar ( a , b ) . out . 2 ", false).buildName(), "foo(\" bar ( a , b ) . out . 2 \")"); + + TEST_DONE(); +} diff --git a/searchlib/src/tests/fef/featurenameparser/.gitignore b/searchlib/src/tests/fef/featurenameparser/.gitignore new file mode 100644 index 00000000000..f16080e9791 --- /dev/null +++ b/searchlib/src/tests/fef/featurenameparser/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +featurenameparser_test +searchlib_featurenameparser_test_app diff --git a/searchlib/src/tests/fef/featurenameparser/CMakeLists.txt b/searchlib/src/tests/fef/featurenameparser/CMakeLists.txt new file mode 100644 index 00000000000..e313ee24deb --- /dev/null +++ b/searchlib/src/tests/fef/featurenameparser/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_featurenameparser_test_app + SOURCES + featurenameparser_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_featurenameparser_test_app COMMAND searchlib_featurenameparser_test_app) diff --git a/searchlib/src/tests/fef/featurenameparser/DESC b/searchlib/src/tests/fef/featurenameparser/DESC new file mode 100644 index 00000000000..4c3da4e47a2 --- /dev/null +++ b/searchlib/src/tests/fef/featurenameparser/DESC @@ -0,0 +1 @@ +featurenameparser test. Take a look at featurenameparser.cpp for details. diff --git a/searchlib/src/tests/fef/featurenameparser/FILES b/searchlib/src/tests/fef/featurenameparser/FILES new file mode 100644 index 00000000000..4567d5b7ccc --- /dev/null +++ b/searchlib/src/tests/fef/featurenameparser/FILES @@ -0,0 +1 @@ +featurenameparser.cpp diff --git a/searchlib/src/tests/fef/featurenameparser/featurenameparser_test.cpp b/searchlib/src/tests/fef/featurenameparser/featurenameparser_test.cpp new file mode 100644 index 00000000000..2824f5ef8fc --- /dev/null +++ b/searchlib/src/tests/fef/featurenameparser/featurenameparser_test.cpp @@ -0,0 +1,151 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("featurenameparser_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/fef/featurenameparser.h> +#include <vector> +#include <string> + +using namespace search::fef; + +struct ParamList { + std::vector<vespalib::string> list; + ParamList() : list() {} + ParamList(const std::vector<vespalib::string> &l) : list(l) {} + ParamList &add(const vespalib::string &str) { + list.push_back(str); + return *this; + } + bool operator==(const ParamList &rhs) const { + return rhs.list == list; + } +}; + +std::ostream &operator<<(std::ostream &os, const ParamList &pl) { + os << std::endl; + for (uint32_t i = 0; i < pl.list.size(); ++i) { + os << " " << pl.list[i] << std::endl; + } + return os; +} + +class Test : public vespalib::TestApp +{ +public: + bool testParse(const vespalib::string &input, bool valid, + const vespalib::string &base, ParamList pl, + const vespalib::string &output); + void testFile(const vespalib::string &name); + int Main(); +}; + +bool +Test::testParse(const vespalib::string &input, bool valid, + const vespalib::string &base, ParamList pl, + const vespalib::string &output) +{ + bool ok = true; + FeatureNameParser parser(input); + if (!parser.valid()) { + LOG(warning, "parse error: input:'%s', rest:'%s'", + input.c_str(), input.substr(parser.parsedBytes()).c_str()); + } + ok &= EXPECT_EQUAL(parser.valid(), valid); + ok &= EXPECT_EQUAL(parser.baseName(), base); + ok &= EXPECT_EQUAL(ParamList(parser.parameters()), pl); + ok &= EXPECT_EQUAL(parser.output(), output); + return ok; +} + +void +Test::testFile(const vespalib::string &name) +{ + char buf[4096]; + uint32_t lineN = 0; + FILE *f = fopen(name.c_str(), "r"); + ASSERT_TRUE(f != 0); + while (fgets(buf, sizeof(buf), f) != NULL) { + ++lineN; + vespalib::string line(buf); + if (*line.rbegin() == '\n') { + line.resize(line.size() - 1); + } + if (line.empty() || line[0] == '#') { + continue; + } + uint32_t idx = line.find("<=>"); + if (!EXPECT_TRUE(idx < line.size())) { + LOG(error, "(%s:%u): malformed line: '%s'", + name.c_str(), lineN, line.c_str()); + } else { + vespalib::string input = line.substr(0, idx); + vespalib::string expect = line.substr(idx + strlen("<=>")); + if (!EXPECT_EQUAL(FeatureNameParser(input).featureName(), expect)) { + LOG(error, "(%s:%u): test failed: '%s'", + name.c_str(), lineN, line.c_str()); + } + } + } + ASSERT_TRUE(!ferror(f)); + fclose(f); +} + +int +Test::Main() +{ + TEST_INIT("featurenameparser_test"); + + // normal cases + EXPECT_TRUE(testParse("foo", true, "foo", ParamList(), "")); + EXPECT_TRUE(testParse("foo.out", true, "foo", ParamList(), "out")); + EXPECT_TRUE(testParse("foo(a)", true, "foo", ParamList().add("a"), "")); + EXPECT_TRUE(testParse("foo(a,b)", true, "foo", ParamList().add("a").add("b"), "")); + EXPECT_TRUE(testParse("foo(a,b).out", true, "foo", ParamList().add("a").add("b"), "out")); + + // @ in feature name (for macros) + EXPECT_TRUE(testParse("foo@", true, "foo@", ParamList(), "")); + EXPECT_TRUE(testParse("foo@.out", true, "foo@", ParamList(), "out")); + EXPECT_TRUE(testParse("foo@(a)", true, "foo@", ParamList().add("a"), "")); + EXPECT_TRUE(testParse("foo@(a,b)", true, "foo@", ParamList().add("a").add("b"), "")); + EXPECT_TRUE(testParse("foo@(a,b).out", true, "foo@", ParamList().add("a").add("b"), "out")); + + // $ in feature name (for macros) + EXPECT_TRUE(testParse("foo$", true, "foo$", ParamList(), "")); + EXPECT_TRUE(testParse("foo$.out", true, "foo$", ParamList(), "out")); + EXPECT_TRUE(testParse("foo$(a)", true, "foo$", ParamList().add("a"), "")); + EXPECT_TRUE(testParse("foo$(a,b)", true, "foo$", ParamList().add("a").add("b"), "")); + EXPECT_TRUE(testParse("foo$(a,b).out", true, "foo$", ParamList().add("a").add("b"), "out")); + + // de-quoting of parameters + EXPECT_TRUE(testParse("foo(a,\"b\")", true, "foo", ParamList().add("a").add("b"), "")); + EXPECT_TRUE(testParse("foo(a,\" b \")", true, "foo", ParamList().add("a").add(" b "), "")); + EXPECT_TRUE(testParse("foo( \"a\" , \" b \" )", true, "foo", ParamList().add("a").add(" b "), "")); + EXPECT_TRUE(testParse("foo(\"\\\"\\\\\\t\\n\\r\\f\\x20\")", true, "foo", ParamList().add("\"\\\t\n\r\f "), "")); + + // only default output if '.' not specified + EXPECT_TRUE(testParse("foo.", false, "", ParamList(), "")); + EXPECT_TRUE(testParse("foo(a,b).", false, "", ParamList(), "")); + + // string cannot end in parameter list + EXPECT_TRUE(testParse("foo(", false, "", ParamList(), "")); + EXPECT_TRUE(testParse("foo(a", false, "", ParamList(), "")); + EXPECT_TRUE(testParse("foo(a\\", false, "", ParamList(), "")); + EXPECT_TRUE(testParse("foo(a\\)", false, "", ParamList(), "")); + EXPECT_TRUE(testParse("foo(a,", false, "", ParamList(), "")); + EXPECT_TRUE(testParse("foo(a,b", false, "", ParamList(), "")); + + // empty parameters + EXPECT_TRUE(testParse("foo()", true, "foo", ParamList().add(""), "")); + EXPECT_TRUE(testParse("foo(,)", true, "foo", ParamList().add("").add(""), "")); + EXPECT_TRUE(testParse("foo(,,)", true, "foo", ParamList().add("").add("").add(""), "")); + EXPECT_TRUE(testParse("foo(,x,)", true, "foo", ParamList().add("").add("x").add(""), "")); + EXPECT_TRUE(testParse("foo( )", true, "foo", ParamList().add(""), "")); + EXPECT_TRUE(testParse("foo( , , )", true, "foo", ParamList().add("").add("").add(""), "")); + EXPECT_TRUE(testParse("foo( \t , \n , \r , \f )", true, "foo", ParamList().add("").add("").add("").add(""), "")); + + testFile("parsetest.txt"); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/fef/featurenameparser/parsetest.txt b/searchlib/src/tests/fef/featurenameparser/parsetest.txt new file mode 100644 index 00000000000..ce9db595eca --- /dev/null +++ b/searchlib/src/tests/fef/featurenameparser/parsetest.txt @@ -0,0 +1,55 @@ +# This file is used to test feature name parsing. The file format is +# as follows: Empty lines and lines starting with '#' will be +# ignored. Other lines must be on the form +# "<input>'<=>'<expected_output>". The parser will be run on the +# input, and the normalized feature name will be compared to the +# expected output. If they match the test passes, if they don't match +# the test fails. The normalized feature name in the case of a parse +# error is the empty string. When parsing this file, no whitespace +# skipping is allowed inside the input or the expected output. To +# simplify things, the byte sequence '<=>' may not be used anywhere +# else than as a separator between the input and the expected +# output. Malformed lines will result in a failed test. + +# basic normalization + foo . out <=>foo.out + foo ( a , b ) . out <=>foo(a,b).out + foo ( a , b , "") . out <=>foo(a,b,).out + foo ( bar ( a ) , b , "") . out <=>foo(bar(a),b,).out + +# basic parse errors +<=> + <=> +foo(<=> +foo(,<=> +foo().<=> +foo(a b)<=> +foo(bar(a b))<=> +foo . a . b<=> + +#quoting +foo("a b")<=>foo("a b") +foo(bar("a b"))<=>foo(bar("a b")) +foo("\"bar\"")<=>foo("\"bar\"") +foo( "bar(x)" )<=>foo(bar(x)) +foo( "bar( x )" )<=>foo("bar( x )") +foo("xyz")<=>foo(xyz) +foo("\\\t\n\r\f\x10")<=>foo("\\\t\n\r\f\x10") +foo("\y")<=> +foo("\x05")<=>foo("\x05") +foo("\x00")<=> +foo("\")<=> +foo("abc<=> +foo("\x5")<=> +foo("\x31\x32\x33")<=>foo(123) + +# my current favorite pair :) +foo("bar(\"x\")")<=>foo("bar(\"x\")") +foo("bar(\"x \")")<=>foo(bar("x ")) + +# might want to disallow non-printables inside quotes... +foo(" ")<=>foo("\t") + +#some more fancy normalization tests + foo ( a , b ) . out <=>foo(a,b).out + foo ( "", bar ( baz ( a, "" ) , "" ) , b , " ") . out <=>foo(,bar(baz(a,),),b," ").out diff --git a/searchlib/src/tests/fef/featureoverride/.gitignore b/searchlib/src/tests/fef/featureoverride/.gitignore new file mode 100644 index 00000000000..35285582ceb --- /dev/null +++ b/searchlib/src/tests/fef/featureoverride/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +featureoverride_test +searchlib_featureoverride_test_app diff --git a/searchlib/src/tests/fef/featureoverride/CMakeLists.txt b/searchlib/src/tests/fef/featureoverride/CMakeLists.txt new file mode 100644 index 00000000000..23370d51d22 --- /dev/null +++ b/searchlib/src/tests/fef/featureoverride/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_featureoverride_test_app + SOURCES + featureoverride.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_featureoverride_test_app COMMAND searchlib_featureoverride_test_app) diff --git a/searchlib/src/tests/fef/featureoverride/DESC b/searchlib/src/tests/fef/featureoverride/DESC new file mode 100644 index 00000000000..1605959dae6 --- /dev/null +++ b/searchlib/src/tests/fef/featureoverride/DESC @@ -0,0 +1 @@ +featureoverride test. Take a look at featureoverride.cpp for details. diff --git a/searchlib/src/tests/fef/featureoverride/FILES b/searchlib/src/tests/fef/featureoverride/FILES new file mode 100644 index 00000000000..864ca65657a --- /dev/null +++ b/searchlib/src/tests/fef/featureoverride/FILES @@ -0,0 +1 @@ +featureoverride.cpp diff --git a/searchlib/src/tests/fef/featureoverride/featureoverride.cpp b/searchlib/src/tests/fef/featureoverride/featureoverride.cpp new file mode 100644 index 00000000000..b0929f50fa9 --- /dev/null +++ b/searchlib/src/tests/fef/featureoverride/featureoverride.cpp @@ -0,0 +1,175 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("featureoverride_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/fef/fef.h> + +#include <vespa/searchlib/fef/test/indexenvironment.h> +#include <vespa/searchlib/fef/test/queryenvironment.h> +#include <vespa/searchlib/fef/test/plugin/double.h> +#include <vespa/searchlib/fef/test/plugin/sum.h> +#include <vespa/searchlib/features/valuefeature.h> + +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; +using search::feature_t; + +typedef FeatureExecutor::LP FESP; +typedef Blueprint::SP BPSP; + +struct Fixture +{ + MatchDataLayout mdl; + std::vector<FeatureExecutor::LP> executors; + MatchData::UP md; + Fixture() : mdl(), executors(), md() {} + Fixture &add(FeatureExecutor::LP &executor, size_t outCnt) { + executor->inputs_done(); + for (uint32_t outIdx = 0; outIdx < outCnt; ++outIdx) { + executor->bindOutput(mdl.allocFeature()); + } + executor->outputs_done(); + executors.push_back(executor); + return *this; + } + Fixture &run() { + md = mdl.createMatchData(); + for (const auto &executor : executors) { + executor->execute(*md); + } + return *this; + } + feature_t resolveFeature(FeatureHandle handle) { + return *md->resolveFeature(handle); + } + FESP createValueExecutor() { + std::vector<feature_t> values; + values.push_back(1.0); + values.push_back(2.0); + values.push_back(3.0); + return FESP(new ValueExecutor(values)); + } +}; + +TEST_F("test decorator - single override", Fixture) +{ + FESP fe = f.createValueExecutor(); + fe = FESP(new FeatureOverrider(fe, 1, 50.0)); + f.add(fe, 3).run(); + EXPECT_EQUAL(fe->outputs().size(), 3u); + + EXPECT_EQUAL(f.resolveFeature(fe->outputs()[0]), 1.0); + EXPECT_EQUAL(f.resolveFeature(fe->outputs()[1]), 50.0); + EXPECT_EQUAL(f.resolveFeature(fe->outputs()[2]), 3.0); +} + +TEST_F("test decorator - multiple overrides", Fixture) +{ + FESP fe = f.createValueExecutor(); + fe = FESP(new FeatureOverrider(fe, 0, 50.0)); + fe = FESP(new FeatureOverrider(fe, 2, 100.0)); + f.add(fe, 3).run(); + EXPECT_EQUAL(fe->outputs().size(), 3u); + + EXPECT_EQUAL(f.resolveFeature(fe->outputs()[0]), 50.0); + EXPECT_EQUAL(f.resolveFeature(fe->outputs()[1]), 2.0); + EXPECT_EQUAL(f.resolveFeature(fe->outputs()[2]), 100.0); +} + +TEST_F("test decorator - non-existing override", Fixture) +{ + FESP fe = f.createValueExecutor(); + fe = FESP(new FeatureOverrider(fe, 1000, 50.0)); + f.add(fe, 3).run(); + EXPECT_EQUAL(fe->outputs().size(), 3u); + + EXPECT_EQUAL(f.resolveFeature(fe->outputs()[0]), 1.0); + EXPECT_EQUAL(f.resolveFeature(fe->outputs()[1]), 2.0); + EXPECT_EQUAL(f.resolveFeature(fe->outputs()[2]), 3.0); +} + +TEST_F("test decorator - transitive override", Fixture) +{ + FeatureExecutor::SharedInputs inputs; + FESP fe = f.createValueExecutor(); + fe = FESP(new FeatureOverrider(fe, 1, 50.0)); + f.add(fe, 3); + EXPECT_EQUAL(fe->outputs().size(), 3u); + + FESP fe2 = FESP(new DoubleExecutor(3)); + fe2->bind_shared_inputs(inputs); + fe2->addInput(fe->outputs()[0]); + fe2->addInput(fe->outputs()[1]); + fe2->addInput(fe->outputs()[2]); + fe2 = FESP(new FeatureOverrider(fe2, 2, 10.0)); + f.add(fe2, 3).run(); + EXPECT_EQUAL(fe2->outputs().size(), 3u); + + EXPECT_EQUAL(f.resolveFeature(fe->outputs()[0]), 1.0); + EXPECT_EQUAL(f.resolveFeature(fe->outputs()[1]), 50.0); + EXPECT_EQUAL(f.resolveFeature(fe->outputs()[2]), 3.0); + EXPECT_EQUAL(f.resolveFeature(fe2->outputs()[0]), 2.0); + EXPECT_EQUAL(f.resolveFeature(fe2->outputs()[1]), 100.0); + EXPECT_EQUAL(f.resolveFeature(fe2->outputs()[2]), 10.0); +} + +TEST("test overrides") +{ + BlueprintFactory bf; + bf.addPrototype(BPSP(new ValueBlueprint())); + bf.addPrototype(BPSP(new DoubleBlueprint())); + bf.addPrototype(BPSP(new SumBlueprint())); + + IndexEnvironment idxEnv; + RankSetup rs(bf, idxEnv); + + rs.addDumpFeature("value(1,2,3)"); + rs.addDumpFeature("double(value(1))"); + rs.addDumpFeature("double(value(2))"); + rs.addDumpFeature("double(value(3))"); + rs.addDumpFeature("mysum(value(2),value(2))"); + rs.addDumpFeature("mysum(value(1),value(2),value(3))"); + EXPECT_TRUE(rs.compile()); + + RankProgram::UP rankProgram = rs.create_dump_program(); + + MatchDataLayout mdl; + QueryEnvironment queryEnv; + Properties overrides; + + overrides.add("value(2)", "20.0"); + overrides.add("value(1,2,3).1", "4.0"); + overrides.add("value(1,2,3).2", "6.0"); + overrides.add("bogus(feature)", "10.0"); + + rankProgram->setup(mdl, queryEnv, overrides); + rankProgram->run(2); + + std::map<vespalib::string, feature_t> res = Utils::getAllFeatures(*rankProgram); + + EXPECT_EQUAL(res.size(), 20u); + EXPECT_APPROX(res["value(1)"], 1.0, 1e-6); + EXPECT_APPROX(res["value(1).0"], 1.0, 1e-6); + EXPECT_APPROX(res["value(2)"], 20.0, 1e-6); + EXPECT_APPROX(res["value(2).0"], 20.0, 1e-6); + EXPECT_APPROX(res["value(3)"], 3.0, 1e-6); + EXPECT_APPROX(res["value(3).0"], 3.0, 1e-6); + EXPECT_APPROX(res["value(1,2,3)"], 1.0, 1e-6); + EXPECT_APPROX(res["value(1,2,3).0"], 1.0, 1e-6); + EXPECT_APPROX(res["value(1,2,3).1"], 4.0, 1e-6); + EXPECT_APPROX(res["value(1,2,3).2"], 6.0, 1e-6); + EXPECT_APPROX(res["mysum(value(2),value(2))"], 40.0, 1e-6); + EXPECT_APPROX(res["mysum(value(2),value(2)).out"], 40.0, 1e-6); + EXPECT_APPROX(res["mysum(value(1),value(2),value(3))"], 24.0, 1e-6); + EXPECT_APPROX(res["mysum(value(1),value(2),value(3)).out"], 24.0, 1e-6); + EXPECT_APPROX(res["double(value(1))"], 2.0, 1e-6); + EXPECT_APPROX(res["double(value(1)).0"], 2.0, 1e-6); + EXPECT_APPROX(res["double(value(2))"], 40.0, 1e-6); + EXPECT_APPROX(res["double(value(2)).0"], 40.0, 1e-6); + EXPECT_APPROX(res["double(value(3))"], 6.0, 1e-6); + EXPECT_APPROX(res["double(value(3)).0"], 6.0, 1e-6); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/fef/fef_test.cpp b/searchlib/src/tests/fef/fef_test.cpp new file mode 100644 index 00000000000..b3107e57fae --- /dev/null +++ b/searchlib/src/tests/fef/fef_test.cpp @@ -0,0 +1,116 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("fef_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/fef/fef.h> +#include <vespa/searchlib/fef/objectstore.h> + +using namespace search::fef; +using std::shared_ptr; +using search::feature_t; + +class Test : public vespalib::TestApp +{ +public: + void testLayout(); + void testObjectStore(); + void testTermFieldMatchDataAppend(); + int Main(); +}; + +void +Test::testLayout() +{ + { + TermFieldMatchData tmd; + EXPECT_EQUAL(IllegalFieldId, tmd.getFieldId()); + EXPECT_EQUAL(TermFieldMatchData::invalidId(), tmd.getDocId()); + } + MatchDataLayout mdl; + EXPECT_EQUAL(mdl.allocTermField(0), 0u); + EXPECT_EQUAL(mdl.allocTermField(42), 1u); + EXPECT_EQUAL(mdl.allocTermField(IllegalFieldId), 2u); + EXPECT_EQUAL(mdl.allocFeature(), 0u); + EXPECT_EQUAL(mdl.allocFeature(), 1u); + EXPECT_EQUAL(mdl.allocFeature(), 2u); + + MatchData::UP md = mdl.createMatchData(); + EXPECT_EQUAL(TermFieldMatchData::invalidId(), md->getDocId()); + EXPECT_EQUAL(md->getNumTermFields(), 3u); + EXPECT_EQUAL(md->getNumFeatures(), 3u); + TermFieldMatchData *t0 = md->resolveTermField(0); + TermFieldMatchData *t1 = md->resolveTermField(1); + TermFieldMatchData *t2 = md->resolveTermField(2); + EXPECT_EQUAL(t1, t0 + 1); + EXPECT_EQUAL(t2, t1 + 1); + EXPECT_EQUAL(0u, t0->getFieldId()); + EXPECT_EQUAL(42u, t1->getFieldId()); + EXPECT_EQUAL(IllegalFieldId, t2->getFieldId()); + feature_t *f0 = md->resolveFeature(0); + feature_t *f1 = md->resolveFeature(1); + feature_t *f2 = md->resolveFeature(2); + EXPECT_EQUAL(f1, f0 + 1); + EXPECT_EQUAL(f2, f1 + 1); + EXPECT_TRUE((void*)t2 < (void*)f0 || (void*)f2 < (void*)t0); +} + +void +Test::testObjectStore() +{ + ObjectStore s; + class Object : public Anything { + }; + Anything::UP u1(new Object()); + Anything::UP u11(new Object()); + Anything::UP u2(new Object()); + const Anything * o1(u1.get()); + const Anything * o11(u11.get()); + const Anything * o2(u2.get()); + EXPECT_TRUE(nullptr == s.get("a")); + s.add("a", std::move(u1)); + EXPECT_EQUAL(o1, s.get("a")); + EXPECT_TRUE(nullptr == s.get("b")); + s.add("b", std::move(u2)); + EXPECT_EQUAL(o1, s.get("a")); + EXPECT_EQUAL(o2, s.get("b")); + s.add("a", std::move(u11)); + EXPECT_EQUAL(o11, s.get("a")); +} + +void +Test::testTermFieldMatchDataAppend() +{ + TermFieldMatchData tmd; + EXPECT_EQUAL(0u, tmd.size()); + EXPECT_EQUAL(1u, tmd.capacity()); + TermFieldMatchDataPosition pos; + tmd.appendPosition(pos); + EXPECT_EQUAL(1u, tmd.size()); + EXPECT_EQUAL(1u, tmd.capacity()); + tmd.appendPosition(pos); + EXPECT_EQUAL(2u, tmd.size()); + EXPECT_EQUAL(2u, tmd.capacity()); + for (size_t i(2); i < std::numeric_limits<uint16_t>::max(); i++) { + EXPECT_EQUAL(i, tmd.size()); + EXPECT_EQUAL(std::min(size_t(std::numeric_limits<uint16_t>::max()), vespalib::roundUp2inN(i)), tmd.capacity()); + tmd.appendPosition(pos); + } + EXPECT_EQUAL(std::numeric_limits<uint16_t>::max(), tmd.size()); + EXPECT_EQUAL(std::numeric_limits<uint16_t>::max(), tmd.capacity()); + tmd.appendPosition(pos); + EXPECT_EQUAL(std::numeric_limits<uint16_t>::max(), tmd.size()); + EXPECT_EQUAL(std::numeric_limits<uint16_t>::max(), tmd.capacity()); +} + +int +Test::Main() +{ + TEST_INIT("fef_test"); + testLayout(); + testObjectStore(); + testTermFieldMatchDataAppend(); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/fef/object_passing/.gitignore b/searchlib/src/tests/fef/object_passing/.gitignore new file mode 100644 index 00000000000..64b250201a8 --- /dev/null +++ b/searchlib/src/tests/fef/object_passing/.gitignore @@ -0,0 +1 @@ +searchlib_object_passing_test_app diff --git a/searchlib/src/tests/fef/object_passing/CMakeLists.txt b/searchlib/src/tests/fef/object_passing/CMakeLists.txt new file mode 100644 index 00000000000..2334711f015 --- /dev/null +++ b/searchlib/src/tests/fef/object_passing/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_object_passing_test_app + SOURCES + object_passing_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_object_passing_test_app COMMAND searchlib_object_passing_test_app) diff --git a/searchlib/src/tests/fef/object_passing/object_passing_test.cpp b/searchlib/src/tests/fef/object_passing/object_passing_test.cpp new file mode 100644 index 00000000000..69c681d8f60 --- /dev/null +++ b/searchlib/src/tests/fef/object_passing/object_passing_test.cpp @@ -0,0 +1,128 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/stllike/string.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/searchlib/features/valuefeature.h> +#include <vespa/searchlib/fef/blueprintfactory.h> +#include <vespa/searchlib/fef/test/indexenvironment.h> +#include <vespa/searchlib/fef/test/queryenvironment.h> +#include <vespa/searchlib/fef/test/plugin/sum.h> +#include <vespa/searchlib/fef/rank_program.h> +#include <vespa/searchlib/fef/verify_feature.h> +#include <vespa/vespalib/eval/value_type.h> +#include <vespa/searchlib/fef/feature_type.h> + +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; +using vespalib::eval::ValueType; + +struct ProxyExecutor : FeatureExecutor { + double number_value; + vespalib::eval::Value::UP object_value; + ProxyExecutor() : number_value(0.0), object_value() {} + bool isPure() override { return true; } + void execute(search::fef::MatchData &md) override { + double was_object = 0.0; + if (md.feature_is_object(inputs()[0])) { + was_object = 1.0; + number_value = md.resolve_object_feature(inputs()[0])->get().as_double(); + object_value.reset(new vespalib::eval::DoubleValue(number_value)); + } else { + number_value = *md.resolveFeature(inputs()[0]); + object_value.reset(new vespalib::eval::DoubleValue(number_value)); + } + if (md.feature_is_object(outputs()[0])) { + *md.resolve_object_feature(outputs()[0]) = *object_value; + } else { + *md.resolveFeature(outputs()[0]) = number_value; + } + *md.resolveFeature(outputs()[1]) = was_object; + } +}; + +struct ProxyBlueprint : Blueprint { + vespalib::string name; + AcceptInput accept_input; + bool object_output; + ProxyBlueprint(const vespalib::string &name_in, AcceptInput accept_input_in, bool object_output_in) + : Blueprint(name_in), name(name_in), accept_input(accept_input_in), object_output(object_output_in) {} + void visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const override {} + Blueprint::UP createInstance() const override { + return Blueprint::UP(new ProxyBlueprint(name, accept_input, object_output)); + } + bool setup(const IIndexEnvironment &, const std::vector<vespalib::string> ¶ms) override { + ASSERT_EQUAL(1u, params.size()); + defineInput(params[0], accept_input); + describeOutput("value", "the value", object_output ? FeatureType::object(ValueType::double_type()) : FeatureType::number()); + describeOutput("was_object", "whether input was object", FeatureType::number()); + return true; + } + FeatureExecutor::LP createExecutor(const IQueryEnvironment &) const override { + return FeatureExecutor::LP(new ProxyExecutor()); + } +}; + +struct Fixture { + BlueprintFactory factory; + IndexEnvironment indexEnv; + + explicit Fixture() { + factory.addPrototype(Blueprint::SP(new ValueBlueprint())); + factory.addPrototype(Blueprint::SP(new ProxyBlueprint("box", Blueprint::AcceptInput::NUMBER, true))); + factory.addPrototype(Blueprint::SP(new ProxyBlueprint("maybe_box", Blueprint::AcceptInput::ANY, true))); + factory.addPrototype(Blueprint::SP(new ProxyBlueprint("unbox", Blueprint::AcceptInput::OBJECT, false))); + factory.addPrototype(Blueprint::SP(new ProxyBlueprint("maybe_unbox", Blueprint::AcceptInput::ANY, false))); + } + + double eval(const vespalib::string &feature) { + BlueprintResolver::SP resolver(new BlueprintResolver(factory, indexEnv)); + resolver->addSeed(feature); + if (!resolver->compile()) { + return vespalib::eval::error_value; + } + MatchDataLayout mdl; + QueryEnvironment queryEnv(&indexEnv); + Properties overrides; + RankProgram program(resolver); + program.setup(mdl, queryEnv, overrides); + program.run(1); + std::vector<vespalib::string> names; + std::vector<FeatureHandle> handles; + program.get_seed_handles(names, handles); + EXPECT_EQUAL(1u, names.size()); + EXPECT_EQUAL(names.size(), handles.size()); + const auto &md = program.match_data(); + EXPECT_TRUE(!md.feature_is_object(handles[0])); // verifies auto-unboxing + return *md.resolveFeature(handles[0]); + } + + bool verify(const vespalib::string &feature) { + return verifyFeature(factory, indexEnv, feature, "unit test"); + } +}; + +TEST_F("require that values can be boxed and unboxed", Fixture()) { + EXPECT_EQUAL(3.0, f1.eval("box(value(3))")); + EXPECT_EQUAL(0.0, f1.eval("box(value(3)).was_object")); + EXPECT_EQUAL(3.0, f1.eval("unbox(box(value(3)))")); + EXPECT_EQUAL(1.0, f1.eval("unbox(box(value(3))).was_object")); + EXPECT_EQUAL(3.0, f1.eval("box(unbox(box(value(3))))")); + EXPECT_EQUAL(0.0, f1.eval("box(unbox(box(value(3)))).was_object")); +} + +TEST_F("require that output features may be either objects or numbers", Fixture()) { + EXPECT_TRUE(f1.verify("value(3)")); + EXPECT_TRUE(f1.verify("box(value(3))")); +} + +TEST_F("require that feature input/output types must be compatible", Fixture()) { + EXPECT_TRUE(!f1.verify("unbox(value(3))")); + EXPECT_TRUE(f1.verify("maybe_unbox(value(3))")); + EXPECT_TRUE(f1.verify("unbox(box(value(3)))")); + EXPECT_TRUE(!f1.verify("unbox(box(box(value(3))))")); + EXPECT_TRUE(f1.verify("unbox(maybe_box(box(value(3))))")); + EXPECT_TRUE(f1.verify("unbox(box(unbox(box(value(3)))))")); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/fef/parameter/.gitignore b/searchlib/src/tests/fef/parameter/.gitignore new file mode 100644 index 00000000000..17cf6c69953 --- /dev/null +++ b/searchlib/src/tests/fef/parameter/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +parameter_test +searchlib_parameter_test_app diff --git a/searchlib/src/tests/fef/parameter/CMakeLists.txt b/searchlib/src/tests/fef/parameter/CMakeLists.txt new file mode 100644 index 00000000000..dcd45390ce3 --- /dev/null +++ b/searchlib/src/tests/fef/parameter/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_parameter_test_app + SOURCES + parameter_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_parameter_test_app NO_VALGRIND COMMAND searchlib_parameter_test_app) diff --git a/searchlib/src/tests/fef/parameter/DESC b/searchlib/src/tests/fef/parameter/DESC new file mode 100644 index 00000000000..738e0dbd512 --- /dev/null +++ b/searchlib/src/tests/fef/parameter/DESC @@ -0,0 +1 @@ +parameter test. Take a look at parameter.cpp for details. diff --git a/searchlib/src/tests/fef/parameter/FILES b/searchlib/src/tests/fef/parameter/FILES new file mode 100644 index 00000000000..20c9e0c9ba0 --- /dev/null +++ b/searchlib/src/tests/fef/parameter/FILES @@ -0,0 +1 @@ +parameter.cpp diff --git a/searchlib/src/tests/fef/parameter/parameter_test.cpp b/searchlib/src/tests/fef/parameter/parameter_test.cpp new file mode 100644 index 00000000000..4d6741937d5 --- /dev/null +++ b/searchlib/src/tests/fef/parameter/parameter_test.cpp @@ -0,0 +1,267 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("parameter_test"); +#include <vespa/vespalib/testkit/testapp.h> + +#include <vespa/searchlib/fef/parametervalidator.h> +#include <vespa/searchlib/fef/test/indexenvironment.h> +#include <vespa/searchlib/fef/test/indexenvironmentbuilder.h> + +using namespace search::fef::test; + +namespace search { +namespace fef { + +class StringList : public std::vector<vespalib::string> { +public: + StringList & add(const vespalib::string & str) { push_back(str); return *this; } +}; + +class ParameterTest : public vespalib::TestApp { +private: + typedef ParameterDescriptions PDS; + typedef ParameterType PT; + typedef Parameter P; + typedef StringList SL; + typedef ParameterValidator::Result PVR; + + bool assertParameter(const Parameter & exp, const Parameter & act); + bool validate(const IIndexEnvironment & env, + const std::vector<vespalib::string> & params, + const ParameterDescriptions & descs); + bool validate(const IIndexEnvironment & env, + const std::vector<vespalib::string> & params, + const ParameterDescriptions & descs, + const ParameterValidator::Result & result); + + void testDescriptions(); + void testValidator(); + void testParameters(); + +public: + int Main(); +}; + +bool +ParameterTest::assertParameter(const Parameter & exp, const Parameter & act) +{ + bool retval = true; + if (!EXPECT_EQUAL(exp.getType(), act.getType())) retval = false; + if (!EXPECT_EQUAL(exp.getValue(), act.getValue())) retval = false; + if (!EXPECT_EQUAL(exp.asDouble(), act.asDouble())) retval = false; + if (!EXPECT_EQUAL(exp.asInteger(), act.asInteger())) retval = false; + if (!EXPECT_EQUAL(exp.asField(), act.asField())) retval = false; + return retval; +} + +bool +ParameterTest::validate(const IIndexEnvironment & env, + const std::vector<vespalib::string> & params, + const ParameterDescriptions & descs) +{ + ParameterValidator pv(env, params, descs); + ParameterValidator::Result result = pv.validate(); + LOG(info, "validate(%s)", result.getError().c_str()); + return result.valid(); +} + +bool +ParameterTest::validate(const IIndexEnvironment & env, + const std::vector<vespalib::string> & params, + const ParameterDescriptions & descs, + const ParameterValidator::Result & result) +{ + if (!validate(env, params, descs)) return false; + ParameterValidator pv(env, params, descs); + ParameterValidator::Result actual = pv.validate(); + if (!EXPECT_EQUAL(result.getTag(), actual.getTag())) return false; + if (!EXPECT_EQUAL(result.getParameters().size(), actual.getParameters().size())) return false; + bool retval = true; + for (size_t i = 0; i < result.getParameters().size(); ++i) { + if (!assertParameter(result.getParameters()[i], actual.getParameters()[i])) retval = false; + } + return retval; +} + +void +ParameterTest::testDescriptions() +{ + PDS descs = PDS(). + desc().indexField(ParameterCollection::SINGLE).indexField(ParameterCollection::ARRAY).indexField(ParameterCollection::WEIGHTEDSET).attribute(ParameterCollection::ANY).attributeField(ParameterCollection::ANY).field(). + desc(5).feature().number().string().attribute(ParameterCollection::ANY). + desc().string().number().repeat(2); + const PDS::DescriptionVector & v = descs.getDescriptions(); + EXPECT_EQUAL(v.size(), 3u); + EXPECT_EQUAL(v[0].getTag(), 0u); + EXPECT_TRUE(!v[0].hasRepeat()); + EXPECT_EQUAL(v[0].getParams().size(), 6u); + EXPECT_EQUAL(v[0].getParam(0).type, ParameterType::INDEX_FIELD); + EXPECT_EQUAL(v[0].getParam(1).type, ParameterType::INDEX_FIELD); + EXPECT_EQUAL(v[0].getParam(2).type, ParameterType::INDEX_FIELD); + EXPECT_EQUAL(v[0].getParam(3).type, ParameterType::ATTRIBUTE); + EXPECT_EQUAL(v[0].getParam(4).type, ParameterType::ATTRIBUTE_FIELD); + EXPECT_EQUAL(v[0].getParam(5).type, ParameterType::FIELD); + EXPECT_EQUAL(v[0].getParam(0).collection, ParameterCollection::SINGLE); + EXPECT_EQUAL(v[0].getParam(1).collection, ParameterCollection::ARRAY); + EXPECT_EQUAL(v[0].getParam(2).collection, ParameterCollection::WEIGHTEDSET); + EXPECT_EQUAL(v[0].getParam(3).collection, ParameterCollection::ANY); + EXPECT_EQUAL(v[0].getParam(4).collection, ParameterCollection::ANY); + EXPECT_EQUAL(v[0].getParam(5).collection, ParameterCollection::ANY); + + EXPECT_EQUAL(v[1].getTag(), 5u); + EXPECT_TRUE(!v[1].hasRepeat()); + EXPECT_EQUAL(v[1].getParams().size(), 4u); + EXPECT_EQUAL(v[1].getParam(0).type, ParameterType::FEATURE); + EXPECT_EQUAL(v[1].getParam(1).type, ParameterType::NUMBER); + EXPECT_EQUAL(v[1].getParam(2).type, ParameterType::STRING); + EXPECT_EQUAL(v[1].getParam(3).type, ParameterType::ATTRIBUTE); + + EXPECT_EQUAL(v[2].getTag(), 6u); + EXPECT_TRUE(v[2].hasRepeat()); + EXPECT_EQUAL(v[2].getParams().size(), 2u); + EXPECT_EQUAL(v[2].getParam(0).type, ParameterType::STRING); + EXPECT_EQUAL(v[2].getParam(1).type, ParameterType::NUMBER); + EXPECT_EQUAL(v[2].getParam(2).type, ParameterType::STRING); + EXPECT_EQUAL(v[2].getParam(3).type, ParameterType::NUMBER); + EXPECT_EQUAL(v[2].getParam(4).type, ParameterType::STRING); + EXPECT_EQUAL(v[2].getParam(5).type, ParameterType::NUMBER); +} + +void +ParameterTest::testValidator() +{ + IndexEnvironment env; + IndexEnvironmentBuilder builder(env); + builder.addField(FieldType::INDEX, CollectionType::SINGLE, "foo") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar") + .addField(FieldType::INDEX, CollectionType::ARRAY, "afoo") + .addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wfoo") + .addField(FieldType::INDEX, CollectionType::SINGLE, "hybrid"); + env.getFields().back().addAttribute(); // 'hybrid' field can also be accessed as an attribute + + // valid + EXPECT_TRUE(validate(env, SL(), PDS().desc())); + EXPECT_TRUE(validate(env, SL().add("foo"), PDS().desc().field())); + EXPECT_TRUE(validate(env, SL().add("bar"), PDS().desc().field())); + EXPECT_TRUE(validate(env, SL().add("foo"), PDS().desc().indexField(ParameterCollection::SINGLE))); + EXPECT_TRUE(validate(env, SL().add("afoo"), PDS().desc().indexField(ParameterCollection::ARRAY))); + EXPECT_TRUE(validate(env, SL().add("wfoo"), PDS().desc().indexField(ParameterCollection::WEIGHTEDSET))); + EXPECT_TRUE(validate(env, SL().add("foo"), PDS().desc().indexField(ParameterCollection::ANY))); + EXPECT_TRUE(validate(env, SL().add("afoo"), PDS().desc().indexField(ParameterCollection::ANY))); + EXPECT_TRUE(validate(env, SL().add("wfoo"), PDS().desc().indexField(ParameterCollection::ANY))); + EXPECT_TRUE(validate(env, SL().add("bar"), PDS().desc().attribute(ParameterCollection::ANY))); + EXPECT_TRUE(validate(env, SL().add("bar"), PDS().desc().attributeField(ParameterCollection::ANY))); + EXPECT_TRUE(validate(env, SL().add("hybrid"), PDS().desc().attribute(ParameterCollection::ANY))); + EXPECT_TRUE(validate(env, SL().add("baz"), PDS().desc().feature())); + EXPECT_TRUE(validate(env, SL().add("123"), PDS().desc().number())); + EXPECT_TRUE(validate(env, SL().add("baz"), PDS().desc().string())); + // first fail but second pass + EXPECT_TRUE(validate(env, SL().add("baz"), PDS().desc().field().desc().string())); + + // not valid + EXPECT_FALSE(validate(env, SL().add("baz"), PDS().desc().string().string())); + EXPECT_FALSE(validate(env, SL().add("baz").add("baz"), PDS().desc().string())); + EXPECT_FALSE(validate(env, SL().add("baz"), PDS().desc().field())); + EXPECT_FALSE(validate(env, SL().add("bar"), PDS().desc().indexField(ParameterCollection::SINGLE))); + EXPECT_FALSE(validate(env, SL().add("foo"), PDS().desc().indexField(ParameterCollection::NONE))); + EXPECT_FALSE(validate(env, SL().add("foo"), PDS().desc().indexField(ParameterCollection::ARRAY))); + EXPECT_FALSE(validate(env, SL().add("foo"), PDS().desc().indexField(ParameterCollection::WEIGHTEDSET))); + EXPECT_FALSE(validate(env, SL().add("afoo"), PDS().desc().indexField(ParameterCollection::NONE))); + EXPECT_FALSE(validate(env, SL().add("afoo"), PDS().desc().indexField(ParameterCollection::SINGLE))); + EXPECT_FALSE(validate(env, SL().add("afoo"), PDS().desc().indexField(ParameterCollection::WEIGHTEDSET))); + EXPECT_FALSE(validate(env, SL().add("wfoo"), PDS().desc().indexField(ParameterCollection::NONE))); + EXPECT_FALSE(validate(env, SL().add("wfoo"), PDS().desc().indexField(ParameterCollection::SINGLE))); + EXPECT_FALSE(validate(env, SL().add("wfoo"), PDS().desc().indexField(ParameterCollection::ARRAY))); + EXPECT_FALSE(validate(env, SL().add("unknown"), PDS().desc().attribute(ParameterCollection::ANY))); + EXPECT_FALSE(validate(env, SL().add("unknown"), PDS().desc().attributeField(ParameterCollection::ANY))); + EXPECT_FALSE(validate(env, SL().add("foo"), PDS().desc().attribute(ParameterCollection::ANY))); + EXPECT_FALSE(validate(env, SL().add("foo"), PDS().desc().attributeField(ParameterCollection::ANY))); + EXPECT_FALSE(validate(env, SL().add("hybrid"), PDS().desc().attributeField(ParameterCollection::ANY))); + EXPECT_FALSE(validate(env, SL().add("12a"), PDS().desc().number())); + EXPECT_FALSE(validate(env, SL().add("a12"), PDS().desc().number())); + + // test repeat + PDS d1 = PDS().desc().field().repeat(); + EXPECT_TRUE(validate(env, SL(), d1)); + EXPECT_TRUE(validate(env, SL().add("foo"), d1)); + EXPECT_TRUE(validate(env, SL().add("foo").add("bar"), d1)); + EXPECT_TRUE(!validate(env, SL().add("foo").add("bar").add("baz"), d1)); + PDS d2 = PDS().desc().string().attribute(ParameterCollection::ANY).indexField(ParameterCollection::SINGLE).repeat(2); + EXPECT_TRUE(validate(env, SL().add("str"), d2)); + EXPECT_TRUE(validate(env, SL().add("str").add("bar").add("foo"), d2)); + EXPECT_TRUE(validate(env, SL().add("str").add("bar").add("foo").add("bar").add("foo"), d2)); + EXPECT_TRUE(!validate(env, SL().add("str").add("bar"), d2)); + EXPECT_TRUE(!validate(env, SL().add("str").add("bar").add("foo").add("bar"), d2)); +} + +void +ParameterTest::testParameters() +{ + IndexEnvironment env; + IndexEnvironmentBuilder builder(env); + builder.addField(FieldType::INDEX, CollectionType::SINGLE, "foo") + .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "bar") + .addField(FieldType::INDEX, CollectionType::ARRAY, "afoo") + .addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "wfoo"); + + const FieldInfo * foo = env.getFieldByName("foo"); + const FieldInfo * bar = env.getFieldByName("bar"); + const FieldInfo * afoo = env.getFieldByName("afoo"); + const FieldInfo * wfoo = env.getFieldByName("wfoo"); + + EXPECT_TRUE(validate(env, SL().add("foo"), PDS().desc().field(), + PVR().addParameter(P(PT::FIELD, "foo").setField(foo)))); // field + EXPECT_TRUE(validate(env, SL().add("foo"), PDS().desc().indexField(ParameterCollection::SINGLE), + PVR().addParameter(P(PT::INDEX_FIELD, "foo").setField(foo)))); // index field + EXPECT_TRUE(validate(env, SL().add("foo"), PDS().desc().indexField(ParameterCollection::ANY), + PVR().addParameter(P(PT::INDEX_FIELD, "foo").setField(foo)))); // index field + EXPECT_TRUE(validate(env, SL().add("afoo"), PDS().desc().indexField(ParameterCollection::ARRAY), + PVR().addParameter(P(PT::INDEX_FIELD, "afoo").setField(afoo)))); // index field + EXPECT_TRUE(validate(env, SL().add("afoo"), PDS().desc().indexField(ParameterCollection::ANY), + PVR().addParameter(P(PT::INDEX_FIELD, "afoo").setField(afoo)))); // index field + EXPECT_TRUE(validate(env, SL().add("wfoo"), PDS().desc().indexField(ParameterCollection::WEIGHTEDSET), + PVR().addParameter(P(PT::INDEX_FIELD, "wfoo").setField(wfoo)))); // index field + EXPECT_TRUE(validate(env, SL().add("wfoo"), PDS().desc().indexField(ParameterCollection::ANY), + PVR().addParameter(P(PT::INDEX_FIELD, "wfoo").setField(wfoo)))); // index field + EXPECT_TRUE(validate(env, SL().add("bar"), PDS().desc().attribute(ParameterCollection::ANY), + PVR().addParameter(P(PT::ATTRIBUTE, "bar").setField(bar)))); // attribute field + EXPECT_TRUE(validate(env, SL().add("feature"), PDS().desc().feature(), + PVR().addParameter(P(PT::FEATURE, "feature")))); // feature + EXPECT_TRUE(validate(env, SL().add("string"), PDS().desc().string(), + PVR().addParameter(P(PT::STRING, "string")))); // string + + // numbers + EXPECT_TRUE(validate(env, SL().add("-100"), PDS().desc().number(), + PVR().addParameter(P(PT::NUMBER, "-100").setDouble(-100).setInteger(-100)))); + EXPECT_TRUE(validate(env, SL().add("100"), PDS().desc().number(), + PVR().addParameter(P(PT::NUMBER, "100").setDouble(100).setInteger(100)))); + EXPECT_TRUE(validate(env, SL().add("100.16"), PDS().desc().number(), + PVR().addParameter(P(PT::NUMBER, "100.16").setDouble(100.16).setInteger(100)))); + + EXPECT_TRUE(validate(env, SL(), PDS().desc(), PVR())); // no param + EXPECT_TRUE(validate(env, SL().add("foo").add("bar"), PDS().desc().string().string(), + PVR().addParameter(P(PT::STRING, "foo")).addParameter(P(PT::STRING, "bar")))); // multiple params + EXPECT_TRUE(validate(env, SL().add("foo").add("bar"), PDS().desc().string().repeat(), + PVR().addParameter(P(PT::STRING, "foo")).addParameter(P(PT::STRING, "bar")))); // repeat + EXPECT_TRUE(validate(env, SL().add("baz"), PDS().desc(10).field().desc(20).string(), + PVR(20).addParameter(P(PT::STRING, "baz")))); // second desc matching +} + +int +ParameterTest::Main() +{ + TEST_INIT("parameter_test"); + + testDescriptions(); + testValidator(); + testParameters(); + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::fef::ParameterTest); + diff --git a/searchlib/src/tests/fef/phrasesplitter/.gitignore b/searchlib/src/tests/fef/phrasesplitter/.gitignore new file mode 100644 index 00000000000..418f9961840 --- /dev/null +++ b/searchlib/src/tests/fef/phrasesplitter/.gitignore @@ -0,0 +1,6 @@ +.depend +Makefile +benchmark +phrasesplitter_test +searchlib_phrasesplitter_test_app +searchlib_benchmark_app diff --git a/searchlib/src/tests/fef/phrasesplitter/CMakeLists.txt b/searchlib/src/tests/fef/phrasesplitter/CMakeLists.txt new file mode 100644 index 00000000000..aa16f3e0a0d --- /dev/null +++ b/searchlib/src/tests/fef/phrasesplitter/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_phrasesplitter_test_app + SOURCES + phrasesplitter_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_phrasesplitter_test_app COMMAND searchlib_phrasesplitter_test_app) +vespa_add_executable(searchlib_benchmark_app + SOURCES + benchmark.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_benchmark_app COMMAND searchlib_benchmark_app BENCHMARK) diff --git a/searchlib/src/tests/fef/phrasesplitter/DESC b/searchlib/src/tests/fef/phrasesplitter/DESC new file mode 100644 index 00000000000..fba49bdb8c0 --- /dev/null +++ b/searchlib/src/tests/fef/phrasesplitter/DESC @@ -0,0 +1 @@ +phrasesplitter test. Take a look at phrasesplitter.cpp for details. diff --git a/searchlib/src/tests/fef/phrasesplitter/FILES b/searchlib/src/tests/fef/phrasesplitter/FILES new file mode 100644 index 00000000000..be37941d0c8 --- /dev/null +++ b/searchlib/src/tests/fef/phrasesplitter/FILES @@ -0,0 +1 @@ +phrasesplitter.cpp diff --git a/searchlib/src/tests/fef/phrasesplitter/benchmark.cpp b/searchlib/src/tests/fef/phrasesplitter/benchmark.cpp new file mode 100644 index 00000000000..ca90b1de261 --- /dev/null +++ b/searchlib/src/tests/fef/phrasesplitter/benchmark.cpp @@ -0,0 +1,84 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("phrasesplitter_test"); +#include <vespa/vespalib/testkit/testapp.h> + +#include <iomanip> +#include <vespa/searchlib/fef/matchdatalayout.h> +#include <vespa/searchlib/fef/phrasesplitter.h> +#include <vespa/searchlib/fef/test/queryenvironment.h> + +namespace search { +namespace fef { + +class Benchmark : public vespalib::TestApp +{ +private: + FastOS_Time _timer; + double _sample; + + void start() { _timer.SetNow(); } + void sample() { _sample = _timer.MilliSecsToNow(); } + void run(size_t numRuns, size_t numPositions); + +public: + Benchmark() : _timer(), _sample(0) {} + int Main(); +}; + +void +Benchmark::run(size_t numRuns, size_t numPositions) +{ + test::QueryEnvironment qe; + std::vector<SimpleTermData> &terms = qe.getTerms(); + MatchDataLayout mdl; + terms.push_back(SimpleTermData()); + terms.back().setUniqueId(1); + terms.back().setPhraseLength(3); // phrase with 3 terms + terms.back().addField(0).setHandle(mdl.allocTermField(0)); + MatchData::UP md = mdl.createMatchData(); + TermFieldMatchData *tmd = md->resolveTermField(terms[0].lookupField(0)->getHandle()); + for (size_t i = 0; i < numPositions; ++i) { + tmd->appendPosition(TermFieldMatchDataPosition(0, i, 0, numPositions)); + } + + PhraseSplitter ps(qe, 0); + + std::cout << "Start benchmark with numRuns(" << numRuns << ") and numPositions(" << numPositions << ")" << std::endl; + + start(); + + for (size_t i = 0; i < numRuns; ++i) { + ps.update(*md); + } + + sample(); +} + +int +Benchmark::Main() +{ + + TEST_INIT("benchmark"); + + if (_argc != 3) { + std::cout << "Must specify <numRuns> and <numPositions>" << std::endl; + return 0; + } + + size_t numRuns = strtoull(_argv[1], NULL, 10); + size_t numPositions = strtoull(_argv[2], NULL, 10); + + run(numRuns, numPositions); + + std::cout << "TET: " << _sample << " (ms)" << std::endl; + std::cout << "ETPD: " << std::fixed << std::setprecision(10) << _sample / numRuns << " (ms)" << std::endl; + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::fef::Benchmark); diff --git a/searchlib/src/tests/fef/phrasesplitter/phrasesplitter_test.cpp b/searchlib/src/tests/fef/phrasesplitter/phrasesplitter_test.cpp new file mode 100644 index 00000000000..0fa6f27022e --- /dev/null +++ b/searchlib/src/tests/fef/phrasesplitter/phrasesplitter_test.cpp @@ -0,0 +1,242 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("phrasesplitter_test"); +#include <vespa/vespalib/testkit/testapp.h> + +#include <vespa/searchlib/fef/matchdatalayout.h> +#include <vespa/searchlib/fef/phrasesplitter.h> +#include <vespa/searchlib/fef/test/queryenvironment.h> + +namespace search { +namespace fef { + +class PhraseSplitterTest : public vespalib::TestApp +{ +private: + void assertTermData(const ITermData * td, uint32_t uniqueId, uint32_t numTerms, + uint32_t fieldId, uint32_t termHandle); + void testCopyTermFieldMatchData(); + void testSplitter(); + void testSplitterUpdate(); + +public: + int Main(); +}; + +void +PhraseSplitterTest::assertTermData(const ITermData *td, uint32_t uniqueId, uint32_t numTerms, + uint32_t fieldId, uint32_t tfHandle) +{ + // fprintf(stderr, "checking uid=%d numterms=%d field=%d handle=%d\n", uniqueId, numTerms, fieldId, tfHandle); + EXPECT_EQUAL(uniqueId, td->getUniqueId()); + EXPECT_EQUAL(numTerms, td->getPhraseLength()); + EXPECT_EQUAL(tfHandle, td->lookupField(fieldId)->getHandle()); +} + +void +PhraseSplitterTest::testCopyTermFieldMatchData() +{ + TermFieldMatchData src; + src.reset(1); + src.appendPosition(TermFieldMatchDataPosition(0, 5, 0, 1000)); + src.appendPosition(TermFieldMatchDataPosition(0, 15, 0, 1000)); + + SimpleTermData td; + TermFieldMatchData dst; + dst.reset(0); + // dst.setTermData(&td); + dst.appendPosition(TermFieldMatchDataPosition(0, 10, 0, 1000)); + { + FieldPositionsIterator itr = dst.getIterator(); + EXPECT_EQUAL(itr.getPosition(), 10u); + itr.next(); + ASSERT_TRUE(!itr.valid()); + } + + PhraseSplitter::copyTermFieldMatchData(dst, src, 2); + + EXPECT_EQUAL(dst.getDocId(), 1u); + { + TermFieldMatchData::PositionsIterator itr = dst.begin(); + EXPECT_EQUAL(itr->getPosition(), 7u); + ++itr; + EXPECT_EQUAL(itr->getPosition(), 17u); + ++itr; + ASSERT_TRUE(itr == dst.end()); + } + { + FieldPositionsIterator itr = dst.getIterator(); + EXPECT_EQUAL(itr.getPosition(), 7u); + itr.next(); + EXPECT_EQUAL(itr.getPosition(), 17u); + itr.next(); + ASSERT_TRUE(!itr.valid()); + } +} + +void +PhraseSplitterTest::testSplitter() +{ + { // single term + test::QueryEnvironment qe; + std::vector<SimpleTermData> &terms = qe.getTerms(); + MatchDataLayout mdl; + terms.push_back(SimpleTermData()); + terms.back().addField(0).setHandle(mdl.allocTermField(0)); + MatchData::UP md = mdl.createMatchData(); + PhraseSplitter ps(qe, 0); + ASSERT_TRUE(ps.getNumTerms() == 1); + ps.update(*md); + // check that nothing is served from the splitter + EXPECT_EQUAL(ps.getTerm(0), &terms[0]); + TermFieldHandle handle = terms[0].lookupField(0)->getHandle(); + EXPECT_EQUAL(ps.resolveTermField(handle), md->resolveTermField(handle)); + } + { // single phrase + test::QueryEnvironment qe; + std::vector<SimpleTermData> & terms = qe.getTerms(); + MatchDataLayout mdl; + terms.push_back(SimpleTermData()); + terms.back().setUniqueId(1); + terms.back().setPhraseLength(3); + terms.back().addField(0).setHandle(mdl.allocTermField(0)); + terms.back().addField(7).setHandle(mdl.allocTermField(7)); + MatchData::UP md = mdl.createMatchData(); + PhraseSplitter ps(qe, 7); + ASSERT_TRUE(ps.getNumTerms() == 3); + ps.update(*md); + // check that all is served from the splitter + for (size_t i = 0; i < 3; ++i) { + // fprintf(stderr, "checking term %d\n", (int)i); + const ITermData *td = ps.getTerm(i); + EXPECT_NOT_EQUAL(td, &terms[0]); + EXPECT_NOT_EQUAL(td->lookupField(7), (ITermFieldData *)0); + EXPECT_EQUAL(td->lookupField(0), (ITermFieldData *)0); + TEST_DO(assertTermData(td, 1, 1, 7, i + 4)); // skipHandles = 4 + EXPECT_NOT_EQUAL(td->lookupField(7)->getHandle(), + terms[0].lookupField(7)->getHandle()); + EXPECT_NOT_EQUAL(ps.resolveTermField(td->lookupField(7)->getHandle()), + md->resolveTermField(terms[0].lookupField(7)->getHandle())); + } + } + { // combination + test::QueryEnvironment qe; + std::vector<SimpleTermData> &terms = qe.getTerms(); + MatchDataLayout mdl; + for (size_t i = 0; i < 3; ++i) { + terms.push_back(SimpleTermData()); + terms.back().setUniqueId(i); + terms.back().setPhraseLength(1); + terms.back().addField(4).setHandle(mdl.allocTermField(4)); + terms.back().addField(7).setHandle(mdl.allocTermField(7)); + // fprintf(stderr, "setup B term %p #f %zd\n", &terms.back(), terms.back().numFields()); + } + terms[1].setPhraseLength(3); + MatchData::UP md = mdl.createMatchData(); + PhraseSplitter ps(qe, 4); + ASSERT_TRUE(ps.getNumTerms() == 5); + ps.update(*md); + { // first term + // fprintf(stderr, "first term\n"); + EXPECT_EQUAL(ps.getTerm(0), &terms[0]); + TEST_DO(assertTermData(ps.getTerm(0), 0, 1, 4, 0)); + TEST_DO(assertTermData(ps.getTerm(0), 0, 1, 7, 1)); + + TermFieldHandle handle = terms[0].lookupField(4)->getHandle(); + EXPECT_EQUAL(ps.resolveTermField(handle), md->resolveTermField(handle)); + handle = terms[0].lookupField(7)->getHandle(); + EXPECT_EQUAL(ps.resolveTermField(handle), md->resolveTermField(handle)); + } + for (size_t i = 0; i < 3; ++i) { // phrase + // fprintf(stderr, "phrase term %zd\n", i); + const ITermData *td = ps.getTerm(i + 1); + EXPECT_NOT_EQUAL(td, &terms[1]); + TEST_DO(assertTermData(td, 1, 1, 4, i + 11)); // skipHandles == 11 + EXPECT_EQUAL(td->lookupField(7), (ITermFieldData *)0); + EXPECT_NOT_EQUAL(ps.resolveTermField(td->lookupField(4)->getHandle()), + md->resolveTermField(terms[1].lookupField(4)->getHandle())); + } + { // last term + // fprintf(stderr, "last term\n"); + EXPECT_EQUAL(ps.getTerm(4), &terms[2]); + TEST_DO(assertTermData(ps.getTerm(4), 2, 1, 4, 4)); + TEST_DO(assertTermData(ps.getTerm(4), 2, 1, 7, 5)); + + // fprintf(stderr, "inspect term %p #f %zd\n", &terms[2], terms[2].numFields()); + fflush(stderr); + TermFieldHandle handle = terms[2].lookupField(4)->getHandle(); + EXPECT_EQUAL(ps.resolveTermField(handle), md->resolveTermField(handle)); + } + } +} + +void +PhraseSplitterTest::testSplitterUpdate() +{ + { + test::QueryEnvironment qe; + std::vector<SimpleTermData> &terms = qe.getTerms(); + MatchDataLayout mdl; + for (size_t i = 0; i < 3; ++i) { + terms.push_back(SimpleTermData()); + terms.back().setUniqueId(i); + terms.back().setPhraseLength(1); + terms.back().addField(0).setHandle(mdl.allocTermField(0)); + } + terms[0].setPhraseLength(2); + terms[2].setPhraseLength(2); + MatchData::UP md = mdl.createMatchData(); + PhraseSplitter ps(qe, 0); + ASSERT_TRUE(ps.getNumTerms() == 5); + { // first phrase + TermFieldMatchData * tmd = md->resolveTermField(terms[0].lookupField(0)->getHandle()); + tmd->appendPosition(TermFieldMatchDataPosition(0, 10, 0, 1000)); + } + { // first term + TermFieldMatchData * tmd = md->resolveTermField(terms[1].lookupField(0)->getHandle()); + tmd->appendPosition(TermFieldMatchDataPosition(0, 20, 0, 1000)); + } + { // second phrase + TermFieldMatchData * tmd = md->resolveTermField(terms[2].lookupField(0)->getHandle()); + tmd->appendPosition(TermFieldMatchDataPosition(0, 30, 0, 1000)); + } + ps.update(*md); + for (size_t i = 0; i < 2; ++i) { // first phrase + const TermFieldMatchData * tmd = ps.resolveTermField(ps.getTerm(i)->lookupField(0)->getHandle()); + TermFieldMatchData::PositionsIterator itr = tmd->begin(); + EXPECT_EQUAL((itr++)->getPosition(), 10 + i); + ASSERT_TRUE(itr == tmd->end()); + } + { // first term + TermFieldMatchData * tmd = md->resolveTermField(ps.getTerm(2)->lookupField(0)->getHandle()); + TermFieldMatchData::PositionsIterator itr = tmd->begin(); + EXPECT_EQUAL((itr++)->getPosition(), 20u); + ASSERT_TRUE(itr == tmd->end()); + } + for (size_t i = 0; i < 2; ++i) { // second phrase + const TermFieldMatchData * tmd = ps.resolveTermField(ps.getTerm(i + 3)->lookupField(0)->getHandle()); + TermFieldMatchData::PositionsIterator itr = tmd->begin(); + EXPECT_EQUAL((itr++)->getPosition(), 30 + i); + ASSERT_TRUE(itr == tmd->end()); + } + } +} + +int +PhraseSplitterTest::Main() +{ + + TEST_INIT("phrasesplitter_test"); + + testCopyTermFieldMatchData(); + testSplitter(); + testSplitterUpdate(); + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::fef::PhraseSplitterTest); diff --git a/searchlib/src/tests/fef/properties/.gitignore b/searchlib/src/tests/fef/properties/.gitignore new file mode 100644 index 00000000000..00f94794fa3 --- /dev/null +++ b/searchlib/src/tests/fef/properties/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +properties_test +searchlib_properties_test_app diff --git a/searchlib/src/tests/fef/properties/CMakeLists.txt b/searchlib/src/tests/fef/properties/CMakeLists.txt new file mode 100644 index 00000000000..0b74b10cb31 --- /dev/null +++ b/searchlib/src/tests/fef/properties/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_properties_test_app + SOURCES + properties_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_properties_test_app COMMAND searchlib_properties_test_app) diff --git a/searchlib/src/tests/fef/properties/DESC b/searchlib/src/tests/fef/properties/DESC new file mode 100644 index 00000000000..02faa4cb727 --- /dev/null +++ b/searchlib/src/tests/fef/properties/DESC @@ -0,0 +1 @@ +properties test. Take a look at properties.cpp for details. diff --git a/searchlib/src/tests/fef/properties/FILES b/searchlib/src/tests/fef/properties/FILES new file mode 100644 index 00000000000..61054fa62c2 --- /dev/null +++ b/searchlib/src/tests/fef/properties/FILES @@ -0,0 +1 @@ +properties.cpp diff --git a/searchlib/src/tests/fef/properties/properties_test.cpp b/searchlib/src/tests/fef/properties/properties_test.cpp new file mode 100644 index 00000000000..a08d511b418 --- /dev/null +++ b/searchlib/src/tests/fef/properties/properties_test.cpp @@ -0,0 +1,425 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchlib/fef/indexproperties.h> +#include <vespa/searchlib/fef/properties.h> +#include <limits> + +using namespace search::fef; +using namespace search::fef::indexproperties; + +struct CopyVisitor : public IPropertiesVisitor +{ + Properties &dst; + CopyVisitor(Properties &p) : dst(p) {} + virtual void visitProperty(const Property::Value &key, + const Property &values) + { + for (uint32_t i = 0; i < values.size(); ++i) { + dst.add(key, values.getAt(i)); + } + } +}; + +Properties make_props(std::initializer_list<std::pair<const char *, std::initializer_list<const char *> > > entries) { + Properties props; + for (const auto &entry: entries) { + vespalib::string key = entry.first; + for (vespalib::string value: entry.second) { + props.add(key, value); + } + } + return props; +} + +TEST("require that namespace visitation works") { + Properties props = make_props({ {"foo", {"outside"}}, + {"foo.a", {"a_value"}}, + {"foo.b", {"b_value"}}, + {"foo.", {"outside"}} + }); + Properties result; + CopyVisitor copy_visitor(result); + props.visitNamespace("foo", copy_visitor); + EXPECT_EQUAL(2u, result.numKeys()); + EXPECT_EQUAL(result.lookup("a").get(), Property::Value("a_value")); + EXPECT_EQUAL(result.lookup("b").get(), Property::Value("b_value")); +} + +TEST("test stuff") { + { // empty lookup result + Property p; + + EXPECT_EQUAL(p.found(), false); + EXPECT_EQUAL(p.get(), Property::Value("")); + EXPECT_EQUAL(p.get("fb"), Property::Value("fb")); + EXPECT_EQUAL(p.size(), 0u); + EXPECT_EQUAL(p.getAt(0), Property::Value("")); + } + { // add / count / remove + Properties p = make_props({ {"a", {"a1", "a2", "a3"}}, + {"b", {"b1", "b2"}}, + {"c", {"c1"}} + }); + const Properties &pc = p; + + EXPECT_EQUAL(pc.numKeys(), 3u); + EXPECT_EQUAL(pc.numValues(), 6u); + EXPECT_EQUAL(pc.count("a"), 3u); + EXPECT_EQUAL(pc.count("b"), 2u); + EXPECT_EQUAL(pc.count("c"), 1u); + EXPECT_EQUAL(pc.count("d"), 0u); + + p.remove("d"); + + EXPECT_EQUAL(pc.numKeys(), 3u); + EXPECT_EQUAL(pc.numValues(), 6u); + EXPECT_EQUAL(pc.count("a"), 3u); + EXPECT_EQUAL(pc.count("b"), 2u); + EXPECT_EQUAL(pc.count("c"), 1u); + EXPECT_EQUAL(pc.count("d"), 0u); + + p.remove("c"); + + EXPECT_EQUAL(pc.numKeys(), 2u); + EXPECT_EQUAL(pc.numValues(), 5u); + EXPECT_EQUAL(pc.count("a"), 3u); + EXPECT_EQUAL(pc.count("b"), 2u); + EXPECT_EQUAL(pc.count("c"), 0u); + EXPECT_EQUAL(pc.count("d"), 0u); + + p.remove("b"); + + EXPECT_EQUAL(pc.numKeys(), 1u); + EXPECT_EQUAL(pc.numValues(), 3u); + EXPECT_EQUAL(pc.count("a"), 3u); + EXPECT_EQUAL(pc.count("b"), 0u); + EXPECT_EQUAL(pc.count("c"), 0u); + EXPECT_EQUAL(pc.count("d"), 0u); + + p.remove("a"); + + EXPECT_EQUAL(pc.numKeys(), 0u); + EXPECT_EQUAL(pc.numValues(), 0u); + EXPECT_EQUAL(pc.count("a"), 0u); + EXPECT_EQUAL(pc.count("b"), 0u); + EXPECT_EQUAL(pc.count("c"), 0u); + EXPECT_EQUAL(pc.count("d"), 0u); + } + { // lookup / import / visit / compare / hash + Properties p; + + p.add("x", "x1"); + p.add("a.x", "x2"); + p.add("a.b.x", "x3"); + p.add("a.b.c.x", "x4"); + + p.add("list", "e1").add("list", "e2").add("list", "e3"); + + EXPECT_EQUAL(p.numKeys(), 5u); + EXPECT_EQUAL(p.numValues(), 7u); + + EXPECT_EQUAL(p.lookup("x").found(), true); + EXPECT_EQUAL(p.lookup("a.x").found(), true); + EXPECT_EQUAL(p.lookup("a.b.x").found(), true); + EXPECT_EQUAL(p.lookup("a.b.c.x").found(), true); + EXPECT_EQUAL(p.lookup("list").found(), true); + EXPECT_EQUAL(p.lookup("y").found(), false); + + EXPECT_EQUAL(p.lookup("x").get(), Property::Value("x1")); + EXPECT_EQUAL(p.lookup("a.x").get(), Property::Value("x2")); + EXPECT_EQUAL(p.lookup("a.b.x").get(), Property::Value("x3")); + EXPECT_EQUAL(p.lookup("a.b.c.x").get(), Property::Value("x4")); + EXPECT_EQUAL(p.lookup("list").get(), Property::Value("e1")); + EXPECT_EQUAL(p.lookup("y").get(), Property::Value("")); + + EXPECT_EQUAL(p.lookup("x").get(), Property::Value("x1")); + EXPECT_EQUAL(p.lookup("a", "x").get(), Property::Value("x2")); + EXPECT_EQUAL(p.lookup("a", "b", "x").get(), Property::Value("x3")); + EXPECT_EQUAL(p.lookup("a", "b", "c", "x").get(), Property::Value("x4")); + + EXPECT_EQUAL(p.lookup("x").get("fallback"), Property::Value("x1")); + EXPECT_EQUAL(p.lookup("y").get("fallback"), Property::Value("fallback")); + + EXPECT_EQUAL(p.lookup("y").size(), 0u); + EXPECT_EQUAL(p.lookup("x").size(), 1u); + EXPECT_EQUAL(p.lookup("list").size(), 3u); + EXPECT_EQUAL(p.lookup("list").getAt(0), Property::Value("e1")); + EXPECT_EQUAL(p.lookup("list").getAt(1), Property::Value("e2")); + EXPECT_EQUAL(p.lookup("list").getAt(2), Property::Value("e3")); + EXPECT_EQUAL(p.lookup("list").getAt(3), Property::Value("")); + + Properties p2; + + p2.add("x", "new_x"); + p2.add("y", "y1"); + p2.add("list", "foo").add("list", "bar"); + + EXPECT_EQUAL(p2.numKeys(), 3u); + EXPECT_EQUAL(p2.numValues(), 4u); + + p.import(p2); + + EXPECT_EQUAL(p.numKeys(), 6u); + EXPECT_EQUAL(p.numValues(), 7u); + + EXPECT_EQUAL(p.lookup("y").size(), 1u); + EXPECT_EQUAL(p.lookup("y").get(), Property::Value("y1")); + + EXPECT_EQUAL(p.lookup("x").size(), 1u); + EXPECT_EQUAL(p.lookup("x").get(), Property::Value("new_x")); + + EXPECT_EQUAL(p.lookup("z").size(), 0u); + + EXPECT_EQUAL(p.lookup("a", "x").size(), 1u); + EXPECT_EQUAL(p.lookup("a", "x").get(), Property::Value("x2")); + + EXPECT_EQUAL(p.lookup("list").size(), 2u); + EXPECT_EQUAL(p.lookup("list").getAt(0), Property::Value("foo")); + EXPECT_EQUAL(p.lookup("list").getAt(1), Property::Value("bar")); + EXPECT_EQUAL(p.lookup("list").getAt(2), Property::Value("")); + + Properties p3; + + EXPECT_TRUE(!(p == p2)); + EXPECT_TRUE(!(p == p3)); + EXPECT_TRUE(!(p2 == p)); + EXPECT_TRUE(!(p3 == p)); + EXPECT_TRUE(!(p2 == p3)); + EXPECT_TRUE(!(p3 == p2)); + + CopyVisitor cv(p3); + p.visitProperties(cv); + + EXPECT_EQUAL(p3.numKeys(), 6u); + EXPECT_EQUAL(p3.numValues(), 7u); + + EXPECT_TRUE(p == p3); + EXPECT_TRUE(p3 == p); + EXPECT_EQUAL(p.hashCode(), p3.hashCode()); + + p.clear(); + EXPECT_EQUAL(p.numKeys(), 0u); + EXPECT_EQUAL(p.numValues(), 0u); + EXPECT_TRUE(!(p == p3)); + EXPECT_TRUE(!(p3 == p)); + + Properties p4; + CopyVisitor cv2(p4); + p.visitProperties(cv); + EXPECT_EQUAL(p4.numKeys(), 0u); + EXPECT_EQUAL(p4.numValues(), 0u); + EXPECT_TRUE(p == p4); + EXPECT_TRUE(p4 == p); + EXPECT_EQUAL(p.hashCode(), p4.hashCode()); + } + + { // test index properties known by the framework + { // vespa.rank.firstphase + EXPECT_EQUAL(rank::FirstPhase::NAME, vespalib::string("vespa.rank.firstphase")); + EXPECT_EQUAL(rank::FirstPhase::DEFAULT_VALUE, vespalib::string("nativeRank")); + Properties p; + EXPECT_EQUAL(rank::FirstPhase::lookup(p), vespalib::string("nativeRank")); + p.add("vespa.rank.firstphase", "specialrank"); + EXPECT_EQUAL(rank::FirstPhase::lookup(p), vespalib::string("specialrank")); + } + { // vespa.rank.secondphase + EXPECT_EQUAL(rank::SecondPhase::NAME, vespalib::string("vespa.rank.secondphase")); + EXPECT_EQUAL(rank::SecondPhase::DEFAULT_VALUE, vespalib::string("")); + Properties p; + EXPECT_EQUAL(rank::SecondPhase::lookup(p), vespalib::string("")); + p.add("vespa.rank.secondphase", "specialrank"); + EXPECT_EQUAL(rank::SecondPhase::lookup(p), vespalib::string("specialrank")); + } + { // vespa.dump.feature + EXPECT_EQUAL(dump::Feature::NAME, vespalib::string("vespa.dump.feature")); + EXPECT_EQUAL(dump::Feature::DEFAULT_VALUE.size(), 0u); + Properties p; + EXPECT_EQUAL(dump::Feature::lookup(p).size(), 0u); + p.add("vespa.dump.feature", "foo"); + p.add("vespa.dump.feature", "bar"); + std::vector<vespalib::string> a = dump::Feature::lookup(p); + ASSERT_TRUE(a.size() == 2); + EXPECT_EQUAL(a[0], vespalib::string("foo")); + EXPECT_EQUAL(a[1], vespalib::string("bar")); + } + { // vespa.dump.ignoredefaultfeatures + EXPECT_EQUAL(dump::IgnoreDefaultFeatures::NAME, vespalib::string("vespa.dump.ignoredefaultfeatures")); + EXPECT_EQUAL(dump::IgnoreDefaultFeatures::DEFAULT_VALUE, "false"); + Properties p; + EXPECT_TRUE(!dump::IgnoreDefaultFeatures::check(p)); + p.add("vespa.dump.ignoredefaultfeatures", "true"); + EXPECT_TRUE(dump::IgnoreDefaultFeatures::check(p)); + } + { // vespa.matching.termwise_limit + EXPECT_EQUAL(matching::TermwiseLimit::NAME, vespalib::string("vespa.matching.termwise_limit")); + EXPECT_EQUAL(matching::TermwiseLimit::DEFAULT_VALUE, 1.0); + Properties p; + EXPECT_EQUAL(matching::TermwiseLimit::lookup(p), 1.0); + p.add("vespa.matching.termwise_limit", "0.05"); + EXPECT_EQUAL(matching::TermwiseLimit::lookup(p), 0.05); + } + { // vespa.matching.numthreads + EXPECT_EQUAL(matching::NumThreadsPerSearch::NAME, vespalib::string("vespa.matching.numthreadspersearch")); + EXPECT_EQUAL(matching::NumThreadsPerSearch::DEFAULT_VALUE, std::numeric_limits<uint32_t>::max()); + Properties p; + EXPECT_EQUAL(matching::NumThreadsPerSearch::lookup(p), std::numeric_limits<uint32_t>::max()); + p.add("vespa.matching.numthreadspersearch", "50"); + EXPECT_EQUAL(matching::NumThreadsPerSearch::lookup(p), 50u); + } + { + EXPECT_EQUAL(matching::NumSearchPartitions::NAME, vespalib::string("vespa.matching.numsearchpartitions")); + EXPECT_EQUAL(matching::NumSearchPartitions::DEFAULT_VALUE, 1u); + Properties p; + EXPECT_EQUAL(matching::NumSearchPartitions::lookup(p), 1u); + p.add("vespa.matching.numsearchpartitions", "50"); + EXPECT_EQUAL(matching::NumSearchPartitions::lookup(p), 50u); + } + { // vespa.matchphase.degradation.attribute + EXPECT_EQUAL(matchphase::DegradationAttribute::NAME, vespalib::string("vespa.matchphase.degradation.attribute")); + EXPECT_EQUAL(matchphase::DegradationAttribute::DEFAULT_VALUE, ""); + Properties p; + EXPECT_EQUAL(matchphase::DegradationAttribute::lookup(p), ""); + p.add("vespa.matchphase.degradation.attribute", "foobar"); + EXPECT_EQUAL(matchphase::DegradationAttribute::lookup(p), "foobar"); + } + { // vespa.matchphase.degradation.ascending + EXPECT_EQUAL(matchphase::DegradationAscendingOrder::NAME, vespalib::string("vespa.matchphase.degradation.ascendingorder")); + EXPECT_EQUAL(matchphase::DegradationAscendingOrder::DEFAULT_VALUE, false); + Properties p; + EXPECT_EQUAL(matchphase::DegradationAscendingOrder::lookup(p), false); + p.add("vespa.matchphase.degradation.ascendingorder", "true"); + EXPECT_EQUAL(matchphase::DegradationAscendingOrder::lookup(p), true); + } + { // vespa.matchphase.degradation.maxhits + EXPECT_EQUAL(matchphase::DegradationMaxHits::NAME, vespalib::string("vespa.matchphase.degradation.maxhits")); + EXPECT_EQUAL(matchphase::DegradationMaxHits::DEFAULT_VALUE, 0u); + Properties p; + EXPECT_EQUAL(matchphase::DegradationMaxHits::lookup(p), 0u); + p.add("vespa.matchphase.degradation.maxhits", "123789"); + EXPECT_EQUAL(matchphase::DegradationMaxHits::lookup(p), 123789u); + } + { // vespa.matchphase.degradation.samplepercentage + EXPECT_EQUAL(matchphase::DegradationSamplePercentage::NAME, vespalib::string("vespa.matchphase.degradation.samplepercentage")); + EXPECT_EQUAL(matchphase::DegradationSamplePercentage::DEFAULT_VALUE, 0.2); + Properties p; + EXPECT_EQUAL(matchphase::DegradationSamplePercentage::lookup(p), 0.2); + p.add("vespa.matchphase.degradation.samplepercentage", "0.9"); + EXPECT_EQUAL(matchphase::DegradationSamplePercentage::lookup(p), 0.9); + } + { // vespa.matchphase.degradation.maxfiltercoverage + EXPECT_EQUAL(matchphase::DegradationMaxFilterCoverage::NAME, vespalib::string("vespa.matchphase.degradation.maxfiltercoverage")); + EXPECT_EQUAL(matchphase::DegradationMaxFilterCoverage::DEFAULT_VALUE, 1.0); + Properties p; + EXPECT_EQUAL(matchphase::DegradationMaxFilterCoverage::lookup(p), 1.0); + p.add("vespa.matchphase.degradation.maxfiltercoverage", "0.076"); + EXPECT_EQUAL(matchphase::DegradationMaxFilterCoverage::lookup(p), 0.076); + } + { // vespa.matchphase.degradation.postfiltermultiplier + EXPECT_EQUAL(matchphase::DegradationPostFilterMultiplier::NAME, vespalib::string("vespa.matchphase.degradation.postfiltermultiplier")); + EXPECT_EQUAL(matchphase::DegradationPostFilterMultiplier::DEFAULT_VALUE, 1.0); + Properties p; + EXPECT_EQUAL(matchphase::DegradationPostFilterMultiplier::lookup(p), 1.0); + p.add("vespa.matchphase.degradation.postfiltermultiplier", "0.9"); + EXPECT_EQUAL(matchphase::DegradationPostFilterMultiplier::lookup(p), 0.9); + } + { // vespa.matchphase.diversity.attribute + EXPECT_EQUAL(matchphase::DiversityAttribute::NAME, vespalib::string("vespa.matchphase.diversity.attribute")); + EXPECT_EQUAL(matchphase::DiversityAttribute::DEFAULT_VALUE, ""); + Properties p; + EXPECT_EQUAL(matchphase::DiversityAttribute::lookup(p), ""); + p.add("vespa.matchphase.diversity.attribute", "foobar"); + EXPECT_EQUAL(matchphase::DiversityAttribute::lookup(p), "foobar"); + } + { // vespa.matchphase.diversity.mingroups + EXPECT_EQUAL(matchphase::DiversityMinGroups::NAME, vespalib::string("vespa.matchphase.diversity.mingroups")); + EXPECT_EQUAL(matchphase::DiversityMinGroups::DEFAULT_VALUE, 1u); + Properties p; + EXPECT_EQUAL(matchphase::DiversityMinGroups::lookup(p), 1u); + p.add("vespa.matchphase.diversity.mingroups", "5"); + EXPECT_EQUAL(matchphase::DiversityMinGroups::lookup(p), 5u); + } + { // vespa.hitcollector.heapsize + EXPECT_EQUAL(hitcollector::HeapSize::NAME, vespalib::string("vespa.hitcollector.heapsize")); + EXPECT_EQUAL(hitcollector::HeapSize::DEFAULT_VALUE, 100u); + Properties p; + EXPECT_EQUAL(hitcollector::HeapSize::lookup(p), 100u); + p.add("vespa.hitcollector.heapsize", "50"); + EXPECT_EQUAL(hitcollector::HeapSize::lookup(p), 50u); + } + { // vespa.hitcollector.arraysize + EXPECT_EQUAL(hitcollector::ArraySize::NAME, vespalib::string("vespa.hitcollector.arraysize")); + EXPECT_EQUAL(hitcollector::ArraySize::DEFAULT_VALUE, 10000u); + Properties p; + EXPECT_EQUAL(hitcollector::ArraySize::lookup(p), 10000u); + p.add("vespa.hitcollector.arraysize", "50"); + EXPECT_EQUAL(hitcollector::ArraySize::lookup(p), 50u); + } + { // vespa.hitcollector.estimatepoint + EXPECT_EQUAL(hitcollector::EstimatePoint::NAME, vespalib::string("vespa.hitcollector.estimatepoint")); + EXPECT_EQUAL(hitcollector::EstimatePoint::DEFAULT_VALUE, 0xffffffffu); + Properties p; + EXPECT_EQUAL(hitcollector::EstimatePoint::lookup(p), 0xffffffffu); + p.add("vespa.hitcollector.estimatepoint", "50"); + EXPECT_EQUAL(hitcollector::EstimatePoint::lookup(p), 50u); + } + { // vespa.hitcollector.estimatelimit + EXPECT_EQUAL(hitcollector::EstimateLimit::NAME, vespalib::string("vespa.hitcollector.estimatelimit")); + EXPECT_EQUAL(hitcollector::EstimateLimit::DEFAULT_VALUE, 0xffffffffu); + Properties p; + EXPECT_EQUAL(hitcollector::EstimateLimit::lookup(p), 0xffffffffu); + p.add("vespa.hitcollector.estimatelimit", "50"); + EXPECT_EQUAL(hitcollector::EstimateLimit::lookup(p), 50u); + } + { // vespa.hitcollector.rankscoredroplimit + EXPECT_EQUAL(hitcollector::RankScoreDropLimit::NAME, vespalib::string("vespa.hitcollector.rankscoredroplimit")); + search::feature_t got1 = hitcollector::RankScoreDropLimit::DEFAULT_VALUE; + EXPECT_TRUE(got1 != got1); + Properties p; + search::feature_t got2= hitcollector::RankScoreDropLimit::lookup(p); + EXPECT_TRUE(got2 != got2); + p.add("vespa.hitcollector.rankscoredroplimit", "-123456789.12345"); + EXPECT_EQUAL(hitcollector::RankScoreDropLimit::lookup(p), -123456789.12345); + p.clear().add("vespa.hitcollector.rankscoredroplimit", "123456789.12345"); + EXPECT_EQUAL(hitcollector::RankScoreDropLimit::lookup(p), 123456789.12345); + } + { // vespa.fieldweight. + EXPECT_EQUAL(FieldWeight::BASE_NAME, vespalib::string("vespa.fieldweight.")); + EXPECT_EQUAL(FieldWeight::DEFAULT_VALUE, 100u); + Properties p; + EXPECT_EQUAL(FieldWeight::lookup(p, "foo"), 100u); + p.add("vespa.fieldweight.foo", "200"); + EXPECT_EQUAL(FieldWeight::lookup(p, "foo"), 200u); + } + { // vespa.isfilterfield. + EXPECT_EQUAL(IsFilterField::BASE_NAME, "vespa.isfilterfield."); + EXPECT_EQUAL(IsFilterField::DEFAULT_VALUE, "false"); + Properties p; + EXPECT_TRUE(!IsFilterField::check(p, "foo")); + p.add("vespa.isfilterfield.foo", "true"); + EXPECT_TRUE(IsFilterField::check(p, "foo")); + EXPECT_TRUE(!IsFilterField::check(p, "bar")); + IsFilterField::set(p, "bar"); + EXPECT_TRUE(IsFilterField::check(p, "bar")); + } + } +} + +TEST("test attribute type properties") +{ + Properties p; + p.add("vespa.type.attribute.foo", "tensor(x[10])"); + EXPECT_EQUAL("tensor(x[10])", type::Attribute::lookup(p, "foo")); + EXPECT_EQUAL("", type::Attribute::lookup(p, "bar")); +} + +TEST("test query feature type properties") +{ + Properties p; + p.add("vespa.type.query.foo", "tensor(x[10])"); + EXPECT_EQUAL("tensor(x[10])", type::QueryFeature::lookup(p, "foo")); + EXPECT_EQUAL("", type::QueryFeature::lookup(p, "bar")); +} + + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/fef/rank_program/.gitignore b/searchlib/src/tests/fef/rank_program/.gitignore new file mode 100644 index 00000000000..b86a29e139f --- /dev/null +++ b/searchlib/src/tests/fef/rank_program/.gitignore @@ -0,0 +1 @@ +searchlib_rank_program_test_app diff --git a/searchlib/src/tests/fef/rank_program/CMakeLists.txt b/searchlib/src/tests/fef/rank_program/CMakeLists.txt new file mode 100644 index 00000000000..12d971a9421 --- /dev/null +++ b/searchlib/src/tests/fef/rank_program/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_rank_program_test_app + SOURCES + rank_program_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_rank_program_test_app COMMAND searchlib_rank_program_test_app) diff --git a/searchlib/src/tests/fef/rank_program/FILES b/searchlib/src/tests/fef/rank_program/FILES new file mode 100644 index 00000000000..bf6e4665a68 --- /dev/null +++ b/searchlib/src/tests/fef/rank_program/FILES @@ -0,0 +1 @@ +rank_program_test.cpp diff --git a/searchlib/src/tests/fef/rank_program/rank_program_test.cpp b/searchlib/src/tests/fef/rank_program/rank_program_test.cpp new file mode 100644 index 00000000000..baf665c58e8 --- /dev/null +++ b/searchlib/src/tests/fef/rank_program/rank_program_test.cpp @@ -0,0 +1,172 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/stllike/string.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/searchlib/features/valuefeature.h> +#include <vespa/searchlib/fef/blueprintfactory.h> +#include <vespa/searchlib/fef/test/indexenvironment.h> +#include <vespa/searchlib/fef/test/queryenvironment.h> +#include <vespa/searchlib/fef/test/plugin/sum.h> +#include <vespa/searchlib/fef/rank_program.h> + +using namespace search::fef; +using namespace search::fef::test; +using namespace search::features; + +struct ImpureValueExecutor : FeatureExecutor { + double value; + ImpureValueExecutor(double value_in) : value(value_in) {} + bool isPure() override { return false; } + void execute(search::fef::MatchData &md) override { *md.resolveFeature(outputs()[0]) = value; } +}; + +struct ImpureValueBlueprint : Blueprint { + double value; + ImpureValueBlueprint() : Blueprint("ivalue"), value(31212.0) {} + void visitDumpFeatures(const IIndexEnvironment &, IDumpFeatureVisitor &) const override {} + Blueprint::UP createInstance() const override { return Blueprint::UP(new ImpureValueBlueprint()); } + bool setup(const IIndexEnvironment &, const std::vector<vespalib::string> ¶ms) override { + ASSERT_EQUAL(1u, params.size()); + value = strtod(params[0].c_str(), nullptr); + describeOutput("out", "the impure value"); + return true; + } + FeatureExecutor::LP createExecutor(const IQueryEnvironment &) const override { + return FeatureExecutor::LP(new ImpureValueExecutor(value)); + } +}; + +struct MySetup { + BlueprintFactory factory; + IndexEnvironment indexEnv; + BlueprintResolver::SP resolver; + Properties overrides; + RankProgram program; + MySetup() : factory(), indexEnv(), resolver(new BlueprintResolver(factory, indexEnv)), + overrides(), program(resolver) + { + factory.addPrototype(Blueprint::SP(new ValueBlueprint())); + factory.addPrototype(Blueprint::SP(new ImpureValueBlueprint())); + factory.addPrototype(Blueprint::SP(new SumBlueprint())); + } + MySetup &add(const vespalib::string &feature) { + resolver->addSeed(feature); + return *this; + } + MySetup &override(const vespalib::string &feature, double value) { + overrides.add(feature, vespalib::make_string("%g", value)); + return *this; + } + MySetup &compile() { + ASSERT_TRUE(resolver->compile()); + MatchDataLayout mdl; + QueryEnvironment queryEnv(&indexEnv); + program.setup(mdl, queryEnv, overrides); + return *this; + } + MySetup &run() { + program.run(1); + return *this; + } + double get() { + std::vector<vespalib::string> names; + std::vector<FeatureHandle> handles; + program.get_seed_handles(names, handles); + EXPECT_EQUAL(1u, names.size()); + EXPECT_EQUAL(names.size(), handles.size()); + return *program.match_data().resolveFeature(handles[0]); + } + double get(const vespalib::string &feature) { + std::vector<vespalib::string> names; + std::vector<FeatureHandle> handles; + program.get_seed_handles(names, handles); + EXPECT_EQUAL(names.size(), handles.size()); + for (size_t i = 0; i < names.size(); ++i) { + if (names[i] == feature) { + return *program.match_data().resolveFeature(handles[i]); + } + } + return 31212.0; + } + std::map<vespalib::string, double> all() { + std::map<vespalib::string, double> result; + std::vector<vespalib::string> names; + std::vector<FeatureHandle> handles; + program.get_seed_handles(names, handles); + EXPECT_EQUAL(names.size(), handles.size()); + for (size_t i = 0; i < names.size(); ++i) { + result[names[i]] = *program.match_data().resolveFeature(handles[i]); + } + return result; + } +}; + +TEST_F("require that match data docid is set by run", MySetup()) { + f1.compile(); + EXPECT_NOT_EQUAL(1u, f1.program.match_data().getDocId()); + f1.run(); + EXPECT_EQUAL(1u, f1.program.match_data().getDocId()); +} + +TEST_F("require that simple program works", MySetup()) { + EXPECT_EQUAL(15.0, f1.add("mysum(value(10),ivalue(5))").compile().run().get()); + EXPECT_EQUAL(3u, f1.program.num_executors()); + EXPECT_EQUAL(2u, f1.program.program_size()); +} + +TEST_F("require that const features are calculated during setup", MySetup()) { + f1.add("mysum(value(10),value(5))").compile(); + EXPECT_EQUAL(15.0, f1.get()); + EXPECT_EQUAL(3u, f1.program.num_executors()); + EXPECT_EQUAL(0u, f1.program.program_size()); +} + +TEST_F("require that non-const features are calculated during run", MySetup()) { + f1.add("mysum(ivalue(10),ivalue(5))").compile(); + EXPECT_EQUAL(0.0, f1.get()); + f1.run(); + EXPECT_EQUAL(15.0, f1.get()); + EXPECT_EQUAL(3u, f1.program.num_executors()); + EXPECT_EQUAL(3u, f1.program.program_size()); +} + +TEST_F("require that a single program can calculate multiple output features", MySetup()) { + f1.add("value(1)").add("ivalue(2)").add("ivalue(3)"); + f1.add("mysum(value(1),value(2),ivalue(3))"); + f1.compile().run(); + EXPECT_EQUAL(5u, f1.program.num_executors()); + EXPECT_EQUAL(3u, f1.program.program_size()); + EXPECT_EQUAL(5u, f1.program.match_data().getNumFeatures()); + auto result = f1.all(); + EXPECT_EQUAL(4u, result.size()); + EXPECT_EQUAL(1.0, result["value(1)"]); + EXPECT_EQUAL(2.0, result["ivalue(2)"]); + EXPECT_EQUAL(3.0, result["ivalue(3)"]); + EXPECT_EQUAL(6.0, result["mysum(value(1),value(2),ivalue(3))"]); +} + +TEST_F("require that a single executor can produce multiple features", MySetup()) { + f1.add("mysum(value(1,2,3).0,value(1,2,3).1,value(1,2,3).2)"); + EXPECT_EQUAL(6.0, f1.compile().run().get()); + EXPECT_EQUAL(2u, f1.program.num_executors()); + EXPECT_EQUAL(0u, f1.program.program_size()); + EXPECT_EQUAL(4u, f1.program.match_data().getNumFeatures()); +} + +TEST_F("require that feature values can be overridden", MySetup()) { + f1.add("value(1)").add("ivalue(2)").add("ivalue(3)"); + f1.add("mysum(value(1),value(2),ivalue(3))"); + f1.override("value(2)", 20.0).override("ivalue(3)", 30.0); + f1.compile().run(); + EXPECT_EQUAL(5u, f1.program.num_executors()); + EXPECT_EQUAL(3u, f1.program.program_size()); + EXPECT_EQUAL(5u, f1.program.match_data().getNumFeatures()); + auto result = f1.all(); + EXPECT_EQUAL(4u, result.size()); + EXPECT_EQUAL(1.0, result["value(1)"]); + EXPECT_EQUAL(2.0, result["ivalue(2)"]); + EXPECT_EQUAL(30.0, result["ivalue(3)"]); + EXPECT_EQUAL(51.0, result["mysum(value(1),value(2),ivalue(3))"]); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/fef/resolver/.gitignore b/searchlib/src/tests/fef/resolver/.gitignore new file mode 100644 index 00000000000..57114e69298 --- /dev/null +++ b/searchlib/src/tests/fef/resolver/.gitignore @@ -0,0 +1,4 @@ +*_test +.depend +Makefile +searchlib_resolver_test_app diff --git a/searchlib/src/tests/fef/resolver/CMakeLists.txt b/searchlib/src/tests/fef/resolver/CMakeLists.txt new file mode 100644 index 00000000000..835a50fd6fb --- /dev/null +++ b/searchlib/src/tests/fef/resolver/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_resolver_test_app + SOURCES + resolver_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_resolver_test_app COMMAND searchlib_resolver_test_app) diff --git a/searchlib/src/tests/fef/resolver/DESC b/searchlib/src/tests/fef/resolver/DESC new file mode 100644 index 00000000000..7d3262ab110 --- /dev/null +++ b/searchlib/src/tests/fef/resolver/DESC @@ -0,0 +1 @@ +resolver test. Take a look at resolver_test.cpp for details. diff --git a/searchlib/src/tests/fef/resolver/FILES b/searchlib/src/tests/fef/resolver/FILES new file mode 100644 index 00000000000..c40c0663848 --- /dev/null +++ b/searchlib/src/tests/fef/resolver/FILES @@ -0,0 +1 @@ +resolver_test.cpp diff --git a/searchlib/src/tests/fef/resolver/resolver_test.cpp b/searchlib/src/tests/fef/resolver/resolver_test.cpp new file mode 100644 index 00000000000..3d791f886e1 --- /dev/null +++ b/searchlib/src/tests/fef/resolver/resolver_test.cpp @@ -0,0 +1,93 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("resolver_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/fef/fef.h> +#include <vespa/searchlib/fef/test/indexenvironment.h> + +namespace search { +namespace fef { + +class BaseBlueprint : public Blueprint { +public: + BaseBlueprint() : Blueprint("base") { } + virtual void visitDumpFeatures(const IIndexEnvironment &, + IDumpFeatureVisitor &) const {} + virtual Blueprint::UP createInstance() const { return Blueprint::UP(new BaseBlueprint()); } + virtual bool setup(const IIndexEnvironment & indexEnv, + const ParameterList & params) { + (void) indexEnv; (void) params; + describeOutput("foo", "foo"); + describeOutput("bar", "bar"); + describeOutput("baz", "baz"); + return true; + } + virtual FeatureExecutor::LP createExecutor(const IQueryEnvironment &) const { + return FeatureExecutor::LP(NULL); + } +}; + +class CombineBlueprint : public Blueprint { +public: + CombineBlueprint() : Blueprint("combine") { } + virtual void visitDumpFeatures(const IIndexEnvironment &, + IDumpFeatureVisitor &) const {} + virtual Blueprint::UP createInstance() const { return Blueprint::UP(new CombineBlueprint()); } + virtual bool setup(const IIndexEnvironment & indexEnv, + const ParameterList & params) { + (void) indexEnv; (void) params; + defineInput("base.foo"); + defineInput("base.bar"); + defineInput("base.baz"); + describeOutput("out", "out"); + return true; + } + virtual FeatureExecutor::LP createExecutor(const IQueryEnvironment &) const { + return FeatureExecutor::LP(NULL); + } +}; + +class Test : public vespalib::TestApp { +private: + BlueprintFactory _factory; + void requireThatWeGetUniqueBlueprints(); +public: + Test(); + int Main(); +}; + +Test::Test() : + _factory() +{ + _factory.addPrototype(Blueprint::SP(new BaseBlueprint())); + _factory.addPrototype(Blueprint::SP(new CombineBlueprint())); +} + +void +Test::requireThatWeGetUniqueBlueprints() +{ + test::IndexEnvironment ienv; + BlueprintResolver::SP res(new BlueprintResolver(_factory, ienv)); + res->addSeed("combine"); + EXPECT_TRUE(res->compile()); + const BlueprintResolver::ExecutorSpecList & spec = res->getExecutorSpecs(); + EXPECT_EQUAL(2u, spec.size()); + EXPECT_TRUE(dynamic_cast<BaseBlueprint *>(spec[0].blueprint.get()) != NULL); + EXPECT_TRUE(dynamic_cast<CombineBlueprint *>(spec[1].blueprint.get()) != NULL); +} + +int +Test::Main() +{ + TEST_INIT("resolver_test"); + + requireThatWeGetUniqueBlueprints(); + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::fef::Test); diff --git a/searchlib/src/tests/fef/table/.gitignore b/searchlib/src/tests/fef/table/.gitignore new file mode 100644 index 00000000000..b89a30490e0 --- /dev/null +++ b/searchlib/src/tests/fef/table/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +table_test +searchlib_table_test_app diff --git a/searchlib/src/tests/fef/table/CMakeLists.txt b/searchlib/src/tests/fef/table/CMakeLists.txt new file mode 100644 index 00000000000..ca61eb7c365 --- /dev/null +++ b/searchlib/src/tests/fef/table/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_table_test_app + SOURCES + table_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_table_test_app COMMAND searchlib_table_test_app) diff --git a/searchlib/src/tests/fef/table/DESC b/searchlib/src/tests/fef/table/DESC new file mode 100644 index 00000000000..65834ed1305 --- /dev/null +++ b/searchlib/src/tests/fef/table/DESC @@ -0,0 +1 @@ +table test. Take a look at table.cpp for details. diff --git a/searchlib/src/tests/fef/table/FILES b/searchlib/src/tests/fef/table/FILES new file mode 100644 index 00000000000..40be726aeb8 --- /dev/null +++ b/searchlib/src/tests/fef/table/FILES @@ -0,0 +1 @@ +table.cpp diff --git a/searchlib/src/tests/fef/table/table_test.cpp b/searchlib/src/tests/fef/table/table_test.cpp new file mode 100644 index 00000000000..2d05e0c7310 --- /dev/null +++ b/searchlib/src/tests/fef/table/table_test.cpp @@ -0,0 +1,159 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("tablemanager_test"); +#include <vespa/vespalib/testkit/testapp.h> + +#include <fstream> +#include <limits> +#include <iostream> +#include <vespa/searchlib/fef/filetablefactory.h> +#include <vespa/searchlib/fef/functiontablefactory.h> +#include <vespa/searchlib/fef/table.h> +#include <vespa/searchlib/fef/tablemanager.h> + +namespace search { +namespace fef { + +class TableTest : public vespalib::TestApp +{ +private: + bool assertTable(const Table & act, const Table & exp); + bool assertCreateTable(const ITableFactory & tf, const vespalib::string & name, const Table & exp); + void testTable(); + void testFileTableFactory(); + void testFunctionTableFactory(); + void testTableManager(); + +public: + int Main(); +}; + +bool +TableTest::assertTable(const Table & act, const Table & exp) +{ + if (!EXPECT_EQUAL(act.size(), exp.size())) return false; + for (size_t i = 0; i < act.size(); ++i) { + if (!EXPECT_APPROX(act[i], exp[i], 0.01)) return false; + } + return true; +} + +bool +TableTest::assertCreateTable(const ITableFactory & tf, const vespalib::string & name, const Table & exp) +{ + Table::SP t = tf.createTable(name); + if (!EXPECT_TRUE(t.get() != NULL)) return false; + return assertTable(*t, exp); +} + +void +TableTest::testTable() +{ + Table t; + EXPECT_EQUAL(t.size(), 0u); + EXPECT_EQUAL(t.max(), -std::numeric_limits<double>::max()); + t.add(1).add(2); + EXPECT_EQUAL(t.size(), 2u); + EXPECT_EQUAL(t.max(), 2); + EXPECT_EQUAL(t[0], 1); + EXPECT_EQUAL(t[1], 2); + t.add(10); + EXPECT_EQUAL(t.size(), 3u); + EXPECT_EQUAL(t.max(), 10); + EXPECT_EQUAL(t[2], 10); + t.add(5); + EXPECT_EQUAL(t.size(), 4u); + EXPECT_EQUAL(t.max(), 10); + EXPECT_EQUAL(t[3], 5); +} + +void +TableTest::testFileTableFactory() +{ + { + FileTableFactory ftf("tables1"); + EXPECT_TRUE(assertCreateTable(ftf, "a", Table().add(1.5).add(2.25).add(3))); + EXPECT_TRUE(ftf.createTable("b").get() == NULL); + } + { + FileTableFactory ftf("tables1/"); + EXPECT_TRUE(ftf.createTable("a").get() != NULL); + } +} + +void +TableTest::testFunctionTableFactory() +{ + FunctionTableFactory ftf(2); + EXPECT_TRUE(assertCreateTable(ftf, "expdecay(400,12)", + Table().add(400).add(368.02))); + EXPECT_TRUE(assertCreateTable(ftf, "loggrowth(1000,5000,1)", + Table().add(5000).add(5693.15))); + EXPECT_TRUE(assertCreateTable(ftf, "linear(10,100)", + Table().add(100).add(110))); + // specify table size + EXPECT_TRUE(assertCreateTable(ftf, "expdecay(400,12,3)", + Table().add(400).add(368.02).add(338.60))); + EXPECT_TRUE(assertCreateTable(ftf, "loggrowth(1000,5000,1,3)", + Table().add(5000).add(5693.15).add(6098.61))); + EXPECT_TRUE(assertCreateTable(ftf, "linear(10,100,3)", + Table().add(100).add(110).add(120))); + EXPECT_TRUE(ftf.createTable("expdecay()").get() == NULL); + EXPECT_TRUE(ftf.createTable("expdecay(10)").get() == NULL); + EXPECT_TRUE(ftf.createTable("loggrowth()").get() == NULL); + EXPECT_TRUE(ftf.createTable("linear()").get() == NULL); + EXPECT_TRUE(ftf.createTable("none").get() == NULL); + EXPECT_TRUE(ftf.createTable("none(").get() == NULL); + EXPECT_TRUE(ftf.createTable("none)").get() == NULL); + EXPECT_TRUE(ftf.createTable("none)(").get() == NULL); +} + +void +TableTest::testTableManager() +{ + { + TableManager tm; + tm.addFactory(ITableFactory::SP(new FileTableFactory("tables1"))); + tm.addFactory(ITableFactory::SP(new FileTableFactory("tables2"))); + + { + const Table * t = tm.getTable("a"); // from tables1 + ASSERT_TRUE(t != NULL); + EXPECT_TRUE(assertTable(*t, Table().add(1.5).add(2.25).add(3))); + EXPECT_TRUE(t == tm.getTable("a")); + } + { + const Table * t = tm.getTable("b"); // from tables2 + ASSERT_TRUE(t != NULL); + EXPECT_TRUE(assertTable(*t, Table().add(40).add(50).add(60))); + EXPECT_TRUE(t == tm.getTable("b")); + } + { + EXPECT_TRUE(tm.getTable("c") == NULL); + EXPECT_TRUE(tm.getTable("c") == NULL); + } + } + { + TableManager tm; + ASSERT_TRUE(tm.getTable("a") == NULL); + } +} + +int +TableTest::Main() +{ + TEST_INIT("table_test"); + + testTable(); + testFileTableFactory(); + testFunctionTableFactory(); + testTableManager(); + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::fef::TableTest); diff --git a/searchlib/src/tests/fef/table/tables1/a b/searchlib/src/tests/fef/table/tables1/a new file mode 100644 index 00000000000..c46f4d59a71 --- /dev/null +++ b/searchlib/src/tests/fef/table/tables1/a @@ -0,0 +1,3 @@ +1.5 +2.25 +3 diff --git a/searchlib/src/tests/fef/table/tables2/a b/searchlib/src/tests/fef/table/tables2/a new file mode 100644 index 00000000000..300ed6fcd17 --- /dev/null +++ b/searchlib/src/tests/fef/table/tables2/a @@ -0,0 +1,3 @@ +10 +20 +30 diff --git a/searchlib/src/tests/fef/table/tables2/b b/searchlib/src/tests/fef/table/tables2/b new file mode 100644 index 00000000000..6f98b52f55f --- /dev/null +++ b/searchlib/src/tests/fef/table/tables2/b @@ -0,0 +1,3 @@ +40 +50 +60 diff --git a/searchlib/src/tests/fef/termfieldmodel/.gitignore b/searchlib/src/tests/fef/termfieldmodel/.gitignore new file mode 100644 index 00000000000..0f860efa14a --- /dev/null +++ b/searchlib/src/tests/fef/termfieldmodel/.gitignore @@ -0,0 +1,4 @@ +*_test +.depend +Makefile +searchlib_termfieldmodel_test_app diff --git a/searchlib/src/tests/fef/termfieldmodel/CMakeLists.txt b/searchlib/src/tests/fef/termfieldmodel/CMakeLists.txt new file mode 100644 index 00000000000..c8a678c11bb --- /dev/null +++ b/searchlib/src/tests/fef/termfieldmodel/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_termfieldmodel_test_app + SOURCES + termfieldmodel_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_termfieldmodel_test_app COMMAND searchlib_termfieldmodel_test_app) diff --git a/searchlib/src/tests/fef/termfieldmodel/DESC b/searchlib/src/tests/fef/termfieldmodel/DESC new file mode 100644 index 00000000000..2c8df5a8aab --- /dev/null +++ b/searchlib/src/tests/fef/termfieldmodel/DESC @@ -0,0 +1 @@ +termfieldmodel test. Take a look at termfieldmodel_test.cpp for details. diff --git a/searchlib/src/tests/fef/termfieldmodel/FILES b/searchlib/src/tests/fef/termfieldmodel/FILES new file mode 100644 index 00000000000..b5440335bc6 --- /dev/null +++ b/searchlib/src/tests/fef/termfieldmodel/FILES @@ -0,0 +1 @@ +termfieldmodel_test.cpp diff --git a/searchlib/src/tests/fef/termfieldmodel/termfieldmodel_test.cpp b/searchlib/src/tests/fef/termfieldmodel/termfieldmodel_test.cpp new file mode 100644 index 00000000000..26a02d38adf --- /dev/null +++ b/searchlib/src/tests/fef/termfieldmodel/termfieldmodel_test.cpp @@ -0,0 +1,209 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("termfieldmodel_test"); +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchlib/fef/fef.h> +#include <vespa/searchlib/queryeval/searchiterator.h> + +#include <algorithm> + +using namespace search::fef; + +struct State { + SimpleTermData term; + MatchData::UP md; + TermFieldMatchData *f3; + TermFieldMatchData *f5; + TermFieldMatchData *f7; + TermFieldMatchDataArray array; + + State() : term(), md(), f3(0), f5(0), f7(0), array() {} + + void setArray(TermFieldMatchDataArray value) { + array = value; + } +}; + +void testInvalidId() { + const TermFieldMatchData empty; + using search::queryeval::SearchIterator; + + EXPECT_EQUAL(TermFieldMatchData::invalidId(), empty.getDocId()); + EXPECT_TRUE(TermFieldMatchData::invalidId() < (SearchIterator::beginId() + 1 ) || + TermFieldMatchData::invalidId() > (search::endDocId - 1)); +} + +void testSetup(State &state) { + MatchDataLayout layout; + + state.term.addField(3); // docfreq = 1 + state.term.addField(7); // docfreq = 2 + state.term.addField(5); // docfreq = 3 + + typedef search::fef::ITermFieldRangeAdapter FRA; + typedef search::fef::SimpleTermFieldRangeAdapter SFR; + + // lookup terms + { + int i = 1; + for (SFR iter(state.term); iter.valid(); iter.next()) { + iter.get().setDocFreq(0.25 * i++); + } + } + + // reserve handles + { + for (SFR iter(state.term); iter.valid(); iter.next()) { + iter.get().setHandle(layout.allocTermField(iter.get().getFieldId())); + } + } + + state.md = layout.createMatchData(); + + // init match data + { + for (FRA iter(state.term); iter.valid(); iter.next()) { + const ITermFieldData& tfd = iter.get(); + + TermFieldHandle handle = tfd.getHandle(); + TermFieldMatchData *data = state.md->resolveTermField(handle); + switch (tfd.getFieldId()) { + case 3: + state.f3 = data; + break; + case 5: + state.f5 = data; + break; + case 7: + state.f7 = data; + break; + default: + EXPECT_TRUE(false); + } + } + EXPECT_EQUAL(3u, state.f3->getFieldId()); + EXPECT_EQUAL(5u, state.f5->getFieldId()); + EXPECT_EQUAL(7u, state.f7->getFieldId()); + } + + // test that we can setup array + EXPECT_EQUAL(false, state.array.valid()); + state.setArray(TermFieldMatchDataArray().add(state.f3).add(state.f5).add(state.f7)); + EXPECT_EQUAL(true, state.array.valid()); +} + +void testGenerate(State &state) { + // verify array + EXPECT_EQUAL(3u, state.array.size()); + EXPECT_EQUAL(state.f3, state.array[0]); + EXPECT_EQUAL(state.f5, state.array[1]); + EXPECT_EQUAL(state.f7, state.array[2]); + + // stale unpacked data + state.f5->reset(5); + EXPECT_EQUAL(5u, state.f5->getDocId()); + { + TermFieldMatchDataPosition pos; + pos.setPosition(3); + pos.setElementId(0); + pos.setElementLen(10); + state.f5->appendPosition(pos); + EXPECT_EQUAL(1u, state.f5->getIterator().size()); + EXPECT_EQUAL(10u, state.f5->getIterator().getFieldLength()); + } + state.f5->reset(6); + EXPECT_EQUAL(6u, state.f5->getDocId()); + EXPECT_EQUAL(FieldPositionsIterator::UNKNOWN_LENGTH, + state.f5->getIterator().getFieldLength()); + EXPECT_EQUAL(0u, state.f5->getIterator().size()); + + + // fresh unpacked data + state.md->setDocId(10); + state.f3->reset(10); + { + TermFieldMatchDataPosition pos; + pos.setPosition(3); + pos.setElementId(0); + pos.setElementLen(10); + EXPECT_EQUAL(FieldPositionsIterator::UNKNOWN_LENGTH, + state.f3->getIterator().getFieldLength()); + state.f3->appendPosition(pos); + EXPECT_EQUAL(10u, state.f3->getIterator().getFieldLength()); + } + { + TermFieldMatchDataPosition pos; + pos.setPosition(15); + pos.setElementId(1); + pos.setElementLen(20); + state.f3->appendPosition(pos); + EXPECT_EQUAL(20u, state.f3->getIterator().getFieldLength()); + } + { + TermFieldMatchDataPosition pos; + pos.setPosition(1); + pos.setElementId(2); + pos.setElementLen(5); + state.f3->appendPosition(pos); + EXPECT_EQUAL(20u, state.f3->getIterator().getFieldLength()); + } + + // raw score + state.f7->setRawScore(10, 5.0); +} + +void testAnalyze(State &state) { + EXPECT_EQUAL(state.md->getDocId(), state.f3->getDocId()); + EXPECT_NOT_EQUAL(state.md->getDocId(), state.f5->getDocId()); + EXPECT_EQUAL(state.md->getDocId(), state.f7->getDocId()); + + FieldPositionsIterator it = state.f3->getIterator(); + EXPECT_EQUAL(20u, it.getFieldLength()); + EXPECT_EQUAL(3u, it.size()); + EXPECT_TRUE(it.valid()); + EXPECT_EQUAL(3u, it.getPosition()); + EXPECT_EQUAL(0u, it.getElementId()); + EXPECT_EQUAL(10u, it.getElementLen()); + it.next(); + EXPECT_TRUE(it.valid()); + EXPECT_EQUAL(15u, it.getPosition()); + EXPECT_EQUAL(1u, it.getElementId()); + EXPECT_EQUAL(20u, it.getElementLen()); + it.next(); + EXPECT_TRUE(it.valid()); + EXPECT_EQUAL(1u, it.getPosition()); + EXPECT_EQUAL(2u, it.getElementId()); + EXPECT_EQUAL(5u, it.getElementLen()); + it.next(); + EXPECT_TRUE(!it.valid()); + + EXPECT_EQUAL(0.0, state.f3->getRawScore()); + EXPECT_EQUAL(0.0, state.f5->getRawScore()); + EXPECT_EQUAL(5.0, state.f7->getRawScore()); +} + +TEST("term field model") { + State state; + testSetup(state); + testGenerate(state); + testAnalyze(state); + testInvalidId(); +} + +TEST("Access subqueries") { + State state; + testSetup(state); + state.f3->reset(10); + state.f3->setSubqueries(10, 42); + EXPECT_EQUAL(42ULL, state.f3->getSubqueries()); + state.f3->enableRawScore(); + EXPECT_EQUAL(0ULL, state.f3->getSubqueries()); + + state.f3->reset(11); + state.f3->appendPosition(TermFieldMatchDataPosition()); + state.f3->setSubqueries(11, 42); + EXPECT_EQUAL(0ULL, state.f3->getSubqueries()); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/fef/termmatchdatamerger/.gitignore b/searchlib/src/tests/fef/termmatchdatamerger/.gitignore new file mode 100644 index 00000000000..64f3f4a4600 --- /dev/null +++ b/searchlib/src/tests/fef/termmatchdatamerger/.gitignore @@ -0,0 +1,4 @@ +*_test +.depend +Makefile +searchlib_termmatchdatamerger_test_app diff --git a/searchlib/src/tests/fef/termmatchdatamerger/CMakeLists.txt b/searchlib/src/tests/fef/termmatchdatamerger/CMakeLists.txt new file mode 100644 index 00000000000..cfb6ae2611f --- /dev/null +++ b/searchlib/src/tests/fef/termmatchdatamerger/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_termmatchdatamerger_test_app + SOURCES + termmatchdatamerger_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_termmatchdatamerger_test_app COMMAND searchlib_termmatchdatamerger_test_app) diff --git a/searchlib/src/tests/fef/termmatchdatamerger/DESC b/searchlib/src/tests/fef/termmatchdatamerger/DESC new file mode 100644 index 00000000000..abacd50b719 --- /dev/null +++ b/searchlib/src/tests/fef/termmatchdatamerger/DESC @@ -0,0 +1 @@ +termmatchdatamerger test. Take a look at termmatchdatamerger.cpp for details. diff --git a/searchlib/src/tests/fef/termmatchdatamerger/FILES b/searchlib/src/tests/fef/termmatchdatamerger/FILES new file mode 100644 index 00000000000..709c15d91b8 --- /dev/null +++ b/searchlib/src/tests/fef/termmatchdatamerger/FILES @@ -0,0 +1 @@ +termmatchdatamerger_test.cpp diff --git a/searchlib/src/tests/fef/termmatchdatamerger/termmatchdatamerger_test.cpp b/searchlib/src/tests/fef/termmatchdatamerger/termmatchdatamerger_test.cpp new file mode 100644 index 00000000000..14b74498f2d --- /dev/null +++ b/searchlib/src/tests/fef/termmatchdatamerger/termmatchdatamerger_test.cpp @@ -0,0 +1,281 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("termmatchdatamerger_test"); +#include <vespa/vespalib/testkit/testapp.h> + +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchlib/fef/termfieldmatchdataarray.h> +#include <vespa/searchlib/fef/termmatchdatamerger.h> + +using namespace search::fef; + +typedef TermMatchDataMerger::Input MDMI; +typedef TermMatchDataMerger::Inputs MDMIs; + +namespace { + +TermFieldMatchDataPosition make_pos(uint32_t pos) +{ + return TermFieldMatchDataPosition(0, pos, 1, 1000); +} + +} // namespace <unnamed> + +class Test : public vespalib::TestApp +{ +public: + void testMergeEmptyInput(); + void testMergeSimple(); + void testMergeMultifield(); + void testMergeDuplicates(); + void testMergeFieldLength(); + int Main(); +}; + +void +Test::testMergeEmptyInput() +{ + TermFieldMatchData out; + TermFieldMatchDataArray output; + output.add(&out); + + TermFieldMatchData in; + MDMIs input; + input.push_back(MDMI(&in, 1.0)); + + TermMatchDataMerger merger(input, output); + + uint32_t docid = 5; + in.reset(docid); + merger.merge(docid); + EXPECT_EQUAL(docid, out.getDocId()); + EXPECT_TRUE(out.begin() == out.end()); +} + +void +Test::testMergeSimple() +{ + TermFieldMatchData a; + TermFieldMatchData b; + TermFieldMatchData c; + MDMIs input; + input.push_back(MDMI(&a, 0.5)); + input.push_back(MDMI(&b, 1.0)); + input.push_back(MDMI(&c, 1.5)); + + TermFieldMatchData out; + TermFieldMatchDataArray output; + output.add(&out); + TermMatchDataMerger merger(input, output); + + uint32_t docid = 5; + + a.reset(docid); + a.appendPosition(make_pos(5).setMatchExactness(0.5)); + a.appendPosition(make_pos(10).setMatchExactness(3.0)); + a.appendPosition(make_pos(15).setMatchExactness(2.0)); + + b.reset(docid); + b.appendPosition(make_pos(7).setMatchExactness(0.5)); + b.appendPosition(make_pos(20).setMatchExactness(4.0)); + + c.reset(docid); + c.appendPosition(make_pos(22).setMatchExactness(0.5)); + c.appendPosition(make_pos(27).setMatchExactness(2.0)); + c.appendPosition(make_pos(28).setMatchExactness(5.0)); + + merger.merge(docid); + + EXPECT_EQUAL(docid, out.getDocId()); + EXPECT_EQUAL(8u, out.end() - out.begin()); + + EXPECT_EQUAL( 5u, out.begin()[0].getPosition()); + EXPECT_EQUAL( 7u, out.begin()[1].getPosition()); + EXPECT_EQUAL(10u, out.begin()[2].getPosition()); + EXPECT_EQUAL(15u, out.begin()[3].getPosition()); + EXPECT_EQUAL(20u, out.begin()[4].getPosition()); + EXPECT_EQUAL(22u, out.begin()[5].getPosition()); + EXPECT_EQUAL(27u, out.begin()[6].getPosition()); + EXPECT_EQUAL(28u, out.begin()[7].getPosition()); + + EXPECT_EQUAL(0.25, out.begin()[0].getMatchExactness()); + EXPECT_EQUAL( 0.5, out.begin()[1].getMatchExactness()); + EXPECT_EQUAL( 1.5, out.begin()[2].getMatchExactness()); + EXPECT_EQUAL( 1.0, out.begin()[3].getMatchExactness()); + EXPECT_EQUAL( 4.0, out.begin()[4].getMatchExactness()); + EXPECT_EQUAL(0.75, out.begin()[5].getMatchExactness()); + EXPECT_EQUAL( 3.0, out.begin()[6].getMatchExactness()); + EXPECT_EQUAL( 7.5, out.begin()[7].getMatchExactness()); + + // one stale input + + docid = 10; + a.reset(docid); + a.appendPosition(make_pos(5)); + a.appendPosition(make_pos(10)); + a.appendPosition(make_pos(15)); + + merger.merge(docid); + + EXPECT_EQUAL(docid, out.getDocId()); + EXPECT_EQUAL(3u, out.end() - out.begin()); + + EXPECT_EQUAL( 5u, out.begin()[0].getPosition()); + EXPECT_EQUAL(10u, out.begin()[1].getPosition()); + EXPECT_EQUAL(15u, out.begin()[2].getPosition()); + + // both inputs are stale + + docid = 15; + + merger.merge(docid); + EXPECT_NOT_EQUAL(docid, out.getDocId()); +} + + +void +Test::testMergeMultifield() +{ + TermFieldMatchData a; + TermFieldMatchData b; + TermFieldMatchData c; + MDMIs input; + a.setFieldId(1); + b.setFieldId(2); + c.setFieldId(2); + input.push_back(MDMI(&a, 1.0)); + input.push_back(MDMI(&b, 0.5)); + input.push_back(MDMI(&c, 1.5)); + + TermFieldMatchData out1; + TermFieldMatchData out2; + TermFieldMatchData out3; + TermFieldMatchDataArray output; + out1.setFieldId(1); + out2.setFieldId(2); + out3.setFieldId(3); + output.add(&out1).add(&out2).add(&out3); + + TermMatchDataMerger merger(input, output); + + uint32_t docid = 5; + + a.reset(docid); + a.appendPosition(make_pos(5)); + a.appendPosition(make_pos(15)); + + b.reset(docid); + b.appendPosition(make_pos(7)); + b.appendPosition(make_pos(20)); + + c.reset(docid); + c.appendPosition(make_pos(5)); + c.appendPosition(make_pos(20)); + + merger.merge(docid); + + EXPECT_EQUAL(docid, out1.getDocId()); + EXPECT_EQUAL(docid, out2.getDocId()); + EXPECT_NOT_EQUAL(docid, out3.getDocId()); + + EXPECT_EQUAL(2u, out1.end() - out1.begin()); + EXPECT_EQUAL(3u, out2.end() - out2.begin()); + + EXPECT_EQUAL( 5u, out1.begin()[0].getPosition()); + EXPECT_EQUAL(15u, out1.begin()[1].getPosition()); + + EXPECT_EQUAL( 5u, out2.begin()[0].getPosition()); + EXPECT_EQUAL( 7u, out2.begin()[1].getPosition()); + EXPECT_EQUAL(20u, out2.begin()[2].getPosition()); + + EXPECT_EQUAL(1.0, out1.begin()[0].getMatchExactness()); + EXPECT_EQUAL(1.0, out1.begin()[1].getMatchExactness()); + + EXPECT_EQUAL(1.5, out2.begin()[0].getMatchExactness()); + EXPECT_EQUAL(0.5, out2.begin()[1].getMatchExactness()); + EXPECT_EQUAL(1.5, out2.begin()[2].getMatchExactness()); +} + +void +Test::testMergeDuplicates() +{ + TermFieldMatchData a; + TermFieldMatchData b; + MDMIs input; + input.push_back(MDMI(&a, 0.5)); + input.push_back(MDMI(&b, 1.5)); + + TermFieldMatchData out; + TermFieldMatchDataArray output; + output.add(&out); + TermMatchDataMerger merger(input, output); + + uint32_t docid = 5; + + a.reset(docid); + a.appendPosition(make_pos(5)); + a.appendPosition(make_pos(10)); + a.appendPosition(make_pos(15)); + + b.reset(docid); + b.appendPosition(make_pos(3)); + b.appendPosition(make_pos(10)); + b.appendPosition(make_pos(15)); + b.appendPosition(make_pos(17)); + + merger.merge(docid); + + EXPECT_EQUAL(docid, out.getDocId()); + EXPECT_EQUAL(5u, out.end() - out.begin()); + + EXPECT_EQUAL( 3u, out.begin()[0].getPosition()); + EXPECT_EQUAL(1.5, out.begin()[0].getMatchExactness()); + EXPECT_EQUAL( 5u, out.begin()[1].getPosition()); + EXPECT_EQUAL(0.5, out.begin()[1].getMatchExactness()); + EXPECT_EQUAL(10u, out.begin()[2].getPosition()); + EXPECT_EQUAL(1.5, out.begin()[2].getMatchExactness()); + EXPECT_EQUAL(15u, out.begin()[3].getPosition()); + EXPECT_EQUAL(1.5, out.begin()[3].getMatchExactness()); + EXPECT_EQUAL(17u, out.begin()[4].getPosition()); + EXPECT_EQUAL(1.5, out.begin()[4].getMatchExactness()); +} + +void +Test::testMergeFieldLength() +{ + TermFieldMatchData a; + TermFieldMatchData b; + MDMIs input; + input.push_back(MDMI(&a, 1.0)); + input.push_back(MDMI(&b, 1.0)); + + TermFieldMatchData out; + TermFieldMatchDataArray output; + output.add(&out); + TermMatchDataMerger merger(input, output); + + uint32_t docid = 5; + a.reset(docid); + a.appendPosition(make_pos(1)); + b.reset(docid); + b.appendPosition(make_pos(2)); + merger.merge(docid); + + EXPECT_EQUAL(docid, out.getDocId()); + EXPECT_EQUAL(1000u, out.getIterator().getFieldLength()); +} + +int +Test::Main() +{ + TEST_INIT("termmatchdatamerger_test"); + testMergeEmptyInput(); + testMergeSimple(); + testMergeMultifield(); + testMergeDuplicates(); + testMergeFieldLength(); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/fileheaderinspect/.gitignore b/searchlib/src/tests/fileheaderinspect/.gitignore new file mode 100644 index 00000000000..812991d07b5 --- /dev/null +++ b/searchlib/src/tests/fileheaderinspect/.gitignore @@ -0,0 +1,6 @@ +.depend +Makefile +fileheader.dat +fileheaderinspect_test +out +searchlib_fileheaderinspect_test_app diff --git a/searchlib/src/tests/fileheaderinspect/CMakeLists.txt b/searchlib/src/tests/fileheaderinspect/CMakeLists.txt new file mode 100644 index 00000000000..024e83bde02 --- /dev/null +++ b/searchlib/src/tests/fileheaderinspect/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_fileheaderinspect_test_app + SOURCES + fileheaderinspect.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_fileheaderinspect_test_app COMMAND searchlib_fileheaderinspect_test_app) diff --git a/searchlib/src/tests/fileheaderinspect/DESC b/searchlib/src/tests/fileheaderinspect/DESC new file mode 100644 index 00000000000..ee57a2fdde3 --- /dev/null +++ b/searchlib/src/tests/fileheaderinspect/DESC @@ -0,0 +1 @@ +fileheaderinspect test. Take a look at fileheaderinspect.cpp for details. diff --git a/searchlib/src/tests/fileheaderinspect/FILES b/searchlib/src/tests/fileheaderinspect/FILES new file mode 100644 index 00000000000..7c32fb811d5 --- /dev/null +++ b/searchlib/src/tests/fileheaderinspect/FILES @@ -0,0 +1 @@ +fileheaderinspect.cpp diff --git a/searchlib/src/tests/fileheaderinspect/fileheaderinspect.cpp b/searchlib/src/tests/fileheaderinspect/fileheaderinspect.cpp new file mode 100644 index 00000000000..75ad526e2f7 --- /dev/null +++ b/searchlib/src/tests/fileheaderinspect/fileheaderinspect.cpp @@ -0,0 +1,131 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("fileheaderinspect_test"); + +#include <vespa/searchlib/util/fileheadertk.h> +#include <vespa/vespalib/testkit/testapp.h> + +using namespace search; +using namespace vespalib; + +class Test : public vespalib::TestApp { +private: + bool writeHeader(const FileHeader &header, const vespalib::string &fileName); + vespalib::string readFile(const vespalib::string &fileName); + + void testError(); + void testEscape(); + void testDelimiter(); + void testQuiet(); + void testVerbose(); + +public: + int Main() { + TEST_INIT("fileheaderinspect_test"); + + testError(); TEST_FLUSH(); + testEscape(); TEST_FLUSH(); + testDelimiter(); TEST_FLUSH(); + testQuiet(); TEST_FLUSH(); + testVerbose(); TEST_FLUSH(); + + TEST_DONE(); + } +}; + +TEST_APPHOOK(Test); + +bool +Test::writeHeader(const FileHeader &header, const vespalib::string &fileName) +{ + FastOS_File file; + if (!EXPECT_TRUE(file.OpenWriteOnlyTruncate(fileName.c_str()))) { + return false; + } + if (!EXPECT_EQUAL(header.getSize(), header.writeFile(file))) { + return false; + } + file.Close(); + return true; +} + +vespalib::string +Test::readFile(const vespalib::string &fileName) +{ + FastOS_File file; + ASSERT_TRUE(file.OpenReadOnly(fileName.c_str())); + + char buf[1024]; + uint32_t len = file.Read(buf, 1024); + EXPECT_TRUE(len != 1024); // make sure we got everything + + vespalib::string str(buf, len); + file.Close(); + return str; +} + +void +Test::testError() +{ + EXPECT_TRUE(system("../../apps/fileheaderinspect/vespa-header-inspect notfound.dat") != 0); +} + +void +Test::testEscape() +{ + FileHeader header; + header.putTag(FileHeader::Tag("fanart", "\fa\na\r\t")); + ASSERT_TRUE(writeHeader(header, "fileheader.dat")); + EXPECT_TRUE(system("../../apps/fileheaderinspect/vespa-header-inspect -q fileheader.dat > out") == 0); + EXPECT_EQUAL("fanart;string;\\fa\\na\\r\\t\n", readFile("out")); +} + +void +Test::testDelimiter() +{ + FileHeader header; + header.putTag(FileHeader::Tag("string", "string")); + ASSERT_TRUE(writeHeader(header, "fileheader.dat")); + EXPECT_TRUE(system("../../apps/fileheaderinspect/vespa-header-inspect -d i -q fileheader.dat > out") == 0); + EXPECT_EQUAL("str\\ingistr\\ingistr\\ing\n", readFile("out")); +} + +void +Test::testVerbose() +{ + FileHeader header; + FileHeaderTk::addVersionTags(header); + ASSERT_TRUE(writeHeader(header, "fileheader.dat")); + EXPECT_TRUE(system("../../apps/fileheaderinspect/vespa-header-inspect fileheader.dat > out") == 0); + vespalib::string str = readFile("out"); + EXPECT_TRUE(!str.empty()); + for (uint32_t i = 0, numTags = header.getNumTags(); i < numTags; ++i) { + const FileHeader::Tag &tag = header.getTag(i); + EXPECT_TRUE(str.find(tag.getName()) != vespalib::string::npos); + + vespalib::asciistream out; + out << tag; + EXPECT_TRUE(str.find(out.str()) != vespalib::string::npos); + } +} + +void +Test::testQuiet() +{ + FileHeader header; + FileHeaderTk::addVersionTags(header); + ASSERT_TRUE(writeHeader(header, "fileheader.dat")); + EXPECT_TRUE(system("../../apps/fileheaderinspect/vespa-header-inspect -q fileheader.dat > out") == 0); + vespalib::string str = readFile("out"); + EXPECT_TRUE(!str.empty()); + for (uint32_t i = 0, numTags = header.getNumTags(); i < numTags; ++i) { + const FileHeader::Tag &tag = header.getTag(i); + size_t pos = str.find(tag.getName()); + EXPECT_TRUE(pos != vespalib::string::npos); + + vespalib::asciistream out; + out << ";" << tag; + EXPECT_TRUE(str.find(out.str(), pos) != vespalib::string::npos); + } +} diff --git a/searchlib/src/tests/fileheadertk/.gitignore b/searchlib/src/tests/fileheadertk/.gitignore new file mode 100644 index 00000000000..6aa8c365240 --- /dev/null +++ b/searchlib/src/tests/fileheadertk/.gitignore @@ -0,0 +1,6 @@ +.depend +Makefile +fileheadertk.dat +fileheadertk_test +versiontags.dat +searchlib_fileheadertk_test_app diff --git a/searchlib/src/tests/fileheadertk/CMakeLists.txt b/searchlib/src/tests/fileheadertk/CMakeLists.txt new file mode 100644 index 00000000000..bc6969fbac2 --- /dev/null +++ b/searchlib/src/tests/fileheadertk/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_fileheadertk_test_app + SOURCES + fileheadertk_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_fileheadertk_test_app COMMAND searchlib_fileheadertk_test_app) diff --git a/searchlib/src/tests/fileheadertk/DESC b/searchlib/src/tests/fileheadertk/DESC new file mode 100644 index 00000000000..08ad9a0769d --- /dev/null +++ b/searchlib/src/tests/fileheadertk/DESC @@ -0,0 +1 @@ +Ensures that FileHeaderTk works as expected. diff --git a/searchlib/src/tests/fileheadertk/FILES b/searchlib/src/tests/fileheadertk/FILES new file mode 100644 index 00000000000..fe82bf13af7 --- /dev/null +++ b/searchlib/src/tests/fileheadertk/FILES @@ -0,0 +1 @@ +fileheadertk.cpp diff --git a/searchlib/src/tests/fileheadertk/fileheadertk_test.cpp b/searchlib/src/tests/fileheadertk/fileheadertk_test.cpp new file mode 100644 index 00000000000..14c5d0ed6f6 --- /dev/null +++ b/searchlib/src/tests/fileheadertk/fileheadertk_test.cpp @@ -0,0 +1,47 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("fileheadertk_test"); + +#include <vespa/searchlib/util/fileheadertk.h> +#include <vespa/vespalib/testkit/testapp.h> + +using namespace search; + +class Test : public vespalib::TestApp { +private: + void testVersionTags(); + +public: + int Main() { + TEST_INIT("fileheadertk_test"); + + testVersionTags(); TEST_FLUSH(); + + TEST_DONE(); + } +}; + +TEST_APPHOOK(Test); + +void +Test::testVersionTags() +{ + vespalib::FileHeader header; + FileHeaderTk::addVersionTags(header); + + FastOS_File file; + ASSERT_TRUE(file.OpenWriteOnlyTruncate("versiontags.dat")); + EXPECT_EQUAL(header.getSize(), header.writeFile(file)); + file.Close(); + + EXPECT_EQUAL(8u, header.getNumTags()); + EXPECT_TRUE(header.hasTag("version-arch")); + EXPECT_TRUE(header.hasTag("version-builder")); + EXPECT_TRUE(header.hasTag("version-component")); + EXPECT_TRUE(header.hasTag("version-date")); + EXPECT_TRUE(header.hasTag("version-system")); + EXPECT_TRUE(header.hasTag("version-system-rev")); + EXPECT_TRUE(header.hasTag("version-tag")); + EXPECT_TRUE(header.hasTag("version-pkg")); +} diff --git a/searchlib/src/tests/forcelink/.gitignore b/searchlib/src/tests/forcelink/.gitignore new file mode 100644 index 00000000000..c74c5915388 --- /dev/null +++ b/searchlib/src/tests/forcelink/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +forcelink_test +searchlib_forcelink_test_app diff --git a/searchlib/src/tests/forcelink/CMakeLists.txt b/searchlib/src/tests/forcelink/CMakeLists.txt new file mode 100644 index 00000000000..50e39d2d844 --- /dev/null +++ b/searchlib/src/tests/forcelink/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_forcelink_test_app + SOURCES + forcelink.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_forcelink_test_app COMMAND searchlib_forcelink_test_app) diff --git a/searchlib/src/tests/forcelink/DESC b/searchlib/src/tests/forcelink/DESC new file mode 100644 index 00000000000..c73185a8736 --- /dev/null +++ b/searchlib/src/tests/forcelink/DESC @@ -0,0 +1 @@ +forcelink test. Take a look at forcelink.cpp for details. diff --git a/searchlib/src/tests/forcelink/FILES b/searchlib/src/tests/forcelink/FILES new file mode 100644 index 00000000000..d917375ebf2 --- /dev/null +++ b/searchlib/src/tests/forcelink/FILES @@ -0,0 +1 @@ +forcelink.cpp diff --git a/searchlib/src/tests/forcelink/forcelink.cpp b/searchlib/src/tests/forcelink/forcelink.cpp new file mode 100644 index 00000000000..9f555e09480 --- /dev/null +++ b/searchlib/src/tests/forcelink/forcelink.cpp @@ -0,0 +1,18 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("forcelink_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/expression/forcelink.hpp> +#include <vespa/searchlib/aggregation/forcelink.hpp> + +TEST_SETUP(Test); + +int +Test::Main() +{ + TEST_INIT("forcelink_test"); + forcelink_searchlib_expression(); + forcelink_searchlib_aggregation(); + TEST_DONE(); +} diff --git a/searchlib/src/tests/grouping/.gitignore b/searchlib/src/tests/grouping/.gitignore new file mode 100644 index 00000000000..c7654573dc5 --- /dev/null +++ b/searchlib/src/tests/grouping/.gitignore @@ -0,0 +1,11 @@ +.depend +Makefile +diff.txt +grouping_test +lhs.out +rhs.out +/grouping_benchmark +searchlib_grouping_serialization_test_app +searchlib_grouping_test_app +searchlib_hyperloglog_test_app +searchlib_sketch_test_app diff --git a/searchlib/src/tests/grouping/CMakeLists.txt b/searchlib/src/tests/grouping/CMakeLists.txt new file mode 100644 index 00000000000..ef44472edfc --- /dev/null +++ b/searchlib/src/tests/grouping/CMakeLists.txt @@ -0,0 +1,29 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_grouping_test_app + SOURCES + grouping_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_grouping_test_app COMMAND searchlib_grouping_test_app) +vespa_add_executable(searchlib_hyperloglog_test_app + SOURCES + hyperloglog_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_hyperloglog_test_app COMMAND searchlib_hyperloglog_test_app) +vespa_add_executable(searchlib_sketch_test_app + SOURCES + sketch_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_sketch_test_app COMMAND searchlib_sketch_test_app) +vespa_add_executable(searchlib_grouping_serialization_test_app + SOURCES + grouping_serialization_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_grouping_serialization_test_app COMMAND searchlib_grouping_serialization_test_app) diff --git a/searchlib/src/tests/grouping/DESC b/searchlib/src/tests/grouping/DESC new file mode 100644 index 00000000000..1aa6cb37e89 --- /dev/null +++ b/searchlib/src/tests/grouping/DESC @@ -0,0 +1 @@ +grouping test. Take a look at grouping.cpp for details. diff --git a/searchlib/src/tests/grouping/FILES b/searchlib/src/tests/grouping/FILES new file mode 100644 index 00000000000..af7f7e71257 --- /dev/null +++ b/searchlib/src/tests/grouping/FILES @@ -0,0 +1,4 @@ +grouping.cpp +lhs.out +rhs.out +diff.txt diff --git a/searchlib/src/tests/grouping/grouping_serialization_test.cpp b/searchlib/src/tests/grouping/grouping_serialization_test.cpp new file mode 100644 index 00000000000..99757af8439 --- /dev/null +++ b/searchlib/src/tests/grouping/grouping_serialization_test.cpp @@ -0,0 +1,339 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for grouping_serialization. + +#include <vespa/log/log.h> +LOG_SETUP("grouping_serialization_test"); +#include <vespa/fastos/fastos.h> + +#include <vespa/searchlib/aggregation/aggregation.h> +#include <vespa/searchlib/aggregation/expressioncountaggregationresult.h> +#include <vespa/searchlib/aggregation/fs4hit.h> +#include <vespa/searchlib/aggregation/groupinglevel.h> +#include <vespa/searchlib/aggregation/hitsaggregationresult.h> +#include <vespa/searchlib/aggregation/perdocexpression.h> +#include <vespa/searchlib/aggregation/vdshit.h> +#include <vespa/searchlib/common/hitrank.h> +#include <vespa/searchlib/common/identifiable.h> +#include <vespa/searchlib/expression/fixedwidthbucketfunctionnode.h> +#include <vespa/searchlib/expression/floatbucketresultnode.h> +#include <vespa/searchlib/expression/getdocidnamespacespecificfunctionnode.h> +#include <vespa/searchlib/expression/getymumchecksumfunctionnode.h> +#include <vespa/searchlib/expression/integerbucketresultnode.h> +#include <vespa/vespalib/objects/nboserializer.h> +#include <vespa/vespalib/objects/nbostream.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <fstream> + +using search::HitRank; +using vespalib::Identifiable; +using vespalib::NBOSerializer; +using vespalib::make_string; +using vespalib::nbostream; +using namespace search::aggregation; +using namespace search::expression; + +namespace { + +document::GlobalId getGlobalId(uint32_t docId) { + return document::DocumentId(vespalib::make_string("doc:test:%u", docId)) + .getGlobalId(); +} + +struct Fixture { + // Set WRITE_FILES to true to generate new expected serialization files. + const bool WRITE_FILES = false; + const std::string file_path = "../../test/files/"; + std::string file_name; + std::ifstream file_stream; + + Fixture(const std::string &file_name_in) + : file_name(file_path + file_name_in), + file_stream(file_name.c_str(), + std::ifstream::in | std::ifstream::binary) { + if (WRITE_FILES) { + std::ofstream out(file_name.c_str(), + std::ofstream::out | std::ofstream::trunc | + std::ofstream::binary); + } + } + + void checkObject(const Identifiable &obj) { + if (WRITE_FILES) { + nbostream stream; + NBOSerializer serializer(stream); + serializer << obj; + std::ofstream out(file_name.c_str(), + std::ofstream::out | std::ofstream::app | + std::ofstream::binary); + uint32_t size = stream.size(); + out.write(reinterpret_cast<const char *>(&size), sizeof(size)); + out.write(stream.peek(), stream.size()); + } + + uint32_t size = 0; + file_stream.read(reinterpret_cast<char *>(&size), sizeof(size)); + nbostream stream; + for (size_t i = 0; i < size; ++i) { + char c; + file_stream.read(&c, sizeof(c)); + stream << c; + } + Identifiable::UP newObj = Identifiable::create(stream); + + if (!EXPECT_TRUE(newObj.get() != 0)) { + LOG(error, "object of class '%s' resulted in empty echo", + obj.getClass().name()); + return; + } + if (EXPECT_EQUAL(obj.asString(), newObj->asString()) + && EXPECT_TRUE(newObj->cmp(obj) == 0) + && EXPECT_TRUE(obj.cmp(*newObj) == 0)) + { + LOG(info, "object of class '%s' passed echo test : %s", + obj.getClass().name(), newObj->asString().c_str()); + } else { + LOG(error, "object of class '%s' FAILED echo test", + obj.getClass().name()); + } + } +}; + +//----------------------------------------------------------------------------- + +ExpressionNode::CP createDummyExpression() { + return AddFunctionNode().addArg(ConstantNode(Int64ResultNode(2))) + .addArg(ConstantNode(Int64ResultNode(2))); +} + +//----------------------------------------------------------------------------- + +TEST_F("testResultTypes", Fixture("testResultTypes")) { + f.checkObject(Int64ResultNode(7)); + f.checkObject(FloatResultNode(7.3)); + f.checkObject(StringResultNode("7.3")); + { + char tmp[7] = { (char)0xe5, (char)0xa6, (char)0x82, (char)0xe6, + (char)0x9e, (char)0x9c,0 }; + f.checkObject(StringResultNode(tmp)); + } + { + char tmp[] = { '7', '.', '4' }; + f.checkObject(RawResultNode(tmp, 3)); + } + f.checkObject(IntegerBucketResultNode()); + f.checkObject(FloatBucketResultNode()); + f.checkObject(IntegerBucketResultNode(10, 20)); + f.checkObject(FloatBucketResultNode(10.0, 20.0)); + f.checkObject(StringBucketResultNode("10.0", "20.0")); + char tmp[] = { 1, 0, 0}; + char tmp2[] = { 1, 1, 0}; + f.checkObject( + RawBucketResultNode(ResultNode::UP(new RawResultNode(tmp, 3)), + ResultNode::UP(new RawResultNode(tmp2, 3)))); + + IntegerBucketResultNodeVector iv; + iv.getVector().push_back(IntegerBucketResultNode(878, 3246823)); + f.checkObject(iv); + + FloatBucketResultNodeVector fv; + fv.getVector().push_back(FloatBucketResultNode(878, 3246823)); + f.checkObject(fv); + + StringBucketResultNodeVector sv; + sv.getVector().push_back(StringBucketResultNode("878", "3246823")); + f.checkObject(sv); + + RawBucketResultNodeVector rv; + rv.getVector().push_back( + RawBucketResultNode(ResultNode::UP(new RawResultNode(tmp, 3)), + ResultNode::UP(new RawResultNode(tmp2, 3)))); + f.checkObject(rv); +} + +TEST_F("testSpecialNodes", Fixture("testSpecialNodes")) { + f.checkObject(AttributeNode("testattribute")); + f.checkObject(DocumentFieldNode("testdocumentfield")); + { + f.checkObject(GetDocIdNamespaceSpecificFunctionNode( + ResultNode::UP(new Int64ResultNode(7)))); + } + f.checkObject(GetYMUMChecksumFunctionNode()); +} + +TEST_F("testFunctionNodes", Fixture("testFunctionNodes")) { + f.checkObject(AddFunctionNode() + .addArg(ConstantNode(Int64ResultNode(7))) + .addArg(ConstantNode(Int64ResultNode(8))) + .addArg(ConstantNode(Int64ResultNode(9)))); + f.checkObject(XorFunctionNode() + .addArg(ConstantNode(Int64ResultNode(7))) + .addArg(ConstantNode(Int64ResultNode(8))) + .addArg(ConstantNode(Int64ResultNode(9)))); + f.checkObject(MultiplyFunctionNode() + .addArg(ConstantNode(Int64ResultNode(7))) + .addArg(ConstantNode(Int64ResultNode(8))) + .addArg(ConstantNode(Int64ResultNode(9)))); + f.checkObject(DivideFunctionNode() + .addArg(ConstantNode(Int64ResultNode(7))) + .addArg(ConstantNode(Int64ResultNode(8))) + .addArg(ConstantNode(Int64ResultNode(9)))); + f.checkObject(ModuloFunctionNode() + .addArg(ConstantNode(Int64ResultNode(7))) + .addArg(ConstantNode(Int64ResultNode(8))) + .addArg(ConstantNode(Int64ResultNode(9)))); + f.checkObject(MinFunctionNode() + .addArg(ConstantNode(Int64ResultNode(7))) + .addArg(ConstantNode(Int64ResultNode(8))) + .addArg(ConstantNode(Int64ResultNode(9)))); + f.checkObject(MaxFunctionNode() + .addArg(ConstantNode(Int64ResultNode(7))) + .addArg(ConstantNode(Int64ResultNode(8))) + .addArg(ConstantNode(Int64ResultNode(9)))); + f.checkObject(TimeStampFunctionNode(ConstantNode(Int64ResultNode(7)), + TimeStampFunctionNode::Hour, true)); + f.checkObject(ZCurveFunctionNode(ConstantNode(Int64ResultNode(7)), + ZCurveFunctionNode::X)); + f.checkObject(ZCurveFunctionNode(ConstantNode(Int64ResultNode(7)), + ZCurveFunctionNode::Y)); + f.checkObject(NegateFunctionNode(ConstantNode(Int64ResultNode(7)))); + f.checkObject(SortFunctionNode(ConstantNode(Int64ResultNode(7)))); + f.checkObject(NormalizeSubjectFunctionNode(ConstantNode( + StringResultNode("foo")))); + f.checkObject(ReverseFunctionNode(ConstantNode(Int64ResultNode(7)))); + f.checkObject(MD5BitFunctionNode(ConstantNode(Int64ResultNode(7)), 64)); + f.checkObject(XorBitFunctionNode(ConstantNode(Int64ResultNode(7)), 64)); + f.checkObject(CatFunctionNode() + .addArg(ConstantNode(Int64ResultNode(7))) + .addArg(ConstantNode(Int64ResultNode(8))) + .addArg(ConstantNode(Int64ResultNode(9)))); + f.checkObject(FixedWidthBucketFunctionNode()); + f.checkObject(FixedWidthBucketFunctionNode(AttributeNode("foo"))); + f.checkObject(FixedWidthBucketFunctionNode(AttributeNode("foo")) + .setWidth(Int64ResultNode(10))); + f.checkObject(FixedWidthBucketFunctionNode(AttributeNode("foo")) + .setWidth(FloatResultNode(10.0))); + f.checkObject(RangeBucketPreDefFunctionNode()); + f.checkObject(RangeBucketPreDefFunctionNode(AttributeNode("foo"))); + f.checkObject(DebugWaitFunctionNode(ConstantNode(Int64ResultNode(5)), + 3.3, false)); +} + +TEST_F("testAggregatorResults", Fixture("testAggregatorResults")) { + f.checkObject(SumAggregationResult() + .setExpression(AttributeNode("attributeA")) + .setResult(Int64ResultNode(7))); + f.checkObject(XorAggregationResult() + .setXor(Int64ResultNode(7)) + .setExpression(AttributeNode("attributeA"))); + f.checkObject(CountAggregationResult() + .setCount(7) + .setExpression(AttributeNode("attributeA"))); + f.checkObject(MinAggregationResult() + .setExpression(AttributeNode("attributeA")) + .setResult(Int64ResultNode(7))); + f.checkObject(MaxAggregationResult() + .setExpression(AttributeNode("attributeA")) + .setResult(Int64ResultNode(7))); + f.checkObject(AverageAggregationResult() + .setExpression(AttributeNode("attributeA")) + .setResult(Int64ResultNode(7))); + ExpressionCountAggregationResult expression_count; + expression_count.setExpression(ConstantNode(Int64ResultNode(67))) + .aggregate(DocId(42), HitRank(21)); + f.checkObject(expression_count); +} + +TEST_F("testHitCollection", Fixture("testHitCollection")) { + f.checkObject(FS4Hit()); + f.checkObject(FS4Hit(0, 50.0).setGlobalId(getGlobalId(100))); + f.checkObject(VdsHit()); + f.checkObject(VdsHit("100", 50.0)); + f.checkObject(VdsHit("100", 50.0).setSummary("rawsummary", 10)); + f.checkObject(HitsAggregationResult()); + f.checkObject(HitsAggregationResult() + .setMaxHits(5) + .addHit(FS4Hit(0, 1.0).setGlobalId(getGlobalId(10))) + .addHit(FS4Hit(0, 2.0).setGlobalId(getGlobalId(20))) + .addHit(FS4Hit(0, 3.0).setGlobalId(getGlobalId(30))) + .addHit(FS4Hit(0, 4.0).setGlobalId(getGlobalId(40))) + .addHit(FS4Hit(0, 5.0).setGlobalId(getGlobalId(50))) + .setExpression(ConstantNode(Int64ResultNode(5)))); + f.checkObject(HitsAggregationResult() + .setMaxHits(3) + .addHit(FS4Hit(0, 1.0).setGlobalId(getGlobalId(10)) + .setDistributionKey(100)) + .addHit(FS4Hit(0, 2.0).setGlobalId(getGlobalId(20)) + .setDistributionKey(200)) + .addHit(FS4Hit(0, 3.0).setGlobalId(getGlobalId(30)) + .setDistributionKey(300)) + .setExpression(ConstantNode(Int64ResultNode(5)))); + f.checkObject(HitsAggregationResult() + .setMaxHits(3) + .addHit(VdsHit("10", 1.0).setSummary("100", 3)) + .addHit(VdsHit("20", 2.0).setSummary("200", 3)) + .addHit(VdsHit("30", 3.0).setSummary("300", 3)) + .setExpression(ConstantNode(Int64ResultNode(5)))); +} + +TEST_F("testGroupingLevel", Fixture("testGroupingLevel")) { + f.checkObject(GroupingLevel() + .setMaxGroups(100) + .setExpression(createDummyExpression()) + .addAggregationResult(SumAggregationResult() + .setExpression(createDummyExpression()))); +} + +TEST_F("testGroup", Fixture("testGroup")) { + f.checkObject(Group()); + f.checkObject(Group().setId(Int64ResultNode(50)) + .setRank(RawRank(10))); + f.checkObject(Group().setId(Int64ResultNode(100)) + .addChild(Group().setId(Int64ResultNode(110))) + .addChild(Group().setId(Int64ResultNode(120)) + .setRank(20.5) + .addAggregationResult(SumAggregationResult() + .setExpression(createDummyExpression())) + .addAggregationResult(SumAggregationResult() + .setExpression(createDummyExpression()))) + .addChild(Group().setId(Int64ResultNode(130)) + .addChild(Group().setId(Int64ResultNode(131))))); +} + +TEST_F("testGrouping", Fixture("testGrouping")) { + f.checkObject(Grouping()); + f.checkObject(Grouping() + .addLevel(GroupingLevel() + .setMaxGroups(100) + .setExpression(createDummyExpression()) + .addAggregationResult(SumAggregationResult() + .setExpression(createDummyExpression()))) + .addLevel(GroupingLevel() + .setMaxGroups(10) + .setExpression(createDummyExpression()) + .addAggregationResult(SumAggregationResult() + .setExpression(createDummyExpression())) + .addAggregationResult(SumAggregationResult() + .setExpression(createDummyExpression())))); + f.checkObject(Grouping() + .addLevel(GroupingLevel() + .setExpression(AttributeNode("folder")) + .addAggregationResult(XorAggregationResult() + .setExpression(MD5BitFunctionNode( + AttributeNode("docid"), 64))) + .addAggregationResult(SumAggregationResult() + .setExpression(MinFunctionNode() + .addArg(AttributeNode("attribute1")) + .addArg(AttributeNode("attribute2"))) + ) + .addAggregationResult(XorAggregationResult() + .setExpression( + XorBitFunctionNode(CatFunctionNode() + .addArg(GetDocIdNamespaceSpecificFunctionNode()) + .addArg(DocumentFieldNode("folder")) + .addArg(DocumentFieldNode("flags")), 64))))); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/grouping/grouping_test.cpp b/searchlib/src/tests/grouping/grouping_test.cpp new file mode 100644 index 00000000000..f9939f0d370 --- /dev/null +++ b/searchlib/src/tests/grouping/grouping_test.cpp @@ -0,0 +1,1912 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("grouping_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/aggregation/perdocexpression.h> +#include <vespa/searchlib/aggregation/aggregation.h> +#include <vespa/searchlib/attribute/extendableattributes.h> +#include <vespa/searchlib/attribute/attributemanager.h> +#include <vespa/searchlib/aggregation/hitsaggregationresult.h> +#include <vespa/searchlib/aggregation/fs4hit.h> +#include <vespa/searchlib/aggregation/predicates.h> +#include <vespa/searchlib/expression/fixedwidthbucketfunctionnode.h> +#include <algorithm> + +using namespace vespalib; +using namespace search; +using namespace search::aggregation; +using namespace search::attribute; +using namespace search::expression; + +//----------------------------------------------------------------------------- + +template<typename A, typename T> +class AttrBuilder +{ +private: + A *_attr; + AttributeVector::SP _attrSP; + +public: + AttrBuilder(const AttrBuilder &rhs) + : _attr(new A(rhs._attr->getName())), + _attrSP(_attr) + { + uint32_t numDocs = rhs._attr->getNumDocs(); + for (uint32_t docid = 0; docid < numDocs; ++docid) { + T val; + uint32_t res = rhs._attr->get(docid, &val, 1); + LOG_ASSERT(res == 1); + add(val); + } + } + AttrBuilder(const std::string &name) + : _attr(new A(name)), + _attrSP(_attr) + { + } + AttrBuilder& operator=(const AttrBuilder &rhs) { + AttrBuilder tmp(rhs); + std::swap(_attr, tmp._attr); + _attrSP.swap(tmp._attrSP); + return *this; + } + AttrBuilder &add(T value) { + DocId ignore; + _attr->addDoc(ignore); + _attr->add(value); + return *this; + } + AttributeVector::SP sp() const { + return _attrSP; + } +}; + +typedef AttrBuilder<SingleIntegerExtAttribute, int64_t> IntAttrBuilder; +typedef AttrBuilder<SingleFloatExtAttribute, double> FloatAttrBuilder; +typedef AttrBuilder<SingleStringExtAttribute, const char *> StringAttrBuilder; + +//----------------------------------------------------------------------------- + +class ResultBuilder +{ +private: + std::vector<RankedHit> _hits; + +public: + ResultBuilder() : _hits() {} + ResultBuilder &add(unsigned int docid, HitRank rank = 0) { + RankedHit hit; + hit._docId = docid; + hit._rankValue = rank; + _hits.push_back(hit); + for (uint32_t pos = (_hits.size() - 1); + pos > 0 && (_hits[pos]._rankValue > _hits[pos - 1]._rankValue); + --pos) + { + std::swap(_hits[pos], _hits[pos - 1]); + } + return *this; + } + const RankedHit *hits() const { + return &_hits[0]; + } + uint32_t size() const { + return _hits.size(); + } +}; + +//----------------------------------------------------------------------------- + +class AggregationContext +{ +private: + AttributeManager _attrMan; + ResultBuilder _result; + IAttributeContext::UP _attrCtx; + + AggregationContext(const AggregationContext &); + AggregationContext &operator=(const AggregationContext &); + +public: + AggregationContext() : _attrMan(), _result(), _attrCtx(_attrMan.createContext()) {} + ResultBuilder &result() { return _result; } + void add(AttributeVector::SP attr) { + _attrMan.add(attr); + } + void setup(Grouping &g) { + g.configureStaticStuff(ConfigureStaticParams(_attrCtx.get(), 0)); + } +}; + +//----------------------------------------------------------------------------- + +class Test : public TestApp +{ +public: + bool testAggregation(AggregationContext &ctx, + const Grouping &request, + const Group &expect); + bool testMerge(const Grouping &a, const Grouping &b, + const Group &expect); + bool testMerge(const Grouping &a, const Grouping &b, const Grouping &c, + const Group &expect); + bool testPrune(const Grouping &a, const Grouping &b, + const Group &expect); + bool testPartialMerge(const Grouping &a, const Grouping &b, + const Group &expect); + void testAggregationSimple(); + void testAggregationLevels(); + void testAggregationMaxGroups(); + void testAggregationGroupOrder(); + void testAggregationGroupRank(); + void testAggregationGroupCapping(); + void testMergeSimpleSum(); + void testMergeLevels(); + void testMergeGroups(); + void testMergeTrees(); + void testPruneSimple(); + void testPruneComplex(); + void testPartialMerging(); + void testCount(); + void testTopN(); + void testFS4HitCollection(); + bool checkBucket(const NumericResultNode &width, const NumericResultNode &value, const BucketResultNode &bucket); + bool checkHits(const Grouping &g, uint32_t first, uint32_t last, uint32_t cnt); + void testFixedWidthBuckets(); + void testThatNanIsConverted(); + void testNanSorting(); + int Main(); +private: + void testAggregationSimpleSum(AggregationContext & ctx, const AggregationResult & aggr, const ResultNode & ir, const ResultNode & fr, const ResultNode & sr); + class CheckAttributeReferences : public vespalib::ObjectOperation, public vespalib::ObjectPredicate + { + public: + CheckAttributeReferences() : _numrefs(0) { } + int _numrefs; + private: + virtual void execute(vespalib::Identifiable &obj) { + if (static_cast<AttributeNode &>(obj).getAttribute() != NULL) { + _numrefs++; + } + } + virtual bool check(const vespalib::Identifiable &obj) const { return obj.inherits(AttributeNode::classId); } + }; +}; + +//----------------------------------------------------------------------------- + +/** + * Run the given grouping request and verify that the resulting group + * tree matches the expected value. + **/ +bool +Test::testAggregation(AggregationContext &ctx, + const Grouping &request, + const Group &expect) +{ + Grouping tmp = request; // create local copy + ctx.setup(tmp); + tmp.aggregate(ctx.result().hits(), ctx.result().size()); + tmp.cleanupAttributeReferences(); + CheckAttributeReferences attrCheck; + tmp.select(attrCheck, attrCheck); + EXPECT_EQUAL(attrCheck._numrefs, 0); + bool ok = EXPECT_EQUAL(tmp.getRoot().asString(), expect.asString()); + if (!ok) { + std::cerr << tmp.getRoot().asString() << std::endl << expect.asString() << std::endl; + } + return ok; +} + +/** + * Merge the given grouping requests and verify that the resulting + * group tree matches the expected value. + **/ +bool +Test::testMerge(const Grouping &a, const Grouping &b, + const Group &expect) +{ + Grouping tmp = a; // create local copy + Grouping tmpB = b; + tmp.merge(tmpB); + tmp.postMerge(); + tmp.sortById(); + return EXPECT_EQUAL(tmp.getRoot().asString(), expect.asString()); +} + +/** + * Prune the given grouping request and verify that the resulting + * group tree matches the expected value. + **/ +bool +Test::testPrune(const Grouping &a, const Grouping &b, + const Group &expect) +{ + Grouping tmp = a; // create local copy + tmp.prune(b); + bool ok = EXPECT_EQUAL(tmp.getRoot().asString(), expect.asString()); + if (!ok) { + std::cerr << tmp.getRoot().asString() << std::endl << expect.asString() << std::endl; + } + return ok; +} + +/** + * Merge a given grouping request to get a partial request back. Verify that the + * partial request is correct. + **/ +bool +Test::testPartialMerge(const Grouping &a, const Grouping &b, + const Group &expect) +{ + Grouping tmp = a; // create local copy + tmp.mergePartial(b); + bool ok = EXPECT_EQUAL(tmp.getRoot().asString(), expect.asString()); + if (!ok) { + std::cerr << tmp.getRoot().asString() << std::endl << expect.asString() << std::endl; + } + return ok; +} + +/** + * Merge the given grouping requests and verify that the resulting + * group tree matches the expected value. + **/ +bool +Test::testMerge(const Grouping &a, const Grouping &b, const Grouping &c, + const Group &expect) +{ + Grouping tmp = a; // create local copy + Grouping tmpB = b; // create local copy + Grouping tmpC = c; // create local copy + tmp.merge(tmpB); + tmp.merge(tmpC); + tmp.postMerge(); + tmp.sortById(); + return EXPECT_EQUAL(tmp.getRoot().asString(), expect.asString()); +} + +//----------------------------------------------------------------------------- + +/** + * Test collecting the sum of the values from a single attribute + * vector directly into the root node. Consider this a smoke test. + **/ +void +Test::testAggregationSimple() +{ + AggregationContext ctx; + ctx.result().add(0).add(1).add(2); + ctx.add(IntAttrBuilder("int").add(3).add(7).add(15).sp()); + ctx.add(FloatAttrBuilder("float").add(3).add(7).add(15).sp()); + ctx.add(StringAttrBuilder("string").add("3").add("7").add("15").sp()); + + char strsum[3] = {-101, '5', 0}; + testAggregationSimpleSum(ctx, SumAggregationResult(), Int64ResultNode(25), FloatResultNode(25), StringResultNode(strsum)); + testAggregationSimpleSum(ctx, MinAggregationResult(), Int64ResultNode(3), FloatResultNode(3), StringResultNode("15")); + testAggregationSimpleSum(ctx, MaxAggregationResult(), Int64ResultNode(15), FloatResultNode(15), StringResultNode("7")); +} + +void Test::testAggregationSimpleSum(AggregationContext & ctx, const AggregationResult & aggr, const ResultNode & ir, const ResultNode & fr, const ResultNode & sr) +{ + ExpressionNode::CP clone(aggr); + Grouping request = Grouping() + .setRoot(Group() + .addResult(static_cast<AggregationResult &>(*clone).setExpression(AttributeNode("int"))) + .addResult(static_cast<AggregationResult &>(*clone).setExpression(AttributeNode("float"))) + .addResult(static_cast<AggregationResult &>(*clone).setExpression(AttributeNode("string"))) + ); + + Group expect = Group() + .addResult(static_cast<AggregationResult &>(*clone).setExpression(AttributeNode("int")).setResult(ir)) + .addResult(static_cast<AggregationResult &>(*clone).setExpression(AttributeNode("float")).setResult(fr)) + .addResult(static_cast<AggregationResult &>(*clone).setExpression(AttributeNode("string")).setResult(sr)); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); +} + +/** + * Verify that the backend aggregation will classify and collect on + * the appropriate levels, as indicated by the firstLevel and + * lastLevel parameters. + **/ +void +Test::testAggregationLevels() +{ + AggregationContext ctx; + ctx.add(IntAttrBuilder("attr0").add(10).add(10).sp()); + ctx.add(IntAttrBuilder("attr1").add(11).add(11).sp()); + ctx.add(IntAttrBuilder("attr2").add(12).add(12).sp()); + ctx.add(IntAttrBuilder("attr3").add(13).add(13).sp()); + ctx.result().add(0).add(1); + + Grouping baseRequest = Grouping() + .setRoot(Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr0")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr1")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr2")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr2")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr3")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr3")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr1")))); + + Group notDone = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr0"))); +// Hmm, do not need to prepare more than the levels needed. .setResult(Int64ResultNode(0))); + + Group done0 = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr0")) + .setResult(Int64ResultNode(20))) + .addChild(Group() + .setId(Int64ResultNode(11)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr2")) + .setResult(Int64ResultNode(0)))); + + Group done1 = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr0")) + .setResult(Int64ResultNode(20))) + .addChild(Group() + .setId(Int64ResultNode(11)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr2")) + .setResult(Int64ResultNode(24))) + .addChild(Group() + .setId(Int64ResultNode(12)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr3")) + .setResult(Int64ResultNode(0))))); + + Group done2 = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr0")) + .setResult(Int64ResultNode(20))) + .addChild(Group() + .setId(Int64ResultNode(11)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr2")) + .setResult(Int64ResultNode(24))) + .addChild(Group() + .setId(Int64ResultNode(12)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr3")) + .setResult(Int64ResultNode(26))) + .addChild(Group() + .setId(Int64ResultNode(13)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr1")) + .setResult(Int64ResultNode(0)))))); + + Group done3 = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr0")) + .setResult(Int64ResultNode(20))) + .addChild(Group() + .setId(Int64ResultNode(11)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr2")) + .setResult(Int64ResultNode(24))) + .addChild(Group() + .setId(Int64ResultNode(12)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr3")) + .setResult(Int64ResultNode(26))) + .addChild(Group() + .setId(Int64ResultNode(13)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr1")) + .setResult(Int64ResultNode(22)))))); + + { // level 0 only + Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(0); + EXPECT_TRUE(testAggregation(ctx, request, done0)); + } + { // level 0 and 1 + Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(1); + EXPECT_TRUE(testAggregation(ctx, request, done1)); + } + { // level 0,1 and 2 + Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(2); + EXPECT_TRUE(testAggregation(ctx, request, done2)); + } + { // level 0,1,2 and 3 + Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(3); + EXPECT_TRUE(testAggregation(ctx, request, done3)); + } + { // level 1 with level 0 as input + Grouping request = baseRequest.unchain().setFirstLevel(1).setLastLevel(1).setRoot(done0); + EXPECT_TRUE(testAggregation(ctx, request, done1)); + } + { // level 2 with level 0 and 1 as input + Grouping request = baseRequest.unchain().setFirstLevel(2).setLastLevel(2).setRoot(done1); + EXPECT_TRUE(testAggregation(ctx, request, done2)); + } + { // level 3 with level 0,1 and 2 as input + Grouping request = baseRequest.unchain().setFirstLevel(3).setLastLevel(3).setRoot(done2); + EXPECT_TRUE(testAggregation(ctx, request, done3)); + } + { // level 2 and 3 with level 0 and 1 as input + Grouping request = baseRequest.unchain().setFirstLevel(2).setLastLevel(3).setRoot(done1); + EXPECT_TRUE(testAggregation(ctx, request, done3)); + } + { // level 1 without level 0 as input + Grouping request = baseRequest.unchain().setFirstLevel(1).setLastLevel(1); + EXPECT_TRUE(testAggregation(ctx, request, notDone)); + } +} + +/** + * Verify that the aggregation step does not create more groups than + * indicated by the maxgroups parameter. + **/ +void +Test::testAggregationMaxGroups() +{ + AggregationContext ctx; + ctx.add(IntAttrBuilder("attr").add(5).add(10).add(15).sp()); + ctx.result().add(0).add(1).add(2); + + Grouping baseRequest = Grouping() + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr"))); + + Group empty = Group(); + Group grp1 = empty.unchain().addChild(Group().setId(Int64ResultNode(5))); + Group grp2 = grp1.unchain().addChild(Group().setId(Int64ResultNode(10))); + Group grp3 = grp2.unchain().addChild(Group().setId(Int64ResultNode(15))); + + { // max 0 groups + Grouping request = baseRequest; + request.levels()[0].setMaxGroups(0); + EXPECT_TRUE(testAggregation(ctx, request, empty)); + } + { // max 1 groups + Grouping request = baseRequest; + request.levels()[0].setMaxGroups(1); + EXPECT_TRUE(testAggregation(ctx, request, grp1)); + } + { // max 2 groups + Grouping request = baseRequest; + request.levels()[0].setMaxGroups(2); + EXPECT_TRUE(testAggregation(ctx, request, grp2)); + } + { // max 3 groups + Grouping request = baseRequest; + request.levels()[0].setMaxGroups(3); + EXPECT_TRUE(testAggregation(ctx, request, grp3)); + } + { // max 4 groups + Grouping request = baseRequest; + request.levels()[0].setMaxGroups(4); + EXPECT_TRUE(testAggregation(ctx, request, grp3)); + } + { // max -1 groups + Grouping request = baseRequest; + request.levels()[0].setMaxGroups(-1); + EXPECT_TRUE(testAggregation(ctx, request, grp3)); + } +} + +/** + * Verify that groups are sorted by group id + **/ +void +Test::testAggregationGroupOrder() +{ + AggregationContext ctx; + ctx.add(IntAttrBuilder("attr").add(10).add(25).add(35).add(5).add(20).add(15).add(30).sp()); + ctx.result().add(0).add(1).add(2).add(3).add(4).add(5).add(6); + + Grouping request = Grouping() + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr"))); + + Group expect = Group() + .addChild(Group().setId(Int64ResultNode(5))) + .addChild(Group().setId(Int64ResultNode(10))) + .addChild(Group().setId(Int64ResultNode(15))) + .addChild(Group().setId(Int64ResultNode(20))) + .addChild(Group().setId(Int64ResultNode(25))) + .addChild(Group().setId(Int64ResultNode(30))) + .addChild(Group().setId(Int64ResultNode(35))); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); +} + +/** + * Verify that groups are tagged with the appropriate rank value. + **/ +void +Test::testAggregationGroupRank() +{ + AggregationContext ctx; + ctx.add(IntAttrBuilder("attr") + .add(1).add(1).add(1) + .add(2).add(2).add(2) + .add(3).add(3).add(3).sp()); + ctx.result() + .add(0, 5).add(1, 10).add(2, 15) + .add(3, 10).add(4, 15).add(5, 5) + .add(6, 15).add(7, 5).add(8, 10); + + Grouping request = Grouping().addLevel( + GroupingLevel().setExpression(AttributeNode("attr"))); + + Group expect = Group() + .addChild(Group().setId(Int64ResultNode(1)).setRank(RawRank(15))) + .addChild(Group().setId(Int64ResultNode(2)).setRank(RawRank(15))) + .addChild(Group().setId(Int64ResultNode(3)).setRank(RawRank(15))); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); +} + +void +Test::testAggregationGroupCapping() +{ + AggregationContext ctx; + ctx.add(IntAttrBuilder("attr") + .add(1).add(2).add(3) + .add(4).add(5).add(6) + .add(7).add(8).add(9).sp()); + ctx.result() + .add(0, 1).add(1, 2).add(2, 3) + .add(3, 4).add(4, 5).add(5, 6) + .add(6, 7).add(7, 8).add(8, 9); + + { + Grouping request = Grouping().addLevel( + GroupingLevel().setExpression(AttributeNode("attr"))); + + Group expect = Group() + .addChild(Group().setId(Int64ResultNode(1)).setRank(RawRank(1))) + .addChild(Group().setId(Int64ResultNode(2)).setRank(RawRank(2))) + .addChild(Group().setId(Int64ResultNode(3)).setRank(RawRank(3))) + .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(4))) + .addChild(Group().setId(Int64ResultNode(5)).setRank(RawRank(5))) + .addChild(Group().setId(Int64ResultNode(6)).setRank(RawRank(6))) + .addChild(Group().setId(Int64ResultNode(7)).setRank(RawRank(7))) + .addChild(Group().setId(Int64ResultNode(8)).setRank(RawRank(8))) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(9))); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); + } + { + Grouping request = Grouping().addLevel( + GroupingLevel().setMaxGroups(3).setExpression(AttributeNode("attr"))); + + Group expect = Group() + .addChild(Group().setId(Int64ResultNode(7)).setRank(RawRank(7))) + .addChild(Group().setId(Int64ResultNode(8)).setRank(RawRank(8))) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(9))); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); + } + { + Grouping request = Grouping(). + setFirstLevel(0). + setLastLevel(1). + addLevel( + GroupingLevel().setMaxGroups(3).setExpression(AttributeNode("attr")). + addAggregationResult(SumAggregationResult().setExpression(AttributeNode("attr"))). + addOrderBy(AggregationRefNode(0), false)); + + Group expect = Group() + .addChild(Group().setId(Int64ResultNode(7)).setRank(RawRank(7)).addAggregationResult(SumAggregationResult(Int64ResultNode(7)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), false)) + .addChild(Group().setId(Int64ResultNode(8)).setRank(RawRank(8)).addAggregationResult(SumAggregationResult(Int64ResultNode(8)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), false)) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(9)).addAggregationResult(SumAggregationResult(Int64ResultNode(9)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), false)); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); + } + { + Grouping request = Grouping(). + setFirstLevel(0). + setLastLevel(1). + addLevel( + GroupingLevel().setMaxGroups(3).setExpression(AttributeNode("attr")). + addAggregationResult(SumAggregationResult().setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), true)); + + Group expect = Group() + .addChild(Group().setId(Int64ResultNode(1)).setRank(RawRank(1)).addAggregationResult(SumAggregationResult(Int64ResultNode(1)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), true)) + .addChild(Group().setId(Int64ResultNode(2)).setRank(RawRank(2)).addAggregationResult(SumAggregationResult(Int64ResultNode(2)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), true)) + .addChild(Group().setId(Int64ResultNode(3)).setRank(RawRank(3)).addAggregationResult(SumAggregationResult(Int64ResultNode(3)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), true)); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); + } + { + AddFunctionNode *add = new AddFunctionNode(); + add->addArg(AggregationRefNode(0)); + add->appendArg(ConstantNode(Int64ResultNode(3))); + ExpressionNode::CP i1(add); + Grouping request = Grouping(). + setFirstLevel(0). + setLastLevel(1). + addLevel( + GroupingLevel().setMaxGroups(3).setExpression(AttributeNode("attr")). + addAggregationResult(SumAggregationResult().setExpression(AttributeNode("attr"))). + addOrderBy(i1, false)); + + Group expect = Group() + .addChild(Group().setId(Int64ResultNode(7)).setRank(RawRank(7)).addAggregationResult(SumAggregationResult(Int64ResultNode(7)).setExpression(AttributeNode("attr"))).addOrderBy(AddFunctionNode().appendArg(AggregationRefNode(0)).appendArg(ConstantNode(Int64ResultNode(3))).setResult(Int64ResultNode(10)), false)) + .addChild(Group().setId(Int64ResultNode(8)).setRank(RawRank(8)).addAggregationResult(SumAggregationResult(Int64ResultNode(8)).setExpression(AttributeNode("attr"))).addOrderBy(AddFunctionNode().appendArg(AggregationRefNode(0)).appendArg(ConstantNode(Int64ResultNode(3))).setResult(Int64ResultNode(11)), false)) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(9)).addAggregationResult(SumAggregationResult(Int64ResultNode(9)).setExpression(AttributeNode("attr"))).addOrderBy(AddFunctionNode().appendArg(AggregationRefNode(0)).appendArg(ConstantNode(Int64ResultNode(3))).setResult(Int64ResultNode(12)), false)); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); + } + +} + +//----------------------------------------------------------------------------- + +/** + * Test merging the sum of the values from a single attribute vector + * that was collected directly into the root node. Consider this a + * smoke test. + **/ +void +Test::testMergeSimpleSum() +{ + Grouping a = Grouping() + .setRoot(Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("foo")) + .setResult(Int64ResultNode(20)))); + + Grouping b = Grouping() + .setRoot(Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("foo")) + .setResult(Int64ResultNode(30)))); + + Group expect = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("foo")) + .setResult(Int64ResultNode(50))); + + EXPECT_TRUE(testMerge(a, b, expect)); +} + +/** + * Verify that frozen levels are not touched during merge. + **/ +void +Test::testMergeLevels() +{ + Grouping request = Grouping() + .addLevel(GroupingLevel() + .setExpression(AttributeNode("c1")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("c2")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("c3")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")))); + + Group a = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(5))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(15))) + .addChild(Group() + .setId(Int64ResultNode(30)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(20)))))); + + Group b = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(5))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(15))) + .addChild(Group() + .setId(Int64ResultNode(30)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(20)))))); + + Group expect_all = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(20))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(30))) + .addChild(Group() + .setId(Int64ResultNode(30)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(40)))))); + + Group expect_0 = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(5))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(20))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(30))) + .addChild(Group() + .setId(Int64ResultNode(30)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(40)))))); + + + Group expect_1 = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(5))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(30))) + .addChild(Group() + .setId(Int64ResultNode(30)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(40)))))); + + + Group expect_2 = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(5))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(15))) + .addChild(Group() + .setId(Int64ResultNode(30)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(40)))))); + + + Group expect_3 = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(5))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(15))) + .addChild(Group() + .setId(Int64ResultNode(30)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(20)))))); + + EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(0).setLastLevel(3).setRoot(a), + request.unchain().setFirstLevel(0).setLastLevel(3).setRoot(b), + expect_all)); + EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(1).setLastLevel(3).setRoot(a), + request.unchain().setFirstLevel(1).setLastLevel(3).setRoot(b), + expect_0)); + EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(2).setLastLevel(5).setRoot(a), + request.unchain().setFirstLevel(2).setLastLevel(5).setRoot(b), + expect_1)); + EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(3).setLastLevel(5).setRoot(a), + request.unchain().setFirstLevel(3).setLastLevel(5).setRoot(b), + expect_2)); + EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(4).setLastLevel(4).setRoot(a), + request.unchain().setFirstLevel(4).setLastLevel(4).setRoot(b), + expect_3)); +} + +/** + * Verify that the number of groups for a level is pruned down to + * maxGroups, that the remaining groups are the highest ranked ones, + * and that they are sorted by group id. + **/ +void +Test::testMergeGroups() +{ + Grouping request = Grouping() + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr"))); + + Group a = Group() + .setId(NullResultNode()) + .addChild(Group().setId(StringResultNode("05")).setRank(RawRank(5))) + .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(5))) // (2) + .addChild(Group().setId(StringResultNode("15")).setRank(RawRank(15))) + .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(100))) // 1 + .addChild(Group().setId(StringResultNode("50")).setRank(RawRank(30))); // 3 + + Group b = Group() + .setId(NullResultNode()) + .addChild(Group().setId(StringResultNode("00")).setRank(RawRank(10))) + .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(50))) // 2 + .addChild(Group().setId(StringResultNode("20")).setRank(RawRank(25))) // 4 + .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(10))) // (1) + .addChild(Group().setId(StringResultNode("45")).setRank(RawRank(20))); // 5 + + Group expect_3 = Group() + .setId(NullResultNode()) + .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(50))) + .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(100))) + .addChild(Group().setId(StringResultNode("50")).setRank(RawRank(30))); + + Group expect_5 = Group() + .setId(NullResultNode()) + .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(50))) + .addChild(Group().setId(StringResultNode("20")).setRank(RawRank(25))) + .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(100))) + .addChild(Group().setId(StringResultNode("45")).setRank(RawRank(20))) + .addChild(Group().setId(StringResultNode("50")).setRank(RawRank(30))); + + Group expect_all = Group() + .setId(NullResultNode()) + .addChild(Group().setId(StringResultNode("00")).setRank(RawRank(10))) + .addChild(Group().setId(StringResultNode("05")).setRank(RawRank( 5))) + .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(50))) + .addChild(Group().setId(StringResultNode("15")).setRank(RawRank(15))) + .addChild(Group().setId(StringResultNode("20")).setRank(RawRank(25))) + .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(100))) + .addChild(Group().setId(StringResultNode("45")).setRank(RawRank(20))) + .addChild(Group().setId(StringResultNode("50")).setRank(RawRank(30))); + + request.levels()[0].setMaxGroups(3); + EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), expect_3)); + EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(a), expect_3)); + request.levels()[0].setMaxGroups(5); + EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), expect_5)); + EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(a), expect_5)); + request.levels()[0].setMaxGroups(-1); + EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), expect_all)); + EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(a), expect_all)); +} + +/** + * Merge two relatively complex tree structures and verify that the + * end result is as expected. + **/ +void +Test::testMergeTrees() +{ + Grouping request = Grouping() + .addLevel(GroupingLevel() + .setMaxGroups(3) + .setExpression(AttributeNode("c1")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")))) + .addLevel(GroupingLevel() + .setMaxGroups(2) + .setExpression(AttributeNode("c2")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")))) + .addLevel(GroupingLevel() + .setMaxGroups(1) + .setExpression(AttributeNode("c3")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")))); + + Group a = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .setRank(RawRank(5)) // merged with 200 rank node + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .setRank(RawRank(500)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .setRank(RawRank(200)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .setRank(RawRank(100)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + // dummy child would be picked up here + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(200)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(14)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(300)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ) + .addChild(Group().setId(Int64ResultNode(14)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(300)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(19)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .setRank(RawRank(100)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + ) + ); + + Group b = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .setRank(RawRank(200)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .setRank(RawRank(400)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .setRank(RawRank(100)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .setRank(RawRank(100)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + // dummy child would be picket up here + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(200)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + ) + ) + .addChild(Group().setId(Int64ResultNode(14)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(5)) // merged with 300 rank node + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(19)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .setRank(RawRank(5)) // merged with 100 rank node + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(19)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .setRank(RawRank(500)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + .addChild(Group().setId(Int64ResultNode(24)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(25)) + .setRank(RawRank(300)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(24)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(25)) + .setRank(RawRank(400)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ); + + Group expect = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .setRank(RawRank(200)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .setRank(RawRank(500)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .setRank(RawRank(200)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + .addChild(Group() + .setId(Int64ResultNode(10)) + .setRank(RawRank(400)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .setRank(RawRank(100)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ) + .addChild(Group() + .setId(Int64ResultNode(10)) + .setRank(RawRank(100)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(200)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(300)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ) + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(300)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .setRank(RawRank(100)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .setRank(RawRank(500)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + .addChild(Group() + .setId(Int64ResultNode(25)) + .setRank(RawRank(300)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group() + .setId(Int64ResultNode(25)) + .setRank(RawRank(400)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ); + + EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), expect)); + EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(a), expect)); +} + +void +Test::testPruneComplex() +{ + { // First level + Group baseTree = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar000"))) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002")))) + .addChild(Group().setId(StringResultNode("bar01")))) + .addChild(Group().setId(StringResultNode("baz0")) + .addChild(Group().setId(StringResultNode("baz00")) + .addChild(Group().setId(StringResultNode("baz000"))) + .addChild(Group().setId(StringResultNode("baz001"))))) + .addChild(Group().setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo00"))) + .addChild(Group().setId(StringResultNode("foo01")))); + + Group prune = Group() + .addChild(Group().setId(StringResultNode("bar0"))) + .addChild(Group().setId(StringResultNode("foo0"))); + + Group expect = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar000"))) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002")))) + .addChild(Group().setId(StringResultNode("bar01")))) + .addChild(Group().setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo00"))) + .addChild(Group().setId(StringResultNode("foo01")))); + Grouping request = Grouping().setFirstLevel(1).setLastLevel(1); + Grouping baseRequest = Grouping().setFirstLevel(0).setLastLevel(3); + EXPECT_TRUE(testPrune(baseRequest.unchain().setRoot(baseTree), request.unchain().setRoot(prune), expect)); + } + { // Second level + Group baseTree = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar000"))) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002")))) + .addChild(Group().setId(StringResultNode("bar01")))) + .addChild(Group().setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo00"))) + .addChild(Group().setId(StringResultNode("foo01")))); + + Group prune = Group() + .addChild(Group() + .setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")))) + .addChild(Group() + .setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo01")))); + + Group expect = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar000"))) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002"))))) + .addChild(Group().setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo01")))); + + Grouping request = Grouping().setFirstLevel(2).setLastLevel(2); + Grouping baseRequest = Grouping().setFirstLevel(0).setLastLevel(3); + EXPECT_TRUE(testPrune(baseRequest.unchain().setRoot(baseTree), request.unchain().setRoot(prune), expect)); + } + { // Third level + Group baseTree = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar000"))) + + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002"))))) + .addChild(Group().setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo01")))); + Group prune = Group() + .addChild(Group() + .setId(StringResultNode("bar0")) + .addChild(Group() + .setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002"))))); + + Group expect = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002"))))); + Grouping request = Grouping().setFirstLevel(3).setLastLevel(3); + Grouping baseRequest = Grouping().setFirstLevel(0).setLastLevel(3); + EXPECT_TRUE(testPrune(baseRequest.unchain().setRoot(baseTree), request.unchain().setRoot(prune), expect)); + } + { // Try pruning a grouping we don't have + Group baseTree = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar000"))) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002")))) + .addChild(Group().setId(StringResultNode("bar01")))) + .addChild(Group().setId(StringResultNode("baz0")) + .addChild(Group().setId(StringResultNode("baz00")) + .addChild(Group().setId(StringResultNode("baz000"))) + .addChild(Group().setId(StringResultNode("baz001"))))) + .addChild(Group().setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo00"))) + .addChild(Group().setId(StringResultNode("foo01")))); + + Group prune = Group() + .addChild(Group().setId(StringResultNode("bar0"))) + .addChild(Group().setId(StringResultNode("boz0"))) + .addChild(Group().setId(StringResultNode("foo0"))) + .addChild(Group().setId(StringResultNode("goo0"))); + + Group expect = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar000"))) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002")))) + .addChild(Group().setId(StringResultNode("bar01")))) + .addChild(Group().setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo00"))) + .addChild(Group().setId(StringResultNode("foo01")))); + Grouping request = Grouping().setFirstLevel(1).setLastLevel(1); + Grouping baseRequest = Grouping().setFirstLevel(0).setLastLevel(3); + EXPECT_TRUE(testPrune(baseRequest.unchain().setRoot(baseTree), request.unchain().setRoot(prune), expect)); + } +} + +/** + * Test partial merge of a grouping tree, where all levels up to "lastLevel" is + * merged. The last level should not contain any children groups, and only empty + * results. + **/ +void +Test::testPartialMerging() +{ + Grouping baseRequest = Grouping() + .addLevel(GroupingLevel() + .setExpression(AttributeNode("c1")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("c2")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("c3")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")))); + + // Cached result + Group cached = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(110))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(13)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group() + .setId(Int64ResultNode(14)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + .addChild(Group() + .setId(Int64ResultNode(15)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group() + .setId(Int64ResultNode(22)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ); + + + { // Merge lastlevel 0 + Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(0); + Group incoming = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(0))); + + Group expected = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(110))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ); + EXPECT_TRUE(testPartialMerge(request.unchain().setRoot(incoming), request.unchain().setLastLevel(3).setRoot(cached), expected)); + } + { + // Merge existing tree. Assume we got modified data down again. + Grouping request = baseRequest.unchain().setFirstLevel(1).setLastLevel(1); + Group incoming = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(3)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ) + .addChild(Group() + .setId(Int64ResultNode(5)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ) + .addChild(Group() + .setId(Int64ResultNode(7)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0)))) + .addChild(Group() + .setId(Int64ResultNode(33)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ); + Group expected = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(3)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ) + .addChild(Group() + .setId(Int64ResultNode(5)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(13)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(0))) + ) + ) + .addChild(Group() + .setId(Int64ResultNode(7)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + .addChild(Group() + .setId(Int64ResultNode(15)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(0))) + ) + ) + .addChild(Group() + .setId(Int64ResultNode(33)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ); + EXPECT_TRUE(testPartialMerge(request.unchain().setRoot(incoming), request.unchain().setFirstLevel(0).setLastLevel(3).setRoot(cached), expected)); + } +} + +/** + * Test that pruning a simple grouping tree works. + **/ +void +Test::testPruneSimple() +{ + { + Grouping request = Grouping() + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr"))) + .setFirstLevel(1) + .setLastLevel(1); + + Group a = Group() + .addChild(Group().setId(StringResultNode("foo"))) + .addChild(Group().setId(StringResultNode("bar"))) + .addChild(Group().setId(StringResultNode("baz"))); + + Group b = Group() + .addChild(Group().setId(StringResultNode("foo"))); + + Group expect = Group() + .addChild(Group().setId(StringResultNode("foo"))); + + EXPECT_TRUE(testPrune(request.unchain().setFirstLevel(0).setRoot(a), request.unchain().setRoot(b), expect)); + } +} + +/** + * Test that simple counting works as long as we use an expression + * that we init, calculate and ignore. + **/ +void +Test::testTopN() +{ + AggregationContext ctx; + ctx.result().add(0).add(1).add(2); + ctx.add(IntAttrBuilder("foo").add(3).add(7).add(15).sp()); + + Grouping request = Grouping() + .setRoot(Group() + .addResult(CountAggregationResult() + .setExpression(ConstantNode(Int64ResultNode(0))) + ) + ); + { + Group expect = Group() + .addResult(CountAggregationResult().setCount(3) + .setExpression(ConstantNode(Int64ResultNode(0))) + ); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); + } + { + Group expect = Group() + .addResult(CountAggregationResult().setCount(1) + .setExpression(ConstantNode(Int64ResultNode(0))) + ); + + EXPECT_TRUE(testAggregation(ctx, request.setTopN(1), expect)); + } + { + Grouping request2 = Grouping() + .addLevel(GroupingLevel() + .addAggregationResult(SumAggregationResult()) + .addOrderBy(AggregationRefNode(0), false)); + EXPECT_TRUE(request2.needResort()); + request2.setTopN(0); + EXPECT_TRUE(request2.needResort()); + request2.setTopN(1); + EXPECT_TRUE(!request2.needResort()); + request2.setTopN(100); + EXPECT_TRUE(!request2.needResort()); + } +} + +/** + * Test that simple counting works as long as we use an expression + * that we init, calculate and ignore. + **/ +void +Test::testCount() +{ + AggregationContext ctx; + ctx.result().add(0).add(1).add(2); + ctx.add(IntAttrBuilder("foo").add(3).add(7).add(15).sp()); + + Grouping request = Grouping() + .setRoot(Group() + .addResult(CountAggregationResult() + .setExpression(ConstantNode(Int64ResultNode(0))) + ) + ); + + Group expect = Group() + .addResult(CountAggregationResult().setCount(3) + .setExpression(ConstantNode(Int64ResultNode(0))) + ); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); +} + +//----------------------------------------------------------------------------- + +bool +Test::checkHits(const Grouping &g, uint32_t first, uint32_t last, uint32_t cnt) +{ + CountFS4Hits pop; + Grouping tmp = g; + tmp.setFirstLevel(first).setLastLevel(last).select(pop, pop); + return EXPECT_EQUAL(pop.getHitCount(), cnt); +} + +void +Test::testFS4HitCollection() +{ + { // aggregation + AggregationContext ctx; + ctx.result().add(30, 30.0).add(20, 20.0).add(10, 10.0).add(5, 5.0).add(25, 25.0); + + Grouping request = Grouping() + .setRoot(Group() + .addResult(HitsAggregationResult() + .setMaxHits(3) + .setExpression(ConstantNode(Int64ResultNode(0)))) + ); + + Group expect = Group() + .addResult(HitsAggregationResult() + .setMaxHits(3) + .addHit(FS4Hit(30, 30.0)) + .addHit(FS4Hit(25, 25.0)) + .addHit(FS4Hit(20, 20.0)) + .sort() + .setExpression(ConstantNode(Int64ResultNode(0)))); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); + } + { // merging + + Grouping request = Grouping() + .setRoot(Group() + .addResult(HitsAggregationResult() + .setMaxHits(3) + .setExpression(ConstantNode(Int64ResultNode(0)))) + ); + + Group expect = Group() + .setId(NullResultNode()) + .addResult(HitsAggregationResult() + .setMaxHits(3) + .addHit(FS4Hit(30, 30.0)) + .addHit(FS4Hit(20, 20.0)) + .addHit(FS4Hit(10, 10.0)) + .sort() + .setExpression(ConstantNode(Int64ResultNode(0)))); + + Group a = Group() + .setId(NullResultNode()) + .addResult(HitsAggregationResult() + .setMaxHits(3) + .addHit(FS4Hit(10, 10.0)) + .addHit(FS4Hit(1, 5.0)) + .addHit(FS4Hit(2, 4.0)) + .sort() + .setExpression(ConstantNode(Int64ResultNode(0)))); + + Group b = Group() + .setId(NullResultNode()) + .addResult(HitsAggregationResult() + .setMaxHits(3) + .addHit(FS4Hit(20, 20.0)) + .addHit(FS4Hit(3, 7.0)) + .addHit(FS4Hit(4, 6.0)) + .sort() + .setExpression(ConstantNode(Int64ResultNode(0)))); + + Group c = Group() + .setId(NullResultNode()) + .addResult(HitsAggregationResult() + .setMaxHits(3) + .addHit(FS4Hit(30, 30.0)) + .addHit(FS4Hit(5, 9.0)) + .addHit(FS4Hit(6, 8.0)) + .sort() + .setExpression(ConstantNode(Int64ResultNode(0)))); + + EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), request.unchain().setRoot(c), expect)); + EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(c), request.unchain().setRoot(a), expect)); + EXPECT_TRUE(testMerge(request.unchain().setRoot(c), request.unchain().setRoot(a), request.unchain().setRoot(b), expect)); + } + { // count hits (for external object selection) + HitsAggregationResult dummyHits = HitsAggregationResult() + .setMaxHits(3) + .addHit(FS4Hit(1, 3.0)) + .addHit(FS4Hit(2, 2.0)) + .addHit(FS4Hit(3, 1.0)) + .sort(); + Grouping g = Grouping().setRoot(Group().addResult(dummyHits) + .addChild(Group().addResult(dummyHits) + .addChild(Group().addResult(dummyHits)) + ) + .addChild(Group().addResult(dummyHits) + .addChild(Group().addResult(dummyHits) + .addChild(Group().addResult(dummyHits)) + ) + ) + ); + EXPECT_TRUE(checkHits(g, 0, 0, 3)); + EXPECT_TRUE(checkHits(g, 1, 1, 6)); + EXPECT_TRUE(checkHits(g, 2, 2, 6)); + EXPECT_TRUE(checkHits(g, 3, 3, 3)); + EXPECT_TRUE(checkHits(g, 4, 4, 0)); + + EXPECT_TRUE(checkHits(g, 0, 1, 9)); + EXPECT_TRUE(checkHits(g, 0, 2, 15)); + EXPECT_TRUE(checkHits(g, 0, 3, 18)); + EXPECT_TRUE(checkHits(g, 0, 4, 18)); + EXPECT_TRUE(checkHits(g, 1, 4, 15)); + EXPECT_TRUE(checkHits(g, 2, 4, 9)); + EXPECT_TRUE(checkHits(g, 3, 4, 3)); + + EXPECT_TRUE(checkHits(g, 1, 2, 12)); + EXPECT_TRUE(checkHits(g, 2, 3, 9)); + EXPECT_TRUE(checkHits(g, 3, 4, 3)); + EXPECT_TRUE(checkHits(g, 4, 5, 0)); + } +} + +bool +Test::checkBucket(const NumericResultNode &width, const NumericResultNode &value, const BucketResultNode &bucket) +{ + AggregationContext ctx; + ctx.result().add(0); + if (value.getClass().inherits(IntegerResultNode::classId)) { + ctx.add(IntAttrBuilder("attr").add(value.getInteger()).sp()); + } else if (value.getClass().inherits(FloatResultNode::classId)) { + ctx.add(FloatAttrBuilder("attr").add(value.getFloat()).sp()); + } else { + return EXPECT_TRUE(false); + } + Grouping request = Grouping() + .addLevel(GroupingLevel() + .setExpression(FixedWidthBucketFunctionNode(AttributeNode("attr")).setWidth(width))); + Group expect = Group().addChild(Group().setId(bucket)); + return testAggregation(ctx, request, expect); +} + +void +Test::testFixedWidthBuckets() +{ + typedef Int64ResultNode Int; + typedef FloatResultNode Float; + typedef IntegerBucketResultNode IntBucket; + typedef FloatBucketResultNode FloatBucket; + + // positive int buckets + EXPECT_TRUE(checkBucket(Int(10), Int(0), IntBucket(0,10))); + EXPECT_TRUE(checkBucket(Int(10), Int(5), IntBucket(0,10))); + EXPECT_TRUE(checkBucket(Int(10), Int(9), IntBucket(0,10))); + EXPECT_TRUE(checkBucket(Int(10), Int(10), IntBucket(10,20))); + EXPECT_TRUE(checkBucket(Int(10), Int(299), IntBucket(290,300))); + + // negative int buckets + EXPECT_TRUE(checkBucket(Int(10), Int(-1), IntBucket(-10,0))); + EXPECT_TRUE(checkBucket(Int(10), Int(-5), IntBucket(-10,0))); + EXPECT_TRUE(checkBucket(Int(10), Int(-10), IntBucket(-10,0))); + EXPECT_TRUE(checkBucket(Int(10), Int(-11), IntBucket(-20,-10))); + EXPECT_TRUE(checkBucket(Int(10), Int(-300), IntBucket(-300,-290))); + + // positive float buckets + EXPECT_TRUE(checkBucket(Int(10), Float(0.0), FloatBucket(0.0,10.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(5.0), FloatBucket(0.0,10.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(9.0), FloatBucket(0.0,10.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(10.0), FloatBucket(10.0,20.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(299.0), FloatBucket(290.0,300.0))); + + // negative float buckets + EXPECT_TRUE(checkBucket(Int(10), Float(-1), FloatBucket(-10.0,0.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(-5), FloatBucket(-10.0,0.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(-10), FloatBucket(-10.0,0.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(-10.0000001), FloatBucket(-20.0,-10.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(-300), FloatBucket(-300.0,-290.0))); + + // non-integer bucket width + EXPECT_TRUE(checkBucket(Float(0.5), Float(0.0), FloatBucket(0.0,0.5))); + EXPECT_TRUE(checkBucket(Float(0.5), Float(0.5), FloatBucket(0.5,1.0))); + EXPECT_TRUE(checkBucket(Float(0.5), Float(0.4999), FloatBucket(0.0,0.5))); + EXPECT_TRUE(checkBucket(Float(0.5), Float(-0.0001), FloatBucket(-0.5,0.0))); + EXPECT_TRUE(checkBucket(Float(0.5), Float(-0.5), FloatBucket(-0.5,0.0))); + EXPECT_TRUE(checkBucket(Float(0.5), Float(-0.50001), FloatBucket(-1.0,-0.5))); + + // zero-width buckets + EXPECT_TRUE(checkBucket(Int(0), Int(7), IntBucket(7,7))); + EXPECT_TRUE(checkBucket(Int(0), Float(7.5), FloatBucket(7.5,7.5))); + + // bucket wrap protection + { + int64_t x = std::numeric_limits<int64_t>::min(); + int64_t y = std::numeric_limits<int64_t>::max(); + EXPECT_TRUE(checkBucket(Int(1000), Int(x + 5), IntBucket(x, (x/1000) * 1000))); + EXPECT_TRUE(checkBucket(Int(1000), Int(y - 5), IntBucket((y/1000) * 1000, y))); + } +} + + +void +Test::testNanSorting() +{ + // Attempt at reproducing issue with segfault when setting NaN value. Not + // successful yet, so no point in running test. +#if 0 + double nan = sqrt(-1); + EXPECT_TRUE(isnan(nan)); + EXPECT_TRUE(nan != nan); + EXPECT_FALSE(nan < nan); + EXPECT_FALSE(nan > nan); + EXPECT_FALSE(nan < 0.2); + EXPECT_FALSE(nan > 0.2); + EXPECT_FALSE(0.2 < nan); + EXPECT_FALSE(0.2 > nan); + + FastOS_Time timer; + timer.SetNow(); + std::vector<double> groups; + while (timer.MilliSecsToNow() < 60000.0) { + std::vector<double> vec; + srand((unsigned int)timer.MilliSecs()); + size_t limit = 2345678; + size_t mod = rand() % limit; + for (size_t i = 0; i < limit; i++) { + if ((i % mod) == 0) + vec.push_back(nan); + else + vec.push_back(1.0 * rand()); + } + } + std::sort(groups.begin(), groups.end()); +#endif +} + +void +Test::testThatNanIsConverted() +{ + Group g; + double nan = sqrt(-1); + g.setRank(nan); + // Must have been changed for this to work. + ASSERT_EQUAL(g.getRank(), g.getRank()); +} + +//----------------------------------------------------------------------------- + +struct RunDiff { ~RunDiff() { system("diff -u lhs.out rhs.out > diff.txt"); }}; + +//----------------------------------------------------------------------------- + +int +Test::Main() +{ + RunDiff runDiff; + (void) runDiff; + TEST_DEBUG("lhs.out", "rhs.out"); + TEST_INIT("grouping_test"); + testAggregationSimple(); + testAggregationLevels(); + testAggregationMaxGroups(); + testAggregationGroupOrder(); + testAggregationGroupRank(); + testAggregationGroupCapping(); + testMergeSimpleSum(); + testMergeLevels(); + testMergeGroups(); + testMergeTrees(); + testPruneSimple(); + testPruneComplex(); + testPartialMerging(); + testFS4HitCollection(); + testFixedWidthBuckets(); + testCount(); + testTopN(); + testThatNanIsConverted(); + testNanSorting(); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/grouping/hyperloglog_test.cpp b/searchlib/src/tests/grouping/hyperloglog_test.cpp new file mode 100644 index 00000000000..15b4ae9ae39 --- /dev/null +++ b/searchlib/src/tests/grouping/hyperloglog_test.cpp @@ -0,0 +1,92 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for hyperloglog. + +#include <vespa/log/log.h> +LOG_SETUP("hyperloglog_test"); +#include <vespa/fastos/fastos.h> + +#include <vespa/searchlib/grouping/hyperloglog.h> +#include <vespa/vespalib/objects/nboserializer.h> +#include <vespa/vespalib/objects/nbostream.h> +#include <vespa/vespalib/testkit/testapp.h> + +using vespalib::NBOSerializer; +using vespalib::nbostream; +using namespace search; + +namespace { + +TEST("require that hyperloglog changes from sparse to normal sketch") { + HyperLogLog<> hll; + for (size_t i = 0; i < 256; ++i) { + EXPECT_TRUE(dynamic_cast<const SparseSketch<> *>(&hll.getSketch())); + EXPECT_EQUAL(1, hll.aggregate(i)); + } + EXPECT_TRUE(dynamic_cast<const SparseSketch<> *>(&hll.getSketch())); + EXPECT_EQUAL(23, hll.aggregate(256)); + EXPECT_TRUE(dynamic_cast<const NormalSketch<> *>(&hll.getSketch())); +} + +TEST("require that hyperloglog can be (de)serialized") { + HyperLogLog<> hll; + for (size_t i = 0; i < 256; ++i) { + EXPECT_EQUAL(1, hll.aggregate(i)); + } + nbostream stream; + NBOSerializer serializer(stream); + + // Serializes with sparse sketch + hll.serialize(serializer); + HyperLogLog<> hll2; + hll2.deserialize(serializer); + EXPECT_TRUE(dynamic_cast<const SparseSketch<> *>(&hll2.getSketch())); + EXPECT_EQUAL(hll.getSketch(), hll2.getSketch()); + + // Serializes with normal sketch. + EXPECT_EQUAL(23, hll2.aggregate(256)); + hll2.serialize(serializer); + hll.deserialize(serializer); + EXPECT_TRUE(dynamic_cast<const NormalSketch<> *>(&hll.getSketch())); + EXPECT_EQUAL(hll2.getSketch(), hll.getSketch()); +} + +TEST("require that sparse hyperloglogs can be merged") { + HyperLogLog<> hll; + for (size_t i = 0; i < 100; ++i) { + EXPECT_EQUAL(1, hll.aggregate(i)); + } + HyperLogLog<> hll2; + for (size_t i = 100; i < 255; ++i) { + EXPECT_EQUAL(1, hll2.aggregate(i)); + } + hll.merge(hll2); + EXPECT_TRUE(dynamic_cast<const SparseSketch<> *>(&hll.getSketch())); + + EXPECT_EQUAL(1, hll2.aggregate(255)); + hll.merge(hll2); + EXPECT_TRUE(dynamic_cast<const NormalSketch<> *>(&hll.getSketch())); +} + +TEST("require that mixed hyperloglogs can be merged") { + HyperLogLog<> hll; + for (size_t i = 0; i < 256; ++i) { + EXPECT_EQUAL(1, hll.aggregate(i)); + } + EXPECT_EQUAL(23, hll.aggregate(256)); // normal + HyperLogLog<> hll2; + for (size_t i = 100; i < 255; ++i) { + EXPECT_EQUAL(1, hll2.aggregate(i)); // sparse + } + hll.merge(hll2); // normal + sparse + hll2.merge(hll); // sparse + normal + EXPECT_EQUAL(hll.getSketch(), hll2.getSketch()); + + EXPECT_EQUAL(23, hll2.aggregate(500)); + hll.merge(hll2); // normal + normal + EXPECT_EQUAL(hll.getSketch(), hll2.getSketch()); + EXPECT_EQUAL(0, hll.aggregate(500)); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/grouping/sketch_test.cpp b/searchlib/src/tests/grouping/sketch_test.cpp new file mode 100644 index 00000000000..c6c0b144983 --- /dev/null +++ b/searchlib/src/tests/grouping/sketch_test.cpp @@ -0,0 +1,151 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for sketch. + +#include <vespa/log/log.h> +LOG_SETUP("sketch_test"); +#include <vespa/fastos/fastos.h> + +#include <vespa/searchlib/grouping/sketch.h> +#include <vespa/vespalib/objects/nboserializer.h> +#include <vespa/vespalib/objects/nbostream.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/util/stringfmt.h> + +using vespalib::NBOSerializer; +using vespalib::nbostream; +using namespace search; +using vespalib::make_string; + +namespace { + +TEST("require that normal sketch is initialized") { + NormalSketch<> sketch; + for (size_t i = 0; i < sketch.BUCKET_COUNT; ++i) { + EXPECT_EQUAL(0, sketch.bucket[i]); + } +} + +template <typename NormalSketch> +void checkBucketValue(NormalSketch &sketch, size_t bucket, uint32_t value) { + EXPECT_EQUAL(value, static_cast<size_t>(sketch.bucket[bucket])); +} + +template <int BucketBits, typename HashT> +void checkCountPrefixZeros() { + TEST_STATE(make_string("BucketBits: %d, HashBits: %d", + BucketBits, int(sizeof(HashT) * 8)).c_str()); + NormalSketch<BucketBits, HashT> sketch; + const uint32_t prefix_bits = sizeof(HashT) * 8 - BucketBits; + const uint32_t hash_width = sizeof(HashT) * 8; + for (size_t i = 0; i < prefix_bits ; ++i) { + int increase = sketch.aggregate(HashT(1) << (hash_width - 1 - i)); + EXPECT_EQUAL(1, increase); // bucket increases by 1 for each call + checkBucketValue(sketch, 0, i + 1); + } + sketch.aggregate(0); + checkBucketValue(sketch, prefix_bits + 1, 0); + + checkBucketValue(sketch, HashT(1) << (BucketBits - 1), 0); + sketch.aggregate(HashT(1) << (hash_width - 1 - prefix_bits)); + checkBucketValue(sketch, 0, prefix_bits + 1); + checkBucketValue(sketch, HashT(1) << (BucketBits - 1), prefix_bits + 1); +} + +TEST("require that prefix zeros are counted.") { + checkCountPrefixZeros<10, uint32_t>(); + checkCountPrefixZeros<12, uint32_t>(); + checkCountPrefixZeros<10, uint64_t>(); + checkCountPrefixZeros<12, uint64_t>(); +} + +TEST("require that aggregate returns bucket increase") { + NormalSketch<> sketch; + int increase = sketch.aggregate(-1); + EXPECT_EQUAL(1, increase); + increase = sketch.aggregate(1023); + EXPECT_EQUAL(22, increase); + increase = sketch.aggregate(0); + EXPECT_EQUAL(23, increase); +} + +TEST("require that instances can be merged.") { + NormalSketch<> sketch; + sketch.aggregate(0); + NormalSketch<> sketch2; + sketch2.aggregate(-1); + sketch.merge(sketch2); + checkBucketValue(sketch, 0, 23); + checkBucketValue(sketch, 1023, 1); +} + +TEST("require that different sketch type instances can be merged.") { + NormalSketch<> sketch; + sketch.aggregate(0); + SparseSketch<> sketch2; + sketch2.aggregate(-1); + sketch.merge(sketch2); + checkBucketValue(sketch, 0, 23); + checkBucketValue(sketch, 1023, 1); +} + +TEST("require that normal sketch can be (de)serialized") { + NormalSketch<> sketch; + for (size_t i = 0; i < sketch.BUCKET_COUNT; ++i) { + sketch.aggregate(i | (1 << ((i % sketch.bucketBits) + + sketch.bucketBits))); + } + nbostream stream; + NBOSerializer serializer(stream); + sketch.serialize(serializer); + EXPECT_EQUAL(31u, stream.size()); + uint32_t val; + stream >> val; + EXPECT_TRUE(sketch.BUCKET_COUNT == val); + stream >> val; + EXPECT_EQUAL(23u, val); + stream.adjustReadPos(-2 * sizeof(uint32_t)); + NormalSketch<> sketch2; + sketch2.deserialize(serializer); + EXPECT_EQUAL(sketch, sketch2); +} + +TEST("require that uncompressed data in normal sketch can be deserialized") { + NormalSketch<> sketch; + nbostream stream; + NBOSerializer serializer(stream); + stream << sketch.BUCKET_COUNT; + stream << sketch.BUCKET_COUNT; + const int hash_bits = sizeof(NormalSketch<>::hash_type) * 8; + const int value_bits = hash_bits - sketch.bucketBits; + for (size_t i = 0; i < sketch.BUCKET_COUNT; ++i) { + char bucket_val = (i % value_bits) + 1; + stream << bucket_val; + sketch.aggregate(i | (1 << (hash_bits - bucket_val))); + } + NormalSketch<> sketch2; + sketch2.deserialize(serializer); + EXPECT_EQUAL(sketch, sketch2); +} + +TEST("require that sparse sketch can be (de)serialized") { + SparseSketch<> sketch; + const uint32_t hash_count = 10; + for (size_t hash = 0; hash < hash_count; ++hash) { + sketch.aggregate(hash); + } + nbostream stream; + NBOSerializer serializer(stream); + sketch.serialize(serializer); + EXPECT_EQUAL(4 * hash_count + 4u, stream.size()); + uint32_t val; + stream >> val; + EXPECT_EQUAL(hash_count, val); + stream.adjustReadPos(-1 * sizeof(uint32_t)); + SparseSketch<> sketch2; + sketch2.deserialize(serializer); + EXPECT_EQUAL(sketch, sketch2); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/groupingengine/.gitignore b/searchlib/src/tests/groupingengine/.gitignore new file mode 100644 index 00000000000..1eb2fc1fb29 --- /dev/null +++ b/searchlib/src/tests/groupingengine/.gitignore @@ -0,0 +1,7 @@ +/lhs.out +/rhs.out +/diff.txt +/groupingengine_benchmark +/vgcore.* +searchlib_groupingengine_test_app +searchlib_groupingengine_benchmark_app diff --git a/searchlib/src/tests/groupingengine/CMakeLists.txt b/searchlib/src/tests/groupingengine/CMakeLists.txt new file mode 100644 index 00000000000..74f4574a9a4 --- /dev/null +++ b/searchlib/src/tests/groupingengine/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_groupingengine_test_app + SOURCES + groupingengine_test.cpp + DEPENDS + searchlib +) +#vespa_add_test(NAME searchlib_groupingengine_test_app COMMAND searchlib_groupingengine_test_app) +vespa_add_executable(searchlib_groupingengine_benchmark_app + SOURCES + groupingengine_benchmark.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_groupingengine_benchmark_app COMMAND searchlib_groupingengine_benchmark_app BENCHMARK) diff --git a/searchlib/src/tests/groupingengine/DESC b/searchlib/src/tests/groupingengine/DESC new file mode 100644 index 00000000000..1aa6cb37e89 --- /dev/null +++ b/searchlib/src/tests/groupingengine/DESC @@ -0,0 +1 @@ +grouping test. Take a look at grouping.cpp for details. diff --git a/searchlib/src/tests/groupingengine/FILES b/searchlib/src/tests/groupingengine/FILES new file mode 100644 index 00000000000..af7f7e71257 --- /dev/null +++ b/searchlib/src/tests/groupingengine/FILES @@ -0,0 +1,4 @@ +grouping.cpp +lhs.out +rhs.out +diff.txt diff --git a/searchlib/src/tests/groupingengine/groupingengine_benchmark.cpp b/searchlib/src/tests/groupingengine/groupingengine_benchmark.cpp new file mode 100644 index 00000000000..b7136741a4c --- /dev/null +++ b/searchlib/src/tests/groupingengine/groupingengine_benchmark.cpp @@ -0,0 +1,292 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/aggregation/perdocexpression.h> +#include <vespa/searchlib/aggregation/aggregation.h> +#include <vespa/searchlib/attribute/extendableattributes.h> +#include <vespa/searchlib/attribute/attributemanager.h> +#include <vespa/searchlib/aggregation/hitsaggregationresult.h> +#include <vespa/searchlib/aggregation/fs4hit.h> +#include <vespa/searchlib/expression/fixedwidthbucketfunctionnode.h> +#include <vespa/searchlib/grouping/groupingengine.h> +#include <algorithm> +#include <vespa/vespalib/objects/objectpredicate.h> +#include <vespa/vespalib/objects/objectoperation.h> +#include <vespa/vespalib/util/rusage.h> +LOG_SETUP("grouping_benchmark"); + +using namespace vespalib; +using namespace search; +using namespace search::attribute; +using namespace search::expression; +using namespace search::aggregation; +using namespace search::grouping; + +//----------------------------------------------------------------------------- + +template<typename A, typename T> +class AttrBuilder +{ +private: + A *_attr; + AttributeVector::SP _attrSP; + +public: + AttrBuilder(const AttrBuilder &rhs) + : _attr(new A(rhs._attr->getName())), + _attrSP(_attr) + { + uint32_t numDocs = rhs._attr->getNumDocs(); + for (uint32_t docid = 0; docid < numDocs; ++docid) { + T val; + uint32_t res = rhs._attr->get(docid, &val, 1); + LOG_ASSERT(res == 1); + add(val); + } + } + AttrBuilder(const std::string &name) + : _attr(new A(name)), + _attrSP(_attr) + { + } + AttrBuilder& operator=(const AttrBuilder &rhs) { + AttrBuilder tmp(rhs); + std::swap(_attr, tmp._attr); + _attrSP.swap(tmp._attrSP); + return *this; + } + AttrBuilder &add(T value) { + DocId ignore; + _attr->addDoc(ignore); + _attr->add(value); + return *this; + } + AttributeVector::SP sp() const { + return _attrSP; + } +}; + +typedef AttrBuilder<SingleIntegerExtAttribute, int64_t> IntAttrBuilder; +typedef AttrBuilder<SingleFloatExtAttribute, double> FloatAttrBuilder; +typedef AttrBuilder<SingleStringExtAttribute, const char *> StringAttrBuilder; + +//----------------------------------------------------------------------------- + +class ResultBuilder +{ +private: + std::vector<RankedHit> _hits; + +public: + ResultBuilder() : _hits() {} + ResultBuilder &add(unsigned int docid, HitRank rank = 0) { + RankedHit hit; + hit._docId = docid; + hit._rankValue = rank; + _hits.push_back(hit); + for (uint32_t pos = (_hits.size() - 1); + pos > 0 && (_hits[pos]._rankValue > _hits[pos - 1]._rankValue); + --pos) + { + std::swap(_hits[pos], _hits[pos - 1]); + } + return *this; + } + const RankedHit *hits() const { + return &_hits[0]; + } + uint32_t size() const { + return _hits.size(); + } +}; + +//----------------------------------------------------------------------------- + +class AggregationContext +{ +private: + AttributeManager _attrMan; + ResultBuilder _result; + IAttributeContext::UP _attrCtx; + + AggregationContext(const AggregationContext &); + AggregationContext &operator=(const AggregationContext &); + +public: + AggregationContext() : _attrMan(), _result(), _attrCtx(_attrMan.createContext()) {} + ResultBuilder &result() { return _result; } + void add(AttributeVector::SP attr) { + _attrMan.add(attr); + } + void setup(Grouping &g) { + g.configureStaticStuff(ConfigureStaticParams(_attrCtx.get(), 0)); + } +}; + +//----------------------------------------------------------------------------- + +class Test : public TestApp +{ +public: +private: + bool testAggregation(AggregationContext &ctx, const Grouping &request, bool useEngine); + void benchmarkIntegerSum(bool useEngine, size_t numDocs, size_t numQueries, int64_t maxGroups); + void benchmarkIntegerCount(bool useEngine, size_t numDocs, size_t numQueries, int64_t maxGroups); + class CheckAttributeReferences : public vespalib::ObjectOperation, public vespalib::ObjectPredicate + { + public: + CheckAttributeReferences() : _numrefs(0) { } + int _numrefs; + private: + virtual void execute(vespalib::Identifiable &obj) { + if (static_cast<AttributeNode &>(obj).getAttribute() != NULL) { + _numrefs++; + } + } + virtual bool check(const vespalib::Identifiable &obj) const { return obj.inherits(AttributeNode::classId); } + }; + int Main(); +}; + +//----------------------------------------------------------------------------- + +/** + * Run the given grouping request and verify that the resulting group + * tree matches the expected value. + **/ +bool +Test::testAggregation(AggregationContext &ctx, const Grouping &request, bool useEngine) +{ + Grouping tmp = request; // create local copy + ctx.setup(tmp); + if (useEngine) { + GroupingEngine engine(tmp); + engine.aggregate(ctx.result().hits(), ctx.result().size()); + Group::UP result = engine.createResult(); + } else { + tmp.aggregate(ctx.result().hits(), ctx.result().size()); + } + tmp.cleanupAttributeReferences(); + CheckAttributeReferences attrCheck; + tmp.select(attrCheck, attrCheck); + EXPECT_EQUAL(attrCheck._numrefs, 0); + return true; +} + +void +Test::benchmarkIntegerSum(bool useEngine, size_t numDocs, size_t numQueries, int64_t maxGroups) +{ + IntAttrBuilder attrB("attr0"); + for (size_t i=0; i < numDocs; i++) { + attrB.add(i); + } + AggregationContext ctx; + for(size_t i(0); i < numDocs; i++) { + ctx.result().add(i, numDocs-i); + } + ctx.add(attrB.sp()); + GroupingLevel level; + level.setExpression(AttributeNode("attr0")).setMaxGroups(maxGroups); + level.addResult(SumAggregationResult().setExpression(AttributeNode("attr0"))); + if (maxGroups >= 0) { + level.addOrderBy(AggregationRefNode(0), false); + } + Grouping baseRequest = Grouping() + .setFirstLevel(0) + .setLastLevel(1) + .setRoot(Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr0")))) + .addLevel(level); + + for (size_t i(0); i < numQueries; i++) { + testAggregation(ctx, baseRequest, useEngine); + } +} + +void +Test::benchmarkIntegerCount(bool useEngine, size_t numDocs, size_t numQueries, int64_t maxGroups) +{ + IntAttrBuilder attrB("attr0"); + for (size_t i=0; i < numDocs; i++) { + attrB.add(i); + } + AggregationContext ctx; + for(size_t i(0); i < numDocs; i++) { + ctx.result().add(i); + } + ctx.add(attrB.sp()); + GroupingLevel level; + level.setExpression(AttributeNode("attr0")).setMaxGroups(maxGroups); + level.addResult(CountAggregationResult().setExpression(AttributeNode("attr0"))); + if (maxGroups >= 0) { + level.addOrderBy(AggregationRefNode(0), false); + } + Grouping baseRequest = Grouping() + .setFirstLevel(0) + .setLastLevel(1) + .setRoot(Group() + .addResult(CountAggregationResult() + .setExpression(AttributeNode("attr0")))) + .addLevel(level); + + for (size_t i(0); i < numQueries; i++) { + testAggregation(ctx, baseRequest, useEngine); + } +} + +int +Test::Main() +{ + size_t numDocs = 1000000; + size_t numQueries = 1000; + int64_t maxGroups = -1; + bool useEngine = true; + vespalib::string idType = "int"; + vespalib::string aggrType = "sum"; + if (_argc > 1) { + useEngine = (strcmp(_argv[1], "tree") != 0); + } + if (_argc > 2) { + idType = _argv[2]; + } + if (_argc > 3) { + aggrType = _argv[3]; + } + if (_argc > 4) { + numDocs = strtol(_argv[4], NULL, 0); + } + if (_argc > 5) { + numQueries = strtol(_argv[5], NULL, 0); + } + if (_argc > 6) { + maxGroups = strtol(_argv[6], NULL, 0); + } + TEST_INIT("grouping_benchmark"); + LOG(info, "sizeof(Group) = %ld", sizeof(Group)); + LOG(info, "sizeof(ResultNode::CP) = %ld", sizeof(ResultNode::CP)); + LOG(info, "sizeof(RawRank) = %ld", sizeof(RawRank)); + LOG(info, "sizeof(SumAggregationResult) = %ld", sizeof(SumAggregationResult)); + LOG(info, "sizeof(CountAggregationResult) = %ld", sizeof(CountAggregationResult)); + LOG(info, "sizeof(Int64ResultNode) = %ld", sizeof(Int64ResultNode)); + + LOG(info, "sizeof(Group::ExpressionVector) = %ld", sizeof(Group::ExpressionVector)); + fastos::TimeStamp start(fastos::ClockSystem::now()); + if (idType == "int") { + if (aggrType == "sum") { + benchmarkIntegerSum(useEngine, numDocs, numQueries, maxGroups); + } else if (aggrType == "count") { + benchmarkIntegerCount(useEngine, numDocs, numQueries, maxGroups); + } else { + ASSERT_TRUE(false); + } + } else { + ASSERT_TRUE(false); + } + LOG(info, "rusage = {\n%s\n}", vespalib::RUsage::createSelf(start).toString().c_str()); + ASSERT_EQUAL(0, kill(0, SIGPROF)); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/groupingengine/groupingengine_test.cpp b/searchlib/src/tests/groupingengine/groupingengine_test.cpp new file mode 100644 index 00000000000..ab371cc3dcc --- /dev/null +++ b/searchlib/src/tests/groupingengine/groupingengine_test.cpp @@ -0,0 +1,1985 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("groupingengine_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/aggregation/perdocexpression.h> +#include <vespa/searchlib/aggregation/aggregation.h> +#include <vespa/searchlib/attribute/extendableattributes.h> +#include <vespa/searchlib/attribute/attributemanager.h> +#include <vespa/searchlib/aggregation/hitsaggregationresult.h> +#include <vespa/searchlib/aggregation/fs4hit.h> +#include <vespa/searchlib/aggregation/predicates.h> +#include <vespa/searchlib/expression/fixedwidthbucketfunctionnode.h> +#include <vespa/searchlib/grouping/groupingengine.h> +#include <algorithm> + +using namespace vespalib; +using namespace search; +using namespace search::attribute; +using namespace search::expression; +using namespace search::aggregation; +using namespace search::grouping; + +//----------------------------------------------------------------------------- + +template<typename A, typename T> +class AttrBuilder +{ +private: + A *_attr; + AttributeVector::SP _attrSP; + +public: + AttrBuilder(const AttrBuilder &rhs) + : _attr(new A(rhs._attr->getName())), + _attrSP(_attr) + { + uint32_t numDocs = rhs._attr->getNumDocs(); + for (uint32_t docid = 0; docid < numDocs; ++docid) { + T val; + uint32_t res = rhs._attr->get(docid, &val, 1); + LOG_ASSERT(res == 1); + add(val); + } + } + AttrBuilder(const std::string &name) + : _attr(new A(name)), + _attrSP(_attr) + { + } + AttrBuilder& operator=(const AttrBuilder &rhs) { + AttrBuilder tmp(rhs); + std::swap(_attr, tmp._attr); + _attrSP.swap(tmp._attrSP); + return *this; + } + AttrBuilder &add(T value) { + DocId ignore; + _attr->addDoc(ignore); + _attr->add(value); + return *this; + } + AttributeVector::SP sp() const { + return _attrSP; + } +}; + +typedef AttrBuilder<SingleIntegerExtAttribute, int64_t> IntAttrBuilder; +typedef AttrBuilder<SingleFloatExtAttribute, double> FloatAttrBuilder; +typedef AttrBuilder<SingleStringExtAttribute, const char *> StringAttrBuilder; + +//----------------------------------------------------------------------------- + +class ResultBuilder +{ +private: + std::vector<RankedHit> _hits; + +public: + ResultBuilder() : _hits() {} + ResultBuilder &add(unsigned int docid, HitRank rank = 0) { + RankedHit hit; + hit._docId = docid; + hit._rankValue = rank; + _hits.push_back(hit); + for (uint32_t pos = (_hits.size() - 1); + pos > 0 && (_hits[pos]._rankValue > _hits[pos - 1]._rankValue); + --pos) + { + std::swap(_hits[pos], _hits[pos - 1]); + } + return *this; + } + const RankedHit *hits() const { + return &_hits[0]; + } + uint32_t size() const { + return _hits.size(); + } +}; + +//----------------------------------------------------------------------------- + +class AggregationContext +{ +private: + AttributeManager _attrMan; + ResultBuilder _result; + IAttributeContext::UP _attrCtx; + + AggregationContext(const AggregationContext &); + AggregationContext &operator=(const AggregationContext &); + +public: + AggregationContext() : _attrMan(), _result(), _attrCtx(_attrMan.createContext()) {} + ResultBuilder &result() { return _result; } + void add(AttributeVector::SP attr) { + _attrMan.add(attr); + } + void setup(Grouping &g) { + g.configureStaticStuff(ConfigureStaticParams(_attrCtx.get(), 0)); + } +}; + +//----------------------------------------------------------------------------- + +class Test : public TestApp +{ +public: + bool testAggregation(AggregationContext &ctx, + const Grouping &request, + const Group &expect); + bool testMerge(const Grouping &a, const Grouping &b, + const Group &expect); + bool testMerge(const Grouping &a, const Grouping &b, const Grouping &c, + const Group &expect); + bool testPrune(const Grouping &a, const Grouping &b, + const Group &expect); + bool testPartialMerge(const Grouping &a, const Grouping &b, + const Group &expect); + void testAggregationSimple(); + void testAggregationLevels(); + void testAggregationMaxGroups(); + void testAggregationGroupOrder(); + void testAggregationGroupRank(); + void testAggregationGroupCapping(); + void testMergeSimpleSum(); + void testMergeLevels(); + void testMergeGroups(); + void testMergeTrees(); + void testPruneSimple(); + void testPruneComplex(); + void testPartialMerging(); + void testCount(); + void testTopN(); + void testFS4HitCollection(); + bool checkBucket(const NumericResultNode &width, const NumericResultNode &value, const BucketResultNode &bucket); + bool checkHits(const Grouping &g, uint32_t first, uint32_t last, uint32_t cnt); + void testFixedWidthBuckets(); + void testThatNanIsConverted(); + void testNanSorting(); + void testGroupingEngineFromRequest(); + int Main(); +private: + bool verifyEqual(const Group & a, const Group & b); + void testAggregationSimpleSum(AggregationContext & ctx, const AggregationResult & aggr, const ResultNode & ir, const ResultNode & fr, const ResultNode & sr); + class CheckAttributeReferences : public vespalib::ObjectOperation, public vespalib::ObjectPredicate + { + public: + CheckAttributeReferences() : _numrefs(0) { } + int _numrefs; + private: + virtual void execute(vespalib::Identifiable &obj) { + if (static_cast<AttributeNode &>(obj).getAttribute() != NULL) { + _numrefs++; + } + } + virtual bool check(const vespalib::Identifiable &obj) const { return obj.inherits(AttributeNode::classId); } + }; +}; + +//----------------------------------------------------------------------------- + +/** + * Run the given grouping request and verify that the resulting group + * tree matches the expected value. + **/ +bool +Test::testAggregation(AggregationContext &ctx, + const Grouping &request, + const Group &expect) +{ + Grouping tmp = request; // create local copy + ctx.setup(tmp); + GroupingEngine engine(tmp); + verifyEqual(*engine.createResult(), tmp.getRoot()); + engine.aggregate(ctx.result().hits(), ctx.result().size()); + tmp.cleanupAttributeReferences(); + CheckAttributeReferences attrCheck; + tmp.select(attrCheck, attrCheck); + Group::UP result = engine.createResult(); + EXPECT_EQUAL(attrCheck._numrefs, 0); + return verifyEqual(*result, expect); +} + +bool Test::verifyEqual(const Group & a, const Group & b) +{ + bool ok = EXPECT_EQUAL(a.asString(), b.asString()); + if (!ok) { + std::cerr << a.asString() << std::endl << b.asString() << std::endl; + } + return ok; +} + +/** + * Merge the given grouping requests and verify that the resulting + * group tree matches the expected value. + **/ +bool +Test::testMerge(const Grouping &a, const Grouping &b, + const Group &expect) +{ + Grouping tmp = a; // create local copy + Grouping tmpB = b; +#if 0 + tmp.merge(tmpB); + tmp.postMerge(); + tmp.sortById(); + return EXPECT_EQUAL(tmp.getRoot().asString(), expect.asString()); +#else + GroupingEngine eA(tmp); + GroupingEngine eB(tmpB); + verifyEqual(*eA.createResult(), a.getRoot()); + verifyEqual(*eB.createResult(), b.getRoot()); + eA.merge(eB); + return verifyEqual(*eA.createResult(), expect); +#endif +} + +/** + * Prune the given grouping request and verify that the resulting + * group tree matches the expected value. + **/ +bool +Test::testPrune(const Grouping &a, const Grouping &b, + const Group &expect) +{ + Grouping tmp = a; // create local copy + tmp.prune(b); + bool ok = EXPECT_EQUAL(tmp.getRoot().asString(), expect.asString()); + if (!ok) { + std::cerr << tmp.getRoot().asString() << std::endl << expect.asString() << std::endl; + } + return ok; +} + +/** + * Merge a given grouping request to get a partial request back. Verify that the + * partial request is correct. + **/ +bool +Test::testPartialMerge(const Grouping &a, const Grouping &b, + const Group &expect) +{ + Grouping tmp = a; // create local copy + tmp.mergePartial(b); + bool ok = EXPECT_EQUAL(tmp.getRoot().asString(), expect.asString()); + if (!ok) { + std::cerr << tmp.getRoot().asString() << std::endl << expect.asString() << std::endl; + } + return ok; +} + +/** + * Merge the given grouping requests and verify that the resulting + * group tree matches the expected value. + **/ +bool +Test::testMerge(const Grouping &a, const Grouping &b, const Grouping &c, + const Group &expect) +{ + Grouping tmp = a; // create local copy + Grouping tmpB = b; // create local copy + Grouping tmpC = c; // create local copy + tmp.merge(tmpB); + tmp.merge(tmpC); + tmp.postMerge(); + tmp.sortById(); + return EXPECT_EQUAL(tmp.getRoot().asString(), expect.asString()); +} + +//----------------------------------------------------------------------------- + +/** + * Test collecting the sum of the values from a single attribute + * vector directly into the root node. Consider this a smoke test. + **/ +void +Test::testAggregationSimple() +{ + AggregationContext ctx; + ctx.result().add(0).add(1).add(2); + ctx.add(IntAttrBuilder("int").add(3).add(7).add(15).sp()); + ctx.add(FloatAttrBuilder("float").add(3).add(7).add(15).sp()); + ctx.add(StringAttrBuilder("string").add("3").add("7").add("15").sp()); + + char strsum[3] = {-101, '5', 0}; + testAggregationSimpleSum(ctx, SumAggregationResult(), Int64ResultNode(25), FloatResultNode(25), StringResultNode(strsum)); + testAggregationSimpleSum(ctx, MinAggregationResult(), Int64ResultNode(3), FloatResultNode(3), StringResultNode("15")); + testAggregationSimpleSum(ctx, MaxAggregationResult(), Int64ResultNode(15), FloatResultNode(15), StringResultNode("7")); +} + +void Test::testAggregationSimpleSum(AggregationContext & ctx, const AggregationResult & aggr, const ResultNode & ir, const ResultNode & fr, const ResultNode & sr) +{ + ExpressionNode::CP clone(aggr); + Grouping request = Grouping() + .setRoot(Group() + .setId(NullResultNode()) + .addResult(static_cast<AggregationResult &>(*clone).setExpression(AttributeNode("int"))) + .addResult(static_cast<AggregationResult &>(*clone).setExpression(AttributeNode("float"))) + .addResult(static_cast<AggregationResult &>(*clone).setExpression(AttributeNode("string"))) + ); + + Group expect = Group() + .setId(NullResultNode()) + .addResult(static_cast<AggregationResult &>(*clone).setExpression(AttributeNode("int")).setResult(ir)) + .addResult(static_cast<AggregationResult &>(*clone).setExpression(AttributeNode("float")).setResult(fr)) + .addResult(static_cast<AggregationResult &>(*clone).setExpression(AttributeNode("string")).setResult(sr)); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); +} + +/** + * Verify that the backend aggregation will classify and collect on + * the appropriate levels, as indicated by the firstLevel and + * lastLevel parameters. + **/ +void +Test::testAggregationLevels() +{ + AggregationContext ctx; + ctx.add(IntAttrBuilder("attr0").add(10).add(10).sp()); + ctx.add(IntAttrBuilder("attr1").add(11).add(11).sp()); + ctx.add(IntAttrBuilder("attr2").add(12).add(12).sp()); + ctx.add(IntAttrBuilder("attr3").add(13).add(13).sp()); + ctx.result().add(0).add(1); + + Grouping baseRequest = Grouping() + .setRoot(Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr0")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr1")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr2")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr2")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr3")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr3")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr1")))); + + Group notDone = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr0"))); +// Hmm, do not need to prepare more than the levels needed. .setResult(Int64ResultNode(0))); + + Group done0 = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr0")) + .setResult(Int64ResultNode(20))) + .addChild(Group() + .setId(Int64ResultNode(11)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr2")) + .setResult(Int64ResultNode(0)))); + + Group done1 = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr0")) + .setResult(Int64ResultNode(20))) + .addChild(Group() + .setId(Int64ResultNode(11)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr2")) + .setResult(Int64ResultNode(24))) + .addChild(Group() + .setId(Int64ResultNode(12)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr3")) + .setResult(Int64ResultNode(0))))); + + Group done2 = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr0")) + .setResult(Int64ResultNode(20))) + .addChild(Group() + .setId(Int64ResultNode(11)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr2")) + .setResult(Int64ResultNode(24))) + .addChild(Group() + .setId(Int64ResultNode(12)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr3")) + .setResult(Int64ResultNode(26))) + .addChild(Group() + .setId(Int64ResultNode(13)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr1")) + .setResult(Int64ResultNode(0)))))); + + Group done3 = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr0")) + .setResult(Int64ResultNode(20))) + .addChild(Group() + .setId(Int64ResultNode(11)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr2")) + .setResult(Int64ResultNode(24))) + .addChild(Group() + .setId(Int64ResultNode(12)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr3")) + .setResult(Int64ResultNode(26))) + .addChild(Group() + .setId(Int64ResultNode(13)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr1")) + .setResult(Int64ResultNode(22)))))); + + { // level 0 only + Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(0); + EXPECT_TRUE(testAggregation(ctx, request, done0)); + } + { // level 0 and 1 + Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(1); + EXPECT_TRUE(testAggregation(ctx, request, done1)); + } + { // level 0,1 and 2 + Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(2); + EXPECT_TRUE(testAggregation(ctx, request, done2)); + } + { // level 0,1,2 and 3 + Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(3); + EXPECT_TRUE(testAggregation(ctx, request, done3)); + } + { // level 1 with level 0 as input + Grouping request = baseRequest.unchain().setFirstLevel(1).setLastLevel(1).setRoot(done0); + EXPECT_TRUE(testAggregation(ctx, request, done1)); + } + { // level 2 with level 0 and 1 as input + Grouping request = baseRequest.unchain().setFirstLevel(2).setLastLevel(2).setRoot(done1); + EXPECT_TRUE(testAggregation(ctx, request, done2)); + } + { // level 3 with level 0,1 and 2 as input + Grouping request = baseRequest.unchain().setFirstLevel(3).setLastLevel(3).setRoot(done2); + EXPECT_TRUE(testAggregation(ctx, request, done3)); + } + { // level 2 and 3 with level 0 and 1 as input + Grouping request = baseRequest.unchain().setFirstLevel(2).setLastLevel(3).setRoot(done1); + EXPECT_TRUE(testAggregation(ctx, request, done3)); + } +#if 0 + { // level 1 without level 0 as input + Grouping request = baseRequest.unchain().setFirstLevel(1).setLastLevel(1); + EXPECT_TRUE(testAggregation(ctx, request, notDone)); + } +#else + //#warning "Test has been temporarily disabled" +#endif +} + +/** + * Verify that the aggregation step does not create more groups than + * indicated by the maxgroups parameter. + **/ +void +Test::testAggregationMaxGroups() +{ + AggregationContext ctx; + ctx.add(IntAttrBuilder("attr").add(5).add(10).add(15).sp()); + ctx.result().add(0).add(1).add(2); + + Grouping baseRequest = Grouping() + .setRoot(Group().setId(NullResultNode())) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr"))); + + Group empty = Group().setId(NullResultNode()); + Group grp1 = empty.unchain().addChild(Group().setId(Int64ResultNode(5))); + Group grp2 = grp1.unchain().addChild(Group().setId(Int64ResultNode(10))); + Group grp3 = grp2.unchain().addChild(Group().setId(Int64ResultNode(15))); + + { // max 0 groups + Grouping request = baseRequest; + request.levels()[0].setMaxGroups(0); + EXPECT_TRUE(testAggregation(ctx, request, empty)); + } + { // max 1 groups + Grouping request = baseRequest; + request.levels()[0].setMaxGroups(1); + EXPECT_TRUE(testAggregation(ctx, request, grp1)); + } + { // max 2 groups + Grouping request = baseRequest; + request.levels()[0].setMaxGroups(2); + EXPECT_TRUE(testAggregation(ctx, request, grp2)); + } + { // max 3 groups + Grouping request = baseRequest; + request.levels()[0].setMaxGroups(3); + EXPECT_TRUE(testAggregation(ctx, request, grp3)); + } + { // max 4 groups + Grouping request = baseRequest; + request.levels()[0].setMaxGroups(4); + EXPECT_TRUE(testAggregation(ctx, request, grp3)); + } + { // max -1 groups + Grouping request = baseRequest; + request.levels()[0].setMaxGroups(-1); + EXPECT_TRUE(testAggregation(ctx, request, grp3)); + } +} + +/** + * Verify that groups are sorted by group id + **/ +void +Test::testAggregationGroupOrder() +{ + AggregationContext ctx; + ctx.add(IntAttrBuilder("attr").add(10).add(25).add(35).add(5).add(20).add(15).add(30).sp()); + ctx.result().add(0).add(1).add(2).add(3).add(4).add(5).add(6); + + Grouping request = Grouping() + .setRoot(Group().setId(NullResultNode())) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr"))); + + Group expect = Group() + .setId(NullResultNode()) + .addChild(Group().setId(Int64ResultNode(5))) + .addChild(Group().setId(Int64ResultNode(10))) + .addChild(Group().setId(Int64ResultNode(15))) + .addChild(Group().setId(Int64ResultNode(20))) + .addChild(Group().setId(Int64ResultNode(25))) + .addChild(Group().setId(Int64ResultNode(30))) + .addChild(Group().setId(Int64ResultNode(35))); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); +} + +/** + * Verify that groups are tagged with the appropriate rank value. + **/ +void +Test::testAggregationGroupRank() +{ + AggregationContext ctx; + ctx.add(IntAttrBuilder("attr") + .add(1).add(1).add(1) + .add(2).add(2).add(2) + .add(3).add(3).add(3).sp()); + ctx.result() + .add(0, 5).add(1, 10).add(2, 15) + .add(3, 10).add(4, 15).add(5, 5) + .add(6, 15).add(7, 5).add(8, 10); + + Grouping request = Grouping() + .setRoot(Group().setId(NullResultNode())) + .addLevel(GroupingLevel().setExpression(AttributeNode("attr"))); + + Group expect = Group() + .setId(NullResultNode()) + .addChild(Group().setId(Int64ResultNode(1)).setRank(RawRank(15))) + .addChild(Group().setId(Int64ResultNode(2)).setRank(RawRank(15))) + .addChild(Group().setId(Int64ResultNode(3)).setRank(RawRank(15))); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); +} + +void +Test::testAggregationGroupCapping() +{ + AggregationContext ctx; + ctx.add(IntAttrBuilder("attr") + .add(1).add(2).add(3) + .add(4).add(5).add(6) + .add(7).add(8).add(9).sp()); + ctx.result() + .add(0, 1).add(1, 2).add(2, 3) + .add(3, 4).add(4, 5).add(5, 6) + .add(6, 7).add(7, 8).add(8, 9); + + { + Grouping request = Grouping().setRoot(Group().setId(NullResultNode())).addLevel( + GroupingLevel().setExpression(AttributeNode("attr"))); + + Group expect = Group().setId(NullResultNode()) + .addChild(Group().setId(Int64ResultNode(1)).setRank(RawRank(1))) + .addChild(Group().setId(Int64ResultNode(2)).setRank(RawRank(2))) + .addChild(Group().setId(Int64ResultNode(3)).setRank(RawRank(3))) + .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(4))) + .addChild(Group().setId(Int64ResultNode(5)).setRank(RawRank(5))) + .addChild(Group().setId(Int64ResultNode(6)).setRank(RawRank(6))) + .addChild(Group().setId(Int64ResultNode(7)).setRank(RawRank(7))) + .addChild(Group().setId(Int64ResultNode(8)).setRank(RawRank(8))) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(9))); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); + } + { + Grouping request = Grouping().setRoot(Group().setId(NullResultNode())).addLevel( + GroupingLevel().setMaxGroups(3).setExpression(AttributeNode("attr"))); + + Group expect = Group().setId(NullResultNode()) + .addChild(Group().setId(Int64ResultNode(7)).setRank(RawRank(7))) + .addChild(Group().setId(Int64ResultNode(8)).setRank(RawRank(8))) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(9))); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); + } + { + Grouping request = Grouping(). + setRoot(Group().setId(NullResultNode())). + setFirstLevel(0). + setLastLevel(1). + addLevel( + GroupingLevel().setMaxGroups(3).setExpression(AttributeNode("attr")). + addAggregationResult(SumAggregationResult().setExpression(AttributeNode("attr"))). + addOrderBy(AggregationRefNode(0), false)); + + Group expect = Group().setId(NullResultNode()) + .addChild(Group().setId(Int64ResultNode(7)).setRank(RawRank(7)).addAggregationResult(SumAggregationResult(Int64ResultNode(7)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), false)) + .addChild(Group().setId(Int64ResultNode(8)).setRank(RawRank(8)).addAggregationResult(SumAggregationResult(Int64ResultNode(8)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), false)) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(9)).addAggregationResult(SumAggregationResult(Int64ResultNode(9)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), false)); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); + } + { + Grouping request = Grouping(). + setRoot(Group().setId(NullResultNode())). + setFirstLevel(0). + setLastLevel(1). + addLevel( + GroupingLevel().setMaxGroups(3).setExpression(AttributeNode("attr")). + addAggregationResult(SumAggregationResult().setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), true)); + + Group expect = Group().setId(NullResultNode()) + .addChild(Group().setId(Int64ResultNode(1)).setRank(RawRank(1)).addAggregationResult(SumAggregationResult(Int64ResultNode(1)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), true)) + .addChild(Group().setId(Int64ResultNode(2)).setRank(RawRank(2)).addAggregationResult(SumAggregationResult(Int64ResultNode(2)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), true)) + .addChild(Group().setId(Int64ResultNode(3)).setRank(RawRank(3)).addAggregationResult(SumAggregationResult(Int64ResultNode(3)).setExpression(AttributeNode("attr"))).addOrderBy(AggregationRefNode(0), true)); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); + } + { + AddFunctionNode *add = new AddFunctionNode(); + add->addArg(AggregationRefNode(0)); + add->appendArg(ConstantNode(Int64ResultNode(3))); + ExpressionNode::CP i1(add); + Grouping request = Grouping(). + setFirstLevel(0). + setLastLevel(1). + addLevel( + GroupingLevel().setMaxGroups(3).setExpression(AttributeNode("attr")). + addAggregationResult(SumAggregationResult().setExpression(AttributeNode("attr"))). + addOrderBy(i1, false)); + + Group expect = Group() + .addChild(Group().setId(Int64ResultNode(7)).setRank(RawRank(7)).addAggregationResult(SumAggregationResult(Int64ResultNode(7)).setExpression(AttributeNode("attr"))).addOrderBy(AddFunctionNode().appendArg(AggregationRefNode(0)).appendArg(ConstantNode(Int64ResultNode(3))).setResult(Int64ResultNode(10)), false)) + .addChild(Group().setId(Int64ResultNode(8)).setRank(RawRank(8)).addAggregationResult(SumAggregationResult(Int64ResultNode(8)).setExpression(AttributeNode("attr"))).addOrderBy(AddFunctionNode().appendArg(AggregationRefNode(0)).appendArg(ConstantNode(Int64ResultNode(3))).setResult(Int64ResultNode(11)), false)) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(9)).addAggregationResult(SumAggregationResult(Int64ResultNode(9)).setExpression(AttributeNode("attr"))).addOrderBy(AddFunctionNode().appendArg(AggregationRefNode(0)).appendArg(ConstantNode(Int64ResultNode(3))).setResult(Int64ResultNode(12)), false)); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); + } + +} + +//----------------------------------------------------------------------------- + +/** + * Test merging the sum of the values from a single attribute vector + * that was collected directly into the root node. Consider this a + * smoke test. + **/ +void +Test::testMergeSimpleSum() +{ + Grouping a = Grouping() + .setRoot(Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("foo")) + .setResult(Int64ResultNode(20)))); + + Grouping b = Grouping() + .setRoot(Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("foo")) + .setResult(Int64ResultNode(30)))); + + Group expect = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("foo")) + .setResult(Int64ResultNode(50))); + + EXPECT_TRUE(testMerge(a, b, expect)); +} + +/** + * Verify that frozen levels are not touched during merge. + **/ +void +Test::testMergeLevels() +{ + Grouping request = Grouping() + .addLevel(GroupingLevel() + .setExpression(AttributeNode("c1")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("c2")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("c3")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")))); + + Group a = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(5))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(15))) + .addChild(Group() + .setId(Int64ResultNode(30)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(20)))))); + + Group b = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(5))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(15))) + .addChild(Group() + .setId(Int64ResultNode(30)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(20)))))); + + Group expect_all = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(20))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(30))) + .addChild(Group() + .setId(Int64ResultNode(30)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(40)))))); + + Group expect_0 = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(5))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(20))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(30))) + .addChild(Group() + .setId(Int64ResultNode(30)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(40)))))); + + + Group expect_1 = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(5))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(30))) + .addChild(Group() + .setId(Int64ResultNode(30)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(40)))))); + + + Group expect_2 = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(5))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(15))) + .addChild(Group() + .setId(Int64ResultNode(30)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(40)))))); + + + Group expect_3 = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(5))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(15))) + .addChild(Group() + .setId(Int64ResultNode(30)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(20)))))); + + EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(0).setLastLevel(3).setRoot(a), + request.unchain().setFirstLevel(0).setLastLevel(3).setRoot(b), + expect_all)); + EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(1).setLastLevel(3).setRoot(a), + request.unchain().setFirstLevel(1).setLastLevel(3).setRoot(b), + expect_0)); + EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(2).setLastLevel(5).setRoot(a), + request.unchain().setFirstLevel(2).setLastLevel(5).setRoot(b), + expect_1)); + EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(3).setLastLevel(5).setRoot(a), + request.unchain().setFirstLevel(3).setLastLevel(5).setRoot(b), + expect_2)); + EXPECT_TRUE(testMerge(request.unchain().setFirstLevel(4).setLastLevel(4).setRoot(a), + request.unchain().setFirstLevel(4).setLastLevel(4).setRoot(b), + expect_3)); +} + +/** + * Verify that the number of groups for a level is pruned down to + * maxGroups, that the remaining groups are the highest ranked ones, + * and that they are sorted by group id. + **/ +void +Test::testMergeGroups() +{ + Grouping request = Grouping() + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr"))); + + Group a = Group() + .setId(NullResultNode()) + .addChild(Group().setId(StringResultNode("05")).setRank(RawRank(5))) + .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(5))) // (2) + .addChild(Group().setId(StringResultNode("15")).setRank(RawRank(15))) + .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(100))) // 1 + .addChild(Group().setId(StringResultNode("50")).setRank(RawRank(30))); // 3 + + Group b = Group() + .setId(NullResultNode()) + .addChild(Group().setId(StringResultNode("00")).setRank(RawRank(10))) + .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(50))) // 2 + .addChild(Group().setId(StringResultNode("20")).setRank(RawRank(25))) // 4 + .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(10))) // (1) + .addChild(Group().setId(StringResultNode("45")).setRank(RawRank(20))); // 5 + + Group expect_3 = Group() + .setId(NullResultNode()) + .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(50))) + .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(100))) + .addChild(Group().setId(StringResultNode("50")).setRank(RawRank(30))); + + Group expect_5 = Group() + .setId(NullResultNode()) + .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(50))) + .addChild(Group().setId(StringResultNode("20")).setRank(RawRank(25))) + .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(100))) + .addChild(Group().setId(StringResultNode("45")).setRank(RawRank(20))) + .addChild(Group().setId(StringResultNode("50")).setRank(RawRank(30))); + + Group expect_all = Group() + .setId(NullResultNode()) + .addChild(Group().setId(StringResultNode("00")).setRank(RawRank(10))) + .addChild(Group().setId(StringResultNode("05")).setRank(RawRank( 5))) + .addChild(Group().setId(StringResultNode("10")).setRank(RawRank(50))) + .addChild(Group().setId(StringResultNode("15")).setRank(RawRank(15))) + .addChild(Group().setId(StringResultNode("20")).setRank(RawRank(25))) + .addChild(Group().setId(StringResultNode("40")).setRank(RawRank(100))) + .addChild(Group().setId(StringResultNode("45")).setRank(RawRank(20))) + .addChild(Group().setId(StringResultNode("50")).setRank(RawRank(30))); + + request.levels()[0].setMaxGroups(3); + EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), expect_3)); + EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(a), expect_3)); + request.levels()[0].setMaxGroups(5); + EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), expect_5)); + EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(a), expect_5)); + request.levels()[0].setMaxGroups(-1); + EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), expect_all)); + EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(a), expect_all)); +} + +/** + * Merge two relatively complex tree structures and verify that the + * end result is as expected. + **/ +void +Test::testMergeTrees() +{ + Grouping request = Grouping() + .addLevel(GroupingLevel() + .setMaxGroups(3) + .setExpression(AttributeNode("c1")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")))) + .addLevel(GroupingLevel() + .setMaxGroups(2) + .setExpression(AttributeNode("c2")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")))) + .addLevel(GroupingLevel() + .setMaxGroups(1) + .setExpression(AttributeNode("c3")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")))); + + Group a = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .setRank(RawRank(5)) // merged with 200 rank node + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .setRank(RawRank(500)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .setRank(RawRank(200)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .setRank(RawRank(100)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + // dummy child would be picked up here + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(200)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(14)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(300)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ) + .addChild(Group().setId(Int64ResultNode(14)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(300)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(19)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .setRank(RawRank(100)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + ) + ); + + Group b = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(4)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .setRank(RawRank(200)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .setRank(RawRank(400)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .setRank(RawRank(100)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ) + .addChild(Group().setId(Int64ResultNode(9)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .setRank(RawRank(100)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + // dummy child would be picket up here + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(200)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + ) + ) + .addChild(Group().setId(Int64ResultNode(14)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(5)) // merged with 300 rank node + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(19)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .setRank(RawRank(5)) // merged with 100 rank node + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(19)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .setRank(RawRank(500)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + .addChild(Group().setId(Int64ResultNode(24)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(25)) + .setRank(RawRank(300)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group().setId(Int64ResultNode(24)).setRank(RawRank(10))) + .addChild(Group() + .setId(Int64ResultNode(25)) + .setRank(RawRank(400)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ); + + Group expect = Group() + .setId(NullResultNode()) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .setRank(RawRank(200)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .setRank(RawRank(500)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .setRank(RawRank(200)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + .addChild(Group() + .setId(Int64ResultNode(10)) + .setRank(RawRank(400)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group() + .setId(Int64ResultNode(10)) + .setRank(RawRank(100)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ) + .addChild(Group() + .setId(Int64ResultNode(10)) + .setRank(RawRank(100)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(200)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(300)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ) + .addChild(Group() + .setId(Int64ResultNode(15)) + .setRank(RawRank(300)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .setRank(RawRank(100)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(20)) + .setRank(RawRank(500)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + .addChild(Group() + .setId(Int64ResultNode(25)) + .setRank(RawRank(300)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group() + .setId(Int64ResultNode(25)) + .setRank(RawRank(400)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ); + + EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), expect)); + EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(a), expect)); +} + +void +Test::testPruneComplex() +{ + { // First level + Group baseTree = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar000"))) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002")))) + .addChild(Group().setId(StringResultNode("bar01")))) + .addChild(Group().setId(StringResultNode("baz0")) + .addChild(Group().setId(StringResultNode("baz00")) + .addChild(Group().setId(StringResultNode("baz000"))) + .addChild(Group().setId(StringResultNode("baz001"))))) + .addChild(Group().setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo00"))) + .addChild(Group().setId(StringResultNode("foo01")))); + + Group prune = Group() + .addChild(Group().setId(StringResultNode("bar0"))) + .addChild(Group().setId(StringResultNode("foo0"))); + + Group expect = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar000"))) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002")))) + .addChild(Group().setId(StringResultNode("bar01")))) + .addChild(Group().setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo00"))) + .addChild(Group().setId(StringResultNode("foo01")))); + Grouping request = Grouping().setFirstLevel(1).setLastLevel(1); + Grouping baseRequest = Grouping().setFirstLevel(0).setLastLevel(3); + EXPECT_TRUE(testPrune(baseRequest.unchain().setRoot(baseTree), request.unchain().setRoot(prune), expect)); + } + { // Second level + Group baseTree = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar000"))) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002")))) + .addChild(Group().setId(StringResultNode("bar01")))) + .addChild(Group().setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo00"))) + .addChild(Group().setId(StringResultNode("foo01")))); + + Group prune = Group() + .addChild(Group() + .setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")))) + .addChild(Group() + .setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo01")))); + + Group expect = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar000"))) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002"))))) + .addChild(Group().setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo01")))); + + Grouping request = Grouping().setFirstLevel(2).setLastLevel(2); + Grouping baseRequest = Grouping().setFirstLevel(0).setLastLevel(3); + EXPECT_TRUE(testPrune(baseRequest.unchain().setRoot(baseTree), request.unchain().setRoot(prune), expect)); + } + { // Third level + Group baseTree = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar000"))) + + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002"))))) + .addChild(Group().setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo01")))); + Group prune = Group() + .addChild(Group() + .setId(StringResultNode("bar0")) + .addChild(Group() + .setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002"))))); + + Group expect = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002"))))); + Grouping request = Grouping().setFirstLevel(3).setLastLevel(3); + Grouping baseRequest = Grouping().setFirstLevel(0).setLastLevel(3); + EXPECT_TRUE(testPrune(baseRequest.unchain().setRoot(baseTree), request.unchain().setRoot(prune), expect)); + } + { // Try pruning a grouping we don't have + Group baseTree = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar000"))) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002")))) + .addChild(Group().setId(StringResultNode("bar01")))) + .addChild(Group().setId(StringResultNode("baz0")) + .addChild(Group().setId(StringResultNode("baz00")) + .addChild(Group().setId(StringResultNode("baz000"))) + .addChild(Group().setId(StringResultNode("baz001"))))) + .addChild(Group().setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo00"))) + .addChild(Group().setId(StringResultNode("foo01")))); + + Group prune = Group() + .addChild(Group().setId(StringResultNode("bar0"))) + .addChild(Group().setId(StringResultNode("boz0"))) + .addChild(Group().setId(StringResultNode("foo0"))) + .addChild(Group().setId(StringResultNode("goo0"))); + + Group expect = Group() + .addChild(Group().setId(StringResultNode("bar0")) + .addChild(Group().setId(StringResultNode("bar00")) + .addChild(Group().setId(StringResultNode("bar000"))) + .addChild(Group().setId(StringResultNode("bar001"))) + .addChild(Group().setId(StringResultNode("bar002")))) + .addChild(Group().setId(StringResultNode("bar01")))) + .addChild(Group().setId(StringResultNode("foo0")) + .addChild(Group().setId(StringResultNode("foo00"))) + .addChild(Group().setId(StringResultNode("foo01")))); + Grouping request = Grouping().setFirstLevel(1).setLastLevel(1); + Grouping baseRequest = Grouping().setFirstLevel(0).setLastLevel(3); + EXPECT_TRUE(testPrune(baseRequest.unchain().setRoot(baseTree), request.unchain().setRoot(prune), expect)); + } +} + +/** + * Test partial merge of a grouping tree, where all levels up to "lastLevel" is + * merged. The last level should not contain any children groups, and only empty + * results. + **/ +void +Test::testPartialMerging() +{ + Grouping baseRequest = Grouping() + .addLevel(GroupingLevel() + .setExpression(AttributeNode("c1")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("c2")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("c3")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")))); + + // Cached result + Group cached = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(110))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(13)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group() + .setId(Int64ResultNode(14)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + .addChild(Group() + .setId(Int64ResultNode(15)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(100))) + .addChild(Group() + .setId(Int64ResultNode(22)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s3")) + .setResult(Int64ResultNode(100))) + ) + ) + ); + + + { // Merge lastlevel 0 + Grouping request = baseRequest.unchain().setFirstLevel(0).setLastLevel(0); + Group incoming = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(0))); + + Group expected = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(110))) + .addChild(Group() + .setId(Int64ResultNode(5)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ); + EXPECT_TRUE(testPartialMerge(request.unchain().setRoot(incoming), request.unchain().setLastLevel(3).setRoot(cached), expected)); + } + { + // Merge existing tree. Assume we got modified data down again. + Grouping request = baseRequest.unchain().setFirstLevel(1).setLastLevel(1); + Group incoming = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(3)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ) + .addChild(Group() + .setId(Int64ResultNode(5)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ) + .addChild(Group() + .setId(Int64ResultNode(7)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0)))) + .addChild(Group() + .setId(Int64ResultNode(33)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ); + Group expected = Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s0")) + .setResult(Int64ResultNode(200))) + .addChild(Group() + .setId(Int64ResultNode(3)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ) + .addChild(Group() + .setId(Int64ResultNode(5)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(10))) + .addChild(Group() + .setId(Int64ResultNode(13)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(0))) + ) + ) + .addChild(Group() + .setId(Int64ResultNode(7)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ) + .addChild(Group() + .setId(Int64ResultNode(10)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(100))) + .addChild(Group() + .setId(Int64ResultNode(15)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s2")) + .setResult(Int64ResultNode(0))) + ) + ) + .addChild(Group() + .setId(Int64ResultNode(33)) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("s1")) + .setResult(Int64ResultNode(0))) + ); + EXPECT_TRUE(testPartialMerge(request.unchain().setRoot(incoming), request.unchain().setFirstLevel(0).setLastLevel(3).setRoot(cached), expected)); + } +} + +/** + * Test that pruning a simple grouping tree works. + **/ +void +Test::testPruneSimple() +{ + { + Grouping request = Grouping() + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr"))) + .setFirstLevel(1) + .setLastLevel(1); + + Group a = Group() + .addChild(Group().setId(StringResultNode("foo"))) + .addChild(Group().setId(StringResultNode("bar"))) + .addChild(Group().setId(StringResultNode("baz"))); + + Group b = Group() + .addChild(Group().setId(StringResultNode("foo"))); + + Group expect = Group() + .addChild(Group().setId(StringResultNode("foo"))); + + EXPECT_TRUE(testPrune(request.unchain().setFirstLevel(0).setRoot(a), request.unchain().setRoot(b), expect)); + } +} + +/** + * Test that simple counting works as long as we use an expression + * that we init, calculate and ignore. + **/ +void +Test::testTopN() +{ + AggregationContext ctx; + ctx.result().add(0).add(1).add(2); + ctx.add(IntAttrBuilder("foo").add(3).add(7).add(15).sp()); + + Grouping request = Grouping() + .setRoot(Group().setId(NullResultNode()) + .addResult(CountAggregationResult() + .setExpression(ConstantNode(Int64ResultNode(0))) + ) + ); + { + Group expect = Group().setId(NullResultNode()) + .addResult(CountAggregationResult().setCount(3) + .setExpression(ConstantNode(Int64ResultNode(0))) + ); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); + } + { + Group expect = Group().setId(NullResultNode()) + .addResult(CountAggregationResult().setCount(1) + .setExpression(ConstantNode(Int64ResultNode(0))) + ); + + EXPECT_TRUE(testAggregation(ctx, request.setTopN(1), expect)); + } + { + Grouping request2 = Grouping() + .setRoot(Group().setId(NullResultNode())) + .addLevel(GroupingLevel() + .addAggregationResult(SumAggregationResult()) + .addOrderBy(AggregationRefNode(0), false)); + EXPECT_TRUE(request2.needResort()); + request2.setTopN(0); + EXPECT_TRUE(request2.needResort()); + request2.setTopN(1); + EXPECT_TRUE(!request2.needResort()); + request2.setTopN(100); + EXPECT_TRUE(!request2.needResort()); + } +} + +/** + * Test that simple counting works as long as we use an expression + * that we init, calculate and ignore. + **/ +void +Test::testCount() +{ + AggregationContext ctx; + ctx.result().add(0).add(1).add(2); + ctx.add(IntAttrBuilder("foo").add(3).add(7).add(15).sp()); + + Grouping request = Grouping() + .setRoot(Group().setId(NullResultNode()) + .addResult(CountAggregationResult() + .setExpression(ConstantNode(Int64ResultNode(0))) + ) + ); + + Group expect = Group().setId(NullResultNode()) + .addResult(CountAggregationResult().setCount(3) + .setExpression(ConstantNode(Int64ResultNode(0))) + ); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); +} + +//----------------------------------------------------------------------------- + +bool +Test::checkHits(const Grouping &g, uint32_t first, uint32_t last, uint32_t cnt) +{ + CountFS4Hits pop; + Grouping tmp = g; + tmp.setFirstLevel(first).setLastLevel(last).select(pop, pop); + return EXPECT_EQUAL(pop.getHitCount(), cnt); +} + +void +Test::testFS4HitCollection() +{ + { // aggregation + AggregationContext ctx; + ctx.result().add(30, 30.0).add(20, 20.0).add(10, 10.0).add(5, 5.0).add(25, 25.0); + + Grouping request = Grouping() + .setRoot(Group().setId(NullResultNode()) + .addResult(HitsAggregationResult() + .setMaxHits(3) + .setExpression(ConstantNode(Int64ResultNode(0)))) + ); + + Group expect = Group().setId(NullResultNode()) + .addResult(HitsAggregationResult() + .setMaxHits(3) + .addHit(FS4Hit(30, 30.0)) + .addHit(FS4Hit(25, 25.0)) + .addHit(FS4Hit(20, 20.0)) + .sort() + .setExpression(ConstantNode(Int64ResultNode(0)))); + + EXPECT_TRUE(testAggregation(ctx, request, expect)); + } + { // merging + + Grouping request = Grouping() + .setRoot(Group() + .addResult(HitsAggregationResult() + .setMaxHits(3) + .setExpression(ConstantNode(Int64ResultNode(0)))) + ); + + Group expect = Group() + .setId(NullResultNode()) + .addResult(HitsAggregationResult() + .setMaxHits(3) + .addHit(FS4Hit(30, 30.0)) + .addHit(FS4Hit(20, 20.0)) + .addHit(FS4Hit(10, 10.0)) + .sort() + .setExpression(ConstantNode(Int64ResultNode(0)))); + + Group a = Group() + .setId(NullResultNode()) + .addResult(HitsAggregationResult() + .setMaxHits(3) + .addHit(FS4Hit(10, 10.0)) + .addHit(FS4Hit(1, 5.0)) + .addHit(FS4Hit(2, 4.0)) + .sort() + .setExpression(ConstantNode(Int64ResultNode(0)))); + + Group b = Group() + .setId(NullResultNode()) + .addResult(HitsAggregationResult() + .setMaxHits(3) + .addHit(FS4Hit(20, 20.0)) + .addHit(FS4Hit(3, 7.0)) + .addHit(FS4Hit(4, 6.0)) + .sort() + .setExpression(ConstantNode(Int64ResultNode(0)))); + + Group c = Group() + .setId(NullResultNode()) + .addResult(HitsAggregationResult() + .setMaxHits(3) + .addHit(FS4Hit(30, 30.0)) + .addHit(FS4Hit(5, 9.0)) + .addHit(FS4Hit(6, 8.0)) + .sort() + .setExpression(ConstantNode(Int64ResultNode(0)))); + + EXPECT_TRUE(testMerge(request.unchain().setRoot(a), request.unchain().setRoot(b), request.unchain().setRoot(c), expect)); + EXPECT_TRUE(testMerge(request.unchain().setRoot(b), request.unchain().setRoot(c), request.unchain().setRoot(a), expect)); + EXPECT_TRUE(testMerge(request.unchain().setRoot(c), request.unchain().setRoot(a), request.unchain().setRoot(b), expect)); + } + { // count hits (for external object selection) + HitsAggregationResult dummyHits = HitsAggregationResult() + .setMaxHits(3) + .addHit(FS4Hit(1, 3.0)) + .addHit(FS4Hit(2, 2.0)) + .addHit(FS4Hit(3, 1.0)) + .sort(); + Grouping g = Grouping().setRoot(Group().addResult(dummyHits) + .addChild(Group().addResult(dummyHits) + .addChild(Group().addResult(dummyHits)) + ) + .addChild(Group().addResult(dummyHits) + .addChild(Group().addResult(dummyHits) + .addChild(Group().addResult(dummyHits)) + ) + ) + ); + EXPECT_TRUE(checkHits(g, 0, 0, 3)); + EXPECT_TRUE(checkHits(g, 1, 1, 6)); + EXPECT_TRUE(checkHits(g, 2, 2, 6)); + EXPECT_TRUE(checkHits(g, 3, 3, 3)); + EXPECT_TRUE(checkHits(g, 4, 4, 0)); + + EXPECT_TRUE(checkHits(g, 0, 1, 9)); + EXPECT_TRUE(checkHits(g, 0, 2, 15)); + EXPECT_TRUE(checkHits(g, 0, 3, 18)); + EXPECT_TRUE(checkHits(g, 0, 4, 18)); + EXPECT_TRUE(checkHits(g, 1, 4, 15)); + EXPECT_TRUE(checkHits(g, 2, 4, 9)); + EXPECT_TRUE(checkHits(g, 3, 4, 3)); + + EXPECT_TRUE(checkHits(g, 1, 2, 12)); + EXPECT_TRUE(checkHits(g, 2, 3, 9)); + EXPECT_TRUE(checkHits(g, 3, 4, 3)); + EXPECT_TRUE(checkHits(g, 4, 5, 0)); + } +} + +bool +Test::checkBucket(const NumericResultNode &width, const NumericResultNode &value, const BucketResultNode &bucket) +{ + AggregationContext ctx; + ctx.result().add(0); + if (value.getClass().inherits(IntegerResultNode::classId)) { + ctx.add(IntAttrBuilder("attr").add(value.getInteger()).sp()); + } else if (value.getClass().inherits(FloatResultNode::classId)) { + ctx.add(FloatAttrBuilder("attr").add(value.getFloat()).sp()); + } else { + return EXPECT_TRUE(false); + } + Grouping request = Grouping().setRoot(Group().setId(NullResultNode())) + .addLevel(GroupingLevel() + .setExpression(FixedWidthBucketFunctionNode(AttributeNode("attr")).setWidth(width))); + Group expect = Group().setId(NullResultNode()).addChild(Group().setId(bucket)); + return testAggregation(ctx, request, expect); +} + +void +Test::testFixedWidthBuckets() +{ + typedef Int64ResultNode Int; + typedef FloatResultNode Float; + typedef IntegerBucketResultNode IntBucket; + typedef FloatBucketResultNode FloatBucket; + + // positive int buckets + EXPECT_TRUE(checkBucket(Int(10), Int(0), IntBucket(0,10))); + EXPECT_TRUE(checkBucket(Int(10), Int(5), IntBucket(0,10))); + EXPECT_TRUE(checkBucket(Int(10), Int(9), IntBucket(0,10))); + EXPECT_TRUE(checkBucket(Int(10), Int(10), IntBucket(10,20))); + EXPECT_TRUE(checkBucket(Int(10), Int(299), IntBucket(290,300))); + + // negative int buckets + EXPECT_TRUE(checkBucket(Int(10), Int(-1), IntBucket(-10,0))); + EXPECT_TRUE(checkBucket(Int(10), Int(-5), IntBucket(-10,0))); + EXPECT_TRUE(checkBucket(Int(10), Int(-10), IntBucket(-10,0))); + EXPECT_TRUE(checkBucket(Int(10), Int(-11), IntBucket(-20,-10))); + EXPECT_TRUE(checkBucket(Int(10), Int(-300), IntBucket(-300,-290))); + + // positive float buckets + EXPECT_TRUE(checkBucket(Int(10), Float(0.0), FloatBucket(0.0,10.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(5.0), FloatBucket(0.0,10.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(9.0), FloatBucket(0.0,10.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(10.0), FloatBucket(10.0,20.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(299.0), FloatBucket(290.0,300.0))); + + // negative float buckets + EXPECT_TRUE(checkBucket(Int(10), Float(-1), FloatBucket(-10.0,0.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(-5), FloatBucket(-10.0,0.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(-10), FloatBucket(-10.0,0.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(-10.0000001), FloatBucket(-20.0,-10.0))); + EXPECT_TRUE(checkBucket(Int(10), Float(-300), FloatBucket(-300.0,-290.0))); + + // non-integer bucket width + EXPECT_TRUE(checkBucket(Float(0.5), Float(0.0), FloatBucket(0.0,0.5))); + EXPECT_TRUE(checkBucket(Float(0.5), Float(0.5), FloatBucket(0.5,1.0))); + EXPECT_TRUE(checkBucket(Float(0.5), Float(0.4999), FloatBucket(0.0,0.5))); + EXPECT_TRUE(checkBucket(Float(0.5), Float(-0.0001), FloatBucket(-0.5,0.0))); + EXPECT_TRUE(checkBucket(Float(0.5), Float(-0.5), FloatBucket(-0.5,0.0))); + EXPECT_TRUE(checkBucket(Float(0.5), Float(-0.50001), FloatBucket(-1.0,-0.5))); + + // zero-width buckets + EXPECT_TRUE(checkBucket(Int(0), Int(7), IntBucket(7,7))); + EXPECT_TRUE(checkBucket(Int(0), Float(7.5), FloatBucket(7.5,7.5))); + + // bucket wrap protection + { + int64_t x = std::numeric_limits<int64_t>::min(); + int64_t y = std::numeric_limits<int64_t>::max(); + EXPECT_TRUE(checkBucket(Int(1000), Int(x + 5), IntBucket(x, (x/1000) * 1000))); + EXPECT_TRUE(checkBucket(Int(1000), Int(y - 5), IntBucket((y/1000) * 1000, y))); + } +} + + +void +Test::testNanSorting() +{ + // Attempt at reproducing issue with segfault when setting NaN value. Not + // successful yet, so no point in running test. +#if 0 + double nan = sqrt(-1); + EXPECT_TRUE(isnan(nan)); + EXPECT_TRUE(nan != nan); + EXPECT_FALSE(nan < nan); + EXPECT_FALSE(nan > nan); + EXPECT_FALSE(nan < 0.2); + EXPECT_FALSE(nan > 0.2); + EXPECT_FALSE(0.2 < nan); + EXPECT_FALSE(0.2 > nan); + + FastOS_Time timer; + timer.SetNow(); + std::vector<double> groups; + while (timer.MilliSecsToNow() < 60000.0) { + std::vector<double> vec; + srand((unsigned int)timer.MilliSecs()); + size_t limit = 2345678; + size_t mod = rand() % limit; + for (size_t i = 0; i < limit; i++) { + if ((i % mod) == 0) + vec.push_back(nan); + else + vec.push_back(1.0 * rand()); + } + } + std::sort(groups.begin(), groups.end()); +#endif +} + +void +Test::testThatNanIsConverted() +{ + Group g; + double nan = sqrt(-1); + g.setRank(nan); + // Must have been changed for this to work. + ASSERT_EQUAL(g.getRank(), g.getRank()); +} + +void +Test::testGroupingEngineFromRequest() +{ + AggregationContext ctx; + ctx.add(IntAttrBuilder("attr0").add(10).add(10).sp()); + ctx.add(IntAttrBuilder("attr1").add(11).add(11).sp()); + ctx.add(IntAttrBuilder("attr2").add(12).add(12).sp()); + ctx.add(IntAttrBuilder("attr3").add(13).add(13).sp()); + ctx.result().add(0).add(1); + Grouping baseRequest = Grouping() + .setRoot(Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr0")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr1")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr2")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr2")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr3")))) + .addLevel(GroupingLevel() + .setExpression(AttributeNode("attr3")) + .addResult(SumAggregationResult() + .setExpression(AttributeNode("attr1")))); + ctx.setup(baseRequest); + GroupingEngine engine(baseRequest.setFirstLevel(0).setLastLevel(2)); + EXPECT_EQUAL(4u, engine.getEngines().size()); +} + +//----------------------------------------------------------------------------- + +struct RunDiff { ~RunDiff() { system("diff -u lhs.out rhs.out > diff.txt"); }}; + +//----------------------------------------------------------------------------- + +int +Test::Main() +{ + RunDiff runDiff; + (void) runDiff; + TEST_DEBUG("lhs.out", "rhs.out"); + TEST_INIT("groupingengine_test"); + testGroupingEngineFromRequest(); + testAggregationSimple(); + testAggregationLevels(); + testAggregationMaxGroups(); + testAggregationGroupOrder(); + testAggregationGroupRank(); + testAggregationGroupCapping(); +#if 0 + testMergeSimpleSum(); + testMergeLevels(); + testMergeGroups(); + testMergeTrees(); + testPruneSimple(); + testPruneComplex(); + testPartialMerging(); +#endif + testFS4HitCollection(); + testFixedWidthBuckets(); + testCount(); + testTopN(); + testThatNanIsConverted(); + testNanSorting(); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/hitcollector/.gitignore b/searchlib/src/tests/hitcollector/.gitignore new file mode 100644 index 00000000000..a4313eb2184 --- /dev/null +++ b/searchlib/src/tests/hitcollector/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +hitcollector_test +searchlib_hitcollector_test_app diff --git a/searchlib/src/tests/hitcollector/CMakeLists.txt b/searchlib/src/tests/hitcollector/CMakeLists.txt new file mode 100644 index 00000000000..c2b130b2890 --- /dev/null +++ b/searchlib/src/tests/hitcollector/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_hitcollector_test_app + SOURCES + hitcollector_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_hitcollector_test_app COMMAND searchlib_hitcollector_test_app) diff --git a/searchlib/src/tests/hitcollector/DESC b/searchlib/src/tests/hitcollector/DESC new file mode 100644 index 00000000000..a8751d4a1fe --- /dev/null +++ b/searchlib/src/tests/hitcollector/DESC @@ -0,0 +1 @@ +hitcollector test. Take a look at hitcollector.cpp for details. diff --git a/searchlib/src/tests/hitcollector/FILES b/searchlib/src/tests/hitcollector/FILES new file mode 100644 index 00000000000..88a0d4ba4b3 --- /dev/null +++ b/searchlib/src/tests/hitcollector/FILES @@ -0,0 +1 @@ +hitcollector.cpp diff --git a/searchlib/src/tests/hitcollector/hitcollector_test.cpp b/searchlib/src/tests/hitcollector/hitcollector_test.cpp new file mode 100644 index 00000000000..ec7c74913af --- /dev/null +++ b/searchlib/src/tests/hitcollector/hitcollector_test.cpp @@ -0,0 +1,493 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("hitcollector_test"); +#include <vespa/vespalib/testkit/testapp.h> + +#include <iostream> + +#include <vespa/searchlib/common/bitvector.h> +#include <vespa/searchlib/fef/fef.h> +#include <vespa/searchlib/queryeval/hitcollector.h> +#include <vespa/searchlib/queryeval/scores.h> + +using namespace search; +using namespace search::fef; +using namespace search::queryeval; + +typedef std::map<uint32_t, feature_t> ScoreMap; + +struct BasicScorer : public HitCollector::DocumentScorer +{ + feature_t _scoreDelta; + BasicScorer(feature_t scoreDelta) : _scoreDelta(scoreDelta) {} + virtual feature_t score(uint32_t docId) { + return docId + _scoreDelta; + } +}; + +struct PredefinedScorer : public HitCollector::DocumentScorer +{ + ScoreMap _scores; + PredefinedScorer(const ScoreMap &scores) : _scores(scores) {} + virtual feature_t score(uint32_t docId) { + feature_t retval = 0.0; + auto itr = _scores.find(docId); + if (itr != _scores.end()) { + retval = itr->second; + } + return retval; + } +}; + +void checkResult(const ResultSet & rs, const std::vector<RankedHit> & exp) +{ + if (exp.size() > 0) { + const RankedHit * rh = rs.getArray(); + ASSERT_TRUE(rh != NULL); + ASSERT_EQUAL(rs.getArrayUsed(), exp.size()); + + for (uint32_t i = 0; i < exp.size(); ++i) { +#if 0 + std::cout << " rh[" << i << "]._docId = " << rh[i]._docId << std::endl; + std::cout << "exp[" << i << "]._docId = " << exp[i]._docId << std::endl; + std::cout << " rh[" << i << "]._rankValue = " << rh[i]._rankValue << std::endl; + std::cout << "exp[" << i << "]._rankValue = " << exp[i]._rankValue << std::endl; +#endif + EXPECT_EQUAL(rh[i]._docId, exp[i]._docId); + EXPECT_EQUAL(rh[i]._rankValue, exp[i]._rankValue); + } + } else { + ASSERT_TRUE(rs.getArray() == NULL); + } +} + +void checkResult(ResultSet & rs, BitVector * exp) +{ + if (exp != NULL) { + BitVector * bv = rs.getBitOverflow(); + ASSERT_TRUE(bv != NULL); + bv->invalidateCachedCount(); + exp->invalidateCachedCount(); + LOG(info, "bv.hits: %u, exp.hits: %u", bv->countTrueBits(), exp->countTrueBits()); + ASSERT_TRUE(bv->countTrueBits() == exp->countTrueBits()); + EXPECT_TRUE(*bv == *exp); + } else { + ASSERT_TRUE(rs.getBitOverflow() == NULL); + } +} + +void testAddHit(uint32_t numDocs, uint32_t maxHitsSize, uint32_t maxHeapSize) +{ + + LOG(info, "testAddHit: no hits"); + { // no hits + HitCollector hc(numDocs, maxHitsSize, maxHeapSize); + std::vector<RankedHit> expRh; + + std::unique_ptr<ResultSet> rs = hc.getResultSet(); + TEST_DO(checkResult(*rs.get(), expRh)); + TEST_DO(checkResult(*rs.get(), NULL)); + } + + LOG(info, "testAddHit: only ranked hits"); + { // only ranked hits + HitCollector hc(numDocs, maxHitsSize, maxHeapSize); + std::vector<RankedHit> expRh; + + for (uint32_t i = 0; i < maxHitsSize; ++i) { + hc.addHit(i, i + 100); + + // build expected result set as we go along + expRh.push_back(RankedHit()); + expRh.back()._docId = i; + expRh.back()._rankValue = i + 100; + } + + std::unique_ptr<ResultSet> rs = hc.getResultSet(); + TEST_DO(checkResult(*rs.get(), expRh)); + TEST_DO(checkResult(*rs.get(), NULL)); + } + + LOG(info, "testAddHit: both ranked hits and bit vector hits"); + { // both ranked hits and bit vector hits + HitCollector hc(numDocs, maxHitsSize, maxHeapSize); + std::vector<RankedHit> expRh; + BitVector::UP expBv(BitVector::create(numDocs)); + + for (uint32_t i = 0; i < numDocs; ++i) { + hc.addHit(i, i + 100); + + // build expected result set as we go along + expBv->setBit(i); + if (i >= (numDocs - maxHitsSize)) { + expRh.push_back(RankedHit()); + expRh.back()._docId = i; + expRh.back()._rankValue = i + 100; + } + } + + std::unique_ptr<ResultSet> rs = hc.getResultSet(); + TEST_DO(checkResult(*rs.get(), expRh)); + TEST_DO(checkResult(*rs.get(), expBv.get())); + } +} + +TEST("testAddHit") { + TEST_DO(testAddHit(30, 10, 5)); + TEST_DO(testAddHit(30, 10, 0)); + TEST_DO(testAddHit(400, 10, 5)); // 400/32 = 12 which is bigger than 10. + TEST_DO(testAddHit(400, 10, 0)); +} + +struct Fixture { + HitCollector hc; + BitVector::UP expBv; + BasicScorer scorer; + + Fixture() + : hc(20, 10, 5), expBv(BitVector::create(20)), scorer(200) + { + } + virtual ~Fixture() {} + virtual HitRank calculateScore(uint32_t) { return 0; } + void addHits() { + for (uint32_t i = 0; i < 20; ++i) { + hc.addHit(i, calculateScore(i)); + expBv->setBit(i); + } + } + size_t reRank() { + return hc.reRank(scorer); + } + size_t reRank(size_t count) { + return hc.reRank(scorer, count); + } +}; + +struct AscendingScoreFixture : Fixture { + AscendingScoreFixture() : Fixture() {} + virtual HitRank calculateScore(uint32_t i) { + return i + 100; + } +}; + +struct DescendingScoreFixture : Fixture { + DescendingScoreFixture() : Fixture() {} + virtual HitRank calculateScore(uint32_t i) { + return 100 - i; + } +}; + +TEST_F("testReRank - empty", Fixture) { + EXPECT_EQUAL(0u, f.reRank()); +} + +TEST_F("testReRank - ascending", AscendingScoreFixture) +{ + f.addHits(); + EXPECT_EQUAL(5u, f.reRank()); + + std::vector<RankedHit> expRh; + for (uint32_t i = 10; i < 20; ++i) { // 10 last are the best + expRh.push_back(RankedHit(i, f.calculateScore(i))); + if (i >= 15) { // hits from heap (5 last) + expRh.back()._rankValue = i + 200; // after reranking + } + } + EXPECT_EQUAL(expRh.size(), 10u); + + std::unique_ptr<ResultSet> rs = f.hc.getResultSet(); + TEST_DO(checkResult(*rs.get(), expRh)); + TEST_DO(checkResult(*rs.get(), f.expBv.get())); +} + +TEST_F("testReRank - descending", DescendingScoreFixture) +{ + f.addHits(); + EXPECT_EQUAL(5u, f.reRank()); + + std::vector<RankedHit> expRh; + for (uint32_t i = 0; i < 10; ++i) { // 10 first are the best + expRh.push_back(RankedHit(i, f.calculateScore(i))); + if (i < 5) { // hits from heap (5 first) + expRh.back()._rankValue = i + 200; // after reranking + } + } + EXPECT_EQUAL(expRh.size(), 10u); + + std::unique_ptr<ResultSet> rs = f.hc.getResultSet(); + TEST_DO(checkResult(*rs.get(), expRh)); + TEST_DO(checkResult(*rs.get(), f.expBv.get())); +} + +TEST_F("testReRank - partial", AscendingScoreFixture) +{ + f.addHits(); + EXPECT_EQUAL(3u, f.reRank(3)); + + std::vector<RankedHit> expRh; + for (uint32_t i = 10; i < 20; ++i) { // 10 last are the best + expRh.push_back(RankedHit(i, f.calculateScore(i))); + if (i >= 17) { // hits from heap (3 last) + expRh.back()._rankValue = i + 200; // after reranking + } + } + EXPECT_EQUAL(expRh.size(), 10u); + + std::unique_ptr<ResultSet> rs = f.hc.getResultSet(); + TEST_DO(checkResult(*rs.get(), expRh)); + TEST_DO(checkResult(*rs.get(), f.expBv.get())); +} + +TEST_F("require that scores for 2nd phase candidates can be retrieved", DescendingScoreFixture) +{ + f.addHits(); + std::vector<feature_t> scores = f.hc.getSortedHeapScores(); + ASSERT_EQUAL(5u, scores.size()); + EXPECT_EQUAL(100, scores[0]); + EXPECT_EQUAL(99, scores[1]); + EXPECT_EQUAL(98, scores[2]); + EXPECT_EQUAL(97, scores[3]); + EXPECT_EQUAL(96, scores[4]); +} + +TEST("require that score ranges can be read and set.") { + std::pair<Scores, Scores> ranges = + std::make_pair(Scores(1.0, 2.0), Scores(3.0, 4.0)); + HitCollector hc(20, 10, 5); + hc.setRanges(ranges); + EXPECT_EQUAL(ranges.first.low, hc.getRanges().first.low); + EXPECT_EQUAL(ranges.first.high, hc.getRanges().first.high); + EXPECT_EQUAL(ranges.second.low, hc.getRanges().second.low); + EXPECT_EQUAL(ranges.second.high, hc.getRanges().second.high); +} + +TEST("testNoHitsToReRank") { + uint32_t numDocs = 20; + uint32_t maxHitsSize = 10; + + LOG(info, "testNoMDHeap: test it"); + { + HitCollector hc(numDocs, maxHitsSize, 0); + std::vector<RankedHit> expRh; + + for (uint32_t i = 0; i < maxHitsSize; ++i) { + hc.addHit(i, i + 100); + + // build expected result set as we go along + expRh.push_back(RankedHit()); + expRh.back()._docId = i; + expRh.back()._rankValue = i + 100; + } + + std::unique_ptr<ResultSet> rs = hc.getResultSet(); + TEST_DO(checkResult(*rs.get(), expRh)); + TEST_DO(checkResult(*rs.get(), NULL)); + } +} + +void testScaling(const std::vector<feature_t> &initScores, + const ScoreMap &finalScores, + const std::vector<RankedHit> &expected) +{ + HitCollector hc(5, 5, 2); + + // first phase ranking + for (uint32_t i = 0; i < 5; ++i) { + hc.addHit(i, initScores[i]); + } + + PredefinedScorer scorer(finalScores); + // perform second phase ranking + EXPECT_EQUAL(2u, hc.reRank(scorer)); + + // check results + std::unique_ptr<ResultSet> rs = hc.getResultSet(); + TEST_DO(checkResult(*rs.get(), expected)); +} + +TEST("testScaling") { + std::vector<feature_t> initScores(5); + initScores[0] = 1000; + initScores[1] = 2000; + initScores[2] = 3000; + initScores[3] = 4000; + initScores[4] = 5000; + + // expected final rank scores + std::vector<RankedHit> exp(5); + for (uint32_t i = 0; i < 5; ++i) { + exp[i]._docId = i; + } + + { // scale down and adjust down + exp[0]._rankValue = 0; // scaled + exp[1]._rankValue = 100; // scaled + exp[2]._rankValue = 200; // scaled + exp[3]._rankValue = 300; // from heap + exp[4]._rankValue = 400; // from heap + + // second phase ranking scores + ScoreMap finalScores; + finalScores[3] = 300; + finalScores[4] = 400; + + testScaling(initScores, finalScores, exp); + } + { // scale down and adjust up + exp[0]._rankValue = 200; // scaled + exp[1]._rankValue = 300; // scaled + exp[2]._rankValue = 400; // scaled + exp[3]._rankValue = 500; // from heap + exp[4]._rankValue = 600; // from heap + + // second phase ranking scores + ScoreMap finalScores; + finalScores[3] = 500; + finalScores[4] = 600; + + testScaling(initScores, finalScores, exp); + } + { // scale up and adjust down + + exp[0]._rankValue = -500; // scaled (-500) + exp[1]._rankValue = 750; // scaled + exp[2]._rankValue = 2000; // scaled + exp[3]._rankValue = 3250; // from heap + exp[4]._rankValue = 4500; // from heap + + // second phase ranking scores + ScoreMap finalScores; + finalScores[3] = 3250; + finalScores[4] = 4500; + + testScaling(initScores, finalScores, exp); + } + { // minimal scale (second phase range = 0 (4 - 4) -> 1) + exp[0]._rankValue = 1; // scaled + exp[1]._rankValue = 2; // scaled + exp[2]._rankValue = 3; // scaled + exp[3]._rankValue = 4; // from heap + exp[4]._rankValue = 4; // from heap + + // second phase ranking scores + ScoreMap finalScores; + finalScores[3] = 4; + finalScores[4] = 4; + + testScaling(initScores, finalScores, exp); + } + { // minimal scale (first phase range = 0 (4000 - 4000) -> 1) + std::vector<feature_t> is(initScores); + is[4] = 4000; + exp[0]._rankValue = -299600; // scaled + exp[1]._rankValue = -199600; // scaled + exp[2]._rankValue = -99600; // scaled + exp[3]._rankValue = 400; // from heap + exp[4]._rankValue = 500; // from heap + + // second phase ranking scores + ScoreMap finalScores; + finalScores[3] = 400; + finalScores[4] = 500; + + testScaling(is, finalScores, exp); + } +} + +TEST("testOnlyBitVector") { + uint32_t numDocs = 20; + LOG(info, "testOnlyBitVector: test it"); + { + HitCollector hc(numDocs, 0, 0); + BitVector::UP expBv(BitVector::create(numDocs)); + + for (uint32_t i = 0; i < numDocs; i += 2) { + hc.addHit(i, i + 100); + // build expected result set as we go along + expBv->setBit(i); + } + + std::unique_ptr<ResultSet> rs = hc.getResultSet(); + std::vector<RankedHit> expRh; + TEST_DO(checkResult(*rs.get(), expRh)); // no ranked hits + TEST_DO(checkResult(*rs.get(), expBv.get())); // only bit vector + } +} + +struct MergeResultSetFixture { + const uint32_t numDocs; + const uint32_t maxHitsSize; + const uint32_t maxHeapSize; + HitCollector hc; + MergeResultSetFixture() + : numDocs(100), maxHitsSize(80), maxHeapSize(30), hc(numDocs * 32, maxHitsSize, maxHeapSize) + {} +}; + +TEST_F("require that result set is merged correctly with first phase ranking", + MergeResultSetFixture) +{ + std::vector<RankedHit> expRh; + for (uint32_t i = 0; i < f.numDocs; ++i) { + f.hc.addHit(i, i + 1000); + + // build expected result set + expRh.push_back(RankedHit()); + expRh.back()._docId = i; + // only the maxHitsSize best hits gets a score + expRh.back()._rankValue = (i < f.numDocs - f.maxHitsSize) ? 0 : i + 1000; + } + std::unique_ptr<ResultSet> rs = f.hc.getResultSet(); + TEST_DO(checkResult(*rs.get(), expRh)); +} + +void +addExpectedHitForMergeTest(const MergeResultSetFixture &f, std::vector<RankedHit> &expRh, uint32_t docId) +{ + expRh.push_back(RankedHit()); + expRh.back()._docId = docId; + if (docId < f.numDocs - f.maxHitsSize) { // only the maxHitsSize best hits gets a score + expRh.back()._rankValue = 0; + } else if (docId < f.numDocs - f.maxHeapSize) { // only first phase ranking + expRh.back()._rankValue = docId + 500; // adjusted with - 500 + } else { // second phase ranking on the maxHeapSize best hits + expRh.back()._rankValue = docId + 500; + } +} + +TEST_F("require that result set is merged correctly with second phase ranking (document scorer)", + MergeResultSetFixture) +{ + // with second phase ranking that triggers rescoring / scaling + BasicScorer scorer(500); // second phase ranking setting score to docId + 500 + std::vector<RankedHit> expRh; + for (uint32_t i = 0; i < f.numDocs; ++i) { + f.hc.addHit(i, i + 1000); + addExpectedHitForMergeTest(f, expRh, i); + } + EXPECT_EQUAL(f.maxHeapSize, f.hc.reRank(scorer)); + std::unique_ptr<ResultSet> rs = f.hc.getResultSet(); + TEST_DO(checkResult(*rs.get(), expRh)); +} + +TEST("require that hits can be added out of order") { + HitCollector hc(1000, 100, 10); + std::vector<RankedHit> expRh; + // produce expected result in normal order + for (uint32_t i = 0; i < 5; ++i) { + expRh.push_back(RankedHit()); + expRh.back()._docId = i; + expRh.back()._rankValue = i + 100; + } + // add results in reverse order + for (uint32_t i = 5; i-- > 0; ) { + hc.addHit(i, i + 100); + } + std::unique_ptr<ResultSet> rs = hc.getResultSet(); + TEST_DO(checkResult(*rs.get(), expRh)); + TEST_DO(checkResult(*rs.get(), nullptr)); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/index/docbuilder/.gitignore b/searchlib/src/tests/index/docbuilder/.gitignore new file mode 100644 index 00000000000..999644fce87 --- /dev/null +++ b/searchlib/src/tests/index/docbuilder/.gitignore @@ -0,0 +1,5 @@ +*_test +.depend +Makefile +docbuilder_test +searchlib_docbuilder_test_app diff --git a/searchlib/src/tests/index/docbuilder/CMakeLists.txt b/searchlib/src/tests/index/docbuilder/CMakeLists.txt new file mode 100644 index 00000000000..de382bcc2fe --- /dev/null +++ b/searchlib/src/tests/index/docbuilder/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_docbuilder_test_app + SOURCES + docbuilder_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_docbuilder_test_app COMMAND searchlib_docbuilder_test_app) diff --git a/searchlib/src/tests/index/docbuilder/DESC b/searchlib/src/tests/index/docbuilder/DESC new file mode 100644 index 00000000000..514903f9988 --- /dev/null +++ b/searchlib/src/tests/index/docbuilder/DESC @@ -0,0 +1 @@ +ildocbuilder test. Take a look at ildocbuilder.cpp for details. diff --git a/searchlib/src/tests/index/docbuilder/FILES b/searchlib/src/tests/index/docbuilder/FILES new file mode 100644 index 00000000000..4d90f226fb4 --- /dev/null +++ b/searchlib/src/tests/index/docbuilder/FILES @@ -0,0 +1 @@ +ildocbuilder.cpp diff --git a/searchlib/src/tests/index/docbuilder/docbuilder_test.cpp b/searchlib/src/tests/index/docbuilder/docbuilder_test.cpp new file mode 100644 index 00000000000..06599834ab5 --- /dev/null +++ b/searchlib/src/tests/index/docbuilder/docbuilder_test.cpp @@ -0,0 +1,531 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/* -*- mode: C++; coding: utf-8; -*- */ + +/* $Id$ + * + * Copyright (C) 2011 Yahoo! Technologies Norway AS + * + * All Rights Reserved + * + */ + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("docbuilder_test"); +#include <boost/algorithm/string/classification.hpp> +#include <boost/algorithm/string/split.hpp> +#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/vespalib/encoding/base64.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/document/repo/fixedtyperepo.h> + +using namespace document; + +namespace search { +namespace index { + +namespace +{ +std::string empty; +} + +namespace linguistics +{ +const vespalib::string SPANTREE_NAME("linguistics"); +} + +class Test : public vespalib::TestApp { +private: + void testBuilder(); +public: + int Main(); +}; + +void +Test::testBuilder() +{ + Schema s; + s.addIndexField(Schema::IndexField("ia", Schema::STRING)); + s.addIndexField(Schema::IndexField("ib", Schema::STRING, Schema::ARRAY)); + s.addIndexField(Schema::IndexField("ic", Schema::STRING, Schema::WEIGHTEDSET)); + s.addUriIndexFields(Schema::IndexField("iu", Schema::STRING)); + s.addUriIndexFields(Schema::IndexField("iau", + Schema::STRING, + Schema::ARRAY)); + s.addUriIndexFields(Schema::IndexField("iwu", + Schema::STRING, + Schema::WEIGHTEDSET)); + s.addAttributeField(Schema::AttributeField("aa", Schema::INT32)); + s.addAttributeField(Schema::AttributeField("ab", Schema::FLOAT)); + s.addAttributeField(Schema::AttributeField("ac", Schema::STRING)); + s.addAttributeField(Schema::AttributeField("ad", Schema::INT32, Schema::ARRAY)); + s.addAttributeField(Schema::AttributeField("ae", Schema::FLOAT, Schema::ARRAY)); + s.addAttributeField(Schema::AttributeField("af", Schema::STRING, Schema::ARRAY)); + s.addAttributeField(Schema::AttributeField("ag", Schema::INT32, Schema::WEIGHTEDSET)); + s.addAttributeField(Schema::AttributeField("ah", Schema::FLOAT, Schema::WEIGHTEDSET)); + s.addAttributeField(Schema::AttributeField("ai", Schema::STRING, Schema::WEIGHTEDSET)); + s.addAttributeField(Schema::AttributeField("asp1", + Schema::INT32)); + s.addAttributeField(Schema::AttributeField("asp2", + Schema::INT64)); + s.addAttributeField(Schema::AttributeField("aap1", + Schema::INT32, + Schema::ARRAY)); + s.addAttributeField(Schema::AttributeField("aap2", + Schema::INT64, + Schema::ARRAY)); + s.addAttributeField(Schema::AttributeField("awp1", + Schema::INT32, + Schema::WEIGHTEDSET)); + s.addAttributeField(Schema::AttributeField("awp2", + Schema::INT64, + Schema::WEIGHTEDSET)); + + s.addSummaryField(Schema::SummaryField("sa", Schema::INT8)); + s.addSummaryField(Schema::SummaryField("sb", Schema::INT16)); + s.addSummaryField(Schema::SummaryField("sc", Schema::INT32)); + s.addSummaryField(Schema::SummaryField("sd", Schema::INT64)); + s.addSummaryField(Schema::SummaryField("se", Schema::FLOAT)); + s.addSummaryField(Schema::SummaryField("sf", Schema::DOUBLE)); + s.addSummaryField(Schema::SummaryField("sg", Schema::STRING)); + s.addSummaryField(Schema::SummaryField("sh", Schema::RAW)); + s.addSummaryField(Schema::SummaryField("si", Schema::RAW, + Schema::ARRAY)); + s.addSummaryField(Schema::SummaryField("sj", Schema::RAW, + Schema::WEIGHTEDSET)); + + DocBuilder b(s); + Document::UP doc; + std::vector<std::string> lines; + std::vector<std::string>::const_iterator itr; + std::string xml; + + { // empty + doc = b.startDocument("doc::0").endDocument(); + xml = doc->toXml(""); + boost::split(lines, xml, boost::is_any_of("\n")); + itr = lines.begin(); + EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"doc::0\"/>", *itr++); + EXPECT_EQUAL("", *itr++); + EXPECT_TRUE(itr == lines.end()); + } + { // all fields set + std::vector<char> binaryBlob; + binaryBlob.push_back('\0'); + binaryBlob.push_back('\2'); + binaryBlob.push_back('\1'); + std::string raw1s("Single Raw Element"); + std::string raw1a0("Array Raw Element 0"); + std::string raw1a1("Array Raw Element 1"); + std::string raw1w0("Weighted Set Raw Element 0"); + std::string raw1w1("Weighted Set Raw Element 1"); + raw1s += std::string(&binaryBlob[0], + &binaryBlob[0] + binaryBlob.size()); + raw1a0 += std::string(&binaryBlob[0], + &binaryBlob[0] + binaryBlob.size()); + raw1a1 += std::string(&binaryBlob[0], + &binaryBlob[0] + binaryBlob.size()); + raw1w0 += std::string(&binaryBlob[0], + &binaryBlob[0] + binaryBlob.size()); + raw1w1 += std::string(&binaryBlob[0], + &binaryBlob[0] + binaryBlob.size()); + b.startDocument("doc::1"); + b.startIndexField("ia").addStr("foo").addStr("bar").addStr("baz").addTermAnnotation("altbaz").endField(); + b.startIndexField("ib").startElement().addStr("foo").endElement(). + startElement(1).addStr("bar").addStr("baz").endElement().endField(); + b. startIndexField("ic"). + startElement(20).addStr("bar").addStr("baz").endElement(). + startElement().addStr("foo").endElement(). + endField(); + b.startIndexField("iu"). + startSubField("all"). + addUrlTokenizedString("http://www.yahoo.com:81/fluke?ab=2#4"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("81"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("4"). + endSubField(). + endField(); + b.startIndexField("iau"). + startElement(1). + startSubField("all"). + addUrlTokenizedString("http://www.yahoo.com:82/fluke?ab=2#8"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("82"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("8"). + endSubField(). + endElement(). + startElement(1). + startSubField("all"). + addUrlTokenizedString("http://www.flickr.com:82/fluke?ab=2#9"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.flickr.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("82"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("9"). + endSubField(). + endElement(). + endField(); + b.startIndexField("iwu"). + startElement(4). + startSubField("all"). + addUrlTokenizedString("http://www.yahoo.com:83/fluke?ab=2#12"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("83"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("12"). + endSubField(). + endElement(). + startElement(7). + startSubField("all"). + addUrlTokenizedString("http://www.flickr.com:85/fluke?ab=2#13"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.flickr.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("85"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("13"). + endSubField(). + endElement(). + endField(); + b.startAttributeField("aa").addInt(2147483647).endField(); + b.startAttributeField("ab").addFloat(1234.56).endField(); + b.startAttributeField("ac").addStr("foo baz").endField(); + b.startAttributeField("ad").startElement().addInt(10).endElement().endField(); + b.startAttributeField("ae").startElement().addFloat(10.5).endElement().endField(); + b.startAttributeField("af").startElement().addStr("foo").endElement().endField(); + b.startAttributeField("ag").startElement(2).addInt(20).endElement().endField(); + b.startAttributeField("ah").startElement(3).addFloat(20.5).endElement().endField(); + b.startAttributeField("ai").startElement(4).addStr("bar").endElement().endField(); + b.startAttributeField("asp1").addInt(1001).endField(); + b.startAttributeField("asp2").addPosition(1002, 1003).endField(); + b.startAttributeField("aap1"). + startElement().addInt(1004).endElement(). + startElement().addInt(1005).endElement(). + endField(); + b.startAttributeField("aap2"). + startElement().addPosition(1006, 1007).endElement(). + startElement().addPosition(1008, 1009).endElement(). + endField(); + b.startAttributeField("awp1"). + startElement(41).addInt(1010).endElement(). + startElement(42).addInt(1011).endElement(). + endField(); + b.startAttributeField("awp2"). + startElement(43).addPosition(1012, 1013).endElement(). + startElement(44).addPosition(1014, 1015).endElement(). + endField(); + b.startSummaryField("sa").addInt(127).endField(); + b.startSummaryField("sb").addInt(32767).endField(); + b.startSummaryField("sc").addInt(2147483647).endField(); + b.startSummaryField("sd").addInt(2147483648).endField(); + b.startSummaryField("se").addFloat(1234.56).endField(); + b.startSummaryField("sf").addFloat(9876.54).endField(); + b.startSummaryField("sg").addStr("foo bar").endField(); + b.startSummaryField("sh"). + addRaw(raw1s.c_str(), raw1s.size()). + endField(); + b.startSummaryField("si"). + startElement(). + addRaw(raw1a0.c_str(), raw1a0.size()). + endElement(). + startElement(). + addRaw(raw1a1.c_str(), raw1a1.size()). + endElement(). + endField(); + b.startSummaryField("sj"). + startElement(46). + addRaw(raw1w1.c_str(), raw1w1.size()). + endElement(). + startElement(45). + addRaw(raw1w0.c_str(), raw1w0.size()). + endElement(). + endField(); + doc = b.endDocument(); + xml = doc->toXml(""); + boost::split(lines, xml, boost::is_any_of("\n")); + itr = lines.begin(); + EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"doc::1\">", *itr++); + EXPECT_EQUAL("<ia>foo bar baz</ia>", *itr++); + EXPECT_EQUAL("<ib>", *itr++); + EXPECT_EQUAL("<item>foo</item>", *itr++); + EXPECT_EQUAL("<item>bar baz</item>", *itr++); + EXPECT_EQUAL("</ib>", *itr++); + EXPECT_EQUAL("<ic>", *itr++); + EXPECT_EQUAL("<item weight=\"20\">bar baz</item>", *itr++); + EXPECT_EQUAL("<item weight=\"1\">foo</item>", *itr++); + EXPECT_EQUAL("</ic>", *itr++); + EXPECT_EQUAL("<iu>", *itr++); + EXPECT_EQUAL("<all>http://www.yahoo.com:81/fluke?ab=2#4</all>", *itr++); + EXPECT_EQUAL("<host>www.yahoo.com</host>", *itr++); + EXPECT_EQUAL("<scheme>http</scheme>", *itr++); + EXPECT_EQUAL("<path>/fluke</path>", *itr++); + EXPECT_EQUAL("<port>81</port>", *itr++); + EXPECT_EQUAL("<query>ab=2</query>", *itr++); + EXPECT_EQUAL("<fragment>4</fragment>", *itr++); + EXPECT_EQUAL("</iu>", *itr++); + EXPECT_EQUAL("<iau>", *itr++); + EXPECT_EQUAL("<item>", *itr++); + EXPECT_EQUAL("<all>http://www.yahoo.com:82/fluke?ab=2#8</all>", *itr++); + EXPECT_EQUAL("<host>www.yahoo.com</host>", *itr++); + EXPECT_EQUAL("<scheme>http</scheme>", *itr++); + EXPECT_EQUAL("<path>/fluke</path>", *itr++); + EXPECT_EQUAL("<port>82</port>", *itr++); + EXPECT_EQUAL("<query>ab=2</query>", *itr++); + EXPECT_EQUAL("<fragment>8</fragment>", *itr++); + EXPECT_EQUAL("</item>", *itr++); + EXPECT_EQUAL("<item>", *itr++); + EXPECT_EQUAL("<all>http://www.flickr.com:82/fluke?ab=2#9</all>", *itr++); + EXPECT_EQUAL("<host>www.flickr.com</host>", *itr++); + EXPECT_EQUAL("<scheme>http</scheme>", *itr++); + EXPECT_EQUAL("<path>/fluke</path>", *itr++); + EXPECT_EQUAL("<port>82</port>", *itr++); + EXPECT_EQUAL("<query>ab=2</query>", *itr++); + EXPECT_EQUAL("<fragment>9</fragment>", *itr++); + EXPECT_EQUAL("</item>", *itr++); + EXPECT_EQUAL("</iau>", *itr++); + EXPECT_EQUAL("<iwu>", *itr++); + EXPECT_EQUAL("<item weight=\"4\">", *itr++); + EXPECT_EQUAL("<all>http://www.yahoo.com:83/fluke?ab=2#12</all>", *itr++); + EXPECT_EQUAL("<host>www.yahoo.com</host>", *itr++); + EXPECT_EQUAL("<scheme>http</scheme>", *itr++); + EXPECT_EQUAL("<path>/fluke</path>", *itr++); + EXPECT_EQUAL("<port>83</port>", *itr++); + EXPECT_EQUAL("<query>ab=2</query>", *itr++); + EXPECT_EQUAL("<fragment>12</fragment>", *itr++); + EXPECT_EQUAL("</item>", *itr++); + EXPECT_EQUAL("<item weight=\"7\">", *itr++); + EXPECT_EQUAL("<all>http://www.flickr.com:85/fluke?ab=2#13</all>", *itr++); + EXPECT_EQUAL("<host>www.flickr.com</host>", *itr++); + EXPECT_EQUAL("<scheme>http</scheme>", *itr++); + EXPECT_EQUAL("<path>/fluke</path>", *itr++); + EXPECT_EQUAL("<port>85</port>", *itr++); + EXPECT_EQUAL("<query>ab=2</query>", *itr++); + EXPECT_EQUAL("<fragment>13</fragment>", *itr++); + EXPECT_EQUAL("</item>", *itr++); + EXPECT_EQUAL("</iwu>", *itr++); + EXPECT_EQUAL("<aa>2147483647</aa>", *itr++); + EXPECT_EQUAL("<ab>1234.56</ab>", *itr++); + EXPECT_EQUAL("<ac>foo baz</ac>", *itr++); + EXPECT_EQUAL("<ad>", *itr++); + EXPECT_EQUAL("<item>10</item>", *itr++); + EXPECT_EQUAL("</ad>", *itr++); + EXPECT_EQUAL("<ae>", *itr++); + EXPECT_EQUAL("<item>10.5</item>", *itr++); + EXPECT_EQUAL("</ae>", *itr++); + EXPECT_EQUAL("<af>", *itr++); + EXPECT_EQUAL("<item>foo</item>", *itr++); + EXPECT_EQUAL("</af>", *itr++); + EXPECT_EQUAL("<ag>", *itr++); + EXPECT_EQUAL("<item weight=\"2\">20</item>", *itr++); + EXPECT_EQUAL("</ag>", *itr++); + EXPECT_EQUAL("<ah>", *itr++); + EXPECT_EQUAL("<item weight=\"3\">20.5</item>", *itr++); + EXPECT_EQUAL("</ah>", *itr++); + EXPECT_EQUAL("<ai>", *itr++); + EXPECT_EQUAL("<item weight=\"4\">bar</item>", *itr++); + EXPECT_EQUAL("</ai>", *itr++); + EXPECT_EQUAL("<asp1>1001</asp1>", *itr++); + EXPECT_EQUAL("<asp2>1047758</asp2>", *itr++); + EXPECT_EQUAL("<aap1>", *itr++); + EXPECT_EQUAL("<item>1004</item>", *itr++); + EXPECT_EQUAL("<item>1005</item>", *itr++); + EXPECT_EQUAL("</aap1>", *itr++); + EXPECT_EQUAL("<aap2>", *itr++); + EXPECT_EQUAL("<item>1047806</item>", *itr++); + EXPECT_EQUAL("<item>1048322</item>", *itr++); + EXPECT_EQUAL("</aap2>", *itr++); + EXPECT_EQUAL("<awp1>", *itr++); + EXPECT_EQUAL("<item weight=\"41\">1010</item>", *itr++); + EXPECT_EQUAL("<item weight=\"42\">1011</item>", *itr++); + EXPECT_EQUAL("</awp1>", *itr++); + EXPECT_EQUAL("<awp2>", *itr++); + EXPECT_EQUAL("<item weight=\"43\">1048370</item>", *itr++); + EXPECT_EQUAL("<item weight=\"44\">1048382</item>", *itr++); + EXPECT_EQUAL("</awp2>", *itr++); + EXPECT_EQUAL("<sa>127</sa>", *itr++); + EXPECT_EQUAL("<sb>32767</sb>", *itr++); + EXPECT_EQUAL("<sc>2147483647</sc>", *itr++); + EXPECT_EQUAL("<sd>2147483648</sd>", *itr++); + EXPECT_EQUAL("<se>1234.56</se>", *itr++); + EXPECT_EQUAL("<sf>9876.54</sf>", *itr++); + EXPECT_EQUAL("<sg>foo bar</sg>", *itr++); + EXPECT_EQUAL(empty + "<sh binaryencoding=\"base64\">" + + vespalib::Base64::encode(raw1s) + + "</sh>", *itr++); + EXPECT_EQUAL("<si>", *itr++); + EXPECT_EQUAL(empty + "<item binaryencoding=\"base64\">" + + vespalib::Base64::encode(raw1a0) + + "</item>", *itr++); + EXPECT_EQUAL(empty + "<item binaryencoding=\"base64\">" + + vespalib::Base64::encode(raw1a1) + + "</item>", *itr++); + EXPECT_EQUAL("</si>", *itr++); + EXPECT_EQUAL("<sj>", *itr++); + EXPECT_EQUAL(empty +"<item weight=\"46\" binaryencoding=\"base64\">" + + vespalib::Base64::encode(raw1w1) + + "</item>", *itr++); + EXPECT_EQUAL(empty + "<item weight=\"45\" binaryencoding=\"base64\">" + + vespalib::Base64::encode(raw1w0) + + "</item>", *itr++); + EXPECT_EQUAL("</sj>", *itr++); + EXPECT_EQUAL("</document>", *itr++); + EXPECT_TRUE(itr == lines.end()); +#if 1 + std::cout << "onedoc xml start -----" << std::endl << + xml << std::endl << + "-------" << std::endl; + std::cout << "onedoc toString start ----" << std::endl << + doc->toString(true) << std::endl << + "-------" << std::endl; +#endif + } + { // create one more to see that everything is cleared + b.startDocument("doc::2"); + b.startIndexField("ia").addStr("yes").endField(); + b.startAttributeField("aa").addInt(20).endField(); + b.startSummaryField("sa").addInt(10).endField(); + doc = b.endDocument(); + xml = doc->toXml(""); + boost::split(lines, xml, boost::is_any_of("\n")); + itr = lines.begin(); + EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"doc::2\">", *itr++); + EXPECT_EQUAL("<ia>yes</ia>", *itr++); + EXPECT_EQUAL("<aa>20</aa>", *itr++); + EXPECT_EQUAL("<sa>10</sa>", *itr++); + EXPECT_EQUAL("</document>", *itr++); + EXPECT_TRUE(itr == lines.end()); + } + { // create field with cjk chars + b.startDocument("doc::3"); + b.startIndexField("ia"). + addStr("我就是那个"). + setAutoSpace(false). + addStr("大灰狼"). + setAutoSpace(true). + endField(); + doc = b.endDocument(); + xml = doc->toXml(""); + boost::split(lines, xml, boost::is_any_of("\n")); + itr = lines.begin(); + EXPECT_EQUAL("<document documenttype=\"searchdocument\" documentid=\"doc::3\">", *itr++); + EXPECT_EQUAL("<ia>我就是那个大灰狼</ia>", *itr++); + EXPECT_EQUAL("</document>", *itr++); + EXPECT_TRUE(itr == lines.end()); + const FieldValue::UP iaval = doc->getValue("ia"); + ASSERT_TRUE(iaval.get() != NULL); + const StringFieldValue *iasval = dynamic_cast<const StringFieldValue *> + (iaval.get()); + ASSERT_TRUE(iasval != NULL); + StringFieldValue::SpanTrees trees = iasval->getSpanTrees(); + const SpanTree *tree = StringFieldValue::findTree(trees, linguistics::SPANTREE_NAME); + ASSERT_TRUE(tree != NULL); + std::vector<Span> spans; + std::vector<Span> expSpans; + for (SpanTree::const_iterator i = tree->begin(), ie = tree->end(); + i != ie; ++i) { + Annotation &ann = const_cast<Annotation &>(*i); + const Span *span = dynamic_cast<const Span *>(ann.getSpanNode()); + if (span == NULL) + continue; + spans.push_back(*span); + } + expSpans.push_back(Span(0, 15)); + expSpans.push_back(Span(0, 15)); + expSpans.push_back(Span(15, 9)); + expSpans.push_back(Span(15, 9)); + ASSERT_TRUE(expSpans == spans); +#if 1 + std::cout << "onedoc xml start -----" << std::endl << + xml << std::endl << + "-------" << std::endl; + std::cout << "onedoc toString start ----" << std::endl << + doc->toString(true) << std::endl << + "-------" << std::endl; +#endif + } +} + +int +Test::Main() +{ + TEST_INIT("docbuilder_test"); + + testBuilder(); + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::index::Test); + diff --git a/searchlib/src/tests/index/doctypebuilder/.gitignore b/searchlib/src/tests/index/doctypebuilder/.gitignore new file mode 100644 index 00000000000..f15be1efcfe --- /dev/null +++ b/searchlib/src/tests/index/doctypebuilder/.gitignore @@ -0,0 +1,5 @@ +*_test +.depend +Makefile +doctypebuilder_test +searchlib_doctypebuilder_test_app diff --git a/searchlib/src/tests/index/doctypebuilder/CMakeLists.txt b/searchlib/src/tests/index/doctypebuilder/CMakeLists.txt new file mode 100644 index 00000000000..51fb59421f9 --- /dev/null +++ b/searchlib/src/tests/index/doctypebuilder/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_doctypebuilder_test_app + SOURCES + doctypebuilder_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_doctypebuilder_test_app COMMAND searchlib_doctypebuilder_test_app) diff --git a/searchlib/src/tests/index/doctypebuilder/DESC b/searchlib/src/tests/index/doctypebuilder/DESC new file mode 100644 index 00000000000..a199241a331 --- /dev/null +++ b/searchlib/src/tests/index/doctypebuilder/DESC @@ -0,0 +1 @@ +doctypebuilder test. Take a look at doctypebuilder.cpp for details. diff --git a/searchlib/src/tests/index/doctypebuilder/FILES b/searchlib/src/tests/index/doctypebuilder/FILES new file mode 100644 index 00000000000..9f261ca9a9a --- /dev/null +++ b/searchlib/src/tests/index/doctypebuilder/FILES @@ -0,0 +1 @@ +doctypebuilder.cpp diff --git a/searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp b/searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp new file mode 100644 index 00000000000..3980700fa6b --- /dev/null +++ b/searchlib/src/tests/index/doctypebuilder/doctypebuilder_test.cpp @@ -0,0 +1,88 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("doctypebuilder_test"); +#include <boost/algorithm/string/classification.hpp> +#include <boost/algorithm/string/split.hpp> +#include <vespa/document/repo/documenttyperepo.h> +#include <vespa/searchlib/index/doctypebuilder.h> +#include <vespa/vespalib/testkit/testapp.h> + +using namespace document; + +namespace search { +namespace index { + +TEST("testSearchDocType") { + Schema s; + s.addIndexField(Schema::IndexField("ia", Schema::STRING)); + s.addIndexField(Schema::IndexField("ib", Schema::STRING, Schema::ARRAY)); + s.addIndexField(Schema::IndexField("ic", Schema::STRING, Schema::WEIGHTEDSET)); + s.addUriIndexFields(Schema::IndexField("iu", Schema::STRING)); + s.addUriIndexFields(Schema::IndexField("iau", + Schema::STRING, + Schema::ARRAY)); + s.addUriIndexFields(Schema::IndexField("iwu", + Schema::STRING, + Schema::WEIGHTEDSET)); + s.addAttributeField(Schema::AttributeField("aa", Schema::INT32)); + s.addAttributeField(Schema::AttributeField("spos", + Schema::INT64)); + s.addAttributeField(Schema::AttributeField("apos", + Schema::INT64, + Schema::ARRAY)); + s.addAttributeField(Schema::AttributeField("wpos", + Schema::INT64, + Schema::WEIGHTEDSET)); + s.addSummaryField(Schema::SummaryField("sa", Schema::STRING)); + + DocTypeBuilder docTypeBuilder(s); + document::DocumenttypesConfig config = docTypeBuilder.makeConfig(); + DocumentTypeRepo repo(config); + const DocumentType *docType = repo.getDocumentType("searchdocument"); + ASSERT_TRUE(docType); + EXPECT_EQUAL(11u, docType->getFieldCount()); + + EXPECT_EQUAL("String", docType->getField("ia").getDataType().getName()); + EXPECT_EQUAL("Array<String>", + docType->getField("ib").getDataType().getName()); + EXPECT_EQUAL("WeightedSet<String>", + docType->getField("ic").getDataType().getName()); + EXPECT_EQUAL("url", docType->getField("iu").getDataType().getName()); + EXPECT_EQUAL("Array<url>", + docType->getField("iau").getDataType().getName()); + EXPECT_EQUAL("WeightedSet<url>", + docType->getField("iwu").getDataType().getName()); + + EXPECT_EQUAL("Int", docType->getField("aa").getDataType().getName()); + EXPECT_EQUAL("Long", docType->getField("spos").getDataType().getName()); + EXPECT_EQUAL("Array<Long>", + docType->getField("apos").getDataType().getName()); + EXPECT_EQUAL("WeightedSet<Long>", + docType->getField("wpos").getDataType().getName()); + EXPECT_EQUAL("String", docType->getField("sa").getDataType().getName()); +} + +TEST("require that multiple fields can have the same type") { + Schema s; + s.addIndexField(Schema::IndexField("array1", Schema::STRING, + Schema::ARRAY)); + s.addIndexField(Schema::IndexField("array2", Schema::STRING, + Schema::ARRAY)); + DocTypeBuilder docTypeBuilder(s); + document::DocumenttypesConfig config = docTypeBuilder.makeConfig(); + DocumentTypeRepo repo(config); + const DocumentType *docType = repo.getDocumentType("searchdocument"); + ASSERT_TRUE(docType); + EXPECT_EQUAL(2u, docType->getFieldCount()); + + EXPECT_EQUAL("Array<String>", + docType->getField("array1").getDataType().getName()); + EXPECT_EQUAL("Array<String>", + docType->getField("array2").getDataType().getName()); +} + +} // namespace index +} // namespace search + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/indexmetainfo/.gitignore b/searchlib/src/tests/indexmetainfo/.gitignore new file mode 100644 index 00000000000..ddc0b5f4582 --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/.gitignore @@ -0,0 +1,5 @@ +.depend +Makefile +indexmetainfo_test +test-save.txt +searchlib_indexmetainfo_test_app diff --git a/searchlib/src/tests/indexmetainfo/CMakeLists.txt b/searchlib/src/tests/indexmetainfo/CMakeLists.txt new file mode 100644 index 00000000000..607ab7b7e5b --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_indexmetainfo_test_app + SOURCES + indexmetainfo_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_indexmetainfo_test_app COMMAND searchlib_indexmetainfo_test_app) diff --git a/searchlib/src/tests/indexmetainfo/DESC b/searchlib/src/tests/indexmetainfo/DESC new file mode 100644 index 00000000000..ee312b5fcdc --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/DESC @@ -0,0 +1,2 @@ +Test the API class used to access the 'meta-info.txt' file used to +hold meta information for an index. diff --git a/searchlib/src/tests/indexmetainfo/FILES b/searchlib/src/tests/indexmetainfo/FILES new file mode 100644 index 00000000000..8a96f5f3311 --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/FILES @@ -0,0 +1 @@ +indexmetainfo.cpp diff --git a/searchlib/src/tests/indexmetainfo/bogus1.txt b/searchlib/src/tests/indexmetainfo/bogus1.txt new file mode 100644 index 00000000000..6d412ad302e --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/bogus1.txt @@ -0,0 +1 @@ +noAssign diff --git a/searchlib/src/tests/indexmetainfo/bogus10.txt b/searchlib/src/tests/indexmetainfo/bogus10.txt new file mode 100644 index 00000000000..e4f500cf897 --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/bogus10.txt @@ -0,0 +1,4 @@ +nextSnapshotId=128 +snapshot.0.valid=false +snapshot.0.syncToken=bogus +snapshot.0.dirName=foo diff --git a/searchlib/src/tests/indexmetainfo/bogus2.txt b/searchlib/src/tests/indexmetainfo/bogus2.txt new file mode 100644 index 00000000000..9895913aece --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/bogus2.txt @@ -0,0 +1 @@ +=noKey diff --git a/searchlib/src/tests/indexmetainfo/bogus3.txt b/searchlib/src/tests/indexmetainfo/bogus3.txt new file mode 100644 index 00000000000..73c7da9da74 --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/bogus3.txt @@ -0,0 +1 @@ +unknownKey=magicValue diff --git a/searchlib/src/tests/indexmetainfo/bogus4.txt b/searchlib/src/tests/indexmetainfo/bogus4.txt new file mode 100644 index 00000000000..d841e7509ca --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/bogus4.txt @@ -0,0 +1 @@ +nextSnapshotId=illegalNumber diff --git a/searchlib/src/tests/indexmetainfo/bogus5.txt b/searchlib/src/tests/indexmetainfo/bogus5.txt new file mode 100644 index 00000000000..08c64d393ba --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/bogus5.txt @@ -0,0 +1,7 @@ +nextSnapshotId=128 +snapshot.1.valid=true +snapshot.1.syncToken=50 +snapshot.1.dirName=foo +snapshot.0.valid=false +snapshot.0.syncToken=100 +snapshot.0.dirName=bar diff --git a/searchlib/src/tests/indexmetainfo/bogus6.txt b/searchlib/src/tests/indexmetainfo/bogus6.txt new file mode 100644 index 00000000000..5506704db80 --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/bogus6.txt @@ -0,0 +1,7 @@ +nextSnapshotId=128 +snapshot.0.valid=true +snapshot.0.syncToken=50 +snapshot.0.dirName=foo +snapshot.2.valid=false +snapshot.2.syncToken=100 +snapshot.2.dirName=bar diff --git a/searchlib/src/tests/indexmetainfo/bogus7.txt b/searchlib/src/tests/indexmetainfo/bogus7.txt new file mode 100644 index 00000000000..efbc17b40b6 --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/bogus7.txt @@ -0,0 +1,4 @@ +nextSnapshotId=128 +snapshot..valid=true +snapshot..syncToken=50 +snapshot..dirName=foo diff --git a/searchlib/src/tests/indexmetainfo/bogus8.txt b/searchlib/src/tests/indexmetainfo/bogus8.txt new file mode 100644 index 00000000000..e359ca68f12 --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/bogus8.txt @@ -0,0 +1,4 @@ +nextSnapshotId=128 +snapshot.x.valid=true +snapshot.x.syncToken=50 +snapshot.x.dirName=foo diff --git a/searchlib/src/tests/indexmetainfo/bogus9.txt b/searchlib/src/tests/indexmetainfo/bogus9.txt new file mode 100644 index 00000000000..5dd606d8942 --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/bogus9.txt @@ -0,0 +1,4 @@ +nextSnapshotId=128 +snapshot.0.valid=xyz +snapshot.0.syncToken=50 +snapshot.0.dirName=foo diff --git a/searchlib/src/tests/indexmetainfo/indexmetainfo_test.cpp b/searchlib/src/tests/indexmetainfo/indexmetainfo_test.cpp new file mode 100644 index 00000000000..e7dc828c9e5 --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/indexmetainfo_test.cpp @@ -0,0 +1,127 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("indexmetainfo_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/common/indexmetainfo.h> + +using search::IndexMetaInfo; + +typedef IndexMetaInfo::Snapshot Snap; + +TEST_SETUP(Test) + +int +Test::Main() +{ + TEST_INIT("indexmetainfo_test"); + { // load pregenerated file + IndexMetaInfo info(""); + EXPECT_TRUE(info.load()); + ASSERT_TRUE(info.snapshots().size() == 4); + EXPECT_TRUE(info.snapshots()[0].valid); + EXPECT_TRUE(info.snapshots()[0].syncToken == 50); + EXPECT_TRUE(info.snapshots()[0].dirName == "foo"); + EXPECT_TRUE(!info.snapshots()[1].valid); + EXPECT_TRUE(info.snapshots()[1].syncToken == 100); + EXPECT_TRUE(info.snapshots()[1].dirName == "bar"); + EXPECT_TRUE(info.snapshots()[2].valid); + EXPECT_TRUE(info.snapshots()[2].syncToken == 200); + EXPECT_TRUE(info.snapshots()[2].dirName == "baz"); + EXPECT_TRUE(!info.snapshots()[3].valid); + EXPECT_TRUE(info.snapshots()[3].syncToken == 500); + EXPECT_TRUE(info.snapshots()[3].dirName == "last"); + { + Snap s = info.getBestSnapshot(); + EXPECT_TRUE(s.valid); + EXPECT_TRUE(s.syncToken == 200); + EXPECT_TRUE(s.dirName == "baz"); + } + { + Snap s = info.getSnapshot(100); + EXPECT_TRUE(!s.valid); + EXPECT_TRUE(s.syncToken == 100); + EXPECT_TRUE(s.dirName == "bar"); + } + { + Snap s = info.getSnapshot(666); + EXPECT_TRUE(!s.valid); + EXPECT_TRUE(s.syncToken == 0); + EXPECT_TRUE(s.dirName == ""); + } + { + EXPECT_TRUE(info.invalidateSnapshot(200)); + Snap s = info.getBestSnapshot(); + EXPECT_TRUE(s.valid); + EXPECT_TRUE(s.syncToken == 50); + EXPECT_TRUE(s.dirName == "foo"); + } + { + EXPECT_TRUE(info.invalidateSnapshot(50)); + Snap s = info.getBestSnapshot(); + EXPECT_TRUE(!s.valid); + EXPECT_TRUE(s.syncToken == 0); + EXPECT_TRUE(s.dirName == ""); + } + { + EXPECT_TRUE(info.validateSnapshot(500)); + Snap s = info.getBestSnapshot(); + EXPECT_TRUE(s.valid); + EXPECT_TRUE(s.syncToken == 500); + EXPECT_TRUE(s.dirName == "last"); + } + { + EXPECT_TRUE(!info.invalidateSnapshot(666)); + EXPECT_TRUE(!info.validateSnapshot(666)); + } + { + info.clear(); + EXPECT_TRUE(info.snapshots().size() == 0); + Snap s = info.getBestSnapshot(); + EXPECT_TRUE(!s.valid); + EXPECT_TRUE(s.syncToken == 0); + EXPECT_TRUE(s.dirName == ""); + } + } + { // load file that does not exist + IndexMetaInfo info("."); + EXPECT_TRUE(!info.load("file-not-present.txt")); + } + { // load files with errors should fail + IndexMetaInfo info("."); + EXPECT_TRUE(!info.load("bogus1.txt")); + EXPECT_TRUE(!info.load("bogus2.txt")); + EXPECT_TRUE(!info.load("bogus3.txt")); + EXPECT_TRUE(!info.load("bogus4.txt")); + EXPECT_TRUE(!info.load("bogus5.txt")); + EXPECT_TRUE(!info.load("bogus6.txt")); + EXPECT_TRUE(!info.load("bogus7.txt")); + EXPECT_TRUE(!info.load("bogus8.txt")); + EXPECT_TRUE(!info.load("bogus9.txt")); + EXPECT_TRUE(!info.load("bogus10.txt")); + } + { // save/load/save/load/save/load test + std::string file("test-save.txt"); + IndexMetaInfo a("."); + IndexMetaInfo b("."); + EXPECT_TRUE(a.addSnapshot(Snap(true, 50, "foo"))); + EXPECT_TRUE(a.addSnapshot(Snap(false, 100, "bar"))); + EXPECT_TRUE(!a.addSnapshot(Snap(false, 100, "bar"))); + EXPECT_TRUE(a.save(file)); + EXPECT_TRUE(b.load(file)); + ASSERT_TRUE(b.snapshots().size() == 2); + EXPECT_TRUE(b.snapshots()[0] == Snap(true, 50, "foo")); + EXPECT_TRUE(b.snapshots()[1] == Snap(false, 100, "bar")); + EXPECT_TRUE(a.save(file)); + EXPECT_TRUE(b.load(file)); + ASSERT_TRUE(b.snapshots().size() == 2); + EXPECT_TRUE(b.snapshots()[0] == Snap(true, 50, "foo")); + EXPECT_TRUE(b.snapshots()[1] == Snap(false, 100, "bar")); + a.removeSnapshot(100); + EXPECT_TRUE(a.save(file)); + EXPECT_TRUE(b.load(file)); + ASSERT_TRUE(b.snapshots().size() == 1); + EXPECT_TRUE(b.snapshots()[0] == Snap(true, 50, "foo")); + } + TEST_DONE(); +} diff --git a/searchlib/src/tests/indexmetainfo/meta-info.txt b/searchlib/src/tests/indexmetainfo/meta-info.txt new file mode 100644 index 00000000000..20182f5786c --- /dev/null +++ b/searchlib/src/tests/indexmetainfo/meta-info.txt @@ -0,0 +1,12 @@ +snapshot.0.valid=true +snapshot.0.syncToken=50 +snapshot.0.dirName=foo +snapshot.1.valid=true +snapshot.1.syncToken=200 +snapshot.1.dirName=baz +snapshot.2.valid=false +snapshot.2.syncToken=100 +snapshot.2.dirName=bar +snapshot.3.valid=false +snapshot.3.syncToken=500 +snapshot.3.dirName=last diff --git a/searchlib/src/tests/ld-library-path/.gitignore b/searchlib/src/tests/ld-library-path/.gitignore new file mode 100644 index 00000000000..5f02ecfc8f8 --- /dev/null +++ b/searchlib/src/tests/ld-library-path/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +ld-library-path_test +searchlib_ld-library-path_test_app diff --git a/searchlib/src/tests/ld-library-path/CMakeLists.txt b/searchlib/src/tests/ld-library-path/CMakeLists.txt new file mode 100644 index 00000000000..47e1372ffc6 --- /dev/null +++ b/searchlib/src/tests/ld-library-path/CMakeLists.txt @@ -0,0 +1,7 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_ld-library-path_test_app + SOURCES + ld-library-path.cpp + DEPENDS +) +vespa_add_test(NAME searchlib_ld-library-path_test_app COMMAND searchlib_ld-library-path_test_app) diff --git a/searchlib/src/tests/ld-library-path/ld-library-path.cpp b/searchlib/src/tests/ld-library-path/ld-library-path.cpp new file mode 100644 index 00000000000..c9a429b3b35 --- /dev/null +++ b/searchlib/src/tests/ld-library-path/ld-library-path.cpp @@ -0,0 +1,12 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP(""); + +int +main(int, char **) +{ + LOG(info, "LD_LIBRARY_PATH='%s'", getenv("LD_LIBRARY_PATH")); + return 0; +} diff --git a/searchlib/src/tests/memoryindex/btree/.gitignore b/searchlib/src/tests/memoryindex/btree/.gitignore new file mode 100644 index 00000000000..94440affa90 --- /dev/null +++ b/searchlib/src/tests/memoryindex/btree/.gitignore @@ -0,0 +1,6 @@ +.depend +Makefile +btree_test +frozenbtree_test +searchlib_btree_test_app +searchlib_frozenbtree_test_app diff --git a/searchlib/src/tests/memoryindex/btree/CMakeLists.txt b/searchlib/src/tests/memoryindex/btree/CMakeLists.txt new file mode 100644 index 00000000000..8b523030cab --- /dev/null +++ b/searchlib/src/tests/memoryindex/btree/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_btree_test_app + SOURCES + btree_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_btree_test_app COMMAND searchlib_btree_test_app) +vespa_add_executable(searchlib_frozenbtree_test_app + SOURCES + frozenbtree_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_frozenbtree_test_app COMMAND searchlib_frozenbtree_test_app) diff --git a/searchlib/src/tests/memoryindex/btree/DESC b/searchlib/src/tests/memoryindex/btree/DESC new file mode 100644 index 00000000000..02739da7527 --- /dev/null +++ b/searchlib/src/tests/memoryindex/btree/DESC @@ -0,0 +1 @@ +btree test. Take a look at btree_test.cpp for details. diff --git a/searchlib/src/tests/memoryindex/btree/FILES b/searchlib/src/tests/memoryindex/btree/FILES new file mode 100644 index 00000000000..e63a2f68eb4 --- /dev/null +++ b/searchlib/src/tests/memoryindex/btree/FILES @@ -0,0 +1 @@ +btree_test.cpp diff --git a/searchlib/src/tests/memoryindex/btree/btree_test.cpp b/searchlib/src/tests/memoryindex/btree/btree_test.cpp new file mode 100644 index 00000000000..5fb6761ba57 --- /dev/null +++ b/searchlib/src/tests/memoryindex/btree/btree_test.cpp @@ -0,0 +1,1282 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("btree_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <string> +#include <vespa/searchlib/btree/btreeroot.h> +#include <vespa/searchlib/btree/btreebuilder.h> +#include <vespa/searchlib/btree/btreenodeallocator.h> +#include <vespa/searchlib/btree/btree.h> +#include <vespa/searchlib/btree/btreestore.h> +#include <vespa/searchlib/util/rand48.h> + +#include <vespa/searchlib/btree/btreenodeallocator.hpp> +#include <vespa/searchlib/btree/btreenode.hpp> +#include <vespa/searchlib/btree/btreenodestore.hpp> +#include <vespa/searchlib/btree/btreeiterator.hpp> +#include <vespa/searchlib/btree/btreeroot.hpp> +#include <vespa/searchlib/btree/btreebuilder.hpp> +#include <vespa/searchlib/btree/btree.hpp> +#include <vespa/searchlib/btree/btreestore.hpp> + +using vespalib::GenerationHandler; + +namespace search { +namespace btree { + +namespace { + +template <typename T> +std::string +toStr(const T & v) +{ + std::stringstream ss; + ss << v; + return ss.str(); +} + +} + +typedef BTreeTraits<4, 4, 31, false> MyTraits; + +#define KEYWRAP + +#ifdef KEYWRAP + +// Force use of functor to compare keys. +class WrapInt +{ +public: + int _val; + WrapInt(int val) : _val(val) {} + WrapInt(void) : _val(0) {} + bool operator==(const WrapInt & rhs) const { return _val == rhs._val; } +}; + +std::ostream & +operator<<(std::ostream &s, const WrapInt &i) +{ + s << i._val; + return s; +} + +typedef WrapInt MyKey; +class MyComp +{ +public: + bool + operator()(const WrapInt &a, const WrapInt &b) const + { + return a._val < b._val; + } +}; + +#define UNWRAP(key) (key._val) +#else +typedef int MyKey; +typedef std::less<int> MyComp; +#define UNWRAP(key) (key) +#endif + +typedef BTree<MyKey, std::string, + btree::NoAggregated, + MyComp, MyTraits> MyTree; +typedef BTreeStore<MyKey, std::string, + btree::NoAggregated, + MyComp, MyTraits> MyTreeStore; +typedef MyTree::Builder MyTreeBuilder; +typedef MyTree::LeafNodeType MyLeafNode; +typedef MyTree::InternalNodeType MyInternalNode; +typedef MyTree::NodeAllocatorType MyNodeAllocator; +typedef std::pair<MyKey, std::string> LeafPair; +typedef MyTreeStore::KeyDataType MyKeyData; +typedef MyTreeStore::KeyDataTypeRefPair MyKeyDataRefPair; + +typedef BTree<int, BTreeNoLeafData, btree::NoAggregated> SetTreeB; + +typedef BTreeTraits<16, 16, 10, false> LSeekTraits; +typedef BTree<int, BTreeNoLeafData, btree::NoAggregated, + std::less<int>, LSeekTraits> SetTreeL; + +struct LeafPairLess { + bool operator()(const LeafPair & lhs, const LeafPair & rhs) const { + return UNWRAP(lhs.first) < UNWRAP(rhs.first); + } +}; + +template <typename ManagerType> +void +cleanup(GenerationHandler & g, ManagerType & m) +{ + m.freeze(); + m.transferHoldLists(g.getCurrentGeneration()); + g.incGeneration(); + m.trimHoldLists(g.getFirstUsedGeneration()); +} + +template <typename ManagerType, typename NodeType> +void +cleanup(GenerationHandler & g, + ManagerType & m, + BTreeNode::Ref n1Ref, NodeType * n1, + BTreeNode::Ref n2Ref = BTreeNode::Ref(), NodeType * n2 = NULL) +{ + assert(ManagerType::isValidRef(n1Ref)); + m.holdNode(n1Ref, n1); + if (n2 != NULL) { + assert(ManagerType::isValidRef(n2Ref)); + m.holdNode(n2Ref, n2); + } else { + assert(!ManagerType::isValidRef(n2Ref)); + } + cleanup(g, m); +} + +class Test : public vespalib::TestApp { +private: + template <typename LeafNodeType> + bool assertLeafNode(const std::string & exp, const LeafNodeType & n); + bool assertSeek(int skey, int ekey, const MyTree & tree); + bool assertSeek(int skey, int ekey, MyTree::Iterator & itr); + bool assertMemoryUsage(const MemoryUsage & exp, const MemoryUsage & act); + + void + buildSubTree(const std::vector<LeafPair> &sub, + size_t numEntries); + + void requireThatNodeInsertWorks(); + void requireThatNodeSplitInsertWorks(); + void requireThatNodeStealWorks(); + void requireThatNodeRemoveWorks(); + void requireThatNodeLowerBoundWorks(); + void requireThatWeCanInsertAndRemoveFromTree(); + void requireThatSortedTreeInsertWorks(); + void requireThatCornerCaseTreeFindWorks(); + void requireThatBasicTreeIteratorWorks(); + void requireThatTreeIteratorSeekWorks(); + void requireThatTreeIteratorAssignWorks(); + void requireThatMemoryUsageIsCalculated(); + template <typename TreeType> + void requireThatLowerBoundWorksT(); + void requireThatLowerBoundWorks(); + template <typename TreeType> + void requireThatUpperBoundWorksT(); + void requireThatUpperBoundWorks(); + void requireThatUpdateOfKeyWorks(); + + void + requireThatSmallNodesWorks(); + + void + requireThatApplyWorks(); + + void + requireThatIteratorDistanceWorks(int numEntries); + + void + requireThatIteratorDistanceWorks(); +public: + int Main(); +}; + +template <typename LeafNodeType> +bool +Test::assertLeafNode(const std::string & exp, const LeafNodeType & n) +{ + std::stringstream ss; + ss << "["; + for (uint32_t i = 0; i < n.validSlots(); ++i) { + if (i > 0) ss << ","; + ss << n.getKey(i) << ":" << n.getData(i); + } + ss << "]"; + if (!EXPECT_EQUAL(exp, ss.str())) return false; + return true; +} + +bool +Test::assertSeek(int skey, int ekey, const MyTree & tree) +{ + MyTree::Iterator itr = tree.begin(); + return assertSeek(skey, ekey, itr); +} + +bool +Test::assertSeek(int skey, int ekey, MyTree::Iterator & itr) +{ + MyTree::Iterator bseekItr = itr; + MyTree::Iterator lseekItr = itr; + bseekItr.binarySeek(skey); + lseekItr.linearSeek(skey); + if (!EXPECT_EQUAL(ekey, UNWRAP(bseekItr.getKey()))) return false; + if (!EXPECT_EQUAL(ekey, UNWRAP(lseekItr.getKey()))) return false; + itr = bseekItr; + return true; +} + +bool +Test::assertMemoryUsage(const MemoryUsage & exp, const MemoryUsage & act) +{ + if (!EXPECT_EQUAL(exp.allocatedBytes(), act.allocatedBytes())) return false; + if (!EXPECT_EQUAL(exp.usedBytes(), act.usedBytes())) return false; + if (!EXPECT_EQUAL(exp.deadBytes(), act.deadBytes())) return false; + if (!EXPECT_EQUAL(exp.allocatedBytesOnHold(), act.allocatedBytesOnHold())) return false; + return true; +} + +void +Test::requireThatNodeInsertWorks() +{ + GenerationHandler g; + MyNodeAllocator m; + MyLeafNode::RefPair nPair = m.allocLeafNode(); + MyLeafNode *n = nPair.second; + EXPECT_TRUE(n->isLeaf()); + EXPECT_EQUAL(0u, n->validSlots()); + n->insert(0, 20, "b"); + EXPECT_TRUE(!n->isFull()); + EXPECT_TRUE(!n->isAtLeastHalfFull()); + EXPECT_TRUE(assertLeafNode("[20:b]", *n)); + n->insert(0, 10, "a"); + EXPECT_TRUE(!n->isFull()); + EXPECT_TRUE(n->isAtLeastHalfFull()); + EXPECT_TRUE(assertLeafNode("[10:a,20:b]", *n)); + EXPECT_EQUAL(20, UNWRAP(n->getLastKey())); + EXPECT_EQUAL("b", n->getLastData()); + n->insert(2, 30, "c"); + EXPECT_TRUE(!n->isFull()); + n->insert(3, 40, "d"); + EXPECT_TRUE(n->isFull()); + EXPECT_TRUE(n->isAtLeastHalfFull()); + EXPECT_TRUE(assertLeafNode("[10:a,20:b,30:c,40:d]", *n)); + cleanup(g, m, nPair.first, n); +} + +MyLeafNode::RefPair +getLeafNode(MyNodeAllocator &allocator) +{ + MyLeafNode::RefPair nPair = allocator.allocLeafNode(); + MyLeafNode *n = nPair.second; + n->insert(0, 1, "a"); + n->insert(1, 3, "c"); + n->insert(2, 5, "e"); + n->insert(3, 7, "g"); + return nPair; +} + +void +Test::requireThatNodeSplitInsertWorks() +{ + { // new entry in current node + GenerationHandler g; + MyNodeAllocator m; + MyLeafNode::RefPair nPair = getLeafNode(m); + MyLeafNode *n = nPair.second; + MyLeafNode::RefPair sPair = m.allocLeafNode(); + MyLeafNode *s = sPair.second; + n->splitInsert(s, 2, 4, "d"); + EXPECT_TRUE(assertLeafNode("[1:a,3:c,4:d]", *n)); + EXPECT_TRUE(assertLeafNode("[5:e,7:g]", *s)); + cleanup(g, m, nPair.first, n, sPair.first, s); + } + { // new entry in split node + GenerationHandler g; + MyNodeAllocator m; + MyLeafNode::RefPair nPair = getLeafNode(m); + MyLeafNode *n = nPair.second; + MyLeafNode::RefPair sPair = m.allocLeafNode(); + MyLeafNode *s = sPair.second; + n->splitInsert(s, 3, 6, "f"); + EXPECT_TRUE(assertLeafNode("[1:a,3:c,5:e]", *n)); + EXPECT_TRUE(assertLeafNode("[6:f,7:g]", *s)); + cleanup(g, m, nPair.first, n, sPair.first, s); + } + { // new entry at end + GenerationHandler g; + MyNodeAllocator m; + MyLeafNode::RefPair nPair = getLeafNode(m); + MyLeafNode *n = nPair.second; + MyLeafNode::RefPair sPair = m.allocLeafNode(); + MyLeafNode *s = sPair.second; + n->splitInsert(s, 4, 8, "h"); + EXPECT_TRUE(assertLeafNode("[1:a,3:c,5:e]", *n)); + EXPECT_TRUE(assertLeafNode("[7:g,8:h]", *s)); + cleanup(g, m, nPair.first, n, sPair.first, s); + } +} + +struct BTreeStealTraits +{ + static const size_t LEAF_SLOTS = 6; + static const size_t INTERNAL_SLOTS = 6; +}; + +void +Test::requireThatNodeStealWorks() +{ + typedef BTreeLeafNode<int, std::string, + btree::NoAggregated, 6> MyStealNode; + typedef BTreeNodeAllocator<int, std::string, + btree::NoAggregated, + BTreeStealTraits::INTERNAL_SLOTS, BTreeStealTraits::LEAF_SLOTS> + MyStealManager; + { // steal all from left + GenerationHandler g; + MyStealManager m; + MyStealNode::RefPair nPair = m.allocLeafNode(); + MyStealNode *n = nPair.second; + n->insert(0, 4, "d"); + n->insert(1, 5, "e"); + EXPECT_TRUE(!n->isAtLeastHalfFull()); + MyStealNode::RefPair vPair = m.allocLeafNode(); + MyStealNode *v = vPair.second; + v->insert(0, 1, "a"); + v->insert(1, 2, "b"); + v->insert(2, 3, "c"); + n->stealAllFromLeftNode(v); + EXPECT_TRUE(n->isAtLeastHalfFull()); + EXPECT_TRUE(assertLeafNode("[1:a,2:b,3:c,4:d,5:e]", *n)); + cleanup(g, m, nPair.first, n, vPair.first, v); + } + { // steal all from right + GenerationHandler g; + MyStealManager m; + MyStealNode::RefPair nPair = m.allocLeafNode(); + MyStealNode *n = nPair.second; + n->insert(0, 1, "a"); + n->insert(1, 2, "b"); + EXPECT_TRUE(!n->isAtLeastHalfFull()); + MyStealNode::RefPair vPair = m.allocLeafNode(); + MyStealNode *v = vPair.second; + v->insert(0, 3, "c"); + v->insert(1, 4, "d"); + v->insert(2, 5, "e"); + n->stealAllFromRightNode(v); + EXPECT_TRUE(n->isAtLeastHalfFull()); + EXPECT_TRUE(assertLeafNode("[1:a,2:b,3:c,4:d,5:e]", *n)); + cleanup(g, m, nPair.first, n, vPair.first, v); + } + { // steal some from left + GenerationHandler g; + MyStealManager m; + MyStealNode::RefPair nPair = m.allocLeafNode(); + MyStealNode *n = nPair.second; + n->insert(0, 5, "e"); + n->insert(1, 6, "f"); + EXPECT_TRUE(!n->isAtLeastHalfFull()); + MyStealNode::RefPair vPair = m.allocLeafNode(); + MyStealNode *v = vPair.second; + v->insert(0, 1, "a"); + v->insert(1, 2, "b"); + v->insert(2, 3, "c"); + v->insert(3, 4, "d"); + n->stealSomeFromLeftNode(v); + EXPECT_TRUE(n->isAtLeastHalfFull()); + EXPECT_TRUE(v->isAtLeastHalfFull()); + EXPECT_TRUE(assertLeafNode("[4:d,5:e,6:f]", *n)); + EXPECT_TRUE(assertLeafNode("[1:a,2:b,3:c]", *v)); + cleanup(g, m, nPair.first, n, vPair.first, v); + } + { // steal some from right + GenerationHandler g; + MyStealManager m; + MyStealNode::RefPair nPair = m.allocLeafNode(); + MyStealNode *n = nPair.second; + n->insert(0, 1, "a"); + n->insert(1, 2, "b"); + EXPECT_TRUE(!n->isAtLeastHalfFull()); + MyStealNode::RefPair vPair = m.allocLeafNode(); + MyStealNode *v = vPair.second; + v->insert(0, 3, "c"); + v->insert(1, 4, "d"); + v->insert(2, 5, "e"); + v->insert(3, 6, "f"); + n->stealSomeFromRightNode(v); + EXPECT_TRUE(n->isAtLeastHalfFull()); + EXPECT_TRUE(v->isAtLeastHalfFull()); + EXPECT_TRUE(assertLeafNode("[1:a,2:b,3:c]", *n)); + EXPECT_TRUE(assertLeafNode("[4:d,5:e,6:f]", *v)); + cleanup(g, m, nPair.first, n, vPair.first, v); + } +} + +void +Test::requireThatNodeRemoveWorks() +{ + GenerationHandler g; + MyNodeAllocator m; + MyLeafNode::RefPair nPair = getLeafNode(m); + MyLeafNode *n = nPair.second; + n->remove(1); + EXPECT_TRUE(assertLeafNode("[1:a,5:e,7:g]", *n)); + cleanup(g, m, nPair.first, n); +} + +void +Test::requireThatNodeLowerBoundWorks() +{ + GenerationHandler g; + MyNodeAllocator m; + MyLeafNode::RefPair nPair = getLeafNode(m); + MyLeafNode *n = nPair.second; + EXPECT_EQUAL(1u, n->lower_bound(3, MyComp())); + EXPECT_FALSE(MyComp()(3, n->getKey(1u))); + EXPECT_EQUAL(0u, n->lower_bound(0, MyComp())); + EXPECT_TRUE(MyComp()(0, n->getKey(0u))); + EXPECT_EQUAL(1u, n->lower_bound(2, MyComp())); + EXPECT_TRUE(MyComp()(2, n->getKey(1u))); + EXPECT_EQUAL(3u, n->lower_bound(6, MyComp())); + EXPECT_TRUE(MyComp()(6, n->getKey(3u))); + EXPECT_EQUAL(4u, n->lower_bound(8, MyComp())); + cleanup(g, m, nPair.first, n); +} + +void +generateData(std::vector<LeafPair> & data, size_t numEntries) +{ + data.reserve(numEntries); + Rand48 rnd; + rnd.srand48(10); + for (size_t i = 0; i < numEntries; ++i) { + int num = rnd.lrand48() % 10000000; + std::string str = toStr(num); + data.push_back(std::make_pair(num, str)); + } +} + + +void +Test::buildSubTree(const std::vector<LeafPair> &sub, + size_t numEntries) +{ + GenerationHandler g; + MyTree tree; + MyTreeBuilder builder(tree.getAllocator()); + + std::vector<LeafPair> sorted(sub.begin(), sub.begin() + numEntries); + std::sort(sorted.begin(), sorted.end(), LeafPairLess()); + for (size_t i = 0; i < numEntries; ++i) { + int num = UNWRAP(sorted[i].first); + const std::string & str = sorted[i].second; + builder.insert(num, str); + } + tree.assign(builder); + assert(numEntries == tree.size()); + assert(tree.isValid()); + EXPECT_EQUAL(numEntries, tree.size()); + EXPECT_TRUE(tree.isValid()); + MyTree::Iterator itr = tree.begin(); + MyTree::Iterator ritr = itr; + if (numEntries > 0) { + EXPECT_TRUE(ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + --ritr; + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(numEntries, ritr.position()); + --ritr; + EXPECT_TRUE(ritr.valid()); + EXPECT_EQUAL(numEntries - 1, ritr.position()); + } else { + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + --ritr; + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + } + for (size_t i = 0; i < numEntries; ++i) { + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(sorted[i].first, itr.getKey()); + EXPECT_EQUAL(sorted[i].second, itr.getData()); + ++itr; + } + EXPECT_TRUE(!itr.valid()); + ritr = itr; + EXPECT_TRUE(!ritr.valid()); + --ritr; + for (size_t i = 0; i < numEntries; ++i) { + EXPECT_TRUE(ritr.valid()); + EXPECT_EQUAL(sorted[numEntries - 1 - i].first, ritr.getKey()); + EXPECT_EQUAL(sorted[numEntries - 1 - i].second, ritr.getData()); + --ritr; + } + EXPECT_TRUE(!ritr.valid()); +} + +void +Test::requireThatWeCanInsertAndRemoveFromTree() +{ + GenerationHandler g; + MyTree tree; + std::vector<LeafPair> exp; + std::vector<LeafPair> sorted; + size_t numEntries = 1000; + generateData(exp, numEntries); + sorted = exp; + std::sort(sorted.begin(), sorted.end(), LeafPairLess()); + // insert entries + for (size_t i = 0; i < numEntries; ++i) { + int num = UNWRAP(exp[i].first); + const std::string & str = exp[i].second; + EXPECT_TRUE(!tree.find(num).valid()); + //LOG(info, "insert[%zu](%d, %s)", i, num, str.c_str()); + EXPECT_TRUE(tree.insert(num, str)); + EXPECT_TRUE(!tree.insert(num, str)); + for (size_t j = 0; j <= i; ++j) { + //LOG(info, "find[%zu](%d)", j, exp[j].first._val); + MyTree::Iterator itr = tree.find(exp[j].first); + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(exp[j].first, itr.getKey()); + EXPECT_EQUAL(exp[j].second, itr.getData()); + } + EXPECT_EQUAL(i + 1u, tree.size()); + EXPECT_TRUE(tree.isValid()); + buildSubTree(exp, i + 1); + } + //std::cout << "tree: " << tree.toString() << std::endl; + + { + MyTree::Iterator itr = tree.begin(); + MyTree::Iterator itre = itr; + MyTree::Iterator itre2; + MyTree::Iterator ritr = itr; + while (itre.valid()) + ++itre; + if (numEntries > 0) { + EXPECT_TRUE(ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + --ritr; + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(numEntries, ritr.position()); + --ritr; + EXPECT_TRUE(ritr.valid()); + EXPECT_EQUAL(numEntries - 1, ritr.position()); + } else { + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + --ritr; + EXPECT_TRUE(!ritr.valid()); + EXPECT_EQUAL(0u, ritr.position()); + } + MyTree::Iterator pitr = itr; + for (size_t i = 0; i < numEntries; ++i) { + ssize_t si = i; + ssize_t sileft = numEntries - i; + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(i, itr.position()); + EXPECT_EQUAL(sileft, itre - itr); + EXPECT_EQUAL(-sileft, itr - itre); + EXPECT_EQUAL(sileft, itre2 - itr); + EXPECT_EQUAL(-sileft, itr - itre2); + EXPECT_EQUAL(si, itr - tree.begin()); + EXPECT_EQUAL(-si, tree.begin() - itr); + EXPECT_EQUAL(i != 0, itr - pitr); + EXPECT_EQUAL(-(i != 0), pitr - itr); + EXPECT_EQUAL(sorted[i].first, itr.getKey()); + EXPECT_EQUAL(sorted[i].second, itr.getData()); + pitr = itr; + ++itr; + ritr = itr; + --ritr; + EXPECT_TRUE(ritr.valid()); + EXPECT_TRUE(ritr == pitr); + } + EXPECT_TRUE(!itr.valid()); + EXPECT_EQUAL(numEntries, itr.position()); + ssize_t sNumEntries = numEntries; + EXPECT_EQUAL(sNumEntries, itr - tree.begin()); + EXPECT_EQUAL(-sNumEntries, tree.begin() - itr); + EXPECT_EQUAL(1, itr - pitr); + EXPECT_EQUAL(-1, pitr - itr); + } + // compact full tree by calling incremental compaction methods in a loop + { + MyTree::NodeAllocatorType &manager = tree.getAllocator(); + std::vector<uint32_t> toHold = manager.startCompact(); + MyTree::Iterator itr = tree.begin(); + tree.setRoot(itr.moveFirstLeafNode(tree.getRoot())); + while (itr.valid()) { + // LOG(info, "Leaf moved to %d", UNWRAP(itr.getKey())); + itr.moveNextLeafNode(); + } + manager.finishCompact(toHold); + manager.freeze(); + manager.transferHoldLists(g.getCurrentGeneration()); + g.incGeneration(); + manager.trimHoldLists(g.getFirstUsedGeneration()); + } + // remove entries + for (size_t i = 0; i < numEntries; ++i) { + int num = UNWRAP(exp[i].first); + //LOG(info, "remove[%zu](%d)", i, num); + //std::cout << "tree: " << tree.toString() << std::endl; + EXPECT_TRUE(tree.remove(num)); + EXPECT_TRUE(!tree.find(num).valid()); + EXPECT_TRUE(!tree.remove(num)); + EXPECT_TRUE(tree.isValid()); + for (size_t j = i + 1; j < numEntries; ++j) { + MyTree::Iterator itr = tree.find(exp[j].first); + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(exp[j].first, itr.getKey()); + EXPECT_EQUAL(exp[j].second, itr.getData()); + } + EXPECT_EQUAL(numEntries - 1 - i, tree.size()); + } +} + +void +Test::requireThatSortedTreeInsertWorks() +{ + { + GenerationHandler g; + MyTree tree; + for (int i = 0; i < 1000; ++i) { + EXPECT_TRUE(tree.insert(i, toStr(i))); + MyTree::Iterator itr = tree.find(i); + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(toStr(i), itr.getData()); + EXPECT_TRUE(tree.isValid()); + } + } + { + GenerationHandler g; + MyTree tree; + for (int i = 1000; i > 0; --i) { + EXPECT_TRUE(tree.insert(i, toStr(i))); + MyTree::Iterator itr = tree.find(i); + EXPECT_TRUE(itr.valid()); + EXPECT_EQUAL(toStr(i), itr.getData()); + EXPECT_TRUE(tree.isValid()); + } + } +} + +void +Test::requireThatCornerCaseTreeFindWorks() +{ + GenerationHandler g; + MyTree tree; + for (int i = 1; i < 100; ++i) { + tree.insert(i, toStr(i)); + } + EXPECT_TRUE(!tree.find(0).valid()); // lower than lowest + EXPECT_TRUE(!tree.find(1000).valid()); // higher than highest +} + +void +Test::requireThatBasicTreeIteratorWorks() +{ + GenerationHandler g; + MyTree tree; + EXPECT_TRUE(!tree.begin().valid()); + std::vector<LeafPair> exp; + size_t numEntries = 1000; + generateData(exp, numEntries); + for (size_t i = 0; i < numEntries; ++i) { + tree.insert(exp[i].first, exp[i].second); + } + std::sort(exp.begin(), exp.end(), LeafPairLess()); + size_t ei = 0; + MyTree::Iterator itr = tree.begin(); + MyTree::Iterator ritr; + EXPECT_EQUAL(1000u, itr.size()); + for (; itr.valid(); ++itr) { + //LOG(info, "itr(%d, %s)", itr.getKey(), itr.getData().c_str()); + EXPECT_EQUAL(UNWRAP(exp[ei].first), UNWRAP(itr.getKey())); + EXPECT_EQUAL(exp[ei].second, itr.getData()); + ei++; + ritr = itr; + } + EXPECT_EQUAL(numEntries, ei); + for (; ritr.valid(); --ritr) { + --ei; + //LOG(info, "itr(%d, %s)", itr.getKey(), itr.getData().c_str()); + EXPECT_EQUAL(UNWRAP(exp[ei].first), UNWRAP(ritr.getKey())); + EXPECT_EQUAL(exp[ei].second, ritr.getData()); + } +} + +void +Test::requireThatTreeIteratorSeekWorks() +{ + GenerationHandler g; + MyTree tree; + for (int i = 0; i < 40; i += 2) { + tree.insert(i, toStr(i)); + } + //std::cout << tree.toString() << std::endl; + EXPECT_TRUE(assertSeek(2, 2, tree)); // next key + EXPECT_TRUE(assertSeek(10, 10, tree)); // skip to existing + EXPECT_TRUE(assertSeek(26, 26, tree)); // skip to existing + EXPECT_TRUE(assertSeek(11, 12, tree)); // skip to non-existing + EXPECT_TRUE(assertSeek(23, 24, tree)); // skip to non-existing + { + MyTree::Iterator itr = tree.begin(); + EXPECT_TRUE(assertSeek(4, 4, itr)); + EXPECT_TRUE(assertSeek(14, 14, itr)); + EXPECT_TRUE(assertSeek(18, 18, itr)); + EXPECT_TRUE(assertSeek(36, 36, itr)); + } + { + MyTree::Iterator itr = tree.begin(); + EXPECT_TRUE(assertSeek(3, 4, itr)); + EXPECT_TRUE(assertSeek(13, 14, itr)); + EXPECT_TRUE(assertSeek(17, 18, itr)); + EXPECT_TRUE(assertSeek(35, 36, itr)); + } + { + MyTree::Iterator itr = tree.begin(); + MyTree::Iterator itr2 = tree.begin(); + itr.binarySeek(40); // outside + itr2.linearSeek(40); // outside + EXPECT_TRUE(!itr.valid()); + EXPECT_TRUE(!itr2.valid()); + } + { + MyTree::Iterator itr = tree.begin(); + EXPECT_TRUE(assertSeek(8, 8, itr)); + for (int i = 10; i < 40; i += 2) { + ++itr; + EXPECT_EQUAL(i, UNWRAP(itr.getKey())); + } + } + { + MyTree::Iterator itr = tree.begin(); + EXPECT_TRUE(assertSeek(26, 26, itr)); + for (int i = 28; i < 40; i += 2) { + ++itr; + EXPECT_EQUAL(i, UNWRAP(itr.getKey())); + } + } + GenerationHandler g2; + MyTree tree2; // only leaf node + tree2.insert(0, "0"); + tree2.insert(2, "2"); + tree2.insert(4, "4"); + EXPECT_TRUE(assertSeek(1, 2, tree2)); + EXPECT_TRUE(assertSeek(2, 2, tree2)); + { + MyTree::Iterator itr = tree2.begin(); + MyTree::Iterator itr2 = tree2.begin(); + itr.binarySeek(5); // outside + itr2.linearSeek(5); // outside + EXPECT_TRUE(!itr.valid()); + EXPECT_TRUE(!itr2.valid()); + } +} + +void +Test::requireThatTreeIteratorAssignWorks() +{ + GenerationHandler g; + MyTree tree; + for (int i = 0; i < 1000; ++i) { + tree.insert(i, toStr(i)); + } + for (int i = 0; i < 1000; ++i) { + MyTree::Iterator itr = tree.find(i); + MyTree::Iterator itr2 = itr; + EXPECT_TRUE(itr == itr2); + int expNum = i; + for (; itr2.valid(); ++itr2) { + EXPECT_EQUAL(expNum++, UNWRAP(itr2.getKey())); + } + EXPECT_EQUAL(1000, expNum); + } +} + +void +Test::requireThatMemoryUsageIsCalculated() +{ + typedef BTreeNodeAllocator<int32_t, int8_t, + btree::NoAggregated, + MyTraits::INTERNAL_SLOTS, MyTraits::LEAF_SLOTS> NodeAllocator; + typedef NodeAllocator::InternalNodeType INode; + typedef NodeAllocator::LeafNodeType LNode; + typedef NodeAllocator::InternalNodeTypeRefPair IRef; + typedef NodeAllocator::LeafNodeTypeRefPair LRef; + LOG(info, "sizeof(BTreeNode)=%zu, sizeof(INode)=%zu, sizeof(LNode)=%zu", + sizeof(BTreeNode), sizeof(INode), sizeof(LNode)); + EXPECT_GREATER(sizeof(INode), sizeof(LNode)); + GenerationHandler gh; + gh.incGeneration(); + NodeAllocator tm; + MemoryUsage mu; + const uint32_t initialInternalNodes = 128u; + const uint32_t initialLeafNodes = 128u; + mu.incAllocatedBytes(sizeof(INode) * initialInternalNodes); + mu.incAllocatedBytes(sizeof(LNode) * initialLeafNodes); + mu.incUsedBytes(sizeof(INode)); + mu.incDeadBytes(sizeof(INode)); + EXPECT_TRUE(assertMemoryUsage(mu, tm.getMemoryUsage())); + + // add internal node + IRef ir = tm.allocInternalNode(1); + mu.incUsedBytes(sizeof(INode)); + EXPECT_TRUE(assertMemoryUsage(mu, tm.getMemoryUsage())); + + // add leaf node + LRef lr = tm.allocLeafNode(); + mu.incUsedBytes(sizeof(LNode)); + EXPECT_TRUE(assertMemoryUsage(mu, tm.getMemoryUsage())); + + // move nodes to hold list + tm.freeze(); // mark allocated nodes as frozen so we can hold them later on + tm.holdNode(ir.first, ir.second); + mu.incAllocatedBytesOnHold(sizeof(INode)); + EXPECT_TRUE(assertMemoryUsage(mu, tm.getMemoryUsage())); + tm.holdNode(lr.first, lr.second); + mu.incAllocatedBytesOnHold(sizeof(LNode)); + EXPECT_TRUE(assertMemoryUsage(mu, tm.getMemoryUsage())); + + // trim hold lists + tm.transferHoldLists(gh.getCurrentGeneration()); + gh.incGeneration(); + tm.trimHoldLists(gh.getFirstUsedGeneration()); + mu = MemoryUsage(); + mu.incAllocatedBytes(sizeof(INode) * initialInternalNodes); + mu.incAllocatedBytes(sizeof(LNode) * initialLeafNodes); + mu.incUsedBytes(sizeof(INode) * 2); + mu.incDeadBytes(sizeof(INode) * 2); + mu.incUsedBytes(sizeof(LNode)); + mu.incDeadBytes(sizeof(LNode)); + EXPECT_TRUE(assertMemoryUsage(mu, tm.getMemoryUsage())); +} + +template <typename TreeType> +void +Test::requireThatLowerBoundWorksT() +{ + GenerationHandler g; + TreeType t; + EXPECT_TRUE(t.insert(10, BTreeNoLeafData())); + EXPECT_TRUE(t.insert(20, BTreeNoLeafData())); + EXPECT_TRUE(t.insert(30, BTreeNoLeafData())); + EXPECT_EQUAL(10, t.lowerBound(9).getKey()); + EXPECT_EQUAL(20, t.lowerBound(20).getKey()); + EXPECT_EQUAL(30, t.lowerBound(21).getKey()); + EXPECT_EQUAL(30, t.lowerBound(30).getKey()); + EXPECT_TRUE(!t.lowerBound(31).valid()); + for (int i = 40; i < 1000; i+=10) { + EXPECT_TRUE(t.insert(i, BTreeNoLeafData())); + } + for (int i = 9; i < 990; i+=10) { + EXPECT_EQUAL(i + 1, t.lowerBound(i).getKey()); + EXPECT_EQUAL(i + 1, t.lowerBound(i + 1).getKey()); + } + EXPECT_TRUE(!t.lowerBound(991).valid()); +} + +void +Test::requireThatLowerBoundWorks() +{ + requireThatLowerBoundWorksT<SetTreeB>(); + requireThatLowerBoundWorksT<SetTreeL>(); +} + +template <typename TreeType> +void +Test::requireThatUpperBoundWorksT() +{ + GenerationHandler g; + TreeType t; + EXPECT_TRUE(t.insert(10, BTreeNoLeafData())); + EXPECT_TRUE(t.insert(20, BTreeNoLeafData())); + EXPECT_TRUE(t.insert(30, BTreeNoLeafData())); + EXPECT_EQUAL(10, t.upperBound(9).getKey()); + EXPECT_EQUAL(30, t.upperBound(20).getKey()); + EXPECT_EQUAL(30, t.upperBound(21).getKey()); + EXPECT_TRUE(!t.upperBound(30).valid()); + for (int i = 40; i < 1000; i+=10) { + EXPECT_TRUE(t.insert(i, BTreeNoLeafData())); + } + for (int i = 9; i < 980; i+=10) { + EXPECT_EQUAL(i + 1, t.upperBound(i).getKey()); + EXPECT_EQUAL(i + 11, t.upperBound(i + 1).getKey()); + } + EXPECT_TRUE(!t.upperBound(990).valid()); +} + +void +Test::requireThatUpperBoundWorks() +{ + requireThatUpperBoundWorksT<SetTreeB>(); + requireThatUpperBoundWorksT<SetTreeL>(); +} + +struct UpdKeyComp { + int _remainder; + mutable size_t _numErrors; + UpdKeyComp(int remainder) : _remainder(remainder), _numErrors(0) {} + bool operator() (const int & lhs, const int & rhs) const { + if (lhs % 2 != _remainder) ++_numErrors; + if (rhs % 2 != _remainder) ++_numErrors; + return lhs < rhs; + } +}; + +void +Test::requireThatUpdateOfKeyWorks() +{ + typedef BTree<int, BTreeNoLeafData, + btree::NoAggregated, + UpdKeyComp &> UpdKeyTree; + typedef UpdKeyTree::Iterator UpdKeyTreeIterator; + GenerationHandler g; + UpdKeyTree t; + UpdKeyComp cmp1(0); + for (int i = 0; i < 1000; i+=2) { + EXPECT_TRUE(t.insert(i, BTreeNoLeafData(), cmp1)); + } + EXPECT_EQUAL(0u, cmp1._numErrors); + for (int i = 0; i < 1000; i+=2) { + UpdKeyTreeIterator itr = t.find(i, cmp1); + itr.writeKey(i + 1); + } + UpdKeyComp cmp2(1); + for (int i = 1; i < 1000; i+=2) { + UpdKeyTreeIterator itr = t.find(i, cmp2); + EXPECT_TRUE(itr.valid()); + } + EXPECT_EQUAL(0u, cmp2._numErrors); +} + + +void +Test::requireThatSmallNodesWorks(void) +{ + typedef BTreeStore<MyKey, std::string, btree::NoAggregated, MyComp, + BTreeDefaultTraits> TreeStore; + GenerationHandler g; + TreeStore s; + + EntryRef root; + EXPECT_EQUAL(0u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + EXPECT_TRUE(s.insert(root, 40, "fourty")); + EXPECT_TRUE(!s.insert(root, 40, "fourty.not")); + EXPECT_EQUAL(1u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + EXPECT_TRUE(s.insert(root, 20, "twenty")); + EXPECT_TRUE(!s.insert(root, 20, "twenty.not")); + EXPECT_TRUE(!s.insert(root, 40, "fourty.not")); + EXPECT_EQUAL(2u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + EXPECT_TRUE(s.insert(root, 60, "sixty")); + EXPECT_TRUE(!s.insert(root, 60, "sixty.not")); + EXPECT_TRUE(!s.insert(root, 20, "twenty.not")); + EXPECT_TRUE(!s.insert(root, 40, "fourty.not")); + EXPECT_EQUAL(3u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + EXPECT_TRUE(s.insert(root, 50, "fifty")); + EXPECT_TRUE(!s.insert(root, 50, "fifty.not")); + EXPECT_TRUE(!s.insert(root, 60, "sixty.not")); + EXPECT_TRUE(!s.insert(root, 20, "twenty.not")); + EXPECT_TRUE(!s.insert(root, 40, "fourty.not")); + EXPECT_EQUAL(4u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + + for (uint32_t i = 0; i < 100; ++i) { + EXPECT_TRUE(s.insert(root, 1000 + i, "big")); + if (i > 0) { + EXPECT_TRUE(!s.insert(root, 1000 + i - 1, "big")); + } + EXPECT_EQUAL(5u + i, s.size(root)); + EXPECT_EQUAL(5u + i <= 8u, s.isSmallArray(root)); + } + EXPECT_TRUE(s.remove(root, 40)); + EXPECT_TRUE(!s.remove(root, 40)); + EXPECT_EQUAL(103u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + EXPECT_TRUE(s.remove(root, 20)); + EXPECT_TRUE(!s.remove(root, 20)); + EXPECT_EQUAL(102u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + EXPECT_TRUE(s.remove(root, 50)); + EXPECT_TRUE(!s.remove(root, 50)); + EXPECT_EQUAL(101u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + for (uint32_t i = 0; i < 100; ++i) { + EXPECT_TRUE(s.remove(root, 1000 + i)); + if (i > 0) { + EXPECT_TRUE(!s.remove(root, 1000 + i - 1)); + } + EXPECT_EQUAL(100 - i, s.size(root)); + EXPECT_EQUAL(100 - i <= 8u, s.isSmallArray(root)); + } + EXPECT_EQUAL(1u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + + s.clear(root); + s.clearBuilder(); + s.freeze(); + s.transferHoldLists(g.getCurrentGeneration()); + g.incGeneration(); + s.trimHoldLists(g.getFirstUsedGeneration()); +} + + +void +Test::requireThatApplyWorks(void) +{ + typedef BTreeStore<MyKey, std::string, btree::NoAggregated, MyComp, + BTreeDefaultTraits> TreeStore; + typedef TreeStore::KeyType KeyType; + typedef TreeStore::KeyDataType KeyDataType; + GenerationHandler g; + TreeStore s; + std::vector<KeyDataType> additions; + std::vector<KeyType> removals; + + EntryRef root; + EXPECT_EQUAL(0u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + + additions.clear(); + removals.clear(); + additions.push_back(KeyDataType(40, "fourty")); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(1u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + + additions.clear(); + removals.clear(); + additions.push_back(KeyDataType(20, "twenty")); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(2u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + + additions.clear(); + removals.clear(); + additions.push_back(KeyDataType(60, "sixty")); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(3u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + + additions.clear(); + removals.clear(); + additions.push_back(KeyDataType(50, "fifty")); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(4u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + + for (uint32_t i = 0; i < 100; ++i) { + additions.clear(); + removals.clear(); + additions.push_back(KeyDataType(1000 + i, "big")); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(5u + i, s.size(root)); + EXPECT_EQUAL(5u + i <= 8u, s.isSmallArray(root)); + } + + additions.clear(); + removals.clear(); + removals.push_back(40); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(103u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + + additions.clear(); + removals.clear(); + removals.push_back(20); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(102u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + + additions.clear(); + removals.clear(); + removals.push_back(50); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(101u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + for (uint32_t i = 0; i < 100; ++i) { + additions.clear(); + removals.clear(); + removals.push_back(1000 +i); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(100 - i, s.size(root)); + EXPECT_EQUAL(100 - i <= 8u, s.isSmallArray(root)); + } + EXPECT_EQUAL(1u, s.size(root)); + EXPECT_TRUE(s.isSmallArray(root)); + + additions.clear(); + removals.clear(); + for (uint32_t i = 0; i < 20; ++i) + additions.push_back(KeyDataType(1000 + i, "big")); + removals.push_back(60); + removals.push_back(1002); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(20u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + + additions.clear(); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(19u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + + additions.clear(); + removals.clear(); + for (uint32_t i = 0; i < 20; ++i) + additions.push_back(KeyDataType(1100 + i, "big")); + for (uint32_t i = 0; i < 10; ++i) + removals.push_back(1000 + i); + s.apply(root, &additions[0], &additions[0] + additions.size(), + &removals[0], &removals[0] + removals.size()); + EXPECT_EQUAL(30u, s.size(root)); + EXPECT_TRUE(!s.isSmallArray(root)); + + s.clear(root); + s.clearBuilder(); + s.freeze(); + s.transferHoldLists(g.getCurrentGeneration()); + g.incGeneration(); + s.trimHoldLists(g.getFirstUsedGeneration()); +} + +class MyTreeTestIterator : public MyTree::Iterator +{ +public: + MyTreeTestIterator(const MyTree::Iterator &rhs) + : MyTree::Iterator(rhs) + { + } + + int + getPathSize(void) const + { + return _pathSize; + } +}; + + +void +Test::requireThatIteratorDistanceWorks(int numEntries) +{ + GenerationHandler g; + MyTree tree; + typedef MyTree::Iterator Iterator; + for (int i = 0; i < numEntries; ++i) { + tree.insert(i, toStr(i)); + } + MyTreeTestIterator tit = tree.begin(); + LOG(info, + "numEntries=%d, iterator pathSize=%d", + numEntries, tit.getPathSize()); + Iterator it = tree.begin(); + for (int i = 0; i <= numEntries; ++i) { + Iterator iit = tree.lowerBound(i); + Iterator iitn = tree.lowerBound(i + 1); + Iterator iitu = tree.upperBound(i); + Iterator iitls = tree.begin(); + Iterator iitbs = tree.begin(); + Iterator iitlsp = tree.begin(); + Iterator iitbsp = tree.begin(); + Iterator iitlb(tree.getRoot(), tree.getAllocator()); + iitlb.lower_bound(i); + Iterator iitlb2(BTreeNode::Ref(), tree.getAllocator()); + iitlb2.lower_bound(tree.getRoot(), i); + if (i > 0) { + iitls.linearSeek(i); + iitbs.binarySeek(i); + ++it; + } + iitlsp.linearSeekPast(i); + iitbsp.binarySeekPast(i); + Iterator iitlsp2 = iitls; + Iterator iitbsp2 = iitbs; + Iterator iitnr = i < numEntries ? iitn : tree.begin(); + --iitnr; + if (i < numEntries) { + iitlsp2.linearSeekPast(i); + iitbsp2.binarySeekPast(i); + } + EXPECT_EQUAL(i, static_cast<int>(iit.position())); + EXPECT_EQUAL(i < numEntries, iit.valid()); + EXPECT_TRUE(iit.identical(it)); + EXPECT_TRUE(iit.identical(iitls)); + EXPECT_TRUE(iit.identical(iitbs)); + EXPECT_TRUE(iit.identical(iitnr)); + EXPECT_TRUE(iit.identical(iitlb)); + EXPECT_TRUE(iit.identical(iitlb2)); + EXPECT_TRUE(iitn.identical(iitu)); + EXPECT_TRUE(iitn.identical(iitlsp)); + EXPECT_TRUE(iitn.identical(iitbsp)); + EXPECT_TRUE(iitn.identical(iitlsp2)); + EXPECT_TRUE(iitn.identical(iitbsp2)); + if (i < numEntries) { + EXPECT_EQUAL(i + 1, static_cast<int>(iitn.position())); + EXPECT_EQUAL(i + 1 < numEntries, iitn.valid()); + } + for (int j = 0; j <= numEntries; ++j) { + Iterator jit = tree.lowerBound(j); + EXPECT_EQUAL(j, static_cast<int>(jit.position())); + EXPECT_EQUAL(j < numEntries, jit.valid()); + EXPECT_EQUAL(i - j, iit - jit); + EXPECT_EQUAL(j - i, jit - iit); + + Iterator jit2 = jit; + jit2.setupEnd(); + EXPECT_EQUAL(numEntries - j, jit2 - jit); + EXPECT_EQUAL(numEntries - i, jit2 - iit); + EXPECT_EQUAL(j - numEntries, jit - jit2); + EXPECT_EQUAL(i - numEntries, iit - jit2); + } + } +} + + +void +Test::requireThatIteratorDistanceWorks() +{ + requireThatIteratorDistanceWorks(1); + requireThatIteratorDistanceWorks(3); + requireThatIteratorDistanceWorks(8); + requireThatIteratorDistanceWorks(20); + requireThatIteratorDistanceWorks(100); + requireThatIteratorDistanceWorks(400); +} + + +int +Test::Main() +{ + TEST_INIT("btree_test"); + + requireThatNodeInsertWorks(); + requireThatNodeSplitInsertWorks(); + requireThatNodeStealWorks(); + requireThatNodeRemoveWorks(); + requireThatNodeLowerBoundWorks(); + requireThatWeCanInsertAndRemoveFromTree(); + requireThatSortedTreeInsertWorks(); + requireThatCornerCaseTreeFindWorks(); + requireThatBasicTreeIteratorWorks(); + requireThatTreeIteratorSeekWorks(); + requireThatTreeIteratorAssignWorks(); + requireThatMemoryUsageIsCalculated(); + requireThatLowerBoundWorks(); + requireThatUpperBoundWorks(); + requireThatUpdateOfKeyWorks(); + requireThatSmallNodesWorks(); + requireThatApplyWorks(); + requireThatIteratorDistanceWorks(); + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::btree::Test); diff --git a/searchlib/src/tests/memoryindex/btree/frozenbtree_test.cpp b/searchlib/src/tests/memoryindex/btree/frozenbtree_test.cpp new file mode 100644 index 00000000000..817d024c60f --- /dev/null +++ b/searchlib/src/tests/memoryindex/btree/frozenbtree_test.cpp @@ -0,0 +1,513 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("frozenbtree_test"); +#define DEBUG_FROZENBTREE +#define LOG_FROZENBTREEXX +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/util/rand48.h> +#include <vespa/searchlib/btree/btreeroot.h> +#include <vespa/searchlib/btree/btreenodeallocator.h> +#include <vespa/searchlib/btree/btreeiterator.hpp> +#include <vespa/searchlib/btree/btreeroot.hpp> +#include <vespa/searchlib/btree/btreenodeallocator.hpp> +#include <vespa/searchlib/btree/btreenode.hpp> +#include <vespa/searchlib/btree/btreenodestore.hpp> +#include <algorithm> +#include <limits> +#include <map> + +using search::btree::BTreeRoot; +using search::btree::BTreeNode; +using search::btree::BTreeInternalNode; +using search::btree::BTreeLeafNode; +using search::btree::BTreeDefaultTraits; +using vespalib::GenerationHandler; + +namespace search { + + +class FrozenBTreeTest : public vespalib::TestApp +{ +public: + typedef int KeyType; +private: + std::vector<KeyType> _randomValues; + std::vector<KeyType> _sortedRandomValues; + +public: + typedef int DataType; + typedef BTreeRoot<KeyType, DataType, + btree::NoAggregated, + std::less<KeyType>, + BTreeDefaultTraits> Tree; + typedef Tree::NodeAllocatorType NodeAllocator; + typedef Tree::InternalNodeType InternalNodeType; + typedef Tree::LeafNodeType LeafNodeType; + typedef Tree::Iterator Iterator; + typedef Tree::ConstIterator ConstIterator; +private: + GenerationHandler *_generationHandler; + NodeAllocator *_allocator; + Tree *_tree; + + Rand48 _randomGenerator; + + void + allocTree(void); + + void + freeTree(bool verbose); + + void + fillRandomValues(unsigned int count); + + void + insertRandomValues(Tree &tree, + NodeAllocator &allocator, + const std::vector<KeyType> &values); + + void + removeRandomValues(Tree &tree, + NodeAllocator &allocator, + const std::vector<KeyType> &values); + + void + lookupRandomValues(const Tree &tree, + NodeAllocator &allocator, + const std::vector<KeyType> &values); + + void + lookupGoneRandomValues(const Tree &tree, + NodeAllocator &allocator, + const std::vector<KeyType> &values); + + void + lookupFrozenRandomValues(const Tree &tree, + NodeAllocator &allocator, + const std::vector<KeyType> &values); + + void + sortRandomValues(void); + + void + traverseTreeIterator(const Tree &tree, + NodeAllocator &allocator, + const std::vector<KeyType> &sorted, + bool frozen); + + void + printSubEnumTree(BTreeNode::Ref node, + NodeAllocator &allocator, + int indent) const; + + void + printEnumTree(const Tree *tree, + NodeAllocator &allocator); + + static const char * + frozenName(bool frozen) + { + return frozen ? "frozen" : "thawed"; + } +public: + FrozenBTreeTest(void) + : vespalib::TestApp(), + _randomValues(), + _sortedRandomValues(), + _generationHandler(NULL), + _allocator(NULL), + _tree(NULL), + _randomGenerator() + { + } + + int Main(void); +}; + + + +void +FrozenBTreeTest::allocTree(void) +{ + assert(_generationHandler == NULL); + assert(_allocator == NULL); + assert(_tree == NULL); + _generationHandler = new GenerationHandler; + _allocator = new NodeAllocator(); + _tree = new Tree; +} + + +void +FrozenBTreeTest::freeTree(bool verbose) +{ +#if 0 + LOG(info, + "freeTree before clear: %" PRIu64 " (%" PRIu64 " held)" + ", %" PRIu32 " leaves", + static_cast<uint64_t>(_intTree->getUsedMemory()), + static_cast<uint64_t>(_intTree->getHeldMemory()), + _intTree->validLeaves()); + _intTree->clear(); + LOG(info, + "freeTree before unhold: %" PRIu64 " (%" PRIu64 " held)", + static_cast<uint64_t>(_intTree->getUsedMemory()), + static_cast<uint64_t>(_intTree->getHeldMemory())); + _intTree->dropFrozen(); + _intTree->removeOldGenerations(_intTree->getGeneration() + 1); + LOG(info, + "freeTree after unhold: %" PRIu64 " (%" PRIu64 " held)", + static_cast<uint64_t>(_intTree->getUsedMemory()), + static_cast<uint64_t>(_intTree->getHeldMemory())); + if (verbose) + LOG(info, + "%d+%d leftover tree nodes", + _intTree->getNumInternalNodes(), + _intTree->getNumLeafNodes()); + EXPECT_TRUE(_intTree->getNumInternalNodes() == 0 && + _intTree->getNumLeafNodes() == 0); + delete _intTree; + _intTree = NULL; + delete _intKeyStore; + _intKeyStore = NULL; +#endif + (void) verbose; + _tree->clear(*_allocator); + _allocator->freeze(); + _allocator->transferHoldLists(_generationHandler->getCurrentGeneration()); + _generationHandler->incGeneration(); + _allocator->trimHoldLists(_generationHandler->getFirstUsedGeneration()); + delete _tree; + _tree = NULL; + delete _allocator; + _allocator = NULL; + delete _generationHandler; + _generationHandler = NULL; +} + + +void +FrozenBTreeTest::fillRandomValues(unsigned int count) +{ + unsigned int i; + + LOG(info, + "Filling %u random values", count); + _randomValues.clear(); + _randomValues.reserve(count); + _randomGenerator.srand48(42); + for (i = 0; i <count; i++) + _randomValues.push_back(_randomGenerator.lrand48()); + + EXPECT_TRUE(_randomValues.size() == count); +} + + +void +FrozenBTreeTest:: +insertRandomValues(Tree &tree, + NodeAllocator &allocator, + const std::vector<KeyType> &values) +{ + std::vector<KeyType>::const_iterator i(values.begin()); + std::vector<KeyType>::const_iterator ie(values.end()); + Iterator p; + + LOG(info, "insertRandomValues start"); + for (; i != ie; ++i) { +#ifdef LOG_FROZENBTREE + LOG(info, "Try lookup %d before insert", *i); +#endif + p = tree.find(*i, allocator); + if (!p.valid()) { + DataType val = *i + 42; + if (tree.insert(*i, val, allocator)) + p = tree.find(*i, allocator); + } + ASSERT_TRUE(p.valid() && p.getKey() == *i && p.getData() == *i + 42); +#ifdef DEBUG_FROZENBTREEX + printEnumTree(&tree); +#endif + } + ASSERT_TRUE(tree.isValid(allocator)); + ASSERT_TRUE(tree.isValidFrozen(allocator)); + LOG(info, "insertRandomValues done"); +} + + +void +FrozenBTreeTest:: +removeRandomValues(Tree &tree, + NodeAllocator &allocator, + const std::vector<KeyType> & values) +{ + std::vector<KeyType>::const_iterator i(values.begin()); + std::vector<KeyType>::const_iterator ie(values.end()); + Iterator p; + + LOG(info, "removeRandomValues start"); + for (; i != ie; ++i) { +#ifdef LOG_FROZENBTREE + LOG(info, "Try lookup %d before remove", *i); +#endif + p = tree.find(*i, allocator); + if (p.valid()) { + if (tree.remove(*i, allocator)) + p = tree.find(*i, allocator); + } + ASSERT_TRUE(!p.valid()); +#ifdef DEBUG_FROZENBTREEX + tree.printTree(); +#endif + } + ASSERT_TRUE(tree.isValid(allocator)); + ASSERT_TRUE(tree.isValidFrozen(allocator)); + LOG(info, "removeRandomValues done"); +} + + +void +FrozenBTreeTest:: +lookupRandomValues(const Tree &tree, + NodeAllocator &allocator, + const std::vector<KeyType> &values) +{ + std::vector<KeyType>::const_iterator i(values.begin()); + std::vector<KeyType>::const_iterator ie(values.end()); + Iterator p; + + LOG(info, "lookupRandomValues start"); + for (; i != ie; ++i) { + p = tree.find(*i, allocator); + ASSERT_TRUE(p.valid() && p.getKey() == *i); + } + LOG(info, "lookupRandomValues done"); +} + + +void +FrozenBTreeTest:: +lookupGoneRandomValues(const Tree &tree, + NodeAllocator &allocator, + const std::vector<KeyType> &values) +{ + std::vector<KeyType>::const_iterator i(values.begin()); + std::vector<KeyType>::const_iterator ie(values.end()); + Iterator p; + + LOG(info, "lookupGoneRandomValues start"); + for (; i != ie; ++i) { + p = tree.find(*i, allocator); + ASSERT_TRUE(!p.valid()); + } + LOG(info, "lookupGoneRandomValues done"); +} + + +void +FrozenBTreeTest:: +lookupFrozenRandomValues(const Tree &tree, + NodeAllocator &allocator, + const std::vector<KeyType> &values) +{ + std::vector<KeyType>::const_iterator i(values.begin()); + std::vector<KeyType>::const_iterator ie(values.end()); + ConstIterator p; + + LOG(info, "lookupFrozenRandomValues start"); + for (; i != ie; ++i) { + p = tree.getFrozenView(allocator).find(*i, std::less<int>()); + ASSERT_TRUE(p.valid() && p.getKey() == *i && p.getData() == *i + 42); + } + LOG(info, "lookupFrozenRandomValues done"); +} + + +void +FrozenBTreeTest::sortRandomValues(void) +{ + std::vector<KeyType>::iterator i; + std::vector<KeyType>::iterator ie; + uint32_t okcnt; + int prevVal; + std::vector<KeyType> sorted; + + LOG(info, "sortRandomValues start"); + sorted = _randomValues; + std::sort(sorted.begin(), sorted.end()); + _sortedRandomValues.clear(); + _sortedRandomValues.reserve(sorted.size()); + + okcnt = 0; + prevVal = 0; + ie = sorted.end(); + for (i = sorted.begin(); i != ie; ++i) { + if (i == _sortedRandomValues.begin() || *i > prevVal) { + okcnt++; + _sortedRandomValues.push_back(*i); + } else if (*i == prevVal) + okcnt++; + else + abort(); + prevVal = *i; + } + EXPECT_TRUE(okcnt == sorted.size()); + LOG(info, "sortRandomValues done"); +} + + +void +FrozenBTreeTest:: +traverseTreeIterator(const Tree &tree, + NodeAllocator &allocator, + const std::vector<KeyType> &sorted, + bool frozen) +{ + LOG(info, + "traverseTreeIterator %s start", + frozenName(frozen)); + + std::vector<KeyType>::const_iterator i; + + i = sorted.begin(); + if (frozen) { + ConstIterator ai; + ai = tree.getFrozenView(allocator).begin(); + for (;ai.valid(); ++ai, ++i) + { + ASSERT_TRUE(ai.getKey() == *i); + } + } else { + Iterator ai; + ai = tree.begin(allocator); + for (;ai.valid(); ++ai, ++i) + { + ASSERT_TRUE(ai.getKey() == *i); + } + } + + + ASSERT_TRUE(i == sorted.end()); + + LOG(info, + "traverseTreeIterator %s done", + frozenName(frozen)); +} + + +void +FrozenBTreeTest:: +printSubEnumTree(BTreeNode::Ref node, + NodeAllocator &allocator, + int indent) const +{ + // typedef BTreeNode Node; + typedef LeafNodeType LeafNode; + typedef InternalNodeType InternalNode; + BTreeNode::Ref subNode; + unsigned int i; + + if (allocator.isLeafRef(node)) { + const LeafNode *lnode = allocator.mapLeafRef(node); + printf("%*s LeafNode %s valid=%d\n", + indent, "", + lnode->getFrozen() ? "frozen" : "thawed", + lnode->validSlots()); + for (i = 0; i < lnode->validSlots(); i++) { + + KeyType k = lnode->getKey(i); + DataType d = lnode->getData(i); + printf("leaf value %3d %d %d\n", + (int) i, + (int) k, + (int) d); + } + return; + } + const InternalNode *inode = allocator.mapInternalRef(node); + printf("%*s IntermediteNode %s valid=%d\n", + indent, "", + inode->getFrozen() ? "frozen" : "thawed", + inode->validSlots()); + for (i = 0; i < inode->validSlots(); i++) { + subNode = inode->getChild(i); + assert(subNode != BTreeNode::Ref()); + printSubEnumTree(subNode, allocator, indent + 4); + } +} + + +void +FrozenBTreeTest::printEnumTree(const Tree *tree, + NodeAllocator &allocator) +{ + printf("Tree Dump start\n"); + if (!NodeAllocator::isValidRef(tree->getRoot())) { + printf("EMPTY\n"); + } else { + printSubEnumTree(tree->getRoot(), allocator, 0); + } + printf("Tree Dump done\n"); +} + + + +int +FrozenBTreeTest::Main() +{ + TEST_INIT("frozenbtree_test"); + + fillRandomValues(1000); + sortRandomValues(); + + allocTree(); + insertRandomValues(*_tree, *_allocator, _randomValues); + lookupRandomValues(*_tree, *_allocator, _randomValues); + _allocator->freeze(); + _allocator->transferHoldLists(_generationHandler->getCurrentGeneration()); + lookupFrozenRandomValues(*_tree, *_allocator, _randomValues); + traverseTreeIterator(*_tree, + *_allocator, + _sortedRandomValues, + false); + traverseTreeIterator(*_tree, + *_allocator, + _sortedRandomValues, + true); + traverseTreeIterator(*_tree, + *_allocator, + _sortedRandomValues, + false); + traverseTreeIterator(*_tree, + *_allocator, + _sortedRandomValues, + true); + removeRandomValues(*_tree, *_allocator, _randomValues); + lookupGoneRandomValues(*_tree, *_allocator, _randomValues); + lookupFrozenRandomValues(*_tree, *_allocator,_randomValues); + traverseTreeIterator(*_tree, + *_allocator, + _sortedRandomValues, + true); + insertRandomValues(*_tree, *_allocator, _randomValues); + freeTree(true); + + fillRandomValues(1000000); + sortRandomValues(); + + allocTree(); + insertRandomValues(*_tree, *_allocator, _randomValues); + traverseTreeIterator(*_tree, + *_allocator, + _sortedRandomValues, + false); + freeTree(false); + + TEST_DONE(); +} + +} + +TEST_APPHOOK(search::FrozenBTreeTest); diff --git a/searchlib/src/tests/memoryindex/compact_document_words_store/.gitignore b/searchlib/src/tests/memoryindex/compact_document_words_store/.gitignore new file mode 100644 index 00000000000..3ad290f1731 --- /dev/null +++ b/searchlib/src/tests/memoryindex/compact_document_words_store/.gitignore @@ -0,0 +1 @@ +searchlib_compact_document_words_store_test_app diff --git a/searchlib/src/tests/memoryindex/compact_document_words_store/CMakeLists.txt b/searchlib/src/tests/memoryindex/compact_document_words_store/CMakeLists.txt new file mode 100644 index 00000000000..666639f20ba --- /dev/null +++ b/searchlib/src/tests/memoryindex/compact_document_words_store/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_compact_document_words_store_test_app + SOURCES + compact_document_words_store_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_compact_document_words_store_test_app COMMAND searchlib_compact_document_words_store_test_app) diff --git a/searchlib/src/tests/memoryindex/compact_document_words_store/DESC b/searchlib/src/tests/memoryindex/compact_document_words_store/DESC new file mode 100644 index 00000000000..ee9c4b346a2 --- /dev/null +++ b/searchlib/src/tests/memoryindex/compact_document_words_store/DESC @@ -0,0 +1 @@ +compact_document_words_store test. Take a look at compact_document_words_store_test.cpp for details. diff --git a/searchlib/src/tests/memoryindex/compact_document_words_store/FILES b/searchlib/src/tests/memoryindex/compact_document_words_store/FILES new file mode 100644 index 00000000000..fb2fb1d637b --- /dev/null +++ b/searchlib/src/tests/memoryindex/compact_document_words_store/FILES @@ -0,0 +1 @@ +compact_document_words_store_test.cpp diff --git a/searchlib/src/tests/memoryindex/compact_document_words_store/compact_document_words_store_test.cpp b/searchlib/src/tests/memoryindex/compact_document_words_store/compact_document_words_store_test.cpp new file mode 100644 index 00000000000..2a3bffb2fe6 --- /dev/null +++ b/searchlib/src/tests/memoryindex/compact_document_words_store/compact_document_words_store_test.cpp @@ -0,0 +1,157 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP(".memoryindex.compact_document_words_store_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/btree/entryref.h> +#include <vespa/searchlib/memoryindex/compact_document_words_store.h> +#include <vespa/vespalib/stllike/string.h> +#include <iostream> +#include <map> + +using namespace search; +using namespace search::btree; +using namespace search::memoryindex; + +typedef CompactDocumentWordsStore::Builder Builder; +typedef CompactDocumentWordsStore::Iterator Iterator; +typedef Builder::WordRefVector WordRefVector; + +const EntryRef w1(1); +const EntryRef w2(2); +const EntryRef w3(3); +const EntryRef w4(4); +const uint32_t d1(111); +const uint32_t d2(222); +const uint32_t d3(333); +const uint32_t d4(444); + +WordRefVector +build(Iterator itr) +{ + WordRefVector words; + for (; itr.valid(); ++itr) { + words.push_back(itr.wordRef()); + } + return words; +} + +vespalib::string +toStr(Iterator itr) +{ + WordRefVector words = build(itr); + std::ostringstream oss; + oss << "["; + bool firstWord = true; + for (auto word : words) { + if (!firstWord) oss << ","; + oss << word.ref(); + firstWord = false; + } + oss << "]"; + return oss.str(); +} + +struct SingleFixture +{ + CompactDocumentWordsStore _store; + SingleFixture() : _store() { + _store.insert(Builder(d1).insert(w1).insert(w2).insert(w3)); + } +}; + +struct MultiFixture +{ + CompactDocumentWordsStore _store; + MultiFixture() : _store() { + _store.insert(Builder(d1).insert(w1)); + _store.insert(Builder(d2).insert(w2)); + _store.insert(Builder(d3).insert(w3)); + } +}; + + +TEST_F("require that fields and words can be added for a document", SingleFixture) +{ + EXPECT_EQUAL("[1,2,3]", toStr(f._store.get(d1))); +} + +TEST_F("require that multiple documents can be added", MultiFixture) +{ + EXPECT_EQUAL("[1]", toStr(f._store.get(d1))); + EXPECT_EQUAL("[2]", toStr(f._store.get(d2))); + EXPECT_EQUAL("[3]", toStr(f._store.get(d3))); + EXPECT_FALSE(f._store.get(d4).valid()); +} + +TEST_F("require that documents can be removed", MultiFixture) +{ + f._store.remove(d2); + EXPECT_TRUE(f._store.get(d1).valid()); + EXPECT_FALSE(f._store.get(d2).valid()); + EXPECT_TRUE(f._store.get(d3).valid()); +} + +TEST_F("require that documents can be removed and re-inserted", MultiFixture) +{ + f._store.remove(d2); + f._store.insert(Builder(d2).insert(w4)); + EXPECT_EQUAL("[4]", toStr(f._store.get(d2))); +} + +TEST("require that a lot of words can be inserted, retrieved and removed") +{ + CompactDocumentWordsStore store; + for (uint32_t docId = 0; docId < 50; ++docId) { + Builder b(docId); + for (uint32_t wordRef = 0; wordRef < 20000; ++wordRef) { + b.insert(wordRef); + } + store.insert(b); + MemoryUsage usage = store.getMemoryUsage(); + std::cout << "memory usage (insert): docId=" << docId << ", alloc=" << usage.allocatedBytes() << ", used=" << usage.usedBytes() << std::endl; + } + for (uint32_t docId = 0; docId < 50; ++docId) { + WordRefVector words = build(store.get(docId)); + EXPECT_EQUAL(20000u, words.size()); + uint32_t wordRef = 0; + for (auto word : words) { + EXPECT_EQUAL(wordRef++, word.ref()); + } + store.remove(docId); + MemoryUsage usage = store.getMemoryUsage(); + std::cout << "memory usage (remove): docId=" << docId << ", alloc=" << usage.allocatedBytes() << ", used=" << usage.usedBytes() << std::endl; + } +} + +TEST("require that initial memory usage is reported") +{ + CompactDocumentWordsStore store; + CompactDocumentWordsStore::DocumentWordsMap docs; + CompactDocumentWordsStore::Store internalStore; + MemoryUsage initExp; + initExp.incAllocatedBytes(docs.getMemoryConsumption()); + initExp.incUsedBytes(docs.getMemoryUsed()); + initExp.merge(internalStore.getMemoryUsage()); + MemoryUsage init = store.getMemoryUsage(); + EXPECT_EQUAL(initExp.allocatedBytes(), init.allocatedBytes()); + EXPECT_EQUAL(initExp.usedBytes(), init.usedBytes()); + EXPECT_GREATER(init.allocatedBytes(), init.usedBytes()); + EXPECT_GREATER(init.allocatedBytes(), 0u); + EXPECT_GREATER(init.usedBytes(), 0u); +} + +TEST("require that memory usage is updated after insert") +{ + CompactDocumentWordsStore store; + MemoryUsage init = store.getMemoryUsage(); + + store.insert(Builder(d1).insert(w1)); + MemoryUsage after = store.getMemoryUsage(); + EXPECT_GREATER_EQUAL(after.allocatedBytes(), init.allocatedBytes()); + EXPECT_GREATER(after.usedBytes(), init.usedBytes()); +} + + +TEST_MAIN() { TEST_RUN_ALL(); } + diff --git a/searchlib/src/tests/memoryindex/datastore/.gitignore b/searchlib/src/tests/memoryindex/datastore/.gitignore new file mode 100644 index 00000000000..98f4acc70a8 --- /dev/null +++ b/searchlib/src/tests/memoryindex/datastore/.gitignore @@ -0,0 +1,8 @@ +.depend +Makefile +datastore_test +featurestore_test +wordstore_test +searchlib_datastore_test_app +searchlib_featurestore_test_app +searchlib_wordstore_test_app diff --git a/searchlib/src/tests/memoryindex/datastore/CMakeLists.txt b/searchlib/src/tests/memoryindex/datastore/CMakeLists.txt new file mode 100644 index 00000000000..da45288fe5e --- /dev/null +++ b/searchlib/src/tests/memoryindex/datastore/CMakeLists.txt @@ -0,0 +1,22 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_datastore_test_app + SOURCES + datastore_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_datastore_test_app COMMAND searchlib_datastore_test_app) +vespa_add_executable(searchlib_featurestore_test_app + SOURCES + featurestore_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_featurestore_test_app COMMAND searchlib_featurestore_test_app) +vespa_add_executable(searchlib_wordstore_test_app + SOURCES + wordstore_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_wordstore_test_app COMMAND searchlib_wordstore_test_app) diff --git a/searchlib/src/tests/memoryindex/datastore/DESC b/searchlib/src/tests/memoryindex/datastore/DESC new file mode 100644 index 00000000000..56725396b65 --- /dev/null +++ b/searchlib/src/tests/memoryindex/datastore/DESC @@ -0,0 +1 @@ +datastore test. Take a look at datastore_test.cpp and wordstore_test.cpp for details. diff --git a/searchlib/src/tests/memoryindex/datastore/FILES b/searchlib/src/tests/memoryindex/datastore/FILES new file mode 100644 index 00000000000..6cbbaf6a328 --- /dev/null +++ b/searchlib/src/tests/memoryindex/datastore/FILES @@ -0,0 +1,2 @@ +datastore_test.cpp +wordstore_test.cpp diff --git a/searchlib/src/tests/memoryindex/datastore/datastore_test.cpp b/searchlib/src/tests/memoryindex/datastore/datastore_test.cpp new file mode 100644 index 00000000000..be55dd7ee1e --- /dev/null +++ b/searchlib/src/tests/memoryindex/datastore/datastore_test.cpp @@ -0,0 +1,432 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("datastore_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/btree/datastore.h> +#include <vespa/searchlib/btree/datastore.hpp> + +namespace search { +namespace btree { + +class MyStore : public DataStore<int, EntryRefT<3, 2> > { +private: + typedef DataStore<int, EntryRefT<3, 2> > ParentType; + using ParentType::_buffers; + using ParentType::_states; + using ParentType::_activeBufferIds; +public: + MyStore() {} + + void + holdBuffer(uint32_t bufferId) + { + ParentType::holdBuffer(bufferId); + } + + void + holdElem(EntryRef ref, uint64_t len) + { + ParentType::holdElem(ref, len); + } + + void + transferHoldLists(generation_t generation) + { + ParentType::transferHoldLists(generation); + } + + void trimElemHoldList(generation_t usedGen) { + ParentType::trimElemHoldList(usedGen); + } + void incDead(EntryRef ref, uint64_t dead) { + ParentType::incDead(ref, dead); + } + void ensureBufferCapacity(size_t sizeNeeded) { + ParentType::ensureBufferCapacity(0, sizeNeeded); + } + void enableFreeLists() { + ParentType::enableFreeLists(); + } + + void + switchActiveBuffer(void) + { + ParentType::switchActiveBuffer(0, 0u); + } + std::vector<void *> & buffers() { return _buffers; } + std::vector<BufferState> &statesVec() { return _states; } + size_t activeBufferId() const { return _activeBufferIds[0]; } +}; + +typedef MyStore::RefType MyRef; + +class Test : public vespalib::TestApp { +private: + bool assertMemStats(const DataStoreBase::MemStats & exp, + const DataStoreBase::MemStats & act); + void requireThatEntryRefIsWorking(); + void requireThatAlignedEntryRefIsWorking(); + void requireThatEntriesCanBeAddedAndRetrieved(); + void requireThatAddEntryTriggersChangeOfBuffer(); + void requireThatWeCanHoldAndTrimBuffers(); + void requireThatWeCanHoldAndTrimElements(); + void requireThatWeCanUseFreeLists(); + void requireThatMemoryStatsAreCalculated(); + void requireThatMemoryUsageIsCalculated(); + + void + requireThatWecanDisableElemHoldList(void); +public: + int Main(); +}; + +bool +Test::assertMemStats(const DataStoreBase::MemStats & exp, + const DataStoreBase::MemStats & act) +{ + if (!EXPECT_EQUAL(exp._allocElems, act._allocElems)) return false; + if (!EXPECT_EQUAL(exp._usedElems, act._usedElems)) return false; + if (!EXPECT_EQUAL(exp._deadElems, act._deadElems)) return false; + if (!EXPECT_EQUAL(exp._holdElems, act._holdElems)) return false; + if (!EXPECT_EQUAL(exp._freeBuffers, act._freeBuffers)) return false; + if (!EXPECT_EQUAL(exp._activeBuffers, act._activeBuffers)) return false; + if (!EXPECT_EQUAL(exp._holdBuffers, act._holdBuffers)) return false; + return true; +} + +void +Test::requireThatEntryRefIsWorking() +{ + typedef EntryRefT<22> MyRefType; + EXPECT_EQUAL(4194304u, MyRefType::offsetSize()); + EXPECT_EQUAL(1024u, MyRefType::numBuffers()); + { + MyRefType r(0, 0); + EXPECT_EQUAL(0u, r.offset()); + EXPECT_EQUAL(0u, r.bufferId()); + } + { + MyRefType r(237, 13); + EXPECT_EQUAL(237u, r.offset()); + EXPECT_EQUAL(13u, r.bufferId()); + } + { + MyRefType r(4194303, 1023); + EXPECT_EQUAL(4194303u, r.offset()); + EXPECT_EQUAL(1023u, r.bufferId()); + } + { + MyRefType r1(6498, 76); + MyRefType r2(r1); + EXPECT_EQUAL(r1.offset(), r2.offset()); + EXPECT_EQUAL(r1.bufferId(), r2.bufferId()); + } +} + +void +Test::requireThatAlignedEntryRefIsWorking() +{ + typedef AlignedEntryRefT<22, 2> MyRefType; // 4 byte alignement + EXPECT_EQUAL(4 * 4194304u, MyRefType::offsetSize()); + EXPECT_EQUAL(1024u, MyRefType::numBuffers()); + EXPECT_EQUAL(0u, MyRefType::align(0)); + EXPECT_EQUAL(4u, MyRefType::align(1)); + EXPECT_EQUAL(4u, MyRefType::align(2)); + EXPECT_EQUAL(4u, MyRefType::align(3)); + EXPECT_EQUAL(4u, MyRefType::align(4)); + EXPECT_EQUAL(8u, MyRefType::align(5)); + { + MyRefType r(0, 0); + EXPECT_EQUAL(0u, r.offset()); + EXPECT_EQUAL(0u, r.bufferId()); + } + { + MyRefType r(237, 13); + EXPECT_EQUAL(MyRefType::align(237), r.offset()); + EXPECT_EQUAL(13u, r.bufferId()); + } + { + MyRefType r(MyRefType::offsetSize() - 4, 1023); + EXPECT_EQUAL(MyRefType::align(MyRefType::offsetSize() - 4), r.offset()); + EXPECT_EQUAL(1023u, r.bufferId()); + } +} + +void +Test::requireThatEntriesCanBeAddedAndRetrieved() +{ + typedef DataStore<int> IntStore; + IntStore ds; + EntryRef r1 = ds.addEntry(10); + EntryRef r2 = ds.addEntry(20); + EntryRef r3 = ds.addEntry(30); + EXPECT_EQUAL(1u, IntStore::RefType(r1).offset()); + EXPECT_EQUAL(2u, IntStore::RefType(r2).offset()); + EXPECT_EQUAL(3u, IntStore::RefType(r3).offset()); + EXPECT_EQUAL(0u, IntStore::RefType(r1).bufferId()); + EXPECT_EQUAL(0u, IntStore::RefType(r2).bufferId()); + EXPECT_EQUAL(0u, IntStore::RefType(r3).bufferId()); + EXPECT_EQUAL(10, ds.getEntry(r1)); + EXPECT_EQUAL(20, ds.getEntry(r2)); + EXPECT_EQUAL(30, ds.getEntry(r3)); +} + +void +Test::requireThatAddEntryTriggersChangeOfBuffer() +{ + typedef DataStore<uint64_t, EntryRefT<10, 10> > Store; + Store s; + uint64_t num = 0; + uint32_t lastId = 0; + uint64_t lastNum = 0; + for (;;++num) { + EntryRef r = s.addEntry(num); + EXPECT_EQUAL(num, s.getEntry(r)); + uint32_t bufferId = Store::RefType(r).bufferId(); + if (bufferId > lastId) { + LOG(info, "Changed to bufferId %u after %" PRIu64 " nums", bufferId, num); + EXPECT_EQUAL(Store::RefType::offsetSize() - (lastId == 0), + num - lastNum); + lastId = bufferId; + lastNum = num; + } + if (bufferId == 2) { + break; + } + } + EXPECT_EQUAL(Store::RefType::offsetSize() * 2 - 1, num); + LOG(info, "Added %" PRIu64 " nums in 2 buffers", num); +} + +void +Test::requireThatWeCanHoldAndTrimBuffers() +{ + MyStore s; + EXPECT_EQUAL(0u, MyRef(s.addEntry(1)).bufferId()); + s.switchActiveBuffer(); + EXPECT_EQUAL(1u, s.activeBufferId()); + s.holdBuffer(0); // hold last buffer + s.transferHoldLists(10); + + EXPECT_EQUAL(1u, MyRef(s.addEntry(2)).bufferId()); + s.switchActiveBuffer(); + EXPECT_EQUAL(2u, s.activeBufferId()); + s.holdBuffer(1); // hold last buffer + s.transferHoldLists(20); + + EXPECT_EQUAL(2u, MyRef(s.addEntry(3)).bufferId()); + s.switchActiveBuffer(); + EXPECT_EQUAL(3u, s.activeBufferId()); + s.holdBuffer(2); // hold last buffer + s.transferHoldLists(30); + + EXPECT_EQUAL(3u, MyRef(s.addEntry(4)).bufferId()); + s.holdBuffer(3); // hold current buffer + s.transferHoldLists(40); + + EXPECT_TRUE(s.statesVec()[0].size() != 0); + EXPECT_TRUE(s.statesVec()[1].size() != 0); + EXPECT_TRUE(s.statesVec()[2].size() != 0); + EXPECT_TRUE(s.statesVec()[3].size() != 0); + s.trimHoldLists(11); + EXPECT_TRUE(s.statesVec()[0].size() == 0); + EXPECT_TRUE(s.statesVec()[1].size() != 0); + EXPECT_TRUE(s.statesVec()[2].size() != 0); + EXPECT_TRUE(s.statesVec()[3].size() != 0); + + s.switchActiveBuffer(); + EXPECT_EQUAL(0u, s.activeBufferId()); + EXPECT_EQUAL(0u, MyRef(s.addEntry(5)).bufferId()); + s.trimHoldLists(41); + EXPECT_TRUE(s.statesVec()[0].size() != 0); + EXPECT_TRUE(s.statesVec()[1].size() == 0); + EXPECT_TRUE(s.statesVec()[2].size() == 0); + EXPECT_TRUE(s.statesVec()[3].size() == 0); +} + +void +Test::requireThatWeCanHoldAndTrimElements() +{ + MyStore s; + MyRef r1 = s.addEntry(1); + s.holdElem(r1, 1); + s.transferHoldLists(10); + MyRef r2 = s.addEntry(2); + s.holdElem(r2, 1); + s.transferHoldLists(20); + MyRef r3 = s.addEntry(3); + s.holdElem(r3, 1); + s.transferHoldLists(30); + EXPECT_EQUAL(1, s.getEntry(r1)); + EXPECT_EQUAL(2, s.getEntry(r2)); + EXPECT_EQUAL(3, s.getEntry(r3)); + s.trimElemHoldList(11); + EXPECT_EQUAL(0, s.getEntry(r1)); + EXPECT_EQUAL(2, s.getEntry(r2)); + EXPECT_EQUAL(3, s.getEntry(r3)); + s.trimElemHoldList(31); + EXPECT_EQUAL(0, s.getEntry(r1)); + EXPECT_EQUAL(0, s.getEntry(r2)); + EXPECT_EQUAL(0, s.getEntry(r3)); +} + +void +Test::requireThatWeCanUseFreeLists() +{ + MyStore s; + s.enableFreeLists(); + MyRef r1 = s.addEntry2(1); + s.holdElem(r1, 1); + s.transferHoldLists(10); + MyRef r2 = s.addEntry2(2); + s.holdElem(r2, 1); + s.transferHoldLists(20); + s.trimElemHoldList(11); + MyRef r3 = s.addEntry2(3); // reuse r1 + EXPECT_EQUAL(r1.offset(), r3.offset()); + EXPECT_EQUAL(r1.bufferId(), r3.bufferId()); + MyRef r4 = s.addEntry2(4); + EXPECT_EQUAL(r2.offset() + 1, r4.offset()); + s.trimElemHoldList(21); + MyRef r5 = s.addEntry2(5); // reuse r2 + EXPECT_EQUAL(r2.offset(), r5.offset()); + EXPECT_EQUAL(r2.bufferId(), r5.bufferId()); + MyRef r6 = s.addEntry2(6); + EXPECT_EQUAL(r4.offset() + 1, r6.offset()); + EXPECT_EQUAL(3, s.getEntry(r1)); + EXPECT_EQUAL(5, s.getEntry(r2)); + EXPECT_EQUAL(3, s.getEntry(r3)); + EXPECT_EQUAL(4, s.getEntry(r4)); + EXPECT_EQUAL(5, s.getEntry(r5)); + EXPECT_EQUAL(6, s.getEntry(r6)); +} + +void +Test::requireThatMemoryStatsAreCalculated() +{ + MyStore s; + DataStoreBase::MemStats m; + m._allocElems = MyRef::offsetSize(); + m._usedElems = 1; // ref = 0 is reserved + m._deadElems = 1; // ref = 0 is reserved + m._holdElems = 0; + m._activeBuffers = 1; + m._freeBuffers = MyRef::numBuffers() - 1; + m._holdBuffers = 0; + EXPECT_TRUE(assertMemStats(m, s.getMemStats())); + + // add entry + MyRef r = s.addEntry(10); + m._usedElems++; + EXPECT_TRUE(assertMemStats(m, s.getMemStats())); + + // inc dead + s.incDead(r, 1); + m._deadElems++; + EXPECT_TRUE(assertMemStats(m, s.getMemStats())); + + // hold buffer + s.addEntry(20); + s.addEntry(30); + s.holdBuffer(r.bufferId()); + s.transferHoldLists(100); + m._usedElems += 2; + m._holdElems += 2; // used - dead + m._activeBuffers--; + m._holdBuffers++; + EXPECT_TRUE(assertMemStats(m, s.getMemStats())); + + // new active buffer + s.switchActiveBuffer(); + s.addEntry(40); + m._allocElems += MyRef::offsetSize(); + m._usedElems++; + m._activeBuffers++; + m._freeBuffers--; + + // trim hold buffer + s.trimHoldLists(101); + m._allocElems -= MyRef::offsetSize(); + m._usedElems = 1; + m._deadElems = 0; + m._holdElems = 0; + m._freeBuffers = MyRef::numBuffers() - 1; + m._holdBuffers = 0; + EXPECT_TRUE(assertMemStats(m, s.getMemStats())); +} + +void +Test::requireThatMemoryUsageIsCalculated() +{ + MyStore s; + MyRef r = s.addEntry(10); + s.addEntry(20); + s.addEntry(30); + s.addEntry(40); + s.incDead(r, 1); + s.holdBuffer(r.bufferId()); + s.transferHoldLists(100); + MemoryUsage m = s.getMemoryUsage(); + EXPECT_EQUAL(MyRef::offsetSize() * sizeof(int), m.allocatedBytes()); + EXPECT_EQUAL(5 * sizeof(int), m.usedBytes()); + EXPECT_EQUAL(2 * sizeof(int), m.deadBytes()); + EXPECT_EQUAL(3 * sizeof(int), m.allocatedBytesOnHold()); + s.trimHoldLists(101); +} + + +void +Test::requireThatWecanDisableElemHoldList(void) +{ + MyStore s; + MyRef r1 = s.addEntry(10); + MyRef r2 = s.addEntry(20); + MyRef r3 = s.addEntry(30); + (void) r3; + MemoryUsage m = s.getMemoryUsage(); + EXPECT_EQUAL(MyRef::offsetSize() * sizeof(int), m.allocatedBytes()); + EXPECT_EQUAL(4 * sizeof(int), m.usedBytes()); + EXPECT_EQUAL(1 * sizeof(int), m.deadBytes()); + EXPECT_EQUAL(0 * sizeof(int), m.allocatedBytesOnHold()); + s.holdElem(r1, 1); + m = s.getMemoryUsage(); + EXPECT_EQUAL(MyRef::offsetSize() * sizeof(int), m.allocatedBytes()); + EXPECT_EQUAL(4 * sizeof(int), m.usedBytes()); + EXPECT_EQUAL(1 * sizeof(int), m.deadBytes()); + EXPECT_EQUAL(1 * sizeof(int), m.allocatedBytesOnHold()); + s.disableElemHoldList(); + s.holdElem(r2, 1); + m = s.getMemoryUsage(); + EXPECT_EQUAL(MyRef::offsetSize() * sizeof(int), m.allocatedBytes()); + EXPECT_EQUAL(4 * sizeof(int), m.usedBytes()); + EXPECT_EQUAL(2 * sizeof(int), m.deadBytes()); + EXPECT_EQUAL(1 * sizeof(int), m.allocatedBytesOnHold()); + s.transferHoldLists(100); + s.trimHoldLists(101); +} + +int +Test::Main() +{ + TEST_INIT("datastore_test"); + + requireThatEntryRefIsWorking(); + requireThatAlignedEntryRefIsWorking(); + requireThatEntriesCanBeAddedAndRetrieved(); + requireThatAddEntryTriggersChangeOfBuffer(); + requireThatWeCanHoldAndTrimBuffers(); + requireThatWeCanHoldAndTrimElements(); + requireThatWeCanUseFreeLists(); + requireThatMemoryStatsAreCalculated(); + requireThatMemoryUsageIsCalculated(); + requireThatWecanDisableElemHoldList(); + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::btree::Test); + diff --git a/searchlib/src/tests/memoryindex/datastore/featurestore_test.cpp b/searchlib/src/tests/memoryindex/datastore/featurestore_test.cpp new file mode 100644 index 00000000000..87d32c90b78 --- /dev/null +++ b/searchlib/src/tests/memoryindex/datastore/featurestore_test.cpp @@ -0,0 +1,245 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("featurestore_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/memoryindex/featurestore.h> + +using namespace search::btree; +using namespace search::index; + +namespace search +{ + + +namespace memoryindex +{ + + +class Test : public vespalib::TestApp +{ +private: + Schema _schema; + + const Schema & + getSchema(void) const + { + return _schema; + } + + bool + assertFeatures(const DocIdAndFeatures &exp, + const DocIdAndFeatures &act); + + void + requireThatFeaturesCanBeAddedAndRetrieved(void); + + void + requireThatNextWordsAreWorking(void); + void + requireThatAddFeaturesTriggersChangeOfBuffer(void); + +public: + Test(void); + + int + Main(void); +}; + + +bool +Test::assertFeatures(const DocIdAndFeatures &exp, + const DocIdAndFeatures &act) +{ + // docid is not encoded as part of features + if (!EXPECT_EQUAL(exp._elements.size(), + act._elements.size())) + return false; + for (size_t i = 0; i < exp._elements.size(); ++i) { + if (!EXPECT_EQUAL(exp._elements[i]._elementId, + act._elements[i]._elementId)) + return false; + if (!EXPECT_EQUAL(exp._elements[i]._numOccs, + act._elements[i]._numOccs)) + return false; + if (!EXPECT_EQUAL(exp._elements[i]._weight, act._elements[i]._weight)) + return false; + if (!EXPECT_EQUAL(exp._elements[i]._elementLen, + act._elements[i]._elementLen)) + return false; + } + if (!EXPECT_EQUAL(exp._wordPositions.size(), act._wordPositions.size())) + return false; + for (size_t i = 0; i < exp._wordPositions.size(); ++i) { + if (!EXPECT_EQUAL(exp._wordPositions[i]._wordPos, + act._wordPositions[i]._wordPos)) return false; + } + return true; +} + + +DocIdAndFeatures +getFeatures(uint32_t numOccs, + int32_t weight, + uint32_t elemLen) +{ + DocIdAndFeatures f; + f._docId = 0; + f._elements.push_back(WordDocElementFeatures(0)); + f._elements.back().setNumOccs(numOccs); + f._elements.back().setWeight(weight); + f._elements.back().setElementLen(elemLen); + for (uint32_t i = 0; i < numOccs; ++i) { + f._wordPositions.push_back(WordDocElementWordPosFeatures(i)); + } + return f; +} + + +void +Test::requireThatFeaturesCanBeAddedAndRetrieved(void) +{ + FeatureStore fs(getSchema()); + DocIdAndFeatures act; + EntryRef r1; + EntryRef r2; + std::pair<EntryRef, uint64_t> r; + { + DocIdAndFeatures f = getFeatures(2, 4, 8); + r = fs.addFeatures(0, f); + r1 = r.first; + EXPECT_TRUE(r.second > 0); + EXPECT_EQUAL(FeatureStore::RefType::align(1u), + FeatureStore::RefType(r1).offset()); + EXPECT_EQUAL(0u, FeatureStore::RefType(r1).bufferId()); + LOG(info, + "bits(%" PRIu64 "), ref.offset(%" PRIu64 "), ref.bufferId(%u)", + r.second, + FeatureStore::RefType(r1).offset(), + FeatureStore::RefType(r1).bufferId()); + fs.getFeatures(0, r1, act); + // weight not encoded for single value + EXPECT_TRUE(assertFeatures(getFeatures(2, 1, 8), act)); + } + { + DocIdAndFeatures f = getFeatures(4, 8, 16); + r = fs.addFeatures(1, f); + r2 = r.first; + EXPECT_TRUE(r.second > 0); + EXPECT_TRUE(FeatureStore::RefType(r2).offset() > + FeatureStore::RefType(r1).offset()); + EXPECT_EQUAL(0u, FeatureStore::RefType(r1).bufferId()); + LOG(info, + "bits(%" PRIu64 "), ref.offset(%" PRIu64 "), ref.bufferId(%u)", + r.second, + FeatureStore::RefType(r2).offset(), + FeatureStore::RefType(r2).bufferId()); + fs.getFeatures(1, r2, act); + EXPECT_TRUE(assertFeatures(f, act)); + } +} + + +void +Test::requireThatNextWordsAreWorking(void) +{ + FeatureStore fs(getSchema()); + DocIdAndFeatures act; + EntryRef r1; + EntryRef r2; + std::pair<EntryRef, uint64_t> r; + { + DocIdAndFeatures f = getFeatures(2, 4, 8); + r = fs.addFeatures(0, f); + r1 = r.first; + EXPECT_TRUE(r.second > 0); + EXPECT_EQUAL(FeatureStore::RefType::align(1u), + FeatureStore::RefType(r1).offset()); + EXPECT_EQUAL(0u, FeatureStore::RefType(r1).bufferId()); + LOG(info, + "bits(%" PRIu64 "), ref.offset(%" PRIu64 "), ref.bufferId(%u)", + r.second, + FeatureStore::RefType(r1).offset(), + FeatureStore::RefType(r1).bufferId()); + fs.getFeatures(0, r1, act); + // weight not encoded for single value + EXPECT_TRUE(assertFeatures(getFeatures(2, 1, 8), act)); + } + { + DocIdAndFeatures f = getFeatures(4, 8, 16); + r = fs.addFeatures(1, f); + r2 = r.first; + EXPECT_TRUE(r.second > 0); + EXPECT_TRUE(FeatureStore::RefType(r2).offset() > + FeatureStore::RefType(r1).offset()); + EXPECT_EQUAL(0u, FeatureStore::RefType(r1).bufferId()); + LOG(info, + "bits(%" PRIu64 "), ref.offset(%" PRIu64 "), ref.bufferId(%u)", + r.second, + FeatureStore::RefType(r2).offset(), + FeatureStore::RefType(r2).bufferId()); + fs.getFeatures(1, r2, act); + EXPECT_TRUE(assertFeatures(f, act)); + } +} + + +void +Test::requireThatAddFeaturesTriggersChangeOfBuffer(void) +{ + FeatureStore fs(getSchema()); + size_t cnt = 1; + DocIdAndFeatures act; + uint32_t lastId = 0; + for (;;++cnt) { + uint32_t numOccs = (cnt % 100) + 1; + DocIdAndFeatures f = getFeatures(numOccs, 1, numOccs + 1); + std::pair<EntryRef, uint64_t> r = fs.addFeatures(0, f); + fs.getFeatures(0, r.first, act); + EXPECT_TRUE(assertFeatures(f, act)); + uint32_t bufferId = FeatureStore::RefType(r.first).bufferId(); + if (bufferId > lastId) { + LOG(info, + "Changed to bufferId %u after %zu feature sets", + bufferId, cnt); + lastId = bufferId; + } + if (bufferId == 1) { + break; + } + } + EXPECT_EQUAL(1u, lastId); + LOG(info, "Added %zu feature sets in 1 buffer", cnt); +} + + +Test::Test() + : _schema() +{ + _schema.addIndexField(Schema::IndexField("f0", Schema::STRING)); + _schema.addIndexField(Schema::IndexField("f1", + Schema::STRING, + Schema::WEIGHTEDSET)); +} + + +int +Test::Main() +{ + TEST_INIT("featurestore_test"); + + requireThatFeaturesCanBeAddedAndRetrieved(); + requireThatNextWordsAreWorking(); + requireThatAddFeaturesTriggersChangeOfBuffer(); + + TEST_DONE(); +} + + +} + + +} + + +TEST_APPHOOK(search::memoryindex::Test); diff --git a/searchlib/src/tests/memoryindex/datastore/wordstore_test.cpp b/searchlib/src/tests/memoryindex/datastore/wordstore_test.cpp new file mode 100644 index 00000000000..825992b3b4f --- /dev/null +++ b/searchlib/src/tests/memoryindex/datastore/wordstore_test.cpp @@ -0,0 +1,104 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("wordstore_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/memoryindex/wordstore.h> + +using namespace search::btree; + +namespace search { +namespace memoryindex { + +class Test : public vespalib::TestApp { +private: + void requireThatWordsCanBeAddedAndRetrieved(); + void requireThatAddWordTriggersChangeOfBuffer(); +public: + int Main(); +}; + +void +Test::requireThatWordsCanBeAddedAndRetrieved() +{ + std::string w1 = "require"; + std::string w2 = "that"; + std::string w3 = "words"; + WordStore ws; + EntryRef r1 = ws.addWord(w1); + EntryRef r2 = ws.addWord(w2); + EntryRef r3 = ws.addWord(w3); + uint32_t invp = WordStore::RefType::align(1); // Reserved as invalid + uint32_t w1s = w1.size() + 1; + uint32_t w1p = WordStore::RefType::pad(w1s); + uint32_t w2s = w2.size() + 1; + uint32_t w2p = WordStore::RefType::pad(w2s); + EXPECT_EQUAL(invp, WordStore::RefType(r1).offset()); + EXPECT_EQUAL(invp + w1s + w1p, WordStore::RefType(r2).offset()); + EXPECT_EQUAL(invp + w1s + w1p + w2s + w2p, WordStore::RefType(r3).offset()); + EXPECT_EQUAL(0u, WordStore::RefType(r1).bufferId()); + EXPECT_EQUAL(0u, WordStore::RefType(r2).bufferId()); + EXPECT_EQUAL(0u, WordStore::RefType(r3).bufferId()); + EXPECT_EQUAL(std::string("require"), ws.getWord(r1)); + EXPECT_EQUAL(std::string("that"), ws.getWord(r2)); + EXPECT_EQUAL(std::string("words"), ws.getWord(r3)); +} + +void +Test::requireThatAddWordTriggersChangeOfBuffer() +{ + WordStore ws; + size_t word = 0; + uint32_t lastId = 0; + size_t lastWord = 0; + char wordStr[10]; + size_t entrySize = WordStore::RefType::align(6 + 1); + size_t initBufferSpace = 1024u * WordStore::RefType::align(1); + size_t bufferSpace = initBufferSpace; + size_t bufferWords = (bufferSpace - WordStore::RefType::align(1)) / + entrySize; + size_t usedSpace = 0; + size_t sumBufferWords = 0; + for (;;++word) { + sprintf(wordStr, "%6zu", word); + // all words uses 12 bytes (include padding) + EntryRef r = ws.addWord(std::string(wordStr)); + EXPECT_EQUAL(std::string(wordStr), ws.getWord(r)); + uint32_t bufferId = WordStore::RefType(r).bufferId(); + if (bufferId > lastId) { + LOG(info, + "Changed to bufferId %u after %zu words", + bufferId, word); + EXPECT_EQUAL(bufferWords, word - lastWord); + lastId = bufferId; + lastWord = word; + usedSpace += bufferWords * entrySize; + sumBufferWords += bufferWords; + bufferSpace = usedSpace + initBufferSpace; + bufferWords = bufferSpace / entrySize; + } + if (bufferId == 4) { + break; + } + } + // each buffer can have offsetSize / 12 words + EXPECT_EQUAL(sumBufferWords, word); + LOG(info, "Added %zu words in 4 buffers", word); +} + +int +Test::Main() +{ + TEST_INIT("wordstore_test"); + + requireThatWordsCanBeAddedAndRetrieved(); + requireThatAddWordTriggersChangeOfBuffer(); + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::memoryindex::Test); + diff --git a/searchlib/src/tests/memoryindex/dictionary/.gitignore b/searchlib/src/tests/memoryindex/dictionary/.gitignore new file mode 100644 index 00000000000..d404d7d7063 --- /dev/null +++ b/searchlib/src/tests/memoryindex/dictionary/.gitignore @@ -0,0 +1,6 @@ +.depend +Makefile +dictionary_test +dump +/urldump +searchlib_dictionary_test_app diff --git a/searchlib/src/tests/memoryindex/dictionary/CMakeLists.txt b/searchlib/src/tests/memoryindex/dictionary/CMakeLists.txt new file mode 100644 index 00000000000..9520b37d267 --- /dev/null +++ b/searchlib/src/tests/memoryindex/dictionary/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_dictionary_test_app + SOURCES + dictionary_test.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_dictionary_test_app COMMAND searchlib_dictionary_test_app) diff --git a/searchlib/src/tests/memoryindex/dictionary/DESC b/searchlib/src/tests/memoryindex/dictionary/DESC new file mode 100644 index 00000000000..ff559f42641 --- /dev/null +++ b/searchlib/src/tests/memoryindex/dictionary/DESC @@ -0,0 +1 @@ +dictionary test. Take a look at dictionary_test.cpp for details. diff --git a/searchlib/src/tests/memoryindex/dictionary/FILES b/searchlib/src/tests/memoryindex/dictionary/FILES new file mode 100644 index 00000000000..1f3a8ebef87 --- /dev/null +++ b/searchlib/src/tests/memoryindex/dictionary/FILES @@ -0,0 +1 @@ +dictionary_test.cpp diff --git a/searchlib/src/tests/memoryindex/dictionary/dictionary_test.cpp b/searchlib/src/tests/memoryindex/dictionary/dictionary_test.cpp new file mode 100644 index 00000000000..ef8383b23c7 --- /dev/null +++ b/searchlib/src/tests/memoryindex/dictionary/dictionary_test.cpp @@ -0,0 +1,1528 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/* -*- mode: C++; coding: utf-8; -*- */ + +/* $Id$ + * + * Copyright (C) 2011 Yahoo! Technologies Norway AS + * + * All Rights Reserved + * + */ + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +#include <vespa/searchlib/diskindex/checkpointfile.h> +#include <vespa/searchlib/diskindex/fusion.h> +#include <vespa/searchlib/diskindex/indexbuilder.h> +#include <vespa/searchlib/diskindex/zcposoccrandread.h> +#include <vespa/searchlib/fef/fieldpositionsiterator.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchlib/fef/termfieldmatchdataarray.h> +#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/searchlib/index/indexbuilder.h> +#include <vespa/searchlib/index/schemautil.h> +#include <vespa/searchlib/btree/btreeroot.hpp> +#include <vespa/searchlib/btree/btreeiterator.hpp> +#include <vespa/searchlib/btree/btreenodeallocator.hpp> +#include <vespa/searchlib/btree/btreenode.hpp> +#include <vespa/searchlib/btree/btreenodestore.hpp> +#include <vespa/searchlib/memoryindex/dictionary.h> +#include <vespa/searchlib/memoryindex/documentinverter.h> +#include <vespa/searchlib/memoryindex/fieldinverter.h> +#include <vespa/searchlib/memoryindex/document_remover.h> +#include <vespa/searchlib/memoryindex/featurestore.h> +#include <vespa/searchlib/memoryindex/postingiterator.h> +#include <vespa/searchlib/memoryindex/ordereddocumentinserter.h> +#include <vespa/searchlib/common/sequencedtaskexecutor.h> +#include <vespa/searchlib/test/initrange.h> +#include <vespa/vespalib/objects/nbostream.h> +#include <vespa/vespalib/testkit/testapp.h> + +LOG_SETUP("dictionary_test"); + +namespace search +{ + +using namespace btree; +using namespace fef; +using namespace index; +using queryeval::SearchIterator; +using document::Document; +using diskindex::CheckPointFile; +using vespalib::GenerationHandler; +using test::InitRangeVerifier; + +namespace memoryindex +{ + +typedef Dictionary::PostingList PostingList; +typedef PostingList::Iterator PostingItr; +typedef PostingList::ConstIterator PostingConstItr; + +class MyBuilder : public IndexBuilder { +private: + std::stringstream _ss; + bool _insideWord; + bool _insideField; + bool _insideDoc; + bool _insideElem; + bool _firstWord; + bool _firstField; + bool _firstDoc; + bool _firstElem; + bool _firstPos; +public: + + MyBuilder(const Schema &schema) + : IndexBuilder(schema), + _ss(), + _insideWord(false), + _insideField(false), + _insideDoc(false), + _insideElem(false), + _firstWord(true), + _firstField(true), + _firstDoc(true), + _firstElem(true), + _firstPos(true) + { + } + + virtual void + startWord(const vespalib::stringref &word) + { + assert(_insideField); + assert(!_insideWord); + if (!_firstWord) + _ss << ","; + _ss << "w=" << word << "["; + _firstDoc = true; + _insideWord = true; + } + + virtual void + endWord(void) + { + assert(_insideWord); + assert(!_insideDoc); + _ss << "]"; + _firstWord = false; + _insideWord = false; + } + + virtual void + startField(uint32_t fieldId) + { + assert(!_insideField); + if (!_firstField) _ss << ","; + _ss << "f=" << fieldId << "["; + _firstWord = true; + _insideField = true; + } + + virtual void + endField() + { + assert(_insideField); + assert(!_insideWord); + _ss << "]"; + _firstField = false; + _insideField = false; + } + + virtual void + startDocument(uint32_t docId) + { + assert(_insideWord); + assert(!_insideDoc); + if (!_firstDoc) _ss << ","; + _ss << "d=" << docId << "["; + _firstElem = true; + _insideDoc = true; + } + + virtual void + endDocument(void) + { + assert(_insideDoc); + assert(!_insideElem); + _ss << "]"; + _firstDoc = false; + _insideDoc = false; + } + + virtual void + startElement(uint32_t elementId, + int32_t weight, + uint32_t elementLen) + { + assert(_insideDoc); + assert(!_insideElem); + if (!_firstElem) + _ss << ","; + _ss << "e=" << elementId << + ",w=" << weight << ",l=" << elementLen << "["; + _firstPos = true; + _insideElem = true; + } + + virtual void + endElement(void) + { + assert(_insideElem); + _ss << "]"; + _firstElem = false; + _insideElem = false; + } + + virtual void + addOcc(const WordDocElementWordPosFeatures &features) + { + assert(_insideElem); + if (!_firstPos) _ss << ","; + _ss << features.getWordPos(); + _firstPos = false; + } + + std::string + toStr(void) const + { + return _ss.str(); + } +}; + +std::string +toString(FieldPositionsIterator posItr, + bool hasElements = false, + bool hasWeights = false) +{ + std::stringstream ss; + ss << "{"; + ss << posItr.getFieldLength() << ":"; + bool first = true; + for (; posItr.valid(); posItr.next()) { + if (!first) ss << ","; + ss << posItr.getPosition(); + first = false; + if (hasElements) { + ss << "[e=" << posItr.getElementId(); + if (hasWeights) + ss << ",w=" << posItr.getElementWeight(); + ss << ",l=" << posItr.getElementLen() << "]"; + } + } + ss << "}"; + return ss.str(); +} + +bool +assertPostingList(const std::string &exp, + PostingConstItr itr, + const FeatureStore *store = NULL) +{ + std::stringstream ss; + FeatureStore::DecodeContextCooked decoder(NULL); + TermFieldMatchData tfmd; + TermFieldMatchDataArray matchData; + matchData.add(&tfmd); + ss << "["; + for (size_t i = 0; itr.valid(); ++itr, ++i) { + if (i > 0) ss << ","; + uint32_t docId = itr.getKey(); + ss << docId; + if (store != NULL) { // consider features as well + EntryRef ref(itr.getData()); + store->setupForField(0, decoder); + store->setupForUnpackFeatures(ref, decoder); + decoder.unpackFeatures(matchData, docId); + ss << toString(tfmd.getIterator()); + } + } + ss << "]"; + return EXPECT_EQUAL(exp, ss.str()); +} + +bool +assertPostingList(std::vector<uint32_t> &exp, PostingConstItr itr) +{ + std::stringstream ss; + ss << "["; + for (size_t i = 0; i < exp.size(); ++i) { + if (i > 0) ss << ","; + ss << exp[i]; + } + ss << "]"; + return assertPostingList(ss.str(), itr); +} + + +namespace +{ + +/** + * MockDictionary is a simple mockup of memory index, used to verify + * that we get correct posting lists from real memory index. + */ +class MockDictionary +{ + std::map<std::pair<vespalib::string, uint32_t>, std::set<uint32_t>> _dict; + vespalib::string _word; + uint32_t _fieldId; + +public: + void + setNextWord(const vespalib::string &word) + { + _word = word; + } + + void + setNextField(uint32_t fieldId) + { + _fieldId = fieldId; + } + + void + add(uint32_t docId) + { + _dict[std::make_pair(_word, _fieldId)].insert(docId); + } + + void + remove(uint32_t docId) + { + _dict[std::make_pair(_word, _fieldId)].erase(docId); + } + + std::vector<uint32_t> + find(const vespalib::string &word, uint32_t fieldId) + { + std::vector<uint32_t> res; + for (auto docId : _dict[std::make_pair(word, fieldId)] ) { + res.push_back(docId); + } + return res; + } + + auto begin() + { + return _dict.begin(); + } + + auto end() + { + return _dict.end(); + } +}; + + +/** + * MockWordStoreScan is a helper class to ensure that previous word is + * still stored safely in memory, to satisfy OrderedDocumentInserter + * needs. + */ +class MockWordStoreScan +{ + vespalib::string _word0; + vespalib::string _word1; + vespalib::string *_prevWord; + vespalib::string *_word; + +public: + MockWordStoreScan() + : _word0(), + _word1(), + _prevWord(&_word0), + _word(&_word1) + { + } + + const vespalib::string & + getWord() const + { + return *_word; + } + + const vespalib::string & + setWord(const vespalib::string &word) + { + std::swap(_prevWord, _word); + *_word = word; + return *_word; + } +}; + +/** + * MyInserter performs insertions on both a mockup version of memory index + * and a real memory index. Mockup version is used to calculate expected + * answers. + */ +class MyInserter +{ + MockWordStoreScan _wordStoreScan; + MockDictionary _mock; + Dictionary _d; + DocIdAndPosOccFeatures _features; + IOrderedDocumentInserter *_documentInserter; + +public: + MyInserter(const Schema &schema) + : _wordStoreScan(), + _mock(), + _d(schema), + _features(), + _documentInserter(nullptr) + { + _features.addNextOcc(0, 0, 1, 1); + } + + void + setNextWord(const vespalib::string &word) + { + const vespalib::string &w = _wordStoreScan.setWord(word); + _documentInserter->setNextWord(w); + _mock.setNextWord(w); + } + + void + setNextField(uint32_t fieldId) + { + if (_documentInserter != nullptr) { + _documentInserter->flush(); + } + _documentInserter = &_d.getFieldIndex(fieldId)->getInserter(); + _documentInserter->rewind(); + _mock.setNextField(fieldId); + } + + void + add(uint32_t docId) + { + _documentInserter->add(docId, _features); + _mock.add(docId); + } + + void + remove(uint32_t docId) + { + _documentInserter->remove(docId); + _mock.remove(docId); + } + + bool + assertPosting(const vespalib::string &word, + uint32_t fieldId) + { + std::vector<uint32_t> exp = _mock.find(word, fieldId); + PostingConstItr itr = _d.find(word, fieldId); + return EXPECT_TRUE(assertPostingList(exp, itr)); + } + + bool + assertPostings() + { + if (_documentInserter != nullptr) { + _documentInserter->flush(); + } + for (auto wfp : _mock) { + auto &wf = wfp.first; + auto &word = wf.first; + auto fieldId = wf.second; + if (!EXPECT_TRUE(assertPosting(word, fieldId))) { + return false; + } + } + return true; + } + + void + rewind() + { + if (_documentInserter != nullptr) { + _documentInserter->flush(); + _documentInserter = nullptr; + } + } + + uint32_t + getNumUniqueWords() + { + return _d.getNumUniqueWords(); + } + + Dictionary &getDict() { return _d; } +}; + +void +myremove(uint32_t docId, DocumentInverter &inv, Dictionary &d, + ISequencedTaskExecutor &invertThreads) +{ + inv.removeDocument(docId); + invertThreads.sync(); + inv.pushDocuments(d, std::shared_ptr<IDestructorCallback>()); +} + + +class WrapInserter +{ + OrderedDocumentInserter &_inserter; +public: + WrapInserter(Dictionary &d, uint32_t fieldId) + : _inserter(d.getFieldIndex(fieldId)->getInserter()) + { + } + + WrapInserter &word(const vespalib::stringref &word_) + { + _inserter.setNextWord(word_); + return *this; + } + + WrapInserter &add(uint32_t docId, const index::DocIdAndFeatures &features) + { + _inserter.add(docId, features); + return *this; + } + + WrapInserter &add(uint32_t docId) + { + DocIdAndPosOccFeatures features; + features.addNextOcc(0, 0, 1, 1); + return add(docId, features); + } + + WrapInserter &remove(uint32_t docId) + { + _inserter.remove(docId); + return *this; + } + + WrapInserter &flush() + { + _inserter.flush(); + return *this; + } + + WrapInserter &rewind() + { + _inserter.rewind(); + return *this; + } + + btree::EntryRef + getWordRef() + { + return _inserter.getWordRef(); + } +}; + + +class MyDrainRemoves : IDocumentRemoveListener +{ + DocumentRemover &_remover; +public: + virtual void remove(const vespalib::stringref, uint32_t) override { } + + MyDrainRemoves(Dictionary &d, uint32_t fieldId) + : _remover(d.getFieldIndex(fieldId)->getDocumentRemover()) + { + } + + void drain(uint32_t docId) + { + _remover.remove(docId, *this); + } +}; + +void +myPushDocument(DocumentInverter &inv, Dictionary &d) +{ + inv.pushDocuments(d, std::shared_ptr<IDestructorCallback>()); +} + + +const FeatureStore * +featureStorePtr(const Dictionary &d, uint32_t fieldId) +{ + return &d.getFieldIndex(fieldId)->getFeatureStore(); +} + +const FeatureStore & +featureStoreRef(const Dictionary &d, uint32_t fieldId) +{ + return d.getFieldIndex(fieldId)->getFeatureStore(); +} + + +DataStoreBase::MemStats +getFeatureStoreMemStats(const Dictionary &d) +{ + DataStoreBase::MemStats res; + uint32_t numFields = d.getNumFields(); + for (uint32_t fieldId = 0; fieldId < numFields; ++fieldId) { + DataStoreBase::MemStats stats = + d.getFieldIndex(fieldId)->getFeatureStore().getMemStats(); + res += stats; + } + return res; +} + + +void myCommit(Dictionary &d, ISequencedTaskExecutor &pushThreads) +{ + uint32_t fieldId = 0; + for (auto &fieldIndex : d.getFieldIndexes()) { + pushThreads.execute(fieldId, + [fieldIndex(fieldIndex.get())]() + { fieldIndex->commit(); }); + ++fieldId; + } + pushThreads.sync(); +} + + +void +myCompactFeatures(Dictionary &d, ISequencedTaskExecutor &pushThreads) +{ + uint32_t fieldId = 0; + for (auto &fieldIndex : d.getFieldIndexes()) { + pushThreads.execute(fieldId, + [fieldIndex(fieldIndex.get())]() + { fieldIndex->compactFeatures(); }); + ++fieldId; + } +} + +} + + +struct Fixture +{ + Schema _schema; + Fixture() : _schema() { + _schema.addIndexField(Schema::IndexField("f0", Schema::STRING)); + _schema.addIndexField(Schema::IndexField("f1", Schema::STRING)); + _schema.addIndexField(Schema::IndexField("f2", Schema::STRING, + Schema::ARRAY)); + _schema.addIndexField(Schema::IndexField("f3", Schema::STRING, + Schema::WEIGHTEDSET)); + } + const Schema & getSchema() const { return _schema; } +}; + +TEST_F("requireThatFreshInsertWorks", Fixture) +{ + Dictionary d(f.getSchema()); + SequencedTaskExecutor pushThreads(2); + EXPECT_TRUE(assertPostingList("[]", d.find("a", 0))); + EXPECT_TRUE(assertPostingList("[]", d.findFrozen("a", 0))); + EXPECT_EQUAL(0u, d.getNumUniqueWords()); + WrapInserter(d, 0).word("a").add(10).flush(); + EXPECT_TRUE(assertPostingList("[10]", d.find("a", 0))); + EXPECT_TRUE(assertPostingList("[]", d.findFrozen("a", 0))); + myCommit(d, pushThreads); + EXPECT_TRUE(assertPostingList("[10]", d.findFrozen("a", 0))); + EXPECT_EQUAL(1u, d.getNumUniqueWords()); +} + +TEST_F("requireThatAppendInsertWorks", Fixture) +{ + Dictionary d(f.getSchema()); + SequencedTaskExecutor pushThreads(2); + WrapInserter(d, 0).word("a").add(10).flush().rewind(). + word("a").add(5).flush(); + EXPECT_TRUE(assertPostingList("[5,10]", d.find("a", 0))); + EXPECT_TRUE(assertPostingList("[]", d.findFrozen("a", 0))); + WrapInserter(d, 0).rewind().word("a").add(20).flush(); + EXPECT_TRUE(assertPostingList("[5,10,20]", d.find("a", 0))); + EXPECT_TRUE(assertPostingList("[]", d.findFrozen("a", 0))); + myCommit(d, pushThreads); + EXPECT_TRUE(assertPostingList("[5,10,20]", d.findFrozen("a", 0))); +} + +TEST_F("requireThatMultiplePostingListsCanExist", Fixture) +{ + Dictionary d(f.getSchema()); + WrapInserter(d, 0).word("a").add(10).word("b").add(11).add(15).flush(); + WrapInserter(d, 1).word("a").add(5).word("b").add(12).flush(); + EXPECT_EQUAL(4u, d.getNumUniqueWords()); + EXPECT_TRUE(assertPostingList("[10]", d.find("a", 0))); + EXPECT_TRUE(assertPostingList("[5]", d.find("a", 1))); + EXPECT_TRUE(assertPostingList("[11,15]", d.find("b", 0))); + EXPECT_TRUE(assertPostingList("[12]", d.find("b", 1))); + EXPECT_TRUE(assertPostingList("[]", d.find("a", 2))); + EXPECT_TRUE(assertPostingList("[]", d.find("c", 0))); +} + +TEST_F("requireThatRemoveWorks", Fixture) +{ + Dictionary d(f.getSchema()); + WrapInserter(d, 0).word("a").remove(10).flush(); + EXPECT_TRUE(assertPostingList("[]", d.find("a", 0))); + WrapInserter(d, 0).add(10).add(20).add(30).flush(); + EXPECT_TRUE(assertPostingList("[10,20,30]", d.find("a", 0))); + WrapInserter(d, 0).rewind().word("a").remove(10).flush(); + EXPECT_TRUE(assertPostingList("[20,30]", d.find("a", 0))); + WrapInserter(d, 0).remove(20).flush(); + EXPECT_TRUE(assertPostingList("[30]", d.find("a", 0))); + WrapInserter(d, 0).remove(30).flush(); + EXPECT_TRUE(assertPostingList("[]", d.find("a", 0))); + EXPECT_EQUAL(1u, d.getNumUniqueWords()); + MyDrainRemoves(d, 0).drain(10); + WrapInserter(d, 0).rewind().word("a").add(10).flush(); + EXPECT_TRUE(assertPostingList("[10]", d.find("a", 0))); +} + +TEST_F("requireThatMultipleInsertAndRemoveWorks", Fixture) +{ + MyInserter inserter(f.getSchema()); + uint32_t numFields = 4; + for (uint32_t fi = 0; fi < numFields; ++fi) { + inserter.setNextField(fi); + for (char w = 'a'; w <= 'z'; ++w) { + std::string word(&w, 1); + inserter.setNextWord(word); + for (uint32_t di = 0; di < (uint32_t) w; ++di) { // insert + inserter.add(di * 3); + } + EXPECT_EQUAL((w - 'a' + 1u) + ('z' - 'a' +1u) * fi, + inserter.getNumUniqueWords()); + } + } + EXPECT_TRUE(inserter.assertPostings()); + inserter.rewind(); + for (uint32_t fi = 0; fi < numFields; ++fi) { + MyDrainRemoves drainRemoves(inserter.getDict(), fi); + for (uint32_t di = 0; di < 'z' * 2 + 1; ++di) { + drainRemoves.drain(di); + } + } + for (uint32_t fi = 0; fi < numFields; ++fi) { + inserter.setNextField(fi); + for (char w = 'a'; w <= 'z'; ++w) { + std::string word(&w, 1); + inserter.setNextWord(word); + for (uint32_t di = 0; di < (uint32_t) w; ++di) { + // remove half of the docs + if ((di % 2) == 0) { + inserter.remove(di * 2); + } else { + inserter.add(di * 2 + 1); + } + } + } + } + EXPECT_TRUE(inserter.assertPostings()); +} + +void +addElement(DocIdAndFeatures &f, + uint32_t elemLen, + uint32_t numOccs, + int32_t weight = 1) +{ + f._elements.push_back(WordDocElementFeatures(f._elements.size())); + f._elements.back().setElementLen(elemLen); + f._elements.back().setWeight(weight); + f._elements.back().setNumOccs(numOccs); + for (uint32_t i = 0; i < numOccs; ++i) { + f._wordPositions.push_back(WordDocElementWordPosFeatures(i)); + } +} + +DocIdAndFeatures +getFeatures(uint32_t elemLen, uint32_t numOccs, int32_t weight = 1) +{ + DocIdAndFeatures f; + addElement(f, elemLen, numOccs, weight); + return f; +} + +TEST_F("requireThatFeaturesAreInPostingLists", Fixture) +{ + Dictionary d(f.getSchema()); + WrapInserter(d, 0).word("a").add(1, getFeatures(4, 2)).flush(); + EXPECT_TRUE(assertPostingList("[1{4:0,1}]", + d.find("a", 0), + featureStorePtr(d, 0))); + WrapInserter(d, 0).word("b").add(2, getFeatures(5, 1)). + add(3, getFeatures(6, 2)).flush(); + EXPECT_TRUE(assertPostingList("[2{5:0},3{6:0,1}]", + d.find("b", 0), + featureStorePtr(d, 0))); + WrapInserter(d, 1).word("c").add(4, getFeatures(7, 2)).flush(); + EXPECT_TRUE(assertPostingList("[4{7:0,1}]", + d.find("c", 1), + featureStorePtr(d, 1))); +} + +TEST_F("require that initRange conforms", Fixture) { + Dictionary d(f.getSchema()); + InitRangeVerifier ir; + WrapInserter inserter(d, 0); + inserter.word("a"); + for (uint32_t docId : ir.getExpectedDocIds()) { + inserter.add(docId); + } + inserter.flush(); + + TermFieldMatchData tfmd; + TermFieldMatchDataArray matchData; + matchData.add(&tfmd); + PostingIterator itr(d.find("a", 0), featureStoreRef(d, 0), 0, matchData); + ir.verify(itr); +} + +TEST_F("requireThatPostingIteratorIsWorking", Fixture) +{ + Dictionary d(f.getSchema()); + WrapInserter(d, 0).word("a").add(10, getFeatures(4, 1)). + add(20, getFeatures(5, 2)). + add(30, getFeatures(6, 1)). + add(40, getFeatures(7, 2)).flush(); + TermFieldMatchData tfmd; + TermFieldMatchDataArray matchData; + matchData.add(&tfmd); + { + PostingIterator itr(d.find("not", 0), + featureStoreRef(d, 0), + 0, matchData); + itr.initFullRange(); + EXPECT_TRUE(itr.isAtEnd()); + } + { + PostingIterator itr(d.find("a", 0), + featureStoreRef(d, 0), + 0, matchData); + itr.initFullRange(); + EXPECT_EQUAL(10u, itr.getDocId()); + itr.unpack(10); + EXPECT_EQUAL("{4:0}", toString(tfmd.getIterator())); + EXPECT_TRUE(!itr.seek(25)); + EXPECT_EQUAL(30u, itr.getDocId()); + itr.unpack(30); + EXPECT_EQUAL("{6:0}", toString(tfmd.getIterator())); + EXPECT_TRUE(itr.seek(40)); + EXPECT_EQUAL(40u, itr.getDocId()); + itr.unpack(40); + EXPECT_EQUAL("{7:0,1}", toString(tfmd.getIterator())); + EXPECT_TRUE(!itr.seek(41)); + EXPECT_TRUE(itr.isAtEnd()); + } +} + +TEST_F("requireThatDumpingToIndexBuilderIsWorking", Fixture) +{ + { + MyBuilder b(f.getSchema()); + WordDocElementWordPosFeatures wpf; + b.startField(4); + b.startWord("a"); + b.startDocument(2); + b.startElement(0, 10, 20); + wpf.setWordPos(1); + b.addOcc(wpf); + wpf.setWordPos(3); + b.addOcc(wpf); + b.endElement(); + b.endDocument(); + b.endWord(); + b.endField(); + EXPECT_EQUAL("f=4[w=a[d=2[e=0,w=10,l=20[1,3]]]]", b.toStr()); + } + { + Dictionary d(f.getSchema()); + MyBuilder b(f.getSchema()); + DocIdAndFeatures df; + WrapInserter(d, 1).word("a").add(5, getFeatures(2, 1)). + add(7, getFeatures(3, 2)). + word("b").add(5, getFeatures(12, 2)).flush(); + + df = getFeatures(4, 1); + addElement(df, 5, 2); + WrapInserter(d, 2).word("a").add(5, df); + df = getFeatures(6, 1); + addElement(df, 7, 2); + WrapInserter(d, 2).add(7, df).flush(); + + df = getFeatures(8, 1, 12); + addElement(df, 9, 2, 13); + WrapInserter(d, 3).word("a").add(5, df); + df = getFeatures(10, 1, 14); + addElement(df, 11, 2, 15); + WrapInserter(d, 3).add(7, df).flush(); + + d.dump(b); + + EXPECT_EQUAL("f=0[]," + "f=1[w=a[d=5[e=0,w=1,l=2[0]],d=7[e=0,w=1,l=3[0,1]]]," + "w=b[d=5[e=0,w=1,l=12[0,1]]]]," + "f=2[w=a[d=5[e=0,w=1,l=4[0],e=1,w=1,l=5[0,1]]," + "d=7[e=0,w=1,l=6[0],e=1,w=1,l=7[0,1]]]]," + "f=3[w=a[d=5[e=0,w=12,l=8[0],e=1,w=13,l=9[0,1]]," + "d=7[e=0,w=14,l=10[0],e=1,w=15,l=11[0,1]]]]", + b.toStr()); + } + { // test word with no docs + Dictionary d(f.getSchema()); + WrapInserter(d, 0).word("a").add(2, getFeatures(2, 1)). + word("b").add(4, getFeatures(4, 1)).flush().rewind(). + word("a").remove(2).flush(); + { + MyBuilder b(f.getSchema()); + d.dump(b); + EXPECT_EQUAL("f=0[w=b[d=4[e=0,w=1,l=4[0]]]],f=1[],f=2[],f=3[]", + b.toStr()); + } + { + search::diskindex::IndexBuilder b(f.getSchema()); + b.setPrefix("dump"); + TuneFileIndexing tuneFileIndexing; + DummyFileHeaderContext fileHeaderContext; + b.open(5, 2, tuneFileIndexing, fileHeaderContext); + d.dump(b); + b.close(); + } + } +} + + +template <typename FixtureBase> +class DictionaryFixture : public FixtureBase +{ +public: + using FixtureBase::getSchema; + Dictionary _d; + DocBuilder _b; + SequencedTaskExecutor _invertThreads; + SequencedTaskExecutor _pushThreads; + DocumentInverter _inv; + + DictionaryFixture() + : FixtureBase(), + _d(getSchema()), + _b(getSchema()), + _invertThreads(2), + _pushThreads(2), + _inv(getSchema(), _invertThreads, _pushThreads) + { + } +}; + + +TEST_F("requireThatInversionIsWorking", DictionaryFixture<Fixture>) +{ + Document::UP doc; + + f._b.startDocument("doc::10"); + f._b.startIndexField("f0"). + addStr("a").addStr("b").addStr("c").addStr("d"). + endField(); + doc = f._b.endDocument(); + f._inv.invertDocument(10, *doc); + f._invertThreads.sync(); + myPushDocument(f._inv, f._d); + f._pushThreads.sync(); + + f._b.startDocument("doc::20"); + f._b.startIndexField("f0"). + addStr("a").addStr("a").addStr("b").addStr("c").addStr("d"). + endField(); + doc = f._b.endDocument(); + f._inv.invertDocument(20, *doc); + f._invertThreads.sync(); + myPushDocument(f._inv, f._d); + f._pushThreads.sync(); + + f._b.startDocument("doc::30"); + f._b.startIndexField("f0"). + addStr("a").addStr("b").addStr("c").addStr("d"). + addStr("e").addStr("f"). + endField(); + f._b.startIndexField("f1"). + addStr("\nw2").addStr("w").addStr("x"). + addStr("\nw3").addStr("y").addStr("z"). + endField(); + f._b.startIndexField("f2"). + startElement(4). + addStr("w").addStr("x"). + endElement(). + startElement(5). + addStr("y").addStr("z"). + endElement(). + endField(); + f._b.startIndexField("f3"). + startElement(6). + addStr("w").addStr("x"). + endElement(). + startElement(7). + addStr("y").addStr("z"). + endElement(). + endField(); + doc = f._b.endDocument(); + f._inv.invertDocument(30, *doc); + f._invertThreads.sync(); + myPushDocument(f._inv, f._d); + f._pushThreads.sync(); + + f._b.startDocument("doc::40"); + f._b.startIndexField("f0"). + addStr("a").addStr("a").addStr("b").addStr("c").addStr("a"). + addStr("e").addStr("f"). + endField(); + doc = f._b.endDocument(); + f._inv.invertDocument(40, *doc); + f._invertThreads.sync(); + myPushDocument(f._inv, f._d); + f._pushThreads.sync(); + + f._b.startDocument("doc::999"); + f._b.startIndexField("f0"). + addStr("this").addStr("is").addStr("_a_").addStr("test"). + addStr("for").addStr("insertion").addStr("speed").addStr("with"). + addStr("more").addStr("than").addStr("just").addStr("__a__"). + addStr("few").addStr("words").addStr("present").addStr("in"). + addStr("some").addStr("of").addStr("the").addStr("fields"). + endField(); + f._b.startIndexField("f1"). + addStr("the").addStr("other").addStr("field").addStr("also"). + addStr("has").addStr("some").addStr("content"). + endField(); + f._b.startIndexField("f2"). + startElement(1). + addStr("strange").addStr("things").addStr("here"). + addStr("has").addStr("some").addStr("content"). + endElement(). + endField(); + f._b.startIndexField("f3"). + startElement(3). + addStr("not").addStr("a").addStr("weighty").addStr("argument"). + endElement(). + endField(); + doc = f._b.endDocument(); + for (uint32_t docId = 10000; docId < 20000; ++docId) { + f._inv.invertDocument(docId, *doc); + f._invertThreads.sync(); + myPushDocument(f._inv, f._d); + f._pushThreads.sync(); + } + + f._pushThreads.sync(); + DataStoreBase::MemStats beforeStats = getFeatureStoreMemStats(f._d); + LOG(info, + "Before feature compaction: allocElems=%" PRIu64 ", usedElems=%" PRIu64 + ", deadElems=%" PRIu64 ", holdElems=%" PRIu64 + ", freeBuffers=%" PRIu32 ", activeBuffers=%" PRIu32 + ", holdBuffers=%" PRIu32, + beforeStats._allocElems, + beforeStats._usedElems, + beforeStats._deadElems, + beforeStats._holdElems, + beforeStats._freeBuffers, + beforeStats._activeBuffers, + beforeStats._holdBuffers); + myCompactFeatures(f._d, f._pushThreads); + std::vector<std::unique_ptr<GenerationHandler::Guard>> guards; + for (auto &fieldIndex : f._d.getFieldIndexes()) { + guards.push_back(std::make_unique<GenerationHandler::Guard> + (fieldIndex->takeGenerationGuard())); + } + myCommit(f._d, f._pushThreads); + DataStoreBase::MemStats duringStats = getFeatureStoreMemStats(f._d); + LOG(info, + "During feature compaction: allocElems=%" PRIu64 ", usedElems=%" PRIu64 + ", deadElems=%" PRIu64 ", holdElems=%" PRIu64 + ", freeBuffers=%" PRIu32 ", activeBuffers=%" PRIu32 + ", holdBuffers=%" PRIu32, + duringStats._allocElems, + duringStats._usedElems, + duringStats._deadElems, + duringStats._holdElems, + duringStats._freeBuffers, + duringStats._activeBuffers, + duringStats._holdBuffers); + guards.clear(); + myCommit(f._d, f._pushThreads); + DataStoreBase::MemStats afterStats = getFeatureStoreMemStats(f._d); + LOG(info, + "After feature compaction: allocElems=%" PRIu64 ", usedElems=%" PRIu64 + ", deadElems=%" PRIu64 ", holdElems=%" PRIu64 + ", freeBuffers=%" PRIu32 ", activeBuffers=%" PRIu32 + ", holdBuffers=%" PRIu32, + afterStats._allocElems, + afterStats._usedElems, + afterStats._deadElems, + afterStats._holdElems, + afterStats._freeBuffers, + afterStats._activeBuffers, + afterStats._holdBuffers); + + TermFieldMatchData tfmd; + TermFieldMatchDataArray matchData; + matchData.add(&tfmd); + { + PostingIterator itr(f._d.findFrozen("not", 0), featureStoreRef(f._d, 0), + 0, matchData); + itr.initFullRange(); + EXPECT_TRUE(itr.isAtEnd()); + } + { + PostingIterator itr(f._d.findFrozen("a", 0), featureStoreRef(f._d, 0), + 0, matchData); + itr.initFullRange(); + EXPECT_EQUAL(10u, itr.getDocId()); + itr.unpack(10); + EXPECT_EQUAL("{4:0}", toString(tfmd.getIterator())); + EXPECT_TRUE(!itr.seek(25)); + EXPECT_EQUAL(30u, itr.getDocId()); + itr.unpack(30); + EXPECT_EQUAL("{6:0}", toString(tfmd.getIterator())); + EXPECT_TRUE(itr.seek(40)); + EXPECT_EQUAL(40u, itr.getDocId()); + itr.unpack(40); + EXPECT_EQUAL("{7:0,1,4}", toString(tfmd.getIterator())); + EXPECT_TRUE(!itr.seek(41)); + EXPECT_TRUE(itr.isAtEnd()); + } + { + PostingIterator itr(f._d.findFrozen("x", 0), featureStoreRef(f._d, 0), + 0, matchData); + itr.initFullRange(); + EXPECT_TRUE(itr.isAtEnd()); + } + { + PostingIterator itr(f._d.findFrozen("x", 1), featureStoreRef(f._d, 1), + 1, matchData); + itr.initFullRange(); + EXPECT_EQUAL(30u, itr.getDocId()); + itr.unpack(30); + EXPECT_EQUAL("{6:2[e=0,w=1,l=6]}", + toString(tfmd.getIterator(), true, true)); + } + { + PostingIterator itr(f._d.findFrozen("x", 2), featureStoreRef(f._d, 2), + 2, matchData); + itr.initFullRange(); + EXPECT_EQUAL(30u, itr.getDocId()); + itr.unpack(30); + // weight is hardcoded to 1 for new style il doc array field + EXPECT_EQUAL("{2:1[e=0,w=1,l=2]}", + toString(tfmd.getIterator(), true, true)); + } + { + PostingIterator itr(f._d.findFrozen("x", 3), featureStoreRef(f._d, 3), + 3, matchData); + itr.initFullRange(); + EXPECT_EQUAL(30u, itr.getDocId()); + itr.unpack(30); + EXPECT_EQUAL("{2:1[e=0,w=6,l=2]}", + toString(tfmd.getIterator(), true, true)); + } +} + +TEST_F("requireThatInverterHandlesRemoveViaDocumentRemover", + DictionaryFixture<Fixture>) +{ + Document::UP doc; + + f._b.startDocument("doc::1"); + f._b.startIndexField("f0").addStr("a").addStr("b").endField(); + f._b.startIndexField("f1").addStr("a").addStr("c").endField(); + Document::UP doc1 = f._b.endDocument(); + f._inv.invertDocument(1, *doc1.get()); + f._invertThreads.sync(); + myPushDocument(f._inv, f._d); + f._pushThreads.sync(); + + f._b.startDocument("doc::2"); + f._b.startIndexField("f0").addStr("b").addStr("c").endField(); + Document::UP doc2 = f._b.endDocument(); + f._inv.invertDocument(2, *doc2.get()); + f._invertThreads.sync(); + myPushDocument(f._inv, f._d); + f._pushThreads.sync(); + + EXPECT_TRUE(assertPostingList("[1]", f._d.find("a", 0))); + EXPECT_TRUE(assertPostingList("[1,2]", f._d.find("b", 0))); + EXPECT_TRUE(assertPostingList("[2]", f._d.find("c", 0))); + EXPECT_TRUE(assertPostingList("[1]", f._d.find("a", 1))); + EXPECT_TRUE(assertPostingList("[1]", f._d.find("c", 1))); + + myremove(1, f._inv, f._d, f._invertThreads); + f._pushThreads.sync(); + + EXPECT_TRUE(assertPostingList("[]", f._d.find("a", 0))); + EXPECT_TRUE(assertPostingList("[2]", f._d.find("b", 0))); + EXPECT_TRUE(assertPostingList("[2]", f._d.find("c", 0))); + EXPECT_TRUE(assertPostingList("[]", f._d.find("a", 1))); + EXPECT_TRUE(assertPostingList("[]", f._d.find("c", 1))); +} + +class UriFixture +{ +public: + Schema _schema; + UriFixture() + : _schema() + { + _schema.addUriIndexFields(Schema::IndexField("iu", + Schema::STRING)); + _schema.addUriIndexFields(Schema::IndexField("iau", + Schema::STRING, + Schema::ARRAY)); + _schema.addUriIndexFields(Schema::IndexField("iwu", + Schema::STRING, + Schema::WEIGHTEDSET)); + } + const Schema & getSchema() const { return _schema; } +}; + + +TEST_F("requireThatUriIndexingIsWorking", DictionaryFixture<UriFixture>) +{ + Document::UP doc; + + f._b.startDocument("doc::10"); + f._b.startIndexField("iu"). + startSubField("all"). + addUrlTokenizedString("http://www.yahoo.com:81/fluke?ab=2#4"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("81"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("4"). + endSubField(). + endField(); + f._b.startIndexField("iau"). + startElement(1). + startSubField("all"). + addUrlTokenizedString("http://www.yahoo.com:82/fluke?ab=2#8"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("82"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("8"). + endSubField(). + endElement(). + startElement(1). + startSubField("all"). + addUrlTokenizedString("http://www.flickr.com:82/fluke?ab=2#9"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.flickr.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("82"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("9"). + endSubField(). + endElement(). + endField(); + f._b.startIndexField("iwu"). + startElement(4). + startSubField("all"). + addUrlTokenizedString("http://www.yahoo.com:83/fluke?ab=2#12"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("83"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("12"). + endSubField(). + endElement(). + startElement(7). + startSubField("all"). + addUrlTokenizedString("http://www.flickr.com:85/fluke?ab=2#13"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.flickr.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("85"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("13"). + endSubField(). + endElement(). + endField(); + doc = f._b.endDocument(); + f._inv.invertDocument(10, *doc); + f._invertThreads.sync(); + myPushDocument(f._inv, f._d); + + f._pushThreads.sync(); + + TermFieldMatchData tfmd; + TermFieldMatchDataArray matchData; + matchData.add(&tfmd); + { + uint32_t fieldId = f.getSchema().getIndexFieldId("iu"); + PostingIterator itr(f._d.findFrozen("not", fieldId), + featureStoreRef(f._d, fieldId), + fieldId, matchData); + itr.initFullRange(); + EXPECT_TRUE(itr.isAtEnd()); + } + { + uint32_t fieldId = f.getSchema().getIndexFieldId("iu"); + PostingIterator itr(f._d.findFrozen("yahoo", fieldId), + featureStoreRef(f._d, fieldId), + fieldId, matchData); + itr.initFullRange(); + EXPECT_EQUAL(10u, itr.getDocId()); + itr.unpack(10); + EXPECT_EQUAL("{9:2}", toString(tfmd.getIterator())); + EXPECT_TRUE(!itr.seek(25)); + EXPECT_TRUE(itr.isAtEnd()); + } + { + uint32_t fieldId = f.getSchema().getIndexFieldId("iau"); + PostingIterator itr(f._d.findFrozen("yahoo", fieldId), + featureStoreRef(f._d, fieldId), + fieldId, matchData); + itr.initFullRange(); + EXPECT_EQUAL(10u, itr.getDocId()); + itr.unpack(10); + EXPECT_EQUAL("{9:2[e=0,l=9]}", + toString(tfmd.getIterator(), true, false)); + EXPECT_TRUE(!itr.seek(25)); + EXPECT_TRUE(itr.isAtEnd()); + } + { + uint32_t fieldId = f.getSchema().getIndexFieldId("iwu"); + PostingIterator itr(f._d.findFrozen("yahoo", fieldId), + featureStoreRef(f._d, fieldId), + fieldId, matchData); + itr.initFullRange(); + EXPECT_EQUAL(10u, itr.getDocId()); + itr.unpack(10); + EXPECT_EQUAL("{9:2[e=0,w=4,l=9]}", + toString(tfmd.getIterator(), true, true)); + EXPECT_TRUE(!itr.seek(25)); + EXPECT_TRUE(itr.isAtEnd()); + } + { + search::diskindex::IndexBuilder dib(f.getSchema()); + dib.setPrefix("urldump"); + TuneFileIndexing tuneFileIndexing; + DummyFileHeaderContext fileHeaderContext; + dib.open(11, f._d.getNumUniqueWords(), tuneFileIndexing, + fileHeaderContext); + f._d.dump(dib); + dib.close(); + } +} + + +class SingleFieldFixture +{ +public: + Schema _schema; + SingleFieldFixture() + : _schema() + { + _schema.addIndexField(Schema::IndexField("i", Schema::STRING)); + } + const Schema & getSchema() const { return _schema; } +}; + +TEST_F("requireThatCjkIndexingIsWorking", DictionaryFixture<SingleFieldFixture>) +{ + Document::UP doc; + + f._b.startDocument("doc::10"); + f._b.startIndexField("i"). + addStr("我就是那个"). + setAutoSpace(false). + addStr("大灰狼"). + setAutoSpace(true). + endField(); + doc = f._b.endDocument(); + f._inv.invertDocument(10, *doc); + f._invertThreads.sync(); + myPushDocument(f._inv, f._d); + + f._pushThreads.sync(); + + TermFieldMatchData tfmd; + TermFieldMatchDataArray matchData; + matchData.add(&tfmd); + { + uint32_t fieldId = f.getSchema().getIndexFieldId("i"); + PostingIterator itr(f._d.findFrozen("not", fieldId), + featureStoreRef(f._d, fieldId), + fieldId, matchData); + itr.initFullRange(); + EXPECT_TRUE(itr.isAtEnd()); + } + { + uint32_t fieldId = f.getSchema().getIndexFieldId("i"); + PostingIterator itr(f._d.findFrozen("我就" + "是那个", + fieldId), + featureStoreRef(f._d, fieldId), + fieldId, matchData); + itr.initFullRange(); + EXPECT_EQUAL(10u, itr.getDocId()); + itr.unpack(10); + EXPECT_EQUAL("{2:0}", toString(tfmd.getIterator())); + EXPECT_TRUE(!itr.seek(25)); + EXPECT_TRUE(itr.isAtEnd()); + } + { + uint32_t fieldId = f.getSchema().getIndexFieldId("i"); + PostingIterator itr(f._d.findFrozen("大灰" + "狼", + fieldId), + featureStoreRef(f._d, fieldId), + fieldId, matchData); + itr.initFullRange(); + EXPECT_EQUAL(10u, itr.getDocId()); + itr.unpack(10); + EXPECT_EQUAL("{2:1}", toString(tfmd.getIterator())); + EXPECT_TRUE(!itr.seek(25)); + EXPECT_TRUE(itr.isAtEnd()); + } +} + +void +insertAndAssertTuple(const vespalib::string &word, uint32_t fieldId, uint32_t docId, + Dictionary &dict) +{ + EntryRef wordRef = WrapInserter(dict, fieldId).rewind().word(word). + add(docId).flush().getWordRef(); + EXPECT_EQUAL(word, + dict.getFieldIndex(fieldId)->getWordStore().getWord(wordRef)); + MyDrainRemoves(dict, fieldId).drain(docId); +} + +TEST_F("require that insert tells which word ref that was inserted", Fixture) +{ + Dictionary d(f.getSchema()); + insertAndAssertTuple("a", 1, 11, d); + insertAndAssertTuple("b", 1, 11, d); + insertAndAssertTuple("a", 2, 11, d); + + insertAndAssertTuple("a", 1, 22, d); + insertAndAssertTuple("b", 2, 22, d); + insertAndAssertTuple("c", 2, 22, d); +} + +struct RemoverFixture : public Fixture +{ + Dictionary _d; + SequencedTaskExecutor _invertThreads; + SequencedTaskExecutor _pushThreads; + + RemoverFixture() + : + Fixture(), + _d(getSchema()), + _invertThreads(2), + _pushThreads(2) + { + } + void assertPostingLists(const vespalib::string &e1, + const vespalib::string &e2, + const vespalib::string &e3) { + EXPECT_TRUE(assertPostingList(e1, _d.find("a", 1))); + EXPECT_TRUE(assertPostingList(e2, _d.find("a", 2))); + EXPECT_TRUE(assertPostingList(e3, _d.find("b", 1))); + } + void remove(uint32_t docId) { + DocumentInverter inv(getSchema(), _invertThreads, _pushThreads); + myremove(docId, inv, _d, _invertThreads); + _pushThreads.sync(); + EXPECT_FALSE(_d.getFieldIndex(0u)->getDocumentRemover(). + getStore().get(docId).valid()); + } +}; + +TEST_F("require that document remover can remove several documents", RemoverFixture) +{ + WrapInserter(f._d, 1).word("a").add(11).add(13).add(15). + word("b").add(11).add(15).flush(); + WrapInserter(f._d, 2).word("a").add(11).add(13).flush(); + f.assertPostingLists("[11,13,15]", "[11,13]", "[11,15]"); + + f.remove(13); + f.assertPostingLists("[11,15]", "[11]", "[11,15]"); + + f.remove(11); + f.assertPostingLists("[15]", "[]", "[15]"); + + f.remove(15); + f.assertPostingLists("[]", "[]", "[]"); +} + +TEST_F("require that removal of non-existing document does not do anything", RemoverFixture) +{ + WrapInserter(f._d, 1).word("a").add(11).word("b").add(11).flush(); + WrapInserter(f._d, 2).word("a").add(11).flush(); + f.assertPostingLists("[11]", "[11]", "[11]"); + f.remove(13); + f.assertPostingLists("[11]", "[11]", "[11]"); +} + +} // namespace memoryindex +} // namespace search + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/memoryindex/document_remover/.gitignore b/searchlib/src/tests/memoryindex/document_remover/.gitignore new file mode 100644 index 00000000000..2126f9147bd --- /dev/null +++ b/searchlib/src/tests/memoryindex/document_remover/.gitignore @@ -0,0 +1 @@ +searchlib_document_remover_test_app diff --git a/searchlib/src/tests/memoryindex/document_remover/CMakeLists.txt b/searchlib/src/tests/memoryindex/document_remover/CMakeLists.txt new file mode 100644 index 00000000000..e918d0400b2 --- /dev/null +++ b/searchlib/src/tests/memoryindex/document_remover/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_document_remover_test_app + SOURCES + document_remover_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_document_remover_test_app COMMAND searchlib_document_remover_test_app) diff --git a/searchlib/src/tests/memoryindex/document_remover/DESC b/searchlib/src/tests/memoryindex/document_remover/DESC new file mode 100644 index 00000000000..7fe35ab896f --- /dev/null +++ b/searchlib/src/tests/memoryindex/document_remover/DESC @@ -0,0 +1 @@ +document remover test. Take a look at document_remover_test.cpp for details. diff --git a/searchlib/src/tests/memoryindex/document_remover/FILES b/searchlib/src/tests/memoryindex/document_remover/FILES new file mode 100644 index 00000000000..9b7cb9a8cfa --- /dev/null +++ b/searchlib/src/tests/memoryindex/document_remover/FILES @@ -0,0 +1 @@ +document_remover_test.cpp diff --git a/searchlib/src/tests/memoryindex/document_remover/document_remover_test.cpp b/searchlib/src/tests/memoryindex/document_remover/document_remover_test.cpp new file mode 100644 index 00000000000..8c6751adbeb --- /dev/null +++ b/searchlib/src/tests/memoryindex/document_remover/document_remover_test.cpp @@ -0,0 +1,144 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("document_remover_test"); +#include <vespa/vespalib/testkit/testapp.h> + +#include <vespa/searchlib/memoryindex/document_remover.h> +#include <vespa/searchlib/memoryindex/wordstore.h> +#include <vespa/searchlib/memoryindex/i_document_remove_listener.h> +#include <vespa/vespalib/test/insertion_operators.h> +#include <map> + +using namespace search; +using namespace search::memoryindex; + +struct WordFieldPair +{ + vespalib::string _word; + uint32_t _fieldId; + WordFieldPair(const vespalib::stringref &word, uint32_t fieldId) + : _word(word), _fieldId(fieldId) + {} + bool operator<(const WordFieldPair &rhs) { + if (_word != rhs._word) { + return _word < rhs._word; + } + return _fieldId < rhs._fieldId; + } +}; + +typedef std::vector<WordFieldPair> WordFieldVector; + +std::ostream & +operator<<(std::ostream &os, const WordFieldPair &val) +{ + os << "{" << val._word << "," << val._fieldId << "}"; + return os; +} + +struct MockRemoveListener : public IDocumentRemoveListener +{ + WordFieldVector _words; + uint32_t _expDocId; + uint32_t _fieldId; + virtual void remove(const vespalib::stringref word, uint32_t docId) override { + EXPECT_EQUAL(_expDocId, docId); + _words.emplace_back(word, _fieldId); + } + void reset(uint32_t expDocId) { + _words.clear(); + _expDocId = expDocId; + } + vespalib::string getWords() { + std::sort(_words.begin(), _words.end()); + std::ostringstream oss; + oss << _words; + return oss.str(); + } + void setFieldId(uint32_t fieldId) { _fieldId = fieldId; } +}; + +struct Fixture +{ + MockRemoveListener _listener; + std::vector<std::unique_ptr<WordStore>> _wordStores; + std::vector<std::map<vespalib::string, btree::EntryRef>> _wordToRefMaps; + std::vector<std::unique_ptr<DocumentRemover>> _removers; + Fixture() + : _listener(), + _wordStores(), + _wordToRefMaps(), + _removers() + { + uint32_t numFields = 4; + for (uint32_t fieldId = 0; fieldId < numFields; ++fieldId) { + _wordStores.push_back(std::make_unique<WordStore>()); + _removers.push_back(std::make_unique<DocumentRemover> + (*_wordStores.back())); + } + _wordToRefMaps.resize(numFields); + } + btree::EntryRef getWordRef(const vespalib::string &word, uint32_t fieldId) { + auto &wordToRefMap = _wordToRefMaps[fieldId]; + WordStore &wordStore = *_wordStores[fieldId]; + auto itr = wordToRefMap.find(word); + if (itr == wordToRefMap.end()) { + btree::EntryRef ref = wordStore.addWord(word); + wordToRefMap[word] = ref; + return ref; + } + return itr->second; + } + Fixture &insert(const vespalib::string &word, uint32_t fieldId, uint32_t docId) { + assert(fieldId < _wordStores.size()); + _removers[fieldId]->insert(getWordRef(word, fieldId), docId); + return *this; + } + void flush() { + for (auto &remover : _removers) { + remover->flush(); + } + } + vespalib::string remove(uint32_t docId) { + _listener.reset(docId); + uint32_t fieldId = 0; + for (auto &remover : _removers) { + _listener.setFieldId(fieldId); + remover->remove(docId, _listener); + ++fieldId; + } + return _listener.getWords(); + } +}; + +TEST_F("require that {word,fieldId} pairs for multiple doc ids can be inserted", Fixture) +{ + f.insert("a", 1, 10).insert("a", 1, 20).insert("a", 1, 30); + f.insert("a", 2, 10).insert("a", 2, 20); + f.insert("b", 1, 20).insert("b", 1, 30); + f.insert("b", 2, 10).insert("b", 2, 30); + f.insert("c", 1, 10); + f.insert("c", 2, 20); + f.insert("c", 3, 30); + f.flush(); + + EXPECT_EQUAL("[{a,1},{a,2},{b,2},{c,1}]", f.remove(10)); + EXPECT_EQUAL("[{a,1},{a,2},{b,1},{c,2}]", f.remove(20)); + EXPECT_EQUAL("[{a,1},{b,1},{b,2},{c,3}]", f.remove(30)); +} + +TEST_F("require that we can insert after flush", Fixture) +{ + f.insert("a", 1, 10).insert("b", 1, 10); + f.flush(); + f.insert("b", 1, 20).insert("b", 2, 20); + f.flush(); + + EXPECT_EQUAL("[{a,1},{b,1}]", f.remove(10)); + EXPECT_EQUAL("[{b,1},{b,2}]", f.remove(20)); +} + + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/memoryindex/documentinverter/.gitignore b/searchlib/src/tests/memoryindex/documentinverter/.gitignore new file mode 100644 index 00000000000..1e9666b2d63 --- /dev/null +++ b/searchlib/src/tests/memoryindex/documentinverter/.gitignore @@ -0,0 +1 @@ +searchlib_documentinverter_test_app diff --git a/searchlib/src/tests/memoryindex/documentinverter/CMakeLists.txt b/searchlib/src/tests/memoryindex/documentinverter/CMakeLists.txt new file mode 100644 index 00000000000..85a77fad361 --- /dev/null +++ b/searchlib/src/tests/memoryindex/documentinverter/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_documentinverter_test_app + SOURCES + documentinverter_test.cpp + DEPENDS + searchlib_test + searchlib +) +vespa_add_test(NAME searchlib_documentinverter_test_app COMMAND searchlib_documentinverter_test_app) diff --git a/searchlib/src/tests/memoryindex/documentinverter/DESC b/searchlib/src/tests/memoryindex/documentinverter/DESC new file mode 100644 index 00000000000..5dc610c2a24 --- /dev/null +++ b/searchlib/src/tests/memoryindex/documentinverter/DESC @@ -0,0 +1 @@ +Document inverter test. Take a look at documentinverter_test.cpp for details. diff --git a/searchlib/src/tests/memoryindex/documentinverter/FILES b/searchlib/src/tests/memoryindex/documentinverter/FILES new file mode 100644 index 00000000000..c54817b9df1 --- /dev/null +++ b/searchlib/src/tests/memoryindex/documentinverter/FILES @@ -0,0 +1 @@ +documentinverter_test.cpp diff --git a/searchlib/src/tests/memoryindex/documentinverter/documentinverter_test.cpp b/searchlib/src/tests/memoryindex/documentinverter/documentinverter_test.cpp new file mode 100644 index 00000000000..d3ad1f54e95 --- /dev/null +++ b/searchlib/src/tests/memoryindex/documentinverter/documentinverter_test.cpp @@ -0,0 +1,294 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/* -*- mode: C++; coding: utf-8; -*- */ + + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("documentinverter_test"); +#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/memoryindex/documentinverter.h> +#include <vespa/searchlib/memoryindex/fieldinverter.h> +#include <vespa/vespalib/objects/nbostream.h> +#include <vespa/searchlib/test/memoryindex/ordereddocumentinserter.h> +#include <vespa/searchlib/common/sequencedtaskexecutor.h> +#include <vespa/vespalib/testkit/testapp.h> + +namespace search +{ + + +using document::Document; +using index::DocBuilder; +using index::Schema; + +namespace memoryindex +{ + + +namespace +{ + + +Document::UP +makeDoc10(DocBuilder &b) +{ + b.startDocument("doc::10"); + b.startIndexField("f0"). + addStr("a").addStr("b").addStr("c").addStr("d"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc11(DocBuilder &b) +{ + b.startDocument("doc::11"); + b.startIndexField("f0"). + addStr("a").addStr("b").addStr("e").addStr("f"). + endField(); + b.startIndexField("f1"). + addStr("a").addStr("g"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc12(DocBuilder &b) +{ + b.startDocument("doc::12"); + b.startIndexField("f0"). + addStr("h").addStr("doc12"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc13(DocBuilder &b) +{ + b.startDocument("doc::13"); + b.startIndexField("f0"). + addStr("i").addStr("doc13"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc14(DocBuilder &b) +{ + b.startDocument("doc::14"); + b.startIndexField("f0"). + addStr("j").addStr("doc14"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc15(DocBuilder &b) +{ + b.startDocument("doc::15"); + return b.endDocument(); +} + +} + +struct Fixture +{ + Schema _schema; + DocBuilder _b; + SequencedTaskExecutor _invertThreads; + SequencedTaskExecutor _pushThreads; + DocumentInverter _inv; + test::OrderedDocumentInserter _inserter; + + static Schema + makeSchema() + { + Schema schema; + schema.addIndexField(Schema::IndexField("f0", Schema::STRING)); + schema.addIndexField(Schema::IndexField("f1", Schema::STRING)); + schema.addIndexField(Schema::IndexField("f2", Schema::STRING, + Schema::ARRAY)); + schema.addIndexField(Schema::IndexField("f3", Schema::STRING, + Schema::WEIGHTEDSET)); + return schema; + } + + Fixture() + : _schema(makeSchema()), + _b(_schema), + _invertThreads(2), + _pushThreads(2), + _inv(_schema, _invertThreads, _pushThreads), + _inserter() + { + } + + void + pushDocuments() + { + _invertThreads.sync(); + uint32_t fieldId = 0; + for (auto &inverter : _inv.getInverters()) { + _inserter.setFieldId(fieldId); + inverter->pushDocuments(_inserter); + ++fieldId; + } + _pushThreads.sync(); + } +}; + + +TEST_F("requireThatFreshInsertWorks", Fixture) +{ + f._inv.invertDocument(10, *makeDoc10(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10," + "w=b,a=10," + "w=c,a=10," + "w=d,a=10", + f._inserter.toStr()); +} + + +TEST_F("requireThatMultipleDocsWork", Fixture) +{ + f._inv.invertDocument(10, *makeDoc10(f._b)); + f._inv.invertDocument(11, *makeDoc11(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10,a=11," + "w=b,a=10,a=11," + "w=c,a=10,w=d,a=10," + "w=e,a=11," + "w=f,a=11," + "f=1,w=a,a=11," + "w=g,a=11", + f._inserter.toStr()); +} + + +TEST_F("requireThatRemoveWorks", Fixture) +{ + f._inv.getInverter(0)->remove("b", 10); + f._inv.getInverter(0)->remove("a", 10); + f._inv.getInverter(0)->remove("b", 11); + f._inv.getInverter(2)->remove("c", 12); + f._inv.getInverter(1)->remove("a", 10); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,r=10," + "w=b,r=10,r=11," + "f=1,w=a,r=10," + "f=2,w=c,r=12", + f._inserter.toStr()); +} + + +TEST_F("requireThatReputWorks", Fixture) +{ + f._inv.invertDocument(10, *makeDoc10(f._b)); + f._inv.invertDocument(10, *makeDoc11(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10," + "w=b,a=10," + "w=e,a=10," + "w=f,a=10," + "f=1,w=a,a=10," + "w=g,a=10", + f._inserter.toStr()); +} + + +TEST_F("requireThatAbortPendingDocWorks", Fixture) +{ + Document::UP doc10 = makeDoc10(f._b); + Document::UP doc11 = makeDoc11(f._b); + Document::UP doc12 = makeDoc12(f._b); + Document::UP doc13 = makeDoc13(f._b); + Document::UP doc14 = makeDoc14(f._b); + + f._inv.invertDocument(10, *doc10); + f._inv.invertDocument(11, *doc11); + f._inv.removeDocument(10); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=11," + "w=b,a=11," + "w=e,a=11," + "w=f,a=11," + "f=1,w=a,a=11," + "w=g,a=11", + f._inserter.toStr()); + + f._inv.invertDocument(10, *doc10); + f._inv.invertDocument(11, *doc11); + f._inv.invertDocument(12, *doc12); + f._inv.invertDocument(13, *doc13); + f._inv.invertDocument(14, *doc14); + f._inv.removeDocument(11); + f._inv.removeDocument(13); + f._inserter.reset(); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10," + "w=b,a=10," + "w=c,a=10," + "w=d,a=10," + "w=doc12,a=12," + "w=doc14,a=14," + "w=h,a=12," + "w=j,a=14", + f._inserter.toStr()); + + f._inv.invertDocument(10, *doc10); + f._inv.invertDocument(11, *doc11); + f._inv.invertDocument(12, *doc12); + f._inv.invertDocument(13, *doc13); + f._inv.invertDocument(14, *doc14); + f._inv.removeDocument(11); + f._inv.removeDocument(12); + f._inv.removeDocument(13); + f._inv.removeDocument(14); + f._inserter.reset(); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10," + "w=b,a=10," + "w=c,a=10," + "w=d,a=10", + f._inserter.toStr()); + + +} + + +TEST_F("requireThatMixOfAddAndRemoveWorks", Fixture) +{ + f._inv.getInverter(0)->remove("a", 11); + f._inv.getInverter(0)->remove("c", 9); + f._inv.getInverter(0)->remove("d", 10); + f._inv.getInverter(0)->remove("z", 12); + f._inv.invertDocument(10, *makeDoc10(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10,r=11," + "w=b,a=10," + "w=c,r=9,a=10," + "w=d,r=10,a=10," + "w=z,r=12", + f._inserter.toStr()); +} + + +TEST_F("require that empty document can be inverted", Fixture) +{ + f._inv.invertDocument(15, *makeDoc15(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + + +} // namespace memoryindex +} // namespace search + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/memoryindex/fieldinverter/.gitignore b/searchlib/src/tests/memoryindex/fieldinverter/.gitignore new file mode 100644 index 00000000000..482663dd92e --- /dev/null +++ b/searchlib/src/tests/memoryindex/fieldinverter/.gitignore @@ -0,0 +1 @@ +searchlib_fieldinverter_test_app diff --git a/searchlib/src/tests/memoryindex/fieldinverter/CMakeLists.txt b/searchlib/src/tests/memoryindex/fieldinverter/CMakeLists.txt new file mode 100644 index 00000000000..9d81ebbb57c --- /dev/null +++ b/searchlib/src/tests/memoryindex/fieldinverter/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_fieldinverter_test_app + SOURCES + fieldinverter_test.cpp + DEPENDS + searchlib_test + searchlib +) +vespa_add_test(NAME searchlib_fieldinverter_test_app COMMAND searchlib_fieldinverter_test_app) diff --git a/searchlib/src/tests/memoryindex/fieldinverter/DESC b/searchlib/src/tests/memoryindex/fieldinverter/DESC new file mode 100644 index 00000000000..a40890fdc3d --- /dev/null +++ b/searchlib/src/tests/memoryindex/fieldinverter/DESC @@ -0,0 +1 @@ +Field inverter test. Take a look at fieldinverter_test.cpp for details. diff --git a/searchlib/src/tests/memoryindex/fieldinverter/FILES b/searchlib/src/tests/memoryindex/fieldinverter/FILES new file mode 100644 index 00000000000..892febd1c50 --- /dev/null +++ b/searchlib/src/tests/memoryindex/fieldinverter/FILES @@ -0,0 +1 @@ +fieldinverter_test.cpp diff --git a/searchlib/src/tests/memoryindex/fieldinverter/fieldinverter_test.cpp b/searchlib/src/tests/memoryindex/fieldinverter/fieldinverter_test.cpp new file mode 100644 index 00000000000..6216ba9eb3c --- /dev/null +++ b/searchlib/src/tests/memoryindex/fieldinverter/fieldinverter_test.cpp @@ -0,0 +1,338 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/* -*- mode: C++; coding: utf-8; -*- */ + + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("fieldinverter_test"); +#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/memoryindex/fieldinverter.h> +#include <vespa/vespalib/objects/nbostream.h> +#include <vespa/searchlib/test/memoryindex/ordereddocumentinserter.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/document/repo/fixedtyperepo.h> + +namespace search +{ + + +using document::Document; +using index::DocBuilder; +using index::Schema; + +namespace memoryindex +{ + + +namespace +{ + + +Document::UP +makeDoc10(DocBuilder &b) +{ + b.startDocument("doc::10"); + b.startIndexField("f0"). + addStr("a").addStr("b").addStr("c").addStr("d"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc11(DocBuilder &b) +{ + b.startDocument("doc::11"); + b.startIndexField("f0"). + addStr("a").addStr("b").addStr("e").addStr("f"). + endField(); + b.startIndexField("f1"). + addStr("a").addStr("g"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc12(DocBuilder &b) +{ + b.startDocument("doc::12"); + b.startIndexField("f0"). + addStr("h").addStr("doc12"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc13(DocBuilder &b) +{ + b.startDocument("doc::13"); + b.startIndexField("f0"). + addStr("i").addStr("doc13"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc14(DocBuilder &b) +{ + b.startDocument("doc::14"); + b.startIndexField("f0"). + addStr("j").addStr("doc14"). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc15(DocBuilder &b) +{ + b.startDocument("doc::15"); + return b.endDocument(); +} + + +Document::UP +makeDoc16(DocBuilder &b) +{ + b.startDocument("doc::16"); + b.startIndexField("f0").addStr("foo").addStr("bar").addStr("baz"). + addTermAnnotation("altbaz").addStr("y").addTermAnnotation("alty"). + addStr("z").endField(); + return b.endDocument(); +} + +} + +struct Fixture +{ + Schema _schema; + DocBuilder _b; + std::vector<std::unique_ptr<FieldInverter> > _inverters; + test::OrderedDocumentInserter _inserter; + + static Schema + makeSchema() + { + Schema schema; + schema.addIndexField(Schema::IndexField("f0", Schema::STRING)); + schema.addIndexField(Schema::IndexField("f1", Schema::STRING)); + schema.addIndexField(Schema::IndexField("f2", Schema::STRING, + Schema::ARRAY)); + schema.addIndexField(Schema::IndexField("f3", Schema::STRING, + Schema::WEIGHTEDSET)); + return schema; + } + + Fixture() + : _schema(makeSchema()), + _b(_schema), + _inverters(), + _inserter() + { + for (uint32_t fieldId = 0; fieldId < _schema.getNumIndexFields(); + ++fieldId) { + _inverters.push_back(std::make_unique<FieldInverter>(_schema, + fieldId)); + } + } + + void + invertDocument(uint32_t docId, const Document &doc) + { + uint32_t fieldId = 0; + for (auto &inverter : _inverters) { + vespalib::stringref fieldName = + _schema.getIndexField(fieldId).getName(); + inverter->invertField(docId, doc.getValue(fieldName)); + ++fieldId; + } + } + + void + pushDocuments() + { + uint32_t fieldId = 0; + for (auto &inverter : _inverters) { + _inserter.setFieldId(fieldId); + inverter->pushDocuments(_inserter); + ++fieldId; + } + } + + void + removeDocument(uint32_t docId) { + for (auto &inverter : _inverters) { + inverter->removeDocument(docId); + } + } +}; + + +TEST_F("requireThatFreshInsertWorks", Fixture) +{ + f.invertDocument(10, *makeDoc10(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10," + "w=b,a=10," + "w=c,a=10," + "w=d,a=10", + f._inserter.toStr()); +} + + +TEST_F("requireThatMultipleDocsWork", Fixture) +{ + f.invertDocument(10, *makeDoc10(f._b)); + f.invertDocument(11, *makeDoc11(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10,a=11," + "w=b,a=10,a=11," + "w=c,a=10,w=d,a=10," + "w=e,a=11," + "w=f,a=11," + "f=1,w=a,a=11," + "w=g,a=11", + f._inserter.toStr()); +} + + +TEST_F("requireThatRemoveWorks", Fixture) +{ + f._inverters[0]->remove("b", 10); + f._inverters[0]->remove("a", 10); + f._inverters[0]->remove("b", 11); + f._inverters[2]->remove("c", 12); + f._inverters[1]->remove("a", 10); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,r=10," + "w=b,r=10,r=11," + "f=1,w=a,r=10," + "f=2,w=c,r=12", + f._inserter.toStr()); +} + + +TEST_F("requireThatReputWorks", Fixture) +{ + f.invertDocument(10, *makeDoc10(f._b)); + f.invertDocument(10, *makeDoc11(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10," + "w=b,a=10," + "w=e,a=10," + "w=f,a=10," + "f=1,w=a,a=10," + "w=g,a=10", + f._inserter.toStr()); +} + + +TEST_F("requireThatAbortPendingDocWorks", Fixture) +{ + Document::UP doc10 = makeDoc10(f._b); + Document::UP doc11 = makeDoc11(f._b); + Document::UP doc12 = makeDoc12(f._b); + Document::UP doc13 = makeDoc13(f._b); + Document::UP doc14 = makeDoc14(f._b); + + f.invertDocument(10, *doc10); + f.invertDocument(11, *doc11); + f.removeDocument(10); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=11," + "w=b,a=11," + "w=e,a=11," + "w=f,a=11," + "f=1,w=a,a=11," + "w=g,a=11", + f._inserter.toStr()); + + f.invertDocument(10, *doc10); + f.invertDocument(11, *doc11); + f.invertDocument(12, *doc12); + f.invertDocument(13, *doc13); + f.invertDocument(14, *doc14); + f.removeDocument(11); + f.removeDocument(13); + f._inserter.reset(); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10," + "w=b,a=10," + "w=c,a=10," + "w=d,a=10," + "w=doc12,a=12," + "w=doc14,a=14," + "w=h,a=12," + "w=j,a=14", + f._inserter.toStr()); + + f.invertDocument(10, *doc10); + f.invertDocument(11, *doc11); + f.invertDocument(12, *doc12); + f.invertDocument(13, *doc13); + f.invertDocument(14, *doc14); + f.removeDocument(11); + f.removeDocument(12); + f.removeDocument(13); + f.removeDocument(14); + f._inserter.reset(); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10," + "w=b,a=10," + "w=c,a=10," + "w=d,a=10", + f._inserter.toStr()); + + +} + + +TEST_F("requireThatMixOfAddAndRemoveWorks", Fixture) +{ + f._inverters[0]->remove("a", 11); + f._inverters[0]->remove("c", 9); + f._inverters[0]->remove("d", 10); + f._inverters[0]->remove("z", 12); + f.invertDocument(10, *makeDoc10(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0,w=a,a=10,r=11," + "w=b,a=10," + "w=c,r=9,a=10," + "w=d,r=10,a=10," + "w=z,r=12", + f._inserter.toStr()); +} + + +TEST_F("require that empty document can be inverted", Fixture) +{ + f.invertDocument(15, *makeDoc15(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + +TEST_F("require that multiple words at same position works", Fixture) +{ + f.invertDocument(16, *makeDoc16(f._b)); + f._inserter.setVerbose(); + f.pushDocuments(); + EXPECT_EQUAL("f=0," + "w=altbaz,a=16(e=0,w=1,l=5[2])," + "w=alty,a=16(e=0,w=1,l=5[3])," + "w=bar,a=16(e=0,w=1,l=5[1])," + "w=baz,a=16(e=0,w=1,l=5[2])," + "w=foo,a=16(e=0,w=1,l=5[0])," + "w=y,a=16(e=0,w=1,l=5[3])," + "w=z,a=16(e=0,w=1,l=5[4])", + f._inserter.toStr()); +} + + +} // namespace memoryindex +} // namespace search + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/memoryindex/memoryindex/.gitignore b/searchlib/src/tests/memoryindex/memoryindex/.gitignore new file mode 100644 index 00000000000..174d0a494e2 --- /dev/null +++ b/searchlib/src/tests/memoryindex/memoryindex/.gitignore @@ -0,0 +1,5 @@ +.depend +Makefile +memoryindex_test +sourceselectorwriter_test +searchlib_memoryindex_test_app diff --git a/searchlib/src/tests/memoryindex/memoryindex/CMakeLists.txt b/searchlib/src/tests/memoryindex/memoryindex/CMakeLists.txt new file mode 100644 index 00000000000..f25089e85bb --- /dev/null +++ b/searchlib/src/tests/memoryindex/memoryindex/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_memoryindex_test_app + SOURCES + memoryindex_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_memoryindex_test_app COMMAND searchlib_memoryindex_test_app) diff --git a/searchlib/src/tests/memoryindex/memoryindex/DESC b/searchlib/src/tests/memoryindex/memoryindex/DESC new file mode 100644 index 00000000000..87b69181803 --- /dev/null +++ b/searchlib/src/tests/memoryindex/memoryindex/DESC @@ -0,0 +1 @@ +memoryindex test. Take a look at memoryindex_test.cpp for details. diff --git a/searchlib/src/tests/memoryindex/memoryindex/FILES b/searchlib/src/tests/memoryindex/memoryindex/FILES new file mode 100644 index 00000000000..4faa7668dfc --- /dev/null +++ b/searchlib/src/tests/memoryindex/memoryindex/FILES @@ -0,0 +1 @@ +memoryindex_test.cpp diff --git a/searchlib/src/tests/memoryindex/memoryindex/memoryindex_test.cpp b/searchlib/src/tests/memoryindex/memoryindex/memoryindex_test.cpp new file mode 100644 index 00000000000..7d2afc151d5 --- /dev/null +++ b/searchlib/src/tests/memoryindex/memoryindex/memoryindex_test.cpp @@ -0,0 +1,438 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("memoryindex_test"); +#include <vespa/vespalib/testkit/testapp.h> + +#include <vespa/searchlib/memoryindex/memoryindex.h> +#include <vespa/searchlib/fef/matchdata.h> +#include <vespa/searchlib/fef/matchdatalayout.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h> +#include <vespa/searchlib/queryeval/fake_search.h> +#include <vespa/searchlib/queryeval/fake_searchable.h> +#include <vespa/searchlib/queryeval/fake_requestcontext.h> +#include <vespa/searchlib/queryeval/searchiterator.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/searchlib/common/sequencedtaskexecutor.h> +#include <vespa/searchlib/common/scheduletaskcallback.h> +#include <vespa/vespalib/util/threadstackexecutor.h> + +using document::Document; +using document::FieldValue; +using search::query::Node; +using search::query::SimplePhrase; +using search::query::SimpleStringTerm; +using search::makeLambdaTask; +using search::ScheduleTaskCallback; +using namespace search::fef; +using namespace search::index; +using namespace search::memoryindex; +using namespace search::queryeval; + +//----------------------------------------------------------------------------- + +struct Setup { + Schema schema; + Setup &field(const std::string &name) { + schema.addIndexField(Schema::IndexField(name, + Schema::STRING)); + return *this; + } +}; + +//----------------------------------------------------------------------------- + +struct Index { + Schema schema; + vespalib::ThreadStackExecutor _executor; + search::SequencedTaskExecutor _invertThreads; + search::SequencedTaskExecutor _pushThreads; + MemoryIndex index; + DocBuilder builder; + uint32_t docid; + std::string currentField; + + Index(const Setup &setup) + : schema(setup.schema), + _executor(1, 128 * 1024), + _invertThreads(2), + _pushThreads(2), + index(schema, _invertThreads, _pushThreads), + builder(schema), + docid(1), + currentField() + { + } + void closeField() { + if (!currentField.empty()) { + builder.endField(); + currentField.clear(); + } + } + Index &doc(uint32_t id) { + docid = id; + builder.startDocument(vespalib::make_string("doc::%u", id)); + return *this; + } + Index &field(const std::string &name) { + closeField(); + builder.startIndexField(name); + currentField = name; + return *this; + } + Index &add(const std::string &token) { + builder.addStr(token); + return *this; + } + void internalSyncCommit() { + vespalib::Gate gate; + index.commit(std::make_shared<ScheduleTaskCallback> + (_executor, + makeLambdaTask([&]() { gate.countDown(); }))); + gate.await(); + } + Document::UP commit() { + closeField(); + Document::UP d = builder.endDocument(); + index.insertDocument(docid, *d); + internalSyncCommit(); + return d; + } + Index &remove(uint32_t id) { + index.removeDocument(id); + internalSyncCommit(); + return *this; + } + +private: + Index(const Index &index); + Index &operator=(const Index &index); +}; + +//----------------------------------------------------------------------------- + +std::string toString(SearchIterator & search) +{ + std::ostringstream oss; + bool first = true; + for (search.seek(1); ! search.isAtEnd(); search.seek(search.getDocId() + 1)) { + if (!first) oss << ","; + oss << search.getDocId(); + first = false; + } + return oss.str(); +} + +//----------------------------------------------------------------------------- + +const std::string title("title"); +const std::string body("body"); +const std::string foo("foo"); +const std::string bar("bar"); + +//----------------------------------------------------------------------------- + +bool +verifyResult(const FakeResult &expect, + Searchable &index, + std::string fieldName, + const Node &term) +{ + uint32_t fieldId = 0; + FakeRequestContext requestContext; + + MatchDataLayout mdl; + TermFieldHandle handle = mdl.allocTermField(fieldId); + MatchData::UP match_data = mdl.createMatchData(); + + FieldSpec field(fieldName, fieldId, handle); + FieldSpecList fields; + fields.add(field); + + Blueprint::UP result = index.createBlueprint(requestContext, fields, term); + if (!EXPECT_TRUE(result.get() != 0)) { + return false; + } + EXPECT_EQUAL(expect.inspect().size(), result->getState().estimate().estHits); + EXPECT_EQUAL(expect.inspect().empty(), result->getState().estimate().empty); + + result->fetchPostings(true); + SearchIterator::UP search = result->createSearch(*match_data, true); + if (!EXPECT_TRUE(search.get() != 0)) { + return false; + } + TermFieldMatchData &tmd = *match_data->resolveTermField(handle); + + FakeResult actual; + search->initFullRange(); + for (search->seek(1); !search->isAtEnd(); search->seek(search->getDocId() + 1)) { + actual.doc(search->getDocId()); + search->unpack(search->getDocId()); + EXPECT_EQUAL(search->getDocId(), tmd.getDocId()); + FieldPositionsIterator p = tmd.getIterator(); + actual.len(p.getFieldLength()); + for (; p.valid(); p.next()) { + actual.pos(p.getPosition()); + } + } + return EXPECT_EQUAL(expect, actual); +} + +namespace { +SimpleStringTerm makeTerm(const std::string &term) { + return SimpleStringTerm(term, "field", 0, search::query::Weight(0)); +} + +Node::UP makePhrase(const std::string &term1, const std::string &term2) { + SimplePhrase * phrase = new SimplePhrase("field", 0, search::query::Weight(0)); + Node::UP node(phrase); + phrase->append(Node::UP(new SimpleStringTerm(makeTerm(term1)))); + phrase->append(Node::UP(new SimpleStringTerm(makeTerm(term2)))); + return node; +} +} // namespace + +// tests basic usage; index some documents in docid order and perform +// some searches. +TEST("testIndexAndSearch") +{ + Index index(Setup().field(title).field(body)); + index.doc(1) + .field(title).add(foo).add(bar).add(foo) + .field(body).add(foo).add(foo).add(foo) + .commit(); + index.doc(2) + .field(title).add(bar).add(foo) + .field(body).add(bar).add(bar).add(bar).add(bar) + .commit(); + + // search for "foo" in "title" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(3).pos(0).pos(2) + .doc(2).len(2).pos(1), + index.index, title, makeTerm(foo))); + + // search for "bar" in "title" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(3).pos(1) + .doc(2).len(2).pos(0), + index.index, title, makeTerm(bar))); + + // search for "foo" in "body" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(3).pos(0).pos(1).pos(2), + index.index, body, makeTerm(foo))); + + // search for "bar" in "body" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(2).len(4).pos(0).pos(1).pos(2).pos(3), + index.index, body, makeTerm(bar))); + + // search for "bogus" in "title" + EXPECT_TRUE(verifyResult(FakeResult(), + index.index, title, makeTerm("bogus"))); + + // search for "foo" in "bogus" + EXPECT_TRUE(verifyResult(FakeResult(), + index.index, "bogus", makeTerm(foo))); + + // search for "bar foo" in "title" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(3).pos(1) + .doc(2).len(2).pos(0), + index.index, title, *makePhrase(bar, foo))); + +} + +// tests index update behavior; remove/update and unordered docid +// indexing. +TEST("require that documents can be removed and updated") +{ + Index index(Setup().field(title)); + + // add unordered + index.doc(3).field(title).add(foo).add(foo).add(foo).commit(); + Document::UP doc1 = index.doc(1).field(title).add(foo).commit(); + Document::UP doc2 = index.doc(2).field(title).add(foo).add(foo).commit(); + + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(1).pos(0) + .doc(2).len(2).pos(0).pos(1) + .doc(3).len(3).pos(0).pos(1).pos(2), + index.index, title, makeTerm(foo))); + + // remove document + index.remove(2); + + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(1).pos(0) + .doc(3).len(3).pos(0).pos(1).pos(2), + index.index, title, makeTerm(foo))); + + // update document + index.doc(1).field(title).add(bar).add(foo).add(foo).commit(); + + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(3).pos(1).pos(2) + .doc(3).len(3).pos(0).pos(1).pos(2), + index.index, title, makeTerm(foo))); +} + +// test the fake field source here, to make sure it acts similar to +// the memory index field source. +TEST("testFakeSearchable") +{ + Index index(Setup().field(title).field(body)); + + // setup fake field source with predefined results + FakeSearchable fakeSource; + fakeSource.addResult(title, foo, + FakeResult() + .doc(1).len(3).pos(0).pos(2) + .doc(2).len(2).pos(1)); + fakeSource.addResult(title, bar, + FakeResult() + .doc(1).len(3).pos(1) + .doc(2).len(2).pos(0)); + fakeSource.addResult(body, foo, + FakeResult() + .doc(1).len(3).pos(0).pos(1).pos(2)); + fakeSource.addResult(body, bar, + FakeResult() + .doc(2).len(4).pos(0).pos(1).pos(2).pos(3)); + + // search for "foo" in "title" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(3).pos(0).pos(2) + .doc(2).len(2).pos(1), + fakeSource, title, makeTerm(foo))); + + // search for "bar" in "title" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(3).pos(1) + .doc(2).len(2).pos(0), + fakeSource, title, makeTerm(bar))); + + // search for "foo" in "body" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(1).len(3).pos(0).pos(1).pos(2), + fakeSource, body, makeTerm(foo))); + + // search for "bar" in "body" + EXPECT_TRUE(verifyResult(FakeResult() + .doc(2).len(4).pos(0).pos(1).pos(2).pos(3), + fakeSource, body, makeTerm(bar))); + + // search for "bogus" in "title" + EXPECT_TRUE(verifyResult(FakeResult(), + fakeSource, title, makeTerm("bogus"))); + + // search for foo in "bogus" + EXPECT_TRUE(verifyResult(FakeResult(), + fakeSource, "bogus", makeTerm(foo))); +} + +TEST("requireThatFrozenIndexIgnoresUpdates") +{ + Index index(Setup().field(title)); + Document::UP doc1 = index.doc(1).field(title).add(foo).add(bar).commit(); + FakeResult ffr = FakeResult().doc(1).len(2).pos(0); + EXPECT_TRUE(verifyResult(ffr, index.index, title, makeTerm(foo))); + EXPECT_TRUE(!index.index.isFrozen()); + index.index.freeze(); + EXPECT_TRUE(index.index.isFrozen()); + index.doc(2).field(title).add(bar).add(foo).commit(); // not added + EXPECT_TRUE(verifyResult(ffr, index.index, title, makeTerm(foo))); + index.remove(1); // not removed + EXPECT_TRUE(verifyResult(ffr, index.index, title, makeTerm(foo))); +} + +TEST("requireThatNumDocsAndDocIdLimitIsReturned") +{ + Index index(Setup().field(title)); + EXPECT_EQUAL(0u, index.index.getNumDocs()); + EXPECT_EQUAL(1u, index.index.getDocIdLimit()); + Document::UP doc1 = index.doc(1).field(title).add(foo).commit(); + EXPECT_EQUAL(1u, index.index.getNumDocs()); + EXPECT_EQUAL(2u, index.index.getDocIdLimit()); + Document::UP doc4 = index.doc(4).field(title).add(foo).commit(); + EXPECT_EQUAL(2u, index.index.getNumDocs()); + EXPECT_EQUAL(5u, index.index.getDocIdLimit()); + Document::UP doc2 = index.doc(2).field(title).add(foo).commit(); + EXPECT_EQUAL(3u, index.index.getNumDocs()); + EXPECT_EQUAL(5u, index.index.getDocIdLimit()); + // re-add doc4 + index.doc(4).field(title).add(bar).commit(); + EXPECT_EQUAL(3u, index.index.getNumDocs()); + EXPECT_EQUAL(5u, index.index.getDocIdLimit()); + // remove doc2 + index.remove(2); + EXPECT_EQUAL(2u, index.index.getNumDocs()); + EXPECT_EQUAL(5u, index.index.getDocIdLimit()); +} + +TEST("requireThatWeUnderstandTheMemoryFootprint") +{ + { + Setup setup; + Index index(setup); + EXPECT_EQUAL(0u, index.index.getStaticMemoryFootprint()); + EXPECT_EQUAL(index.index.getStaticMemoryFootprint(), index.index.getMemoryUsage().allocatedBytes()); + } + { + Index index(Setup().field("f1")); + EXPECT_EQUAL(118852u, index.index.getStaticMemoryFootprint()); + EXPECT_EQUAL(index.index.getStaticMemoryFootprint(), index.index.getMemoryUsage().allocatedBytes()); + } + { + Index index(Setup().field("f1").field("f2")); + EXPECT_EQUAL(2*118852u, index.index.getStaticMemoryFootprint()); + EXPECT_EQUAL(index.index.getStaticMemoryFootprint(), index.index.getMemoryUsage().allocatedBytes()); + } +} + +TEST("requireThatNumWordsIsReturned") +{ + Index index(Setup().field(title)); + EXPECT_EQUAL(0u, index.index.getNumWords()); + index.doc(1).field(title).add(foo).commit(); + EXPECT_EQUAL(1u, index.index.getNumWords()); + index.doc(2).field(title).add(foo).add(bar).add(body).commit(); + EXPECT_EQUAL(3u, index.index.getNumWords()); +} + +TEST("requireThatWeCanFakeBitVector") +{ + Index index(Setup().field(title)); + index.doc(1).field(title).add(foo).commit(); + index.doc(3).field(title).add(foo).commit(); + { + uint32_t fieldId = 0; + + MatchDataLayout mdl; + FakeRequestContext requestContext; + TermFieldHandle handle = mdl.allocTermField(fieldId); + MatchData::UP match_data = mdl.createMatchData(); + + // filter field + FieldSpec field(title, fieldId, handle, true); + FieldSpecList fields; + fields.add(field); + + Searchable &searchable = index.index; + Blueprint::UP res = searchable.createBlueprint(requestContext, fields, makeTerm(foo)); + EXPECT_TRUE(res.get() != NULL); + + res->fetchPostings(true); + SearchIterator::UP search = res->createSearch(*match_data, true); + EXPECT_TRUE(search.get() != NULL); + EXPECT_TRUE(dynamic_cast<BooleanMatchIteratorWrapper *>(search.get()) != NULL); + search->initFullRange(); + EXPECT_EQUAL("1,3", toString(*search)); + } +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/.gitignore b/searchlib/src/tests/memoryindex/urlfieldinverter/.gitignore new file mode 100644 index 00000000000..b2636fe5e81 --- /dev/null +++ b/searchlib/src/tests/memoryindex/urlfieldinverter/.gitignore @@ -0,0 +1 @@ +searchlib_urlfieldinverter_test_app diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/CMakeLists.txt b/searchlib/src/tests/memoryindex/urlfieldinverter/CMakeLists.txt new file mode 100644 index 00000000000..c5a0374fad9 --- /dev/null +++ b/searchlib/src/tests/memoryindex/urlfieldinverter/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_urlfieldinverter_test_app + SOURCES + urlfieldinverter_test.cpp + DEPENDS + searchlib_test + searchlib +) +vespa_add_test(NAME searchlib_urlfieldinverter_test_app COMMAND searchlib_urlfieldinverter_test_app) diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/DESC b/searchlib/src/tests/memoryindex/urlfieldinverter/DESC new file mode 100644 index 00000000000..00115ada607 --- /dev/null +++ b/searchlib/src/tests/memoryindex/urlfieldinverter/DESC @@ -0,0 +1 @@ +UrlField inverter test. Take a look at urlfieldinverter_test.cpp for details. diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/FILES b/searchlib/src/tests/memoryindex/urlfieldinverter/FILES new file mode 100644 index 00000000000..ac08b0a3e90 --- /dev/null +++ b/searchlib/src/tests/memoryindex/urlfieldinverter/FILES @@ -0,0 +1 @@ +urlfieldinverter_test.cpp diff --git a/searchlib/src/tests/memoryindex/urlfieldinverter/urlfieldinverter_test.cpp b/searchlib/src/tests/memoryindex/urlfieldinverter/urlfieldinverter_test.cpp new file mode 100644 index 00000000000..30b5883f153 --- /dev/null +++ b/searchlib/src/tests/memoryindex/urlfieldinverter/urlfieldinverter_test.cpp @@ -0,0 +1,579 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +/* -*- mode: C++; coding: utf-8; -*- */ + + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("urlfieldinverter_test"); +#include <vespa/searchlib/index/docbuilder.h> +#include <vespa/searchlib/memoryindex/fieldinverter.h> +#include <vespa/searchlib/memoryindex/urlfieldinverter.h> +#include <vespa/vespalib/objects/nbostream.h> +#include <vespa/searchlib/test/memoryindex/ordereddocumentinserter.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/document/repo/fixedtyperepo.h> + +namespace search +{ + + +using document::Document; +using index::DocBuilder; +using index::DocTypeBuilder; +using index::Schema; + +namespace memoryindex +{ + +namespace { +const vespalib::string url = "url"; +} + + +namespace +{ + +Document::UP +makeDoc10Single(DocBuilder &b) +{ + b.startDocument("doc::10"); + b.startIndexField("url"). + startSubField("all"). + addUrlTokenizedString("http://www.yahoo.com:81/fluke?ab=2#4"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("81"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + addTermAnnotation("altfluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("4"). + endSubField(). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc10Array(DocBuilder &b) +{ + b.startDocument("doc::10"); + b.startIndexField("url"). + startElement(1). + startSubField("all"). + addUrlTokenizedString("http://www.yahoo.com:82/fluke?ab=2#8"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("82"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + addTermAnnotation("altfluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("8"). + endSubField(). + endElement(). + startElement(1). + startSubField("all"). + addUrlTokenizedString("http://www.flickr.com:82/fluke?ab=2#9"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.flickr.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("82"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("9"). + endSubField(). + endElement(). + endField(); + return b.endDocument(); +} + +Document::UP +makeDoc10WeightedSet(DocBuilder &b) +{ + b.startDocument("doc::10"); + b.startIndexField("url"). + startElement(4). + startSubField("all"). + addUrlTokenizedString("http://www.yahoo.com:83/fluke?ab=2#12"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.yahoo.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("83"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + addTermAnnotation("altfluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("12"). + endSubField(). + endElement(). + startElement(7). + startSubField("all"). + addUrlTokenizedString("http://www.flickr.com:85/fluke?ab=2#13"). + endSubField(). + startSubField("scheme"). + addUrlTokenizedString("http"). + endSubField(). + startSubField("host"). + addUrlTokenizedString("www.flickr.com"). + endSubField(). + startSubField("port"). + addUrlTokenizedString("85"). + endSubField(). + startSubField("path"). + addUrlTokenizedString("/fluke"). + endSubField(). + startSubField("query"). + addUrlTokenizedString("ab=2"). + endSubField(). + startSubField("fragment"). + addUrlTokenizedString("13"). + endSubField(). + endElement(). + endField(); + return b.endDocument(); +} + + +Document::UP +makeDoc10Empty(DocBuilder &b) +{ + b.startDocument("doc::10"); + return b.endDocument(); +} + +} + +struct Fixture +{ + Schema _schema; + DocBuilder _b; + std::vector<std::unique_ptr<FieldInverter> > _inverters; + std::unique_ptr<UrlFieldInverter> _urlInverter; + test::OrderedDocumentInserter _inserter; + DocTypeBuilder::SchemaIndexFields _schemaIndexFields; + + static Schema + makeSchema(Schema::CollectionType collectionType) + { + Schema schema; + schema.addUriIndexFields(Schema::IndexField("url", Schema::STRING, + collectionType)); + return schema; + } + + Fixture(Schema::CollectionType collectionType) + : _schema(makeSchema(collectionType)), + _b(_schema), + _inverters(), + _urlInverter(), + _inserter(), + _schemaIndexFields() + { + _schemaIndexFields.setup(_schema); + for (uint32_t fieldId = 0; fieldId < _schema.getNumIndexFields(); + ++fieldId) { + _inverters.push_back(std::make_unique<FieldInverter>(_schema, + fieldId)); + } + DocTypeBuilder::UriField &urlField = + _schemaIndexFields._uriFields.front(); + _urlInverter = std::make_unique<UrlFieldInverter> + (collectionType, + _inverters[urlField._all].get(), + _inverters[urlField._scheme].get(), + _inverters[urlField._host].get(), + _inverters[urlField._port].get(), + _inverters[urlField._path].get(), + _inverters[urlField._query].get(), + _inverters[urlField._fragment].get(), + _inverters[urlField._hostname].get()); + } + + void + invertDocument(uint32_t docId, const Document &doc) + { + _urlInverter->invertField(docId, doc.getValue(url)); + } + + void + pushDocuments() + { + uint32_t fieldId = 0; + for (auto &inverter : _inverters) { + _inserter.setFieldId(fieldId); + inverter->pushDocuments(_inserter); + ++fieldId; + } + } + + void + enableAnnotations() + { + _urlInverter->setUseAnnotations(true); + } +}; + + +TEST_F("requireThatSingleUrlFieldWorks", Fixture(Schema::SINGLE)) +{ + f.invertDocument(10, *makeDoc10Single(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0," + "w=2,a=10," + "w=4,a=10," + "w=81,a=10," + "w=ab,a=10," + "w=com,a=10," + "w=fluke,a=10," + "w=http,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=1," + "w=http,a=10," + "f=2," + "w=com,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=3," + "w=81,a=10," + "f=4," + "w=fluke,a=10," + "f=5," + "w=2,a=10," + "w=ab,a=10," + "f=6," + "w=4,a=10," + "f=7," + "w=EnDhOsT,a=10," + "w=StArThOsT,a=10," + "w=com,a=10," + "w=www,a=10," + "w=yahoo,a=10", + f._inserter.toStr()); +} + + +TEST_F("requireThatArrayUrlFieldWorks", Fixture(Schema::ARRAY)) +{ + f.invertDocument(10, *makeDoc10Array(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0," + "w=2,a=10," + "w=8,a=10," + "w=82,a=10," + "w=9,a=10," + "w=ab,a=10," + "w=com,a=10," + "w=flickr,a=10," + "w=fluke,a=10," + "w=http,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=1," + "w=http,a=10," + "f=2," + "w=com,a=10," + "w=flickr,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=3," + "w=82,a=10," + "f=4," + "w=fluke,a=10," + "f=5," + "w=2,a=10," + "w=ab,a=10," + "f=6," + "w=8,a=10," + "w=9,a=10," + "f=7," + "w=EnDhOsT,a=10," + "w=StArThOsT,a=10," + "w=com,a=10," + "w=flickr,a=10," + "w=www,a=10," + "w=yahoo,a=10", + f._inserter.toStr()); +} + +TEST_F("requireThatWeightedSetFieldWorks", Fixture(Schema::WEIGHTEDSET)) +{ + f.invertDocument(10, *makeDoc10WeightedSet(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0," + "w=12,a=10," + "w=13,a=10," + "w=2,a=10," + "w=83,a=10," + "w=85,a=10," + "w=ab,a=10," + "w=com,a=10," + "w=flickr,a=10," + "w=fluke,a=10," + "w=http,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=1," + "w=http,a=10," + "f=2," + "w=com,a=10," + "w=flickr,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=3," + "w=83,a=10," + "w=85,a=10," + "f=4," + "w=fluke,a=10," + "f=5," + "w=2,a=10," + "w=ab,a=10," + "f=6," + "w=12,a=10," + "w=13,a=10," + "f=7," + "w=EnDhOsT,a=10," + "w=StArThOsT,a=10," + "w=com,a=10," + "w=flickr,a=10," + "w=www,a=10," + "w=yahoo,a=10", + f._inserter.toStr()); +} + +TEST_F("requireThatAnnotatedSingleUrlFieldWorks", Fixture(Schema::SINGLE)) +{ + f.enableAnnotations(); + f.invertDocument(10, *makeDoc10Single(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0," + "w=2,a=10," + "w=4,a=10," + "w=81,a=10," + "w=ab,a=10," + "w=com,a=10," + "w=fluke,a=10," + "w=http,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=1," + "w=http,a=10," + "f=2," + "w=com,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=3," + "w=81,a=10," + "f=4," + "w=altfluke,a=10," + "w=fluke,a=10," + "f=5," + "w=2,a=10," + "w=ab,a=10," + "f=6," + "w=4,a=10," + "f=7," + "w=EnDhOsT,a=10," + "w=StArThOsT,a=10," + "w=com,a=10," + "w=www,a=10," + "w=yahoo,a=10", + f._inserter.toStr()); +} + + +TEST_F("requireThatAnnotatedArrayUrlFieldWorks", Fixture(Schema::ARRAY)) +{ + f.enableAnnotations(); + f.invertDocument(10, *makeDoc10Array(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0," + "w=2,a=10," + "w=8,a=10," + "w=82,a=10," + "w=9,a=10," + "w=ab,a=10," + "w=com,a=10," + "w=flickr,a=10," + "w=fluke,a=10," + "w=http,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=1," + "w=http,a=10," + "f=2," + "w=com,a=10," + "w=flickr,a=10," + "w=www,a=10," + "w=yahoo,a=10," + "f=3," + "w=82,a=10," + "f=4," + "w=altfluke,a=10," + "w=fluke,a=10," + "f=5," + "w=2,a=10," + "w=ab,a=10," + "f=6," + "w=8,a=10," + "w=9,a=10," + "f=7," + "w=EnDhOsT,a=10," + "w=StArThOsT,a=10," + "w=com,a=10," + "w=flickr,a=10," + "w=www,a=10," + "w=yahoo,a=10", + f._inserter.toStr()); +} + +TEST_F("requireThatAnnotatedWeightedSetFieldWorks", + Fixture(Schema::WEIGHTEDSET)) +{ + f.enableAnnotations(); + f._inserter.setVerbose(); + f.invertDocument(10, *makeDoc10WeightedSet(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("f=0," + "w=12,a=10(e=0,w=4,l=9[8])," + "w=13,a=10(e=1,w=7,l=9[8])," + "w=2,a=10(e=0,w=4,l=9[7],e=1,w=7,l=9[7])," + "w=83,a=10(e=0,w=4,l=9[4])," + "w=85,a=10(e=1,w=7,l=9[4])," + "w=ab,a=10(e=0,w=4,l=9[6],e=1,w=7,l=9[6])," + "w=com,a=10(e=0,w=4,l=9[3],e=1,w=7,l=9[3])," + "w=flickr,a=10(e=1,w=7,l=9[2])," + "w=fluke,a=10(e=0,w=4,l=9[5],e=1,w=7,l=9[5])," + "w=http,a=10(e=0,w=4,l=9[0],e=1,w=7,l=9[0])," + "w=www,a=10(e=0,w=4,l=9[1],e=1,w=7,l=9[1])," + "w=yahoo,a=10(e=0,w=4,l=9[2])," + "f=1," + "w=http,a=10(e=0,w=4,l=1[0],e=1,w=7,l=1[0])," + "f=2," + "w=com,a=10(e=0,w=4,l=3[2],e=1,w=7,l=3[2])," + "w=flickr,a=10(e=1,w=7,l=3[1])," + "w=www,a=10(e=0,w=4,l=3[0],e=1,w=7,l=3[0])," + "w=yahoo,a=10(e=0,w=4,l=3[1])," + "f=3," + "w=83,a=10(e=0,w=4,l=1[0])," + "w=85,a=10(e=1,w=7,l=1[0])," + "f=4," + "w=altfluke,a=10(e=0,w=4,l=1[0])," + "w=fluke,a=10(e=0,w=4,l=1[0],e=1,w=7,l=1[0])," + "f=5," + "w=2,a=10(e=0,w=4,l=2[1],e=1,w=7,l=2[1])," + "w=ab,a=10(e=0,w=4,l=2[0],e=1,w=7,l=2[0])," + "f=6," + "w=12,a=10(e=0,w=4,l=1[0])," + "w=13,a=10(e=1,w=7,l=1[0])," + "f=7," + "w=EnDhOsT,a=10(e=0,w=4,l=5[4],e=1,w=7,l=5[4])," + "w=StArThOsT,a=10(e=0,w=4,l=5[0],e=1,w=7,l=5[0])," + "w=com,a=10(e=0,w=4,l=5[3],e=1,w=7,l=5[3])," + "w=flickr,a=10(e=1,w=7,l=5[2])," + "w=www,a=10(e=0,w=4,l=5[1],e=1,w=7,l=5[1])," + "w=yahoo,a=10(e=0,w=4,l=5[2])", + f._inserter.toStr()); +} + + +TEST_F("requireThatEmptySingleFieldWorks", Fixture(Schema::SINGLE)) +{ + f.invertDocument(10, *makeDoc10Empty(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + +TEST_F("requireThatEmptyArrayFieldWorks", Fixture(Schema::ARRAY)) +{ + f.invertDocument(10, *makeDoc10Empty(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + +TEST_F("requireThatEmptyWeightedSetFieldWorks", Fixture(Schema::WEIGHTEDSET)) +{ + f.invertDocument(10, *makeDoc10Empty(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + +TEST_F("requireThatAnnotatedEmptySingleFieldWorks", Fixture(Schema::SINGLE)) +{ + f.enableAnnotations(); + f.invertDocument(10, *makeDoc10Empty(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + +TEST_F("requireThatAnnotatedEmptyArrayFieldWorks", Fixture(Schema::ARRAY)) +{ + f.enableAnnotations(); + f.invertDocument(10, *makeDoc10Empty(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + +TEST_F("requireThatAnnotatedEmptyWeightedSetFieldWorks", + Fixture(Schema::WEIGHTEDSET)) +{ + f.enableAnnotations(); + f.invertDocument(10, *makeDoc10Empty(f._b)); + f.pushDocuments(); + EXPECT_EQUAL("", + f._inserter.toStr()); +} + +} // namespace memoryindex +} // namespace search + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/memorytub/.gitignore b/searchlib/src/tests/memorytub/.gitignore new file mode 100644 index 00000000000..d3185d605a1 --- /dev/null +++ b/searchlib/src/tests/memorytub/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +test_memorytub +searchlib_test_memorytub_app diff --git a/searchlib/src/tests/memorytub/CMakeLists.txt b/searchlib/src/tests/memorytub/CMakeLists.txt new file mode 100644 index 00000000000..a06fb4de8e2 --- /dev/null +++ b/searchlib/src/tests/memorytub/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_test_memorytub_app + SOURCES + memorytub_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_test_memorytub_app COMMAND searchlib_test_memorytub_app) diff --git a/searchlib/src/tests/memorytub/memorytub_test.cpp b/searchlib/src/tests/memorytub/memorytub_test.cpp new file mode 100644 index 00000000000..348aee2fe7f --- /dev/null +++ b/searchlib/src/tests/memorytub/memorytub_test.cpp @@ -0,0 +1,205 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("test_memorytub"); + +#include <vespa/searchlib/util/memorytub.h> +#include <vespa/vespalib/testkit/testapp.h> + +#define MEMTUB_ARRAY_ALLOC(tub, type, size) ((type *) tub->Alloc(sizeof(type) * size)) + + +enum { + SMALL_STRING = 100, + BIG_STRING = 100000, + SMALL_SMALL_ARRAY = 10, + BIG_SMALL_ARRAY = 1000 +}; + + +class Small +{ +public: + char filler[SMALL_STRING]; +}; + + +class Big +{ +public: + char filler[BIG_STRING]; +}; + + +class Test : public vespalib::TestApp +{ +private: + search::util::SmallMemoryTub _tub; + +public: + bool Overlap(char *start1, char *end1, + char *start2, char *end2); + bool InTub(char *pt, char *end); + bool NotInTub(char *pt, char *end); + int Main(); + + Test(void) + : _tub() + { + } +}; + + +bool +Test::Overlap(char *start1, char *end1, + char *start2, char *end2) +{ + if (start1 == end1) + return false; + + if (start2 == end2) + return false; + + if (start2 >= start1 && start2 < end1) + return true; + + if (end2 > start1 && end2 <= end1) + return true; + + if (start1 >= start2 && start1 < end2) + return true; + + if (end1 > start2 && end1 <= end2) + return true; + + return false; +} + + +bool +Test::InTub(char *pt, char *end) +{ + for (char *p = pt; p < end; p++) + if (!_tub.InTub(p)) + return false; + return true; +} + + +bool +Test::NotInTub(char *pt, char *end) +{ + for (char *p = pt; p < end; p++) + if (_tub.InTub(p)) + return false; + return true; +} + + +int +Test::Main() +{ + TEST_INIT("memorytub-test"); + + Small *small = NULL; + Big *big = NULL; + char *small_string = NULL; + char *big_string = NULL; + Small *small_small_array = NULL; + Small *big_small_array = NULL; + + EXPECT_TRUE(!_tub.InTub(&_tub)); + + EXPECT_TRUE(sizeof(Small) < _tub.GetAllocLimit()); + EXPECT_TRUE(sizeof(Big) > _tub.GetAllocLimit()); + EXPECT_TRUE(SMALL_STRING < _tub.GetAllocLimit()); + EXPECT_TRUE(BIG_STRING > _tub.GetAllocLimit()); + EXPECT_TRUE(sizeof(Small) * SMALL_SMALL_ARRAY < _tub.GetAllocLimit()); + EXPECT_TRUE(sizeof(Small) * BIG_SMALL_ARRAY > _tub.GetAllocLimit()); + + small = new (&_tub) Small(); + EXPECT_TRUE(((void *)small) != ((void *)&_tub)); + EXPECT_TRUE(InTub((char *)small, (char *)(small + 1))); + + big = new (&_tub) Big(); + EXPECT_TRUE(((void *)big) != ((void *)&_tub)); + EXPECT_TRUE(InTub((char *)big, (char *)(big + 1))); + + small_string = MEMTUB_ARRAY_ALLOC((&_tub), char, SMALL_STRING); + EXPECT_TRUE(((void *)small_string) != ((void *)&_tub)); + EXPECT_TRUE(InTub(small_string, small_string + SMALL_STRING)); + + big_string = MEMTUB_ARRAY_ALLOC((&_tub), char, BIG_STRING); + EXPECT_TRUE(((void *)big_string) != ((void *)&_tub)); + EXPECT_TRUE(InTub(big_string, big_string + BIG_STRING)); + + small_small_array = MEMTUB_ARRAY_ALLOC((&_tub), Small, SMALL_SMALL_ARRAY); + EXPECT_TRUE(((void *)small_small_array) != ((void *)&_tub)); + EXPECT_TRUE(InTub((char *)small_small_array, (char *)(small_small_array + SMALL_SMALL_ARRAY))); + + big_small_array = MEMTUB_ARRAY_ALLOC((&_tub), Small, BIG_SMALL_ARRAY); + EXPECT_TRUE(((void *)big_small_array) != ((void *)&_tub)); + EXPECT_TRUE(InTub((char *)big_small_array, (char *)(big_small_array + BIG_SMALL_ARRAY))); + + + EXPECT_TRUE(!Overlap((char *)small, (char *)(small + 1), + (char *)big, (char *)(big + 1))); + + EXPECT_TRUE(!Overlap((char *)small, (char *)(small + 1), + small_string, small_string + SMALL_STRING)); + + EXPECT_TRUE(!Overlap((char *)small, (char *)(small + 1), + big_string, big_string + BIG_STRING)); + + EXPECT_TRUE(!Overlap((char *)small, (char *)(small + 1), + (char *)small_small_array, (char *)(small_small_array + SMALL_SMALL_ARRAY))); + + EXPECT_TRUE(!Overlap((char *)small, (char *)(small + 1), + (char *)big_small_array, (char *)(big_small_array + BIG_SMALL_ARRAY))); + + + EXPECT_TRUE(!Overlap((char *)big, (char *)(big + 1), + small_string, small_string + SMALL_STRING)); + + EXPECT_TRUE(!Overlap((char *)big, (char *)(big + 1), + big_string, big_string + BIG_STRING)); + + EXPECT_TRUE(!Overlap((char *)big, (char *)(big + 1), + (char *)small_small_array, (char *)(small_small_array + SMALL_SMALL_ARRAY))); + + EXPECT_TRUE(!Overlap((char *)big, (char *)(big + 1), + (char *)big_small_array, (char *)(big_small_array + BIG_SMALL_ARRAY))); + + + EXPECT_TRUE(!Overlap(small_string, small_string + SMALL_STRING, + big_string, big_string + BIG_STRING)); + + EXPECT_TRUE(!Overlap(small_string, small_string + SMALL_STRING, + (char *)small_small_array, (char *)(small_small_array + SMALL_SMALL_ARRAY))); + + EXPECT_TRUE(!Overlap(small_string, small_string + SMALL_STRING, + (char *)big_small_array, (char *)(big_small_array + BIG_SMALL_ARRAY))); + + + EXPECT_TRUE(!Overlap(big_string, big_string + BIG_STRING, + (char *)small_small_array, (char *)(small_small_array + SMALL_SMALL_ARRAY))); + + EXPECT_TRUE(!Overlap(big_string, big_string + BIG_STRING, + (char *)big_small_array, (char *)(big_small_array + BIG_SMALL_ARRAY))); + + + EXPECT_TRUE(!Overlap((char *)small_small_array, (char *)(small_small_array + SMALL_SMALL_ARRAY), + (char *)big_small_array, (char *)(big_small_array + BIG_SMALL_ARRAY))); + + + _tub.Reset(); + EXPECT_TRUE(NotInTub((char *)small, (char *)(small + 1))); + EXPECT_TRUE(NotInTub((char *)big, (char *)(big + 1))); + EXPECT_TRUE(NotInTub(small_string, small_string + SMALL_STRING)); + EXPECT_TRUE(NotInTub(big_string, big_string + BIG_STRING)); + EXPECT_TRUE(NotInTub((char *)small_small_array, (char *)(small_small_array + SMALL_SMALL_ARRAY))); + EXPECT_TRUE(NotInTub((char *)big_small_array, (char *)(big_small_array + BIG_SMALL_ARRAY))); + TEST_DONE(); +} + +TEST_APPHOOK(Test) diff --git a/searchlib/src/tests/nativerank/.gitignore b/searchlib/src/tests/nativerank/.gitignore new file mode 100644 index 00000000000..6a3051df4e7 --- /dev/null +++ b/searchlib/src/tests/nativerank/.gitignore @@ -0,0 +1,2 @@ +/vlog3.txt +searchlib_nativerank_test_app diff --git a/searchlib/src/tests/nativerank/CMakeLists.txt b/searchlib/src/tests/nativerank/CMakeLists.txt new file mode 100644 index 00000000000..dc9542a4988 --- /dev/null +++ b/searchlib/src/tests/nativerank/CMakeLists.txt @@ -0,0 +1,12 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_nativerank_test_app + SOURCES + nativerank.cpp + DEPENDS + searchlib +) +vespa_add_test( + NAME searchlib_nativerank_test_app + COMMAND searchlib_nativerank_test_app + ENVIRONMENT "VESPA_LOG_TARGET=file:vlog3.txt" +) diff --git a/searchlib/src/tests/nativerank/nativerank.cpp b/searchlib/src/tests/nativerank/nativerank.cpp new file mode 100644 index 00000000000..398ca52a190 --- /dev/null +++ b/searchlib/src/tests/nativerank/nativerank.cpp @@ -0,0 +1,828 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("nativerank_test"); + +#include <vespa/searchlib/features/nativeattributematchfeature.h> +#include <vespa/searchlib/features/nativefieldmatchfeature.h> +#include <vespa/searchlib/features/nativeproximityfeature.h> +#include <vespa/searchlib/features/nativerankfeature.h> +#include <vespa/searchlib/features/setup.h> +#include <vespa/searchlib/features/utils.h> +#include <vespa/searchlib/fef/fef.h> +#include <vespa/searchlib/fef/functiontablefactory.h> +#include <vespa/searchlib/fef/test/plugin/setup.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/searchlib/fef/test/ftlib.h> +#include <vespa/searchlib/fef/test/dummy_dependency_handler.h> + +using namespace search::fef; +using namespace search::fef::test; + +const double EPS = 10e-4; + +namespace search { +namespace features { + +class Test : public FtTestApp { +private: + BlueprintFactory _factory; + + struct ANAM { + int32_t attributeWeight; + search::query::Weight termWeight; + uint32_t fieldWeight; + uint32_t docId; + ANAM(int32_t aw, uint32_t tw = 100, uint32_t fw = 100, uint32_t id = 1) : + attributeWeight(aw), termWeight(tw), fieldWeight(fw), docId(id) {} + vespalib::string toString() const { + return vespalib::make_string("aw(%d), tw(%u), fw(%u), id(%u)", + attributeWeight, termWeight.percent(), fieldWeight, docId); + } + }; + + bool assertNativeFieldMatch(feature_t score, const vespalib::string & query, const vespalib::string & field, + const Properties & props = Properties(), uint32_t docId = 1); + bool assertNativeAttributeMatch(feature_t score, const ANAM & t1, const ANAM & t2, + const Properties & props = Properties()); + bool assertNativeProximity(feature_t score, const vespalib::string & query, const vespalib::string & field, + const Properties & props = Properties(), uint32_t docId = 1); + bool assertNativeRank(feature_t score, feature_t fieldMatchWeight, feature_t attributeMatchWeight, feature_t proximityWeight); + + void testNativeFieldMatch(); + void testNativeAttributeMatch(); + void testNativeProximity(); + void testNativeRank(); + +public: + int Main(); +}; + +void +Test::testNativeFieldMatch() +{ + { // test blueprint + NativeFieldMatchBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "nativeFieldMatch")); + + FtFeatureTest ft(_factory, ""); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "qux"); + ft.getIndexEnv().getTableManager().addFactory(ITableFactory::SP(new FunctionTableFactory(16))); + + StringList params, in, out; + FT_SETUP_FAIL(pt, params.add("baz")); // field 'baz' not found + params.clear(); + + Properties & p = ft.getIndexEnv().getProperties(); + p.add("nativeFieldMatch.firstOccurrenceTable", "a"); + FT_SETUP_FAIL(pt, ft.getIndexEnv(), params); // table 'a' not found + p.clear().add("nativeFieldMatch.occurrenceCountTable", "b"); + FT_SETUP_FAIL(pt, ft.getIndexEnv(), params); // table 'b' not found + + const TableManager & tm = ft.getIndexEnv().getTableManager(); + { + p.clear(); + p.add("nativeRank.useTableNormalization", "false"); + FT_SETUP_OK(pt, params, in, out.add("score")); + Blueprint::UP bp = pt.createInstance(); + DummyDependencyHandler deps(*bp); + bp->setup(ft.getIndexEnv(), params); + const NativeFieldMatchParams & pas = (dynamic_cast<NativeFieldMatchBlueprint *>(bp.get()))->getParams(); + ASSERT_TRUE(pas.vector.size() == 3); + EXPECT_TRUE(pas.vector[0].firstOccTable == tm.getTable("expdecay(8000,12.50)")); + EXPECT_TRUE(pas.vector[1].firstOccTable == tm.getTable("expdecay(8000,12.50)")); + EXPECT_TRUE(pas.vector[0].numOccTable == tm.getTable("loggrowth(1500,4000,19)")); + EXPECT_TRUE(pas.vector[1].numOccTable == tm.getTable("loggrowth(1500,4000,19)")); + EXPECT_EQUAL(pas.vector[0].maxTableSum, 1); + EXPECT_EQUAL(pas.vector[1].maxTableSum, 1); + EXPECT_EQUAL(pas.vector[0].fieldWeight, 100u); + EXPECT_EQUAL(pas.vector[1].fieldWeight, 100u); + EXPECT_EQUAL(pas.vector[0].field, true); + EXPECT_EQUAL(pas.vector[1].field, true); + EXPECT_EQUAL(pas.vector[2].field, false); + EXPECT_EQUAL(pas.vector[0].averageFieldLength, NativeFieldMatchParam::NOT_DEF_FIELD_LENGTH); + EXPECT_EQUAL(pas.vector[1].averageFieldLength, NativeFieldMatchParam::NOT_DEF_FIELD_LENGTH); + EXPECT_EQUAL(pas.minFieldLength, 6u); + EXPECT_EQUAL(pas.vector[0].firstOccImportance, 0.5); + EXPECT_EQUAL(pas.vector[1].firstOccImportance, 0.5); + } + { + p.clear(); + p.add("nativeFieldMatch.firstOccurrenceTable", "linear(0,1)"); + p.add("nativeFieldMatch.firstOccurrenceTable.foo", "linear(0,2)"); + p.add("nativeFieldMatch.occurrenceCountTable", "linear(0,3)"); + p.add("nativeFieldMatch.occurrenceCountTable.baz", "linear(0,4)"); + p.add("vespa.fieldweight.foo", "200"); + p.add("vespa.fieldweight.baz", "0"); + p.add("nativeFieldMatch.averageFieldLength.foo", "400"); + p.add("nativeFieldMatch.averageFieldLength.baz", "500"); + p.add("nativeFieldMatch.minFieldLength", "12"); + p.add("nativeFieldMatch.firstOccurrenceImportance", "0.8"); + p.add("nativeFieldMatch.firstOccurrenceImportance.foo", "0.6"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "baz"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "quux"); + ft.getIndexEnv().getFields()[4].setFilter(true); + FT_SETUP_OK(pt, ft.getIndexEnv(), params.add("foo").add("baz").add("quux"), in, out); + Blueprint::UP bp = pt.createInstance(); + DummyDependencyHandler deps(*bp); + bp->setup(ft.getIndexEnv(), params); + const NativeFieldMatchParams & pas = (dynamic_cast<NativeFieldMatchBlueprint *>(bp.get()))->getParams(); + ASSERT_TRUE(pas.vector.size() == 5); + EXPECT_TRUE(pas.vector[0].firstOccTable == tm.getTable("linear(0,2)")); + EXPECT_TRUE(pas.vector[3].firstOccTable == tm.getTable("linear(0,1)")); + EXPECT_TRUE(pas.vector[0].numOccTable == tm.getTable("linear(0,3)")); + EXPECT_TRUE(pas.vector[3].numOccTable == tm.getTable("linear(0,4)")); + EXPECT_APPROX(pas.vector[0].maxTableSum, 2.4, 10e-6); + EXPECT_APPROX(pas.vector[3].maxTableSum, 1.6, 10e-6); + EXPECT_EQUAL(pas.vector[0].fieldWeight, 200u); + EXPECT_EQUAL(pas.vector[1].fieldWeight, 100u); + EXPECT_EQUAL(pas.vector[3].fieldWeight, 0u); + EXPECT_EQUAL(pas.vector[0].field, true); + EXPECT_EQUAL(pas.vector[1].field, false); // only 'foo' and 'baz' are specified explicit + EXPECT_EQUAL(pas.vector[2].field, false); // 'qux' is an attribute + EXPECT_EQUAL(pas.vector[3].field, false); // fieldWeight == 0 -> do not consider this field + EXPECT_EQUAL(pas.vector[4].field, false); // filter field + EXPECT_EQUAL(pas.vector[0].averageFieldLength, 400u); + EXPECT_EQUAL(pas.vector[3].averageFieldLength, 500u); + EXPECT_EQUAL(pas.minFieldLength, 12u); + EXPECT_EQUAL(pas.vector[0].firstOccImportance, 0.6); + EXPECT_EQUAL(pas.vector[3].firstOccImportance, 0.8); + } + { + FtIndexEnvironment ie; + FT_DUMP(_factory, "nativeFieldMatch", ie, StringList().add("nativeFieldMatch")); + } + } + + { // test helper functions + FtFeatureTest ft(_factory, ""); + NativeFieldMatchParams p; + NativeFieldMatchParam f; + Table t; + t.add(0).add(1).add(2).add(3).add(4).add(5).add(6).add(7); + f.firstOccTable = &t; + f.numOccTable = &t; + p.vector.push_back(f); + NativeFieldMatchExecutor nfme(ft.getQueryEnv(), p); + EXPECT_EQUAL(p.minFieldLength, 6u); + EXPECT_EQUAL(nfme.getFirstOccBoost(0, 0, 4), 0); + EXPECT_EQUAL(nfme.getFirstOccBoost(0, 1, 4), 1); + EXPECT_EQUAL(nfme.getFirstOccBoost(0, 2, 4), 2); + EXPECT_EQUAL(nfme.getFirstOccBoost(0, 3, 4), 4); + EXPECT_EQUAL(nfme.getFirstOccBoost(0, 3, 6), 4); + EXPECT_EQUAL(nfme.getFirstOccBoost(0, 4, 6), 5); + EXPECT_EQUAL(nfme.getFirstOccBoost(0, 5, 6), 7); + EXPECT_EQUAL(nfme.getFirstOccBoost(0, 0, 12), 0); + EXPECT_EQUAL(nfme.getFirstOccBoost(0, 4, 12), 2); + EXPECT_EQUAL(nfme.getFirstOccBoost(0, 11, 12), 7); + EXPECT_EQUAL(nfme.getNumOccBoost(0, 0, 4), 0); + EXPECT_EQUAL(nfme.getNumOccBoost(0, 2, 4), 2); + EXPECT_EQUAL(nfme.getNumOccBoost(0, 4, 4), 4); + EXPECT_EQUAL(nfme.getNumOccBoost(0, 4, 6), 4); + EXPECT_EQUAL(nfme.getNumOccBoost(0, 5, 6), 5); + EXPECT_EQUAL(nfme.getNumOccBoost(0, 6, 6), 7); + EXPECT_EQUAL(nfme.getNumOccBoost(0, 0, 12), 0); + EXPECT_EQUAL(nfme.getNumOccBoost(0, 6, 12), 3); + EXPECT_EQUAL(nfme.getNumOccBoost(0, 12, 12), 7); + } + { // test params object + NativeFieldMatchParams p; + p.resize(1); + p.setMaxTableSums(0, 0); // test reset to 1 + EXPECT_EQUAL(p.vector[0].maxTableSum, 1); + } + + { // test executor + // 1 term + EXPECT_TRUE(assertNativeFieldMatch(55, "a", "a")); + EXPECT_TRUE(assertNativeFieldMatch(40, "a", "x x x a")); + EXPECT_TRUE(assertNativeFieldMatch(70, "a", "a a a a")); + + // 2 terms + EXPECT_TRUE(assertNativeFieldMatch(27.5, "a b", "a")); + EXPECT_TRUE(assertNativeFieldMatch(52.5, "a b", "a b")); + EXPECT_TRUE(assertNativeFieldMatch(67.5, "a b", "a b a b a b a b")); + + // 3 terms + EXPECT_TRUE(assertNativeFieldMatch(50, "a b c", "a b c")); + + // 4 terms + EXPECT_TRUE(assertNativeFieldMatch(47.5, "a b c d", "a b c d")); + + // change term weight + EXPECT_TRUE(assertNativeFieldMatch(45, "a b", "a x x x b")); + EXPECT_TRUE(assertNativeFieldMatch(50, "a!600 b!200", "a x x x b")); + EXPECT_TRUE(assertNativeFieldMatch(40, "a!200 b!600", "a x x x b")); + EXPECT_TRUE(assertNativeFieldMatch(55, "a!200 b!0", "a x x x b")); + + // change significance + EXPECT_TRUE(assertNativeFieldMatch(46, "a%0.4 b%0.1", "x a x x x b")); + EXPECT_TRUE(assertNativeFieldMatch(34, "a%0.1 b%0.4", "x a x x x b")); + + // change firstOccImportance + Properties p = Properties().add("nativeFieldMatch.firstOccurrenceImportance", "1"); + EXPECT_TRUE(assertNativeFieldMatch(100, "a", "a", p)); + p.clear().add("nativeFieldMatch.firstOccurrenceImportance", "0"); + EXPECT_TRUE(assertNativeFieldMatch(10, "a", "a", p)); + + // use table normalization + p.clear().add("nativeRank.useTableNormalization", "true"); + // norm factor = (100*0.5 + 60*0.5) = 80 + EXPECT_TRUE(assertNativeFieldMatch(0.6875, "a", "a", p)); // (55/80) + EXPECT_TRUE(assertNativeFieldMatch(1, "a", "a a a a a a", p)); // (80/80) + p.add("nativeFieldMatch.firstOccurrenceTable", "linear(0,0)"); + p.add("nativeFieldMatch.occurrenceCountTable", "linear(0,0)"); + EXPECT_TRUE(assertNativeFieldMatch(0, "a", "a", p)); + + // use average field length + p.clear().add("nativeFieldMatch.averageFieldLength.foo", "12"); + EXPECT_TRUE(assertNativeFieldMatch(50, "a", "a", p)); // firstOccBoost: 100, numOccBoost: 0 + EXPECT_TRUE(assertNativeFieldMatch(45, "a", "x x x a", p)); // firstOccBoost: 90, numOccBoost: 0 + EXPECT_TRUE(assertNativeFieldMatch(50, "a", "x x x a a", p)); // firstOccBoost: 90, numOccBoost: 10 + + // change field weight + p.clear().add("vespa.fieldweight.foo", "0"); + EXPECT_TRUE(assertNativeFieldMatch(0, "a", "a", p)); + + // change docId to give 0 hits + EXPECT_TRUE(assertNativeFieldMatch(0, "a", "a", p.clear(), 2)); + } +} + +bool +Test::assertNativeFieldMatch(feature_t score, + const vespalib::string & query, + const vespalib::string & field, + const Properties & props, + uint32_t docId) +{ + LOG(info, "assertNativeFieldMatch(%f, '%s', '%s')", score, query.c_str(), field.c_str()); + + // Setup feature test. + vespalib::string feature = "nativeFieldMatch"; + FtFeatureTest ft(_factory, feature); + + StringVectorMap index; + index["foo"] = FtUtil::tokenize(field); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getTableManager().addFactory(ITableFactory::SP(new FunctionTableFactory(256))); + ft.getIndexEnv().getProperties().add("nativeFieldMatch.firstOccurrenceTable", + vespalib::make_string("linear(-10,100,%zu)", std::max((size_t)6, index["foo"].size()))); + ft.getIndexEnv().getProperties().add("nativeFieldMatch.occurrenceCountTable", + vespalib::make_string("linear(10,0,%zu)", std::max((size_t)6, index["foo"].size()) + 1)); + ft.getIndexEnv().getProperties().add("nativeRank.useTableNormalization", "false"); // make it easier to test + ft.getIndexEnv().getProperties().import(props); + FT_SETUP(ft, FtUtil::toQuery(query), index, 1); + + // Execute and compare results. + if (!EXPECT_TRUE(ft.execute(score, EPS, docId))) { + return false; + } + return true; +} + +void +Test::testNativeAttributeMatch() +{ + { // test blueprint + NativeAttributeMatchBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "nativeAttributeMatch")); + + FtFeatureTest ft(_factory, ""); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "foo"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "bar"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "qux"); + ft.getIndexEnv().getTableManager().addFactory(ITableFactory::SP(new FunctionTableFactory(16))); + + StringList params, in, out; + FT_SETUP_FAIL(pt, params.add("baz")); // field 'baz' not found + params.clear(); + + Properties & p = ft.getIndexEnv().getProperties(); + p.add("nativeAttributeMatch.weightTable", "a"); + FT_SETUP_FAIL(pt, ft.getIndexEnv(), params); // table 'a' not found + +// const TableManager & tm = ft.getIndexEnv().getTableManager(); + { + p.clear(); + p.add("nativeRank.useTableNormalization", "false"); + FT_SETUP_OK(pt, params, in, out.add("score")); + Blueprint::UP bp = pt.createInstance(); + DummyDependencyHandler deps(*bp); + bp->setup(ft.getIndexEnv(), params); + const NativeAttributeMatchParams & pas = (dynamic_cast<NativeAttributeMatchBlueprint *>(bp.get()))->getParams(); + ASSERT_TRUE(pas.vector.size() == 3); +// EXPECT_TRUE(pas.vector[0].weightBoostTable == tm.getTable("linear(1,0)")); +// EXPECT_TRUE(pas.vector[1].weightBoostTable == tm.getTable("linear(1,0)")); + EXPECT_EQUAL(pas.vector[0].maxTableSum, 1); + EXPECT_EQUAL(pas.vector[1].maxTableSum, 1); + EXPECT_EQUAL(pas.vector[0].fieldWeight, 100u); + EXPECT_EQUAL(pas.vector[1].fieldWeight, 100u); + EXPECT_EQUAL(pas.vector[0].field, true); + EXPECT_EQUAL(pas.vector[1].field, true); + EXPECT_EQUAL(pas.vector[2].field, false); + } + { + p.clear(); + p.add("nativeAttributeMatch.weightTable", "linear(0,3)"); + p.add("nativeAttributeMatch.weightTable.foo", "linear(0,2)"); + p.add("vespa.fieldweight.foo", "200"); + p.add("vespa.fieldweight.baz", "0"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "baz"); + FT_SETUP_OK(pt, ft.getIndexEnv(), params.add("foo").add("baz"), in, out); + Blueprint::UP bp = pt.createInstance(); + DummyDependencyHandler deps(*bp); + bp->setup(ft.getIndexEnv(), params); + const NativeAttributeMatchParams & pas = (dynamic_cast<NativeAttributeMatchBlueprint *>(bp.get()))->getParams(); + ASSERT_TRUE(pas.vector.size() == 4); +// EXPECT_TRUE(pas.vector[0].weightBoostTable == tm.getTable("linear(0,2)")); +// EXPECT_TRUE(pas.vector[3].weightBoostTable == tm.getTable("linear(0,3)")); + EXPECT_EQUAL(pas.vector[0].maxTableSum, 2); + EXPECT_EQUAL(pas.vector[3].maxTableSum, 3); + EXPECT_EQUAL(pas.vector[0].fieldWeight, 200u); + EXPECT_EQUAL(pas.vector[1].fieldWeight, 100u); + EXPECT_EQUAL(pas.vector[3].fieldWeight, 0u); + EXPECT_EQUAL(pas.vector[0].field, true); + EXPECT_EQUAL(pas.vector[1].field, false); // only 'foo' and 'baz' are specified explicit + EXPECT_EQUAL(pas.vector[2].field, false); // 'qux' is an index + EXPECT_EQUAL(pas.vector[3].field, false); // fieldWeight == 0 -> do not consider this field + } + + { + FtIndexEnvironment ie; + FT_DUMP(_factory, "nativeAttributeMatch", ie, StringList().add("nativeAttributeMatch")); + } + } + { // test executor + + EXPECT_TRUE(assertNativeAttributeMatch(15, ANAM(10), ANAM(10))); // basic + EXPECT_TRUE(assertNativeAttributeMatch(5, ANAM(-10), ANAM(10))); // negative weight + EXPECT_TRUE(assertNativeAttributeMatch(12.5, ANAM(10, 600), ANAM(10, 200))); // change term weights + EXPECT_TRUE(assertNativeAttributeMatch(10, ANAM(10, 600), ANAM(10, 0))); // change term weights + EXPECT_TRUE(assertNativeAttributeMatch(18, ANAM(10, 100, 200), ANAM(10, 100, 800))); // change field weights + EXPECT_TRUE(assertNativeAttributeMatch(0, ANAM(10, 100, 0), ANAM(10, 100, 0))); // change field weights + EXPECT_TRUE(assertNativeAttributeMatch(10, ANAM(10, 100, 100, 2), ANAM(10, 100, 100))); // change docId to give 1 hit + EXPECT_TRUE(assertNativeAttributeMatch(0, ANAM(10, 100, 100, 2), ANAM(10, 100, 100, 2))); // change docId to give 0 hits + { // use table normalization + // foo: max table value: 255 + // bar: max table value: 510 + Properties p = Properties().add("nativeRank.useTableNormalization", "true"); + EXPECT_TRUE(assertNativeAttributeMatch(0.2941, ANAM(100), ANAM(50), p)); // (100/255 + 100/510)*0.5 + EXPECT_TRUE(assertNativeAttributeMatch(1, ANAM(255), ANAM(255), p)); // (255/255 + 510/510)*0.5 + p.add("nativeAttributeMatch.weightTable.foo", "linear(0,0)"); + p.add("nativeAttributeMatch.weightTable.bar", "linear(0,0)"); + EXPECT_TRUE(assertNativeAttributeMatch(0, ANAM(100), ANAM(50), p)); + } + } +} + +bool +Test::assertNativeAttributeMatch(feature_t score, const ANAM & t1, const ANAM & t2, const Properties & props) +{ + LOG(info, "assertNativeAttributeMatch(%f, '%s', '%s')", score, t1.toString().c_str(), t2.toString().c_str()); + vespalib::string feature = "nativeAttributeMatch"; + FtFeatureTest ft(_factory, feature); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "foo"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "bar"); + ft.getIndexEnv().getTableManager().addFactory(ITableFactory::SP(new FunctionTableFactory(256))); + ft.getIndexEnv().getProperties().add("nativeAttributeMatch.weightTable.foo", "linear(1,0)"); + ft.getIndexEnv().getProperties().add("nativeAttributeMatch.weightTable.bar", "linear(2,0)"); + ft.getIndexEnv().getProperties().add("vespa.fieldweight.foo", vespalib::make_string("%u", t1.fieldWeight)); + ft.getIndexEnv().getProperties().add("vespa.fieldweight.bar", vespalib::make_string("%u", t2.fieldWeight)); + ft.getIndexEnv().getProperties().add("nativeRank.useTableNormalization", "false"); // make it easier to test + ft.getIndexEnv().getProperties().import(props); + if (!EXPECT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("foo") != NULL)) { // t1 + return false; + } + if (!EXPECT_TRUE(ft.getQueryEnv().getBuilder().addAttributeNode("bar") != NULL)) { // t2 + return false; + } + ft.getQueryEnv().getTerms()[0].setWeight(t1.termWeight); + ft.getQueryEnv().getTerms()[1].setWeight(t2.termWeight); + ASSERT_TRUE(ft.setup()); + + MatchDataBuilder::UP mdb = ft.createMatchDataBuilder(); + { + TermFieldMatchData *tfmd = mdb->getTermFieldMatchData(0, 0); + tfmd->reset(t1.docId); + TermFieldMatchDataPosition pos; + pos.setElementWeight(t1.attributeWeight); + tfmd->appendPosition(pos); + } + { + TermFieldMatchData *tfmd = mdb->getTermFieldMatchData(1, 1); + tfmd->reset(t2.docId); + TermFieldMatchDataPosition pos; + pos.setElementWeight(t2.attributeWeight); + tfmd->appendPosition(pos); + } + if (!EXPECT_TRUE(ft.execute(score, EPS))) { + return false; + } + return true; +} + +void +Test::testNativeProximity() +{ + { // test blueprint + NativeProximityBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "nativeProximity")); + + FtFeatureTest ft(_factory, ""); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "bar"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "qux"); + ft.getIndexEnv().getTableManager().addFactory(ITableFactory::SP(new FunctionTableFactory(16))); + + StringList params, in, out; + FT_SETUP_FAIL(pt, params.add("baz")); // field 'baz' not found + params.clear(); + + Properties & p = ft.getIndexEnv().getProperties(); + p.add("nativeProximity.proximityTable", "a"); + FT_SETUP_FAIL(pt, ft.getIndexEnv(), params); // table 'a' not found + p.clear().add("nativeProximity.reverseProximityTable", "b"); + FT_SETUP_FAIL(pt, ft.getIndexEnv(), params); // table 'b' not found + + const TableManager & tm = ft.getIndexEnv().getTableManager(); + { + p.clear(); + p.add("nativeRank.useTableNormalization", "false"); + FT_SETUP_OK(pt, params, in, out.add("score")); + Blueprint::UP bp = pt.createInstance(); + DummyDependencyHandler deps(*bp); + bp->setup(ft.getIndexEnv(), params); + const NativeProximityParams & pas = (dynamic_cast<NativeProximityBlueprint *>(bp.get()))->getParams(); + ASSERT_TRUE(pas.vector.size() == 3); + EXPECT_TRUE(pas.vector[0].proximityTable == tm.getTable("expdecay(500,3)")); + EXPECT_TRUE(pas.vector[1].proximityTable == tm.getTable("expdecay(500,3)")); + EXPECT_TRUE(pas.vector[0].revProximityTable == tm.getTable("expdecay(400,3)")); + EXPECT_TRUE(pas.vector[1].revProximityTable == tm.getTable("expdecay(400,3)")); + EXPECT_EQUAL(pas.vector[0].maxTableSum, 1); + EXPECT_EQUAL(pas.vector[1].maxTableSum, 1); + EXPECT_EQUAL(pas.vector[0].fieldWeight, 100u); + EXPECT_EQUAL(pas.vector[1].fieldWeight, 100u); + EXPECT_EQUAL(pas.vector[0].field, true); + EXPECT_EQUAL(pas.vector[1].field, true); + EXPECT_EQUAL(pas.vector[2].field, false); + EXPECT_EQUAL(pas.slidingWindow, 4u); + EXPECT_EQUAL(pas.vector[0].proximityImportance, 0.5); + EXPECT_EQUAL(pas.vector[1].proximityImportance, 0.5); + } + { + p.clear(); + p.add("nativeProximity.proximityTable", "linear(0,1)"); + p.add("nativeProximity.proximityTable.foo", "linear(0,2)"); + p.add("nativeProximity.reverseProximityTable", "linear(0,3)"); + p.add("nativeProximity.reverseProximityTable.baz", "linear(0,4)"); + p.add("vespa.fieldweight.foo", "200"); + p.add("vespa.fieldweight.baz", "0"); + p.add("nativeProximity.slidingWindowSize", "2"); + p.add("nativeProximity.proximityImportance", "0.8"); + p.add("nativeProximity.proximityImportance.foo", "0.6"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "baz"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "quux"); + ft.getIndexEnv().getFields()[4].setFilter(true); + FT_SETUP_OK(pt, ft.getIndexEnv(), params.add("foo").add("baz"), in, out); + Blueprint::UP bp = pt.createInstance(); + DummyDependencyHandler deps(*bp); + bp->setup(ft.getIndexEnv(), params); + const NativeProximityParams & pas = (dynamic_cast<NativeProximityBlueprint *>(bp.get()))->getParams(); + ASSERT_TRUE(pas.vector.size() == 5); + EXPECT_TRUE(pas.vector[0].proximityTable == tm.getTable("linear(0,2)")); + EXPECT_TRUE(pas.vector[3].proximityTable == tm.getTable("linear(0,1)")); + EXPECT_TRUE(pas.vector[0].revProximityTable == tm.getTable("linear(0,3)")); + EXPECT_TRUE(pas.vector[3].revProximityTable == tm.getTable("linear(0,4)")); + EXPECT_APPROX(pas.vector[0].maxTableSum, 2.4, 10e-6); + EXPECT_APPROX(pas.vector[3].maxTableSum, 1.6, 10e-6); + EXPECT_EQUAL(pas.vector[0].fieldWeight, 200u); + EXPECT_EQUAL(pas.vector[1].fieldWeight, 100u); + EXPECT_EQUAL(pas.vector[3].fieldWeight, 0u); + EXPECT_EQUAL(pas.vector[0].field, true); + EXPECT_EQUAL(pas.vector[1].field, false); // only 'foo' and 'baz' are specified explicit + EXPECT_EQUAL(pas.vector[2].field, false); // 'qux' is an attribute + EXPECT_EQUAL(pas.vector[3].field, false); // fieldWeight == 0 -> do not consider this field + EXPECT_EQUAL(pas.vector[4].field, false); // filter field + EXPECT_EQUAL(pas.slidingWindow, 2u); + EXPECT_EQUAL(pas.vector[0].proximityImportance, 0.6); + EXPECT_EQUAL(pas.vector[3].proximityImportance, 0.8); + } + + { + FtIndexEnvironment ie; + FT_DUMP(_factory, "nativeProximity", ie, StringList().add("nativeProximity")); + } + } + + { // test NativeProximityExecutor::generateTermPairs() + QueryTermVector terms; + SimpleTermData a, b, c; + a.setWeight(search::query::Weight(100)); + a.setUniqueId(0); + b.setWeight(search::query::Weight(200)); + b.setUniqueId(1); + c.setWeight(search::query::Weight(300)); + c.setUniqueId(2); + terms.push_back(QueryTerm(&a, 0.1)); + terms.push_back(QueryTerm(&b, 0.2)); + terms.push_back(QueryTerm(&c, 0.3)); + FtFeatureTest ft(_factory, "nativeProximity"); + FtQueryEnvironment & env = ft.getQueryEnv(); + env.getProperties().add("vespa.term.1.connexity", "0"); + env.getProperties().add("vespa.term.1.connexity", "0.8"); + env.getProperties().add("vespa.term.2.connexity", "1"); + env.getProperties().add("vespa.term.2.connexity", "0.6"); + { + NativeProximityExecutor::FieldSetup setup(0); + NativeProximityExecutor::TermPairVector & pairs = setup.pairs; + NativeProximityExecutor::generateTermPairs(env, terms, 0, setup); + EXPECT_EQUAL(pairs.size(), 0u); + NativeProximityExecutor::generateTermPairs(env, terms, 1, setup); + EXPECT_EQUAL(pairs.size(), 0u); + NativeProximityExecutor::generateTermPairs(env, terms, 2, setup); + EXPECT_EQUAL(pairs.size(), 2u); + EXPECT_TRUE(pairs[0].first.termData() == &a); + EXPECT_TRUE(pairs[0].second.termData() == &b); + EXPECT_EQUAL(pairs[0].connectedness, 0.8); + EXPECT_TRUE(pairs[1].first.termData() == &b); + EXPECT_TRUE(pairs[1].second.termData() == &c); + EXPECT_EQUAL(pairs[1].connectedness, 0.6); + EXPECT_EQUAL(setup.divisor, 118); // (10 + 40)*0.8 + (40 + 90)*0.6 + + pairs.clear(); + setup.divisor = 0; + + NativeProximityExecutor::generateTermPairs(env, terms, 3, setup); + EXPECT_EQUAL(pairs.size(), 3u); + EXPECT_TRUE(pairs[0].first.termData() == &a); + EXPECT_TRUE(pairs[0].second.termData() == &b); + EXPECT_EQUAL(pairs[0].connectedness, 0.8); + EXPECT_TRUE(pairs[1].first.termData() == &a); + EXPECT_TRUE(pairs[1].second.termData() == &c); + EXPECT_EQUAL(pairs[1].connectedness, 0.3); + EXPECT_TRUE(pairs[2].first.termData() == &b); + EXPECT_TRUE(pairs[2].second.termData() == &c); + EXPECT_EQUAL(pairs[2].connectedness, 0.6); + EXPECT_EQUAL(setup.divisor, 148); // (10 + 40)*0.8 + (10 + 90)*0.3 + (40 + 90)*0.6 + + pairs.clear(); + setup.divisor = 0; + a.setWeight(search::query::Weight(0)); + b.setWeight(search::query::Weight(0)); + + // test that (ab) is filtered away + NativeProximityExecutor::generateTermPairs(env, terms, 2, setup); + EXPECT_EQUAL(pairs.size(), 1u); + EXPECT_TRUE(pairs[0].first.termData() == &b); + EXPECT_TRUE(pairs[0].second.termData() == &c); + EXPECT_EQUAL(pairs[0].connectedness, 0.6); + } + } + + { // test executor + // 1 pair (only forward) + EXPECT_TRUE(assertNativeProximity(0, "a", "a")); + EXPECT_TRUE(assertNativeProximity(0, "a b", "a")); + EXPECT_TRUE(assertNativeProximity(5, "a b", "a b")); + EXPECT_TRUE(assertNativeProximity(1, "a b", "a x x x x b")); + EXPECT_TRUE(assertNativeProximity(0, "a b", "a x x x x x b")); + EXPECT_TRUE(assertNativeProximity(0, "a b", "a x x x x x x b")); + EXPECT_TRUE(assertNativeProximity(5, "a b", "a x x a x a a b")); + EXPECT_TRUE(assertNativeProximity(5, "b a", "a x x a x a a b")); + + // 1 pair (both forward and backward) + EXPECT_TRUE(assertNativeProximity(10, "a b", "a b a")); + EXPECT_TRUE(assertNativeProximity(10, "b a", "a b a")); + EXPECT_TRUE(assertNativeProximity(10, "a a", "a a")); // term distance 1 + EXPECT_TRUE(assertNativeProximity(6, "a a", "a x x a")); // term distance 3 + EXPECT_TRUE(assertNativeProximity(9, "a b", "a x x x x x b x x x x a x x x b x x a x b a")); + EXPECT_TRUE(assertNativeProximity(9, "b a", "a x x x x x b x x x x a x x x b x x a x b a")); + + // 2 pairs ((ab),(bc)) + EXPECT_TRUE(assertNativeProximity(5, "a b c", "a b c")); + EXPECT_TRUE(assertNativeProximity(10, "a b c", "a b c b a")); + + // change weight + EXPECT_TRUE(assertNativeProximity(4, "a b c", "a b x x c")); + EXPECT_TRUE(assertNativeProximity(4.2, "a!200 b c", "a b x x c")); + EXPECT_TRUE(assertNativeProximity(3.8, "a b c!200", "a b x x c")); + EXPECT_TRUE(assertNativeProximity(4.333, "a b c!0", "a b x x c")); // ((100+100)*5 + (100+0)*3) / 300 + EXPECT_TRUE(assertNativeProximity(5, "a b!0 c!0", "a b x x c")); // ((100+0)*5 + (0+0)*3) / 100 + EXPECT_TRUE(assertNativeProximity(0, "a!0 b!0", "a b")); + + // change significance + EXPECT_TRUE(assertNativeProximity(4.692, "a%1 b%0.1 c%0.1", "a b x x c")); + EXPECT_TRUE(assertNativeProximity(3.308, "a%0.1 b%0.1 c%1", "a b x x c")); + + // change connectedness + EXPECT_TRUE(assertNativeProximity(4, "a 1:b 1:c", "a b x x c")); + EXPECT_TRUE(assertNativeProximity(3.667, "a 0.5:b 1:c", "a b x x c")); // (5*0.5 + 3*1) / (0.5 + 1) + + // change proximityImportance + Properties p = Properties().add("nativeProximity.proximityImportance", "1"); + EXPECT_TRUE(assertNativeProximity(10, "a b", "a b x x x a", p)); + p.clear().add("nativeProximity.proximityImportance", "0"); + EXPECT_TRUE(assertNativeProximity(4, "a b", "a b x x x a", p)); + + // use table normalization + p.clear().add("nativeRank.useTableNormalization", "true"); + // norm factor = (10*0.5 + 10*0.5) = 10 + EXPECT_TRUE(assertNativeProximity(0.5, "a b", "a b", p)); + EXPECT_TRUE(assertNativeProximity(0.5, "a b c", "a b c", p)); + EXPECT_TRUE(assertNativeProximity(1, "a b", "a b a", p)); + EXPECT_TRUE(assertNativeProximity(1, "a b c", "a b c b a", p)); + p.add("nativeProximity.proximityTable", "linear(0,0)"); + p.add("nativeProximity.reverseProximityTable", "linear(0,0)"); + EXPECT_TRUE(assertNativeProximity(0, "a b", "a b", p)); + + // change field weight + p.clear().add("vespa.fieldweight.foo", "0"); + EXPECT_TRUE(assertNativeProximity(0, "a b", "a b", p)); + + // change docId to give 0 hits + EXPECT_TRUE(assertNativeProximity(0, "a b", "a b", p.clear(), 2)); + } +} + +bool +Test::assertNativeProximity(feature_t score, + const vespalib::string & query, + const vespalib::string & field, + const Properties & props, + uint32_t docId) +{ + LOG(info, "assertNativeProximity(%f, '%s', '%s')", score, query.c_str(), field.c_str()); + + // Setup feature test. + vespalib::string feature = "nativeProximity"; + FtFeatureTest ft(_factory, feature); + + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getTableManager().addFactory(ITableFactory::SP(new FunctionTableFactory(6))); + ft.getIndexEnv().getProperties().add("nativeProximity.proximityTable", "linear(-2,10)"); + ft.getIndexEnv().getProperties().add("nativeProximity.reverseProximityTable", "linear(-2,10)"); + ft.getIndexEnv().getProperties().add("nativeProximity.slidingWindowSize", "2"); + ft.getIndexEnv().getProperties().add("nativeRank.useTableNormalization", "false"); // make it easier to test + ft.getIndexEnv().getProperties().import(props); + StringVectorMap index; + index["foo"] = FtUtil::tokenize(field); + FT_SETUP(ft, FtUtil::toQuery(query), index, 1); + + // Execute and compare results. + if (!EXPECT_TRUE(ft.execute(score, EPS, docId))) { + return false; + } + return true; +} + +void +Test::testNativeRank() +{ + { // test blueprint + NativeRankBlueprint pt; + + EXPECT_TRUE(assertCreateInstance(pt, "nativeRank")); + + FtFeatureTest ft(_factory, ""); + + StringList params, in, out; + FT_SETUP_FAIL(pt, params.add("foo")); // field 'foo' not found + params.clear(); + + { + FT_SETUP_OK(pt, params, in.add("nativeFieldMatch").add("nativeProximity").add("nativeAttributeMatch"), + out.add("score")); + Blueprint::UP bp = pt.createInstance(); + DummyDependencyHandler deps(*bp); + bp->setup(ft.getIndexEnv(), params); + const NativeRankParams & pas = (dynamic_cast<NativeRankBlueprint *>(bp.get()))->getParams(); + EXPECT_EQUAL(pas.fieldMatchWeight, 100u); + EXPECT_EQUAL(pas.attributeMatchWeight, 100u); + EXPECT_EQUAL(pas.proximityWeight, 25u); + } + { + Properties & p = ft.getIndexEnv().getProperties(); + p.add("nativeRank.useTableNormalization", "false"); + Blueprint::UP bp = pt.createInstance(); + DummyDependencyHandler deps(*bp); + bp->setup(ft.getIndexEnv(), params); + const NativeRankParams & pas = (dynamic_cast<NativeRankBlueprint *>(bp.get()))->getParams(); + EXPECT_EQUAL(pas.proximityWeight, 100u); + p.clear(); + } + { + Properties & p = ft.getIndexEnv().getProperties(); + p.add("nativeRank.fieldMatchWeight", "200"); + p.add("nativeRank.attributeMatchWeight", "300"); + p.add("nativeRank.proximityWeight", "400"); + FT_SETUP_OK(pt, params, in, out); + Blueprint::UP bp = pt.createInstance(); + DummyDependencyHandler deps(*bp); + bp->setup(ft.getIndexEnv(), params); + const NativeRankParams & pas = (dynamic_cast<NativeRankBlueprint *>(bp.get()))->getParams(); + EXPECT_EQUAL(pas.fieldMatchWeight, 200u); + EXPECT_EQUAL(pas.attributeMatchWeight, 300u); + EXPECT_EQUAL(pas.proximityWeight, 400u); + } + + FT_DUMP(_factory, "nativeRank", ft.getIndexEnv(), StringList().add("nativeRank")); + + { // test optimizations when weight == 0 + Properties & p = ft.getIndexEnv().getProperties(); + p.clear(); + p.add("nativeRank.fieldMatchWeight", "0"); + FT_SETUP_OK(pt, ft.getIndexEnv(), params, + in.clear().add("value(0)").add("nativeProximity").add("nativeAttributeMatch"), out); + p.add("nativeRank.proximityWeight", "0"); + FT_SETUP_OK(pt, ft.getIndexEnv(), params, + in.clear().add("value(0)").add("value(0)").add("nativeAttributeMatch"), out); + p.add("nativeRank.attributeMatchWeight", "0"); + FT_SETUP_OK(pt, ft.getIndexEnv(), params, in.clear().add("value(0)").add("value(0)").add("value(0)"), out); + } + { // nativeRank for a subset of fields + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "foo"); + ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "bar"); + ft.getIndexEnv().getBuilder().addField(FieldType::INDEX, CollectionType::SINGLE, "baz"); + FT_SETUP_OK(pt, ft.getIndexEnv(), params.add("foo").add("bar"), in, out); + ft.getIndexEnv().getProperties().clear(); + FT_SETUP_OK(pt, ft.getIndexEnv(), params, + in.clear().add("nativeFieldMatch(foo)").add("nativeProximity(foo)").add("nativeAttributeMatch(bar)"), out); + FT_SETUP_OK(pt, ft.getIndexEnv(), params.clear().add("foo").add("baz"), + in.clear().add("nativeFieldMatch(foo,baz)").add("nativeProximity(foo,baz)").add("value(0)"), out); + FT_SETUP_OK(pt, ft.getIndexEnv(), params.clear().add("bar"), + in.clear().add("value(0)").add("value(0)").add("nativeAttributeMatch(bar)"), out); + } + } + + { // test executor + assertNativeRank(60, 1, 1, 1); + assertNativeRank(72, 3, 1, 1); + assertNativeRank(37.5, 0, 1, 3); + } +} + +bool +Test::assertNativeRank(feature_t score, + feature_t fieldMatchWeight, + feature_t attributeMatchWeight, + feature_t proximityWeight) +{ + LOG(info, "assertNativeRank(%f, %f, %f, %f)", score, fieldMatchWeight, attributeMatchWeight, proximityWeight); + + // Setup feature test. + vespalib::string feature = "nativeRank"; + FtFeatureTest ft(_factory, feature); + + ft.getIndexEnv().getProperties().add("nativeRank.fieldMatchWeight", + vespalib::make_string("%f", fieldMatchWeight)); + ft.getIndexEnv().getProperties().add("nativeRank.attributeMatchWeight", + vespalib::make_string("%f", attributeMatchWeight)); + ft.getIndexEnv().getProperties().add("nativeRank.proximityWeight", + vespalib::make_string("%f", proximityWeight)); + + ft.getOverrides().add("nativeFieldMatch", "90"); + ft.getOverrides().add("nativeAttributeMatch", "60"); + ft.getOverrides().add("nativeProximity", "30"); + + if (!EXPECT_TRUE(ft.setup())) { + return false; + } + + // Execute and compare results. + if (!EXPECT_TRUE(ft.execute(score, EPS))) { + return false; + } + return true; +} + + + +int +Test::Main() +{ + TEST_INIT("nativerank_test"); + + // Configure factory with all known blueprints. + setup_fef_test_plugin(_factory); + setup_search_features(_factory); + + testNativeFieldMatch(); + testNativeAttributeMatch(); + testNativeProximity(); + testNativeRank(); + + TEST_DONE(); + return 0; +} + +} +} + +TEST_APPHOOK(search::features::Test); + diff --git a/searchlib/src/tests/nearsearch/.gitignore b/searchlib/src/tests/nearsearch/.gitignore new file mode 100644 index 00000000000..c6c72b1cd87 --- /dev/null +++ b/searchlib/src/tests/nearsearch/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +nearsearch_test +searchlib_nearsearch_test_app diff --git a/searchlib/src/tests/nearsearch/CMakeLists.txt b/searchlib/src/tests/nearsearch/CMakeLists.txt new file mode 100644 index 00000000000..a526a059a3d --- /dev/null +++ b/searchlib/src/tests/nearsearch/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_nearsearch_test_app + SOURCES + nearsearch_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_nearsearch_test_app COMMAND searchlib_nearsearch_test_app) diff --git a/searchlib/src/tests/nearsearch/DESC b/searchlib/src/tests/nearsearch/DESC new file mode 100644 index 00000000000..1af96b6ab4c --- /dev/null +++ b/searchlib/src/tests/nearsearch/DESC @@ -0,0 +1 @@ +nearsearch test. Take a look at nearsearch.cpp for details. diff --git a/searchlib/src/tests/nearsearch/FILES b/searchlib/src/tests/nearsearch/FILES new file mode 100644 index 00000000000..e8ff3e62114 --- /dev/null +++ b/searchlib/src/tests/nearsearch/FILES @@ -0,0 +1 @@ +nearsearch.cpp diff --git a/searchlib/src/tests/nearsearch/nearsearch_test.cpp b/searchlib/src/tests/nearsearch/nearsearch_test.cpp new file mode 100644 index 00000000000..9942bcecd4a --- /dev/null +++ b/searchlib/src/tests/nearsearch/nearsearch_test.cpp @@ -0,0 +1,247 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("nearsearch_test"); + +#include <vespa/searchlib/common/resultset.h> +#include <vespa/searchlib/queryeval/nearsearch.h> +#include <vespa/searchlib/queryeval/searchable.h> +#include <vespa/searchlib/queryeval/intermediate_blueprints.h> +#include <vespa/searchlib/queryeval/leaf_blueprints.h> +#include <vespa/searchlib/fef/matchdata.h> +#include <vespa/searchlib/fef/matchdatalayout.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <set> +#include <vespa/vespalib/testkit/testapp.h> + +//////////////////////////////////////////////////////////////////////////////// +// +// Utilities +// +//////////////////////////////////////////////////////////////////////////////// + +class UIntList : public std::set<uint32_t> { +public: + UIntList &add(uint32_t i) { + std::set<uint32_t>::insert(i); + return *this; + } +}; + +class MyTerm { +private: + std::set<uint32_t> _docs; + std::set<uint32_t> _data; + +public: + MyTerm(const std::set<uint32_t> &doc, + const std::set<uint32_t> &pos) + : _docs(doc), + _data(pos) + { + } + + search::queryeval::Blueprint::UP + make_blueprint(uint32_t fieldId, search::fef::TermFieldHandle handle) const + { + search::queryeval::FakeResult result; + for (std::set<uint32_t>::const_iterator doc = _docs.begin(); + doc != _docs.end(); ++doc) + { + result.doc(*doc); + for (std::set<uint32_t>::const_iterator pos = _data.begin(); + pos != _data.end(); ++pos) + { + result.pos(*pos); + } + } + return search::queryeval::Blueprint::UP( + new search::queryeval::FakeBlueprint( + search::queryeval::FieldSpec("<field>", fieldId, handle), + result)); + } +}; + +class MyQuery { +private: + std::vector<MyTerm*> _terms; + bool _ordered; + uint32_t _window; + +public: + MyQuery(bool ordered, uint32_t window) : + _terms(), + _ordered(ordered), + _window(window) { + // empty + } + + MyQuery &addTerm(MyTerm &term) { + _terms.push_back(&term); + return *this; + } + + uint32_t getNumTerms() const { + return _terms.size(); + } + + MyTerm &getTerm(uint32_t i) { + return *_terms[i]; + } + + bool isOrdered() const { + return _ordered; + } + + uint32_t getWindow() const { + return _window; + } +}; + +//////////////////////////////////////////////////////////////////////////////// +// +// Setup +// +//////////////////////////////////////////////////////////////////////////////// + +class Test : public vespalib::TestApp { +private: + bool testNearSearch(MyQuery &query, uint32_t matchId); + +public: + int Main(); + void testBasicNear(); + void testRepeatedTerms(); +}; + +int +Test::Main() +{ + TEST_INIT("nearsearch_test"); + + testBasicNear(); TEST_FLUSH(); + testRepeatedTerms(); TEST_FLUSH(); + + TEST_DONE(); +} + +TEST_APPHOOK(Test); + +//////////////////////////////////////////////////////////////////////////////// +// +// Tests +// +//////////////////////////////////////////////////////////////////////////////// + +void +Test::testBasicNear() +{ + MyTerm foo(UIntList().add(69), + UIntList().add(6).add(11)); + for (uint32_t i = 0; i <= 1; ++i) { + TEST_STATE(vespalib::make_string("i = %u", i).c_str()); + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(foo), 69)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(foo), 69)); + } + + MyTerm bar(UIntList().add(68).add(69).add(70), + UIntList().add(7).add(10)); + TEST_DO(testNearSearch(MyQuery(false, 0).addTerm(foo).addTerm(bar), 0)); + TEST_DO(testNearSearch(MyQuery(true, 0).addTerm(foo).addTerm(bar), 0)); + for (uint32_t i = 1; i <= 2; ++i) { + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(foo).addTerm(bar), 69)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(foo).addTerm(bar), 69)); + } + + MyTerm baz(UIntList().add(69).add(70).add(71), + UIntList().add(8).add(9)); + for (uint32_t i = 0; i <= 1; ++i) { + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(foo).addTerm(bar).addTerm(baz), 0)); + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(foo).addTerm(baz).addTerm(bar), 0)); + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(bar).addTerm(baz).addTerm(foo), 0)); + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(bar).addTerm(foo).addTerm(baz), 0)); + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(baz).addTerm(foo).addTerm(bar), 0)); + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(baz).addTerm(bar).addTerm(foo), 0)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(foo).addTerm(bar).addTerm(baz), 0)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(foo).addTerm(baz).addTerm(bar), 0)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(bar).addTerm(baz).addTerm(foo), 0)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(bar).addTerm(foo).addTerm(baz), 0)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(baz).addTerm(foo).addTerm(bar), 0)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(baz).addTerm(bar).addTerm(foo), 0)); + } + for (uint32_t i = 2; i <= 3; ++i) { + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(foo).addTerm(bar).addTerm(baz), 69)); + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(foo).addTerm(baz).addTerm(bar), 69)); + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(bar).addTerm(baz).addTerm(foo), 69)); + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(bar).addTerm(foo).addTerm(baz), 69)); + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(baz).addTerm(foo).addTerm(bar), 69)); + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(baz).addTerm(bar).addTerm(foo), 69)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(foo).addTerm(bar).addTerm(baz), 69)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(foo).addTerm(baz).addTerm(bar), 0)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(bar).addTerm(baz).addTerm(foo), 0)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(bar).addTerm(foo).addTerm(baz), 0)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(baz).addTerm(foo).addTerm(bar), 0)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(baz).addTerm(bar).addTerm(foo), 69)); + } +} + +void +Test::testRepeatedTerms() +{ + MyTerm foo(UIntList().add(69), + UIntList().add(1).add(2).add(3)); + TEST_DO(testNearSearch(MyQuery(false, 0).addTerm(foo).addTerm(foo), 69)); + TEST_DO(testNearSearch(MyQuery(true, 0).addTerm(foo).addTerm(foo), 0)); + for (uint32_t i = 1; i <= 2; ++i) { + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(foo).addTerm(foo), 69)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(foo).addTerm(foo), 69)); + } + + for (uint32_t i = 0; i <= 1; ++i) { + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(foo).addTerm(foo).addTerm(foo), 69)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(foo).addTerm(foo).addTerm(foo), 0)); + } + for (uint32_t i = 2; i <= 3; ++i) { + TEST_DO(testNearSearch(MyQuery(false, i).addTerm(foo).addTerm(foo).addTerm(foo), 69)); + TEST_DO(testNearSearch(MyQuery(true, i).addTerm(foo).addTerm(foo).addTerm(foo), 69)); + } +} + +bool +Test::testNearSearch(MyQuery &query, uint32_t matchId) +{ + LOG(info, "testNearSearch(%d)", matchId); + search::queryeval::IntermediateBlueprint *near_b = 0; + if (query.isOrdered()) { + near_b = new search::queryeval::ONearBlueprint(query.getWindow()); + } else { + near_b = new search::queryeval::NearBlueprint(query.getWindow()); + } + search::queryeval::Blueprint::UP bp(near_b); + search::fef::MatchDataLayout layout; + for (uint32_t i = 0; i < query.getNumTerms(); ++i) { + uint32_t fieldId = 0; + layout.allocTermField(fieldId); + near_b->addChild(query.getTerm(i).make_blueprint(fieldId, i)); + } + search::fef::MatchData::UP md(layout.createMatchData()); + + bp->fetchPostings(true); + search::queryeval::SearchIterator::UP near = bp->createSearch(*md, true); + near->initFullRange(); + bool foundMatch = false; + for (near->seek(1u); ! near->isAtEnd(); near->seek(near->getDocId() + 1)) { + uint32_t docId = near->getDocId(); + if (docId == matchId) { + foundMatch = true; + } else { + LOG(info, "Document %d matched unexpectedly.", docId); + return false; + } + } + if (matchId == 0) { + return EXPECT_TRUE(!foundMatch); + } else { + return EXPECT_TRUE(foundMatch); + } +} diff --git a/searchlib/src/tests/postinglistbm/.gitignore b/searchlib/src/tests/postinglistbm/.gitignore new file mode 100644 index 00000000000..ac71dde13e2 --- /dev/null +++ b/searchlib/src/tests/postinglistbm/.gitignore @@ -0,0 +1,10 @@ +*.core +*.ilk +*.pdb +.depend +Makefile +core +core.* +postinglistbm +postinglistbm.exe +searchlib_postinglistbm_app diff --git a/searchlib/src/tests/postinglistbm/CMakeLists.txt b/searchlib/src/tests/postinglistbm/CMakeLists.txt new file mode 100644 index 00000000000..403c12da1b1 --- /dev/null +++ b/searchlib/src/tests/postinglistbm/CMakeLists.txt @@ -0,0 +1,10 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_postinglistbm_app + SOURCES + postinglistbm.cpp + andstress.cpp + DEPENDS + searchlib_test + searchlib +) +vespa_add_test(NAME searchlib_postinglistbm_app NO_VALGRIND COMMAND searchlib_postinglistbm_app -q -a) diff --git a/searchlib/src/tests/postinglistbm/andstress.cpp b/searchlib/src/tests/postinglistbm/andstress.cpp new file mode 100644 index 00000000000..f3fabde0d61 --- /dev/null +++ b/searchlib/src/tests/postinglistbm/andstress.cpp @@ -0,0 +1,536 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP(".andstress"); +#include "andstress.h" +#include <vector> + +#include <vespa/searchlib/common/bitvector.h> +#include <vespa/searchlib/test/fakedata/fakeword.h> +#include <vespa/searchlib/test/fakedata/fakewordset.h> +#include <vespa/searchlib/test/fakedata/fakeposting.h> +#include <vespa/searchlib/test/fakedata/fakefilterocc.h> +#include <vespa/searchlib/test/fakedata/fakeegcompr64filterocc.h> +#include <vespa/searchlib/test/fakedata/fakezcfilterocc.h> +#include <vespa/searchlib/test/fakedata/fakezcbfilterocc.h> +#include <vespa/searchlib/test/fakedata/fpfactory.h> + +using search::fef::TermFieldMatchData; +using search::fef::TermFieldMatchDataArray; +using search::queryeval::SearchIterator; +using namespace search::fakedata; + +namespace postinglistbm +{ + +class AndStressWorker; + +class AndStressMaster +{ +private: + AndStressMaster(const AndStressMaster &); + + AndStressMaster & + operator=(const AndStressMaster &); + + search::Rand48 &_rnd; + unsigned int _numDocs; + unsigned int _commonDocFreq; + std::vector<std::string> _postingTypes; + unsigned int _loops; + unsigned int _skipCommonPairsRate; + uint32_t _stride; + bool _unpack; + + FastOS_ThreadPool *_threadPool; + std::vector<AndStressWorker *> _workers; + unsigned int _workersDone; + + FakeWordSet &_wordSet; + + std::vector<std::vector<FakePosting::SP> > _postings; + + FastOS_Cond _taskCond; + unsigned int _taskIdx; + uint32_t _numTasks; + +public: + typedef std::pair<FakePosting *, FakePosting *> Task; +private: + std::vector<Task> _tasks; +public: + AndStressMaster(search::Rand48 &rnd, + FakeWordSet &wordSet, + unsigned int numDocs, + unsigned int commonDocFreq, + const std::vector<std::string> &postingType, + unsigned int loops, + unsigned int skipCommonPairsRate, + uint32_t numTasks, + uint32_t stride, + bool unpack); + + ~AndStressMaster(void); + + void + run(void); + + void + makePostingsHelper(FPFactory *postingFactory, + const std::string &postingFormat, + bool validate, bool verbose); + + void + dropPostings(void); + + void + dropTasks(void); + + void + resetTasks(void); // Prepare for rerun + + void + setupTasks(unsigned int numTasks); + + Task * + getTask(void); + + unsigned int + getNumDocs(void) const + { + return _numDocs; + } + + bool + getUnpack(void) const + { + return _unpack; + } + + double + runWorkers(const std::string &postingFormat); +}; + + +class AndStressWorker : public FastOS_Runnable +{ +private: + AndStressWorker(const AndStressWorker &); + + AndStressWorker & + operator=(const AndStressWorker &); + + AndStressMaster &_master; + unsigned int _id; +public: + AndStressWorker(AndStressMaster &master, unsigned int id); + + ~AndStressWorker(void); + + virtual void + Run(FastOS_ThreadInterface *thisThread, void *arg); +}; + + +template <class P> +FakePosting * +makePosting(FakeWord &fw) +{ + return new P(fw); +} + + +AndStressMaster::AndStressMaster(search::Rand48 &rnd, + FakeWordSet &wordSet, + unsigned int numDocs, + unsigned int commonDocFreq, + const std::vector<std::string> &postingTypes, + unsigned int loops, + unsigned int skipCommonPairsRate, + uint32_t numTasks, + uint32_t stride, + bool unpack) + : _rnd(rnd), + _numDocs(numDocs), + _commonDocFreq(commonDocFreq), + _postingTypes(postingTypes), + _loops(loops), + _skipCommonPairsRate(skipCommonPairsRate), + _stride(stride), + _unpack(unpack), + _threadPool(NULL), + _workers(), + _workersDone(0), + _wordSet(wordSet), + _postings(FakeWordSet::NUM_WORDCLASSES), + _taskCond(), + _taskIdx(0), + _numTasks(numTasks), + _tasks() +{ + LOG(info, "AndStressMaster::AndStressMaster"); + + _threadPool = new FastOS_ThreadPool(128 * 1024, 400); +} + +template <class C> +static void +clearPtrVector(std::vector<C> &v) +{ + for (unsigned int i = 0; i < v.size(); ++i) + delete v[i]; + v.clear(); +} + + +AndStressMaster::~AndStressMaster(void) +{ + LOG(info, "AndStressMaster::~AndStressMaster"); + + _threadPool->Close(); + delete _threadPool; + _threadPool = NULL; + clearPtrVector(_workers); + dropPostings(); +} + + +void +AndStressMaster::dropPostings(void) +{ + for (unsigned int i = 0; i < _postings.size(); ++i) + _postings[i].clear(); + dropTasks(); +} + + +void +AndStressMaster::dropTasks(void) +{ + _tasks.clear(); + _taskIdx = 0; +} + + +void +AndStressMaster::resetTasks(void) +{ + _taskIdx = 0; +} + + +static void +makeSomePostings(FPFactory *postingFactory, + std::vector<FakeWord *> &w, + std::vector<FakePosting::SP> &p, + uint32_t stride, + bool validate, + bool verbose) +{ + for (unsigned int i = 0; i < w.size(); ++i) { + FakePosting::SP np(postingFactory->make(*w[i])); + if (validate) { + TermFieldMatchData md; + TermFieldMatchDataArray tfmda; + tfmda.add(&md); + + std::unique_ptr<SearchIterator> sb(np->createIterator(tfmda)); + if (np->hasWordPositions()) { + if (stride != 0) + w[i]->validate(sb.get(), tfmda, stride, verbose); + else + w[i]->validate(sb.get(), tfmda, verbose); + } else + w[i]->validate(sb.get(), verbose); + } + p.push_back(np); + } +} + +void +AndStressMaster::makePostingsHelper(FPFactory *postingFactory, + const std::string &postingFormat, + bool validate, bool verbose) +{ + FastOS_Time tv; + double before; + double after; + + tv.SetNow(); + before = tv.Secs(); + postingFactory->setup(_wordSet); + for (unsigned int i = 0; i < _wordSet._words.size(); ++i) + makeSomePostings(postingFactory, + _wordSet._words[i], _postings[i], + _stride, + validate, + verbose); + tv.SetNow(); + after = tv.Secs(); + LOG(info, + "AndStressMaster::makePostingsHelper elapsed %10.6f s for %s format", + after - before, + postingFormat.c_str()); +} + + +void +AndStressMaster::setupTasks(unsigned int numTasks) +{ + unsigned int wordclass1; + unsigned int wordclass2; + unsigned int word1idx; + unsigned int word2idx; + + for (unsigned int i = 0; i < numTasks; ++i) { + wordclass1 = _rnd.lrand48() % _postings.size(); + wordclass2 = _rnd.lrand48() % _postings.size(); + while (wordclass1 == FakeWordSet::COMMON_WORD && + wordclass2 == FakeWordSet::COMMON_WORD && + (_rnd.lrand48() % _skipCommonPairsRate) != 0) { + wordclass1 = _rnd.lrand48() % _postings.size(); + wordclass2 = _rnd.lrand48() % _postings.size(); + } + word1idx = _rnd.lrand48() % _postings[wordclass1].size(); + word2idx = _rnd.lrand48() % _postings[wordclass2].size(); + FakePosting::SP p1 = _postings[wordclass1][word1idx]; + FakePosting::SP p2 = _postings[wordclass2][word2idx]; + _tasks.push_back(std::make_pair(p1.get(), p2.get())); + } +} + + +AndStressMaster::Task * +AndStressMaster::getTask(void) +{ + Task *result = NULL; + _taskCond.Lock(); + if (_taskIdx < _tasks.size()) { + result = &_tasks[_taskIdx]; + ++_taskIdx; + } else { + _workersDone++; + if (_workersDone == _workers.size()) + _taskCond.Broadcast(); + } + _taskCond.Unlock(); + return result; +} + +void +AndStressMaster::run(void) +{ + LOG(info, "AndStressMaster::run"); + + std::vector<std::string>::const_iterator pti; + std::vector<std::string>::const_iterator ptie = _postingTypes.end() ; + + for (pti = _postingTypes.begin(); pti != ptie; ++pti) { + std::unique_ptr<FPFactory> ff(getFPFactory(*pti, _wordSet.getSchema())); + makePostingsHelper(ff.get(), *pti, true, false); + setupTasks(_numTasks); + double totalTime = 0; + for (unsigned int loop = 0; loop < _loops; ++loop) { + totalTime += runWorkers(*pti); + resetTasks(); + } + LOG(info, "AndStressMaster::average run elapsed %10.6f s for workers %s format", + totalTime / _loops, pti->c_str()); + dropPostings(); + } + FastOS_Thread::Sleep(250); +} + + +double +AndStressMaster::runWorkers(const std::string &postingFormat) +{ + FastOS_Time tv; + double before; + double after; + + tv.SetNow(); + before = tv.Secs(); + unsigned int numWorkers = 8; + for (unsigned int i = 0; i < numWorkers; ++i) + _workers.push_back(new AndStressWorker(*this, i)); + + for (unsigned int i = 0; i < _workers.size(); ++i) + _threadPool->NewThread(_workers[i]); + _taskCond.Lock(); + while (_workersDone < _workers.size()) + _taskCond.Wait(); + _taskCond.Unlock(); + tv.SetNow(); + after = tv.Secs(); + LOG(info, + "AndStressMaster::run elapsed %10.6f s for workers %s format", + after - before, + postingFormat.c_str()); + clearPtrVector(_workers); + _workersDone = 0; + return after - before; +} + + +AndStressWorker::AndStressWorker(AndStressMaster &master, unsigned int id) + : _master(master), + _id(id) +{ + LOG(debug, "AndStressWorker::AndStressWorker, id=%u", id); +} + +AndStressWorker::~AndStressWorker(void) +{ + LOG(debug, "AndStressWorker::~AndStressWorker, id=%u", _id); +} + + +static int +highLevelAndPairPostingScan(SearchIterator &sb1, + SearchIterator &sb2, + uint32_t numDocs, uint64_t *cycles) +{ + uint32_t hits = 0; + uint64_t before = fastos::ClockSystem::now(); + sb1.initFullRange(); + sb2.initFullRange(); + uint32_t docId = sb1.getDocId(); + while (docId < numDocs) { + if (sb1.seek(docId)) { + if (sb2.seek(docId)) { + ++hits; + ++docId; + } else if (docId < sb2.getDocId()) + docId = sb2.getDocId(); + else + ++docId; + } else if (docId < sb1.getDocId()) + docId= sb1.getDocId(); + else + ++docId; + } + uint64_t after = fastos::ClockSystem::now(); + *cycles = after - before; + return hits; +} + + +static int +highLevelAndPairPostingScanUnpack(SearchIterator &sb1, + SearchIterator &sb2, + uint32_t numDocs, + uint64_t *cycles) +{ + uint32_t hits = 0; + uint64_t before = fastos::ClockSystem::now(); + sb1.initFullRange(); + sb2.initFullRange(); + uint32_t docId = sb1.getDocId(); + while (docId < numDocs) { + if (sb1.seek(docId)) { + if (sb2.seek(docId)) { + ++hits; + sb1.unpack(docId); + sb2.unpack(docId); + ++docId; + } else if (docId < sb2.getDocId()) + docId = sb2.getDocId(); + else + ++docId; + } else if (docId < sb1.getDocId()) + docId= sb1.getDocId(); + else + ++docId; + } + uint64_t after = fastos::ClockSystem::now(); + *cycles = after - before; + return hits; +} + +void +testFakePair(FakePosting &f1, FakePosting &f2, unsigned int numDocs, + bool unpack) +{ + TermFieldMatchData md1; + TermFieldMatchDataArray tfmda1; + tfmda1.add(&md1); + std::unique_ptr<SearchIterator> sb1(f1.createIterator(tfmda1)); + + TermFieldMatchData md2; + TermFieldMatchDataArray tfmda2; + tfmda1.add(&md2); + std::unique_ptr<SearchIterator> sb2(f2.createIterator(tfmda2)); + + int hits = 0; + uint64_t scanUnpackTime = 0; + if (unpack) + hits = highLevelAndPairPostingScanUnpack(*sb1.get(), *sb2.get(), + numDocs, &scanUnpackTime); + else + hits = highLevelAndPairPostingScan(*sb1.get(), *sb2.get(), + numDocs, &scanUnpackTime); +#if 0 + printf("Fakepair %s AND %s => %d hits, %" PRIu64 " cycles\n", + f1.getName().c_str(), + f2.getName().c_str(), + hits, + scanUnpackTime); +#else + (void)hits; +#endif +} + +void +AndStressWorker::Run(FastOS_ThreadInterface *thisThread, void *arg) +{ + (void) thisThread; + (void) arg; + LOG(debug, "AndStressWorker::Run, id=%u", _id); + + bool unpack = _master.getUnpack(); + for (;;) { + AndStressMaster::Task *task = _master.getTask(); + if (task == NULL) + break; + testFakePair(*task->first, *task->second, _master.getNumDocs(), + unpack); + } +} + + +AndStress::AndStress(void) +{ + LOG(debug, "Andstress::AndStress"); +} + + +AndStress::~AndStress(void) +{ + LOG(debug, "Andstress::~AndStress"); +} + +void +AndStress::run(search::Rand48 &rnd, + FakeWordSet &wordSet, + unsigned int numDocs, + unsigned int commonDocFreq, + const std::vector<std::string> &postingTypes, + unsigned int loops, + unsigned int skipCommonPairsRate, + uint32_t numTasks, + uint32_t stride, + bool unpack) +{ + LOG(debug, "Andstress::run"); + AndStressMaster master(rnd, wordSet, + numDocs, commonDocFreq, postingTypes, loops, + skipCommonPairsRate, + numTasks, + stride, + unpack); + master.run(); +} + +} diff --git a/searchlib/src/tests/postinglistbm/andstress.h b/searchlib/src/tests/postinglistbm/andstress.h new file mode 100644 index 00000000000..458866b09d5 --- /dev/null +++ b/searchlib/src/tests/postinglistbm/andstress.h @@ -0,0 +1,43 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + + +#include <vector> +namespace search +{ +class Rand48; + +namespace fakedata +{ + +class FakeWordSet; + +} + +} + +namespace postinglistbm +{ + +class AndStress +{ +public: + AndStress(void); + + ~AndStress(void); + + void + run(search::Rand48 &rnd, + search::fakedata::FakeWordSet &wordSet, + unsigned int numDocs, + unsigned int commonDocFreq, + const std::vector<std::string> &postingTypes, + unsigned int loops, + unsigned int skipCommonPairsRate, + uint32_t numTasks, + uint32_t stride, + bool unpack); +}; + +} // namespace postinglistbm + diff --git a/searchlib/src/tests/postinglistbm/postinglistbm.cpp b/searchlib/src/tests/postinglistbm/postinglistbm.cpp new file mode 100644 index 00000000000..fc93eb42dcd --- /dev/null +++ b/searchlib/src/tests/postinglistbm/postinglistbm.cpp @@ -0,0 +1,491 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2002-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("postinglistbm"); +#include <vespa/searchlib/common/bitvector.h> +#include <vespa/searchlib/common/resultset.h> +#include <vespa/searchlib/util/rand48.h> +#include "andstress.h" +#include <vespa/searchlib/test/fakedata/fakeword.h> +#include <vespa/searchlib/test/fakedata/fakeposting.h> +#include <vespa/searchlib/test/fakedata/fakewordset.h> +#include <vespa/searchlib/test/fakedata/fpfactory.h> +#include <vespa/searchlib/index/docidandfeatures.h> + +using search::ResultSet; +using search::fef::TermFieldMatchData; +using search::fef::TermFieldMatchDataArray; +using search::queryeval::SearchIterator; +using search::index::Schema; +using namespace search::fakedata; + +// needed to resolve external symbol from httpd.h on AIX +void FastS_block_usr2() {} + + +namespace postinglistbm +{ + +class PostingListBM : public FastOS_Application +{ +private: + bool _verbose; + uint32_t _numDocs; + uint32_t _commonDocFreq; + uint32_t _numWordsPerClass; + std::vector<std::string> _postingTypes; + uint32_t _loops; + unsigned int _skipCommonPairsRate; + FakeWordSet _wordSet; + uint32_t _stride; + bool _unpack; +public: + search::Rand48 _rnd; + +private: + void Usage(void); + + void + badPostingType(const std::string &postingType); + + void + testFake(const std::string &postingType, + const Schema &schema, + const FakeWord &fw); +public: + PostingListBM(void); + ~PostingListBM(void); + int Main(void); +}; + + +void +PostingListBM::Usage(void) +{ + printf("postinglistbm " + "[-C <skipCommonPairsRate>] " + "[-a] " + "[-c <commonDoqFreq>] " + "[-d <numDocs>] " + "[-l <numLoops>] " + "[-s <stride>] " + "[-t <postingType>] " + "[-u] " + "[-q] " + "[-v]\n"); +} + + +void +PostingListBM::badPostingType(const std::string &postingType) +{ + printf("Bad posting list type: %s\n", postingType.c_str()); + printf("Supported types: "); + + std::vector<std::string> postingTypes = getPostingTypes(); + std::vector<std::string>::const_iterator pti; + std::vector<std::string>::const_iterator ptie = postingTypes.end(); + bool first = true; + + for (pti = postingTypes.begin(); pti != ptie; ++pti) { + if (first) + first = false; + else + printf(", "); + printf("%s", pti->c_str()); + } + printf("\n"); +} + + +PostingListBM::PostingListBM(void) + : _verbose(false), + _numDocs(10000000), + _commonDocFreq(50000), + _numWordsPerClass(100), + _postingTypes(), + _loops(1), + _skipCommonPairsRate(1), + _wordSet(), + _stride(0), + _unpack(false), + _rnd() +{ +} + + +PostingListBM::~PostingListBM(void) +{ +} + + +static int +highLevelSinglePostingScan(SearchIterator &sb, uint32_t numDocs, uint64_t *cycles) +{ + uint32_t hits = 0; + uint64_t before = fastos::ClockSystem::now(); + sb.initFullRange(); + uint32_t docId = sb.getDocId(); + while (docId < numDocs) { + if (sb.seek(docId)) { + ++hits; + ++docId; + } else if (docId < sb.getDocId()) + docId= sb.getDocId(); + else + ++docId; + } + uint64_t after = fastos::ClockSystem::now(); + *cycles = after - before; + return hits; +} + + +static int +highLevelSinglePostingScanUnpack(SearchIterator &sb, + uint32_t numDocs, uint64_t *cycles) +{ + uint32_t hits = 0; + uint64_t before = fastos::ClockSystem::now(); + sb.initFullRange(); + uint32_t docId = sb.getDocId(); + while (docId < numDocs) { + if (sb.seek(docId)) { + ++hits; + sb.unpack(docId); + ++docId; + } else if (docId < sb.getDocId()) + docId= sb.getDocId(); + else + ++docId; + } + uint64_t after = fastos::ClockSystem::now(); + *cycles = after - before; + return hits; +} + + +static int +highLevelAndPairPostingScan(SearchIterator &sb1, + SearchIterator &sb2, + uint32_t numDocs, uint64_t *cycles) +{ + uint32_t hits = 0; + uint64_t before = fastos::ClockSystem::now(); + sb1.initFullRange(); + sb2.initFullRange(); + uint32_t docId = sb1.getDocId(); + while (docId < numDocs) { + if (sb1.seek(docId)) { + if (sb2.seek(docId)) { + ++hits; + ++docId; + } else if (docId < sb2.getDocId()) + docId = sb2.getDocId(); + else + ++docId; + } else if (docId < sb1.getDocId()) + docId= sb1.getDocId(); + else + ++docId; + } + uint64_t after = fastos::ClockSystem::now(); + *cycles = after - before; + return hits; +} + + +static int +highLevelAndPairPostingScanUnpack(SearchIterator &sb1, + SearchIterator &sb2, + uint32_t numDocs, + uint64_t *cycles) +{ + uint32_t hits = 0; + uint64_t before = fastos::ClockSystem::now(); + sb1.initFullRange(); + sb1.initFullRange(); + uint32_t docId = sb1.getDocId(); + while (docId < numDocs) { + if (sb1.seek(docId)) { + if (sb2.seek(docId)) { + ++hits; + sb1.unpack(docId); + sb2.unpack(docId); + ++docId; + } else if (docId < sb2.getDocId()) + docId = sb2.getDocId(); + else + ++docId; + } else if (docId < sb1.getDocId()) + docId= sb1.getDocId(); + else + ++docId; + } + uint64_t after = fastos::ClockSystem::now(); + *cycles = after - before; + return hits; +} + + +void +PostingListBM::testFake(const std::string &postingType, + const Schema &schema, + const FakeWord &fw) +{ + std::unique_ptr<FPFactory> ff(getFPFactory(postingType, schema)); + std::vector<const FakeWord *> v; + v.push_back(&fw); + ff->setup(v); + FakePosting::SP f(ff->make(fw)); + + printf("%s.bitsize=%d+%d+%d+%d+%d\n", + f->getName().c_str(), + static_cast<int>(f->bitSize()), + static_cast<int>(f->l1SkipBitSize()), + static_cast<int>(f->l2SkipBitSize()), + static_cast<int>(f->l3SkipBitSize()), + static_cast<int>(f->l4SkipBitSize())); + TermFieldMatchData md; + TermFieldMatchDataArray tfmda; + tfmda.add(&md); + + std::unique_ptr<SearchIterator> sb(f->createIterator(tfmda)); + if (f->hasWordPositions()) + fw.validate(sb.get(), tfmda, _verbose); + else + fw.validate(sb.get(), _verbose); + uint64_t scanTime = 0; + uint64_t scanUnpackTime = 0; + TermFieldMatchData md2; + TermFieldMatchDataArray tfmda2; + tfmda2.add(&md2); + + std::unique_ptr<SearchIterator> sb2(f->createIterator(tfmda2)); + int hits1 = highLevelSinglePostingScan(*sb2.get(), fw.getDocIdLimit(), + &scanTime); + TermFieldMatchData md3; + TermFieldMatchDataArray tfmda3; + tfmda3.add(&md3); + + std::unique_ptr<SearchIterator> sb3(f->createIterator(tfmda3)); + int hits2 = highLevelSinglePostingScanUnpack(*sb3.get(), fw.getDocIdLimit(), + &scanUnpackTime); + printf("testFake '%s' hits1=%d, hits2=%d, scanTime=%" PRIu64 + ", scanUnpackTime=%" PRIu64 "\n", + f->getName().c_str(), + hits1, hits2, scanTime, scanUnpackTime); +} + + +void +testFakePair(const std::string &postingType, + const Schema &schema, + bool unpack, + const FakeWord &fw1, const FakeWord &fw2) +{ + std::unique_ptr<FPFactory> ff(getFPFactory(postingType, schema)); + std::vector<const FakeWord *> v; + v.push_back(&fw1); + v.push_back(&fw2); + ff->setup(v); + FakePosting::SP f1(ff->make(fw1)); + FakePosting::SP f2(ff->make(fw2)); + + TermFieldMatchData md1; + TermFieldMatchDataArray tfmda1; + tfmda1.add(&md1); + std::unique_ptr<SearchIterator> sb1(f1->createIterator(tfmda1)); + + TermFieldMatchData md2; + TermFieldMatchDataArray tfmda2; + tfmda1.add(&md2); + std::unique_ptr<SearchIterator> sb2(f2->createIterator(tfmda2)); + + int hits = 0; + uint64_t scanUnpackTime = 0; + if (unpack) + hits = highLevelAndPairPostingScanUnpack(*sb1.get(), *sb2.get(), + fw1.getDocIdLimit(), &scanUnpackTime); + else + hits = highLevelAndPairPostingScan(*sb1.get(), *sb2.get(), + fw1.getDocIdLimit(), &scanUnpackTime); + printf("Fakepair %s AND %s => %d hits, %" PRIu64 " cycles\n", + f1->getName().c_str(), + f2->getName().c_str(), + hits, + scanUnpackTime); +} + + +int +PostingListBM::Main(void) +{ + int argi; + char c; + const char *optArg; + bool doandstress; + + doandstress = false; + argi = 1; + bool hasElements = false; + bool hasElementWeights = false; + bool quick = false; + + while ((c = GetOpt("C:ac:d:l:s:t:uvw:T:q", optArg, argi)) != -1) { + switch(c) { + case 'C': + _skipCommonPairsRate = atoi(optArg); + break; + case 'T': + if (strcmp(optArg, "single") == 0) { + hasElements = false; + hasElementWeights = false; + } else if (strcmp(optArg, "array") == 0) { + hasElements = true; + hasElementWeights = false; + } else if (strcmp(optArg, "weightedSet") == 0) { + hasElements = true; + hasElementWeights = true; + } else { + printf("Bad collection type: %s\n", optArg); + return 1; + } + break; + case 'a': + doandstress = true; + break; + case 'c': + _commonDocFreq = atoi(optArg); + break; + case 'd': + _numDocs = atoi(optArg); + break; + case 'l': + _loops = atoi(optArg); + break; + case 's': + _stride = atoi(optArg); + break; + case 't': + do { + Schema schema; + Schema::IndexField indexField("field0", + Schema::STRING, + Schema::SINGLE); + schema.addIndexField(indexField); + std::unique_ptr<FPFactory> ff(getFPFactory(optArg, schema)); + if (ff.get() == NULL) { + badPostingType(optArg); + return 1; + } + } while (0); + _postingTypes.push_back(optArg); + break; + case 'u': + _unpack = true; + break; + case 'v': + _verbose = true; + break; + case 'w': + _numWordsPerClass = atoi(optArg); + break; + case 'q': + quick = true; + _numDocs = 36000; + _commonDocFreq = 10000; + _numWordsPerClass = 5; + break; + default: + Usage(); + return 1; + } + } + + if (_commonDocFreq > _numDocs) { + Usage(); + return 1; + } + + _wordSet.setupParams(hasElements, hasElementWeights); + + uint32_t w1dfreq = 10; + uint32_t w4dfreq = 790000; + uint32_t w5dfreq = 290000; + uint32_t w4w5od = 100000; + uint32_t numTasks = 40000; + if (quick) { + w1dfreq = 2; + w4dfreq = 19000; + w5dfreq = 5000; + w4w5od = 1000; + numTasks = 40; + } + + + FakeWord word1(_numDocs, w1dfreq, w1dfreq / 2, "word1", _rnd, + _wordSet.getFieldsParams(), _wordSet.getPackedIndex()); + FakeWord word2(_numDocs, 1000, 500, "word2", word1, 4, _rnd, + _wordSet.getFieldsParams(), _wordSet.getPackedIndex()); + FakeWord word3(_numDocs, _commonDocFreq, _commonDocFreq / 2, + "word3", word1, 10, _rnd, + _wordSet.getFieldsParams(), _wordSet.getPackedIndex()); + FakeWord word4(_numDocs, w4dfreq, w4dfreq / 2, + "word4", _rnd, + _wordSet.getFieldsParams(), _wordSet.getPackedIndex()); + FakeWord word5(_numDocs, w5dfreq, w5dfreq / 2, + "word5", word4, w4w5od, _rnd, + _wordSet.getFieldsParams(), _wordSet.getPackedIndex()); + + if (_postingTypes.empty()) + _postingTypes = getPostingTypes(); + std::vector<std::string>::const_iterator pti; + std::vector<std::string>::const_iterator ptie = _postingTypes.end() ; + + for (pti = _postingTypes.begin(); pti != ptie; ++pti) { + testFake(*pti, _wordSet.getSchema(), word1); + testFake(*pti, _wordSet.getSchema(), word2); + testFake(*pti, _wordSet.getSchema(), word3); + } + + for (pti = _postingTypes.begin(); pti != ptie; ++pti) { + testFakePair(*pti, _wordSet.getSchema(), false, word1, word3); + testFakePair(*pti, _wordSet.getSchema(), false, word2, word3); + } + + for (pti = _postingTypes.begin(); pti != ptie; ++pti) { + testFakePair(*pti, _wordSet.getSchema(), false, word4, word5); + } + + if (doandstress) { + _wordSet.setupWords(_rnd, _numDocs, _commonDocFreq, _numWordsPerClass); + } + if (doandstress) { + AndStress andstress; + andstress.run(_rnd, _wordSet, + _numDocs, _commonDocFreq, _postingTypes, _loops, + _skipCommonPairsRate, + numTasks, + _stride, + _unpack); + } + return 0; +} + +} // namespace postinglistbm + +int +main(int argc, char **argv) +{ + postinglistbm::PostingListBM app; + + setvbuf(stdout, NULL, _IOLBF, 32768); + app._rnd.srand48(32); + return app.Entry(argc, argv); + + return 0; +} diff --git a/searchlib/src/tests/postinglistbm/skip.txt b/searchlib/src/tests/postinglistbm/skip.txt new file mode 100644 index 00000000000..9804bce3c33 --- /dev/null +++ b/searchlib/src/tests/postinglistbm/skip.txt @@ -0,0 +1,75 @@ +B tree view: + + Leaf Nodes: segments of docid delta list + Interior Nodes: Segments of skip info lists + + Interior Nodes 1 level above leaf nodes: L1 skip info + Interior Nodes 2 level above leaf nodes: L2 skip info + +Example posting list, with stride 4 for L1 skip and L2 skip: + +DocIdPos: 0 1 2 3| 4 5 6 7| 8 9 10 11| 12 13 14 15| 16 17 18 +DocId: 1 11 21 31|41 51 61 71|81 91 101 111|121 131 141 151|161 171 181 + +(Assume continued with every 10. docid present) + +Old L1 skip info, pointing to start of leaf nodes, with first docid in +leaf node pre-decoded (i.e. containing copy of first docid entry in leaf node): + +L1Pos: 0 1 2 3| 4 5 6 7| 8 9 10 11| 12 13 14 15| 16 +DocId: 41 81 121 161|201 241 281 321|361 401 441 481|521 561 601 641|681 +DocIdPos: 5 9 13 17| 21 25 29 33| 37 41 45 49| 53 57 61 65| 69 + +Old L2 skip info, pointing to start of interior nodes 1 level above leaf nodes +and containing copies of previous L1 skip entry: + +L2Pos: 0 1 2 3 +DocId: 161 321 481 641 +DocIdPos: 17 33 49 65 +L1Pos: 4 8 12 16 + +Reason for change of skip info view: Avoiding null skips, simplifying code. + +Skip from docId 1 to docId 115 first skips to DocId 81 before ending +up at DocId 121. If next seek is to below 161, a null skip to docid +121 is performed since docId delta unpacking caught up with supposedly +next L1 skip docid. With L1 skip stride being N, 1/N of longer seeks +will unpack N extra docids, eating up the advantage of first docid in +leaf node being pre-decoded. + +If a seek to docId 115 is followed by a seek to docId 121, an unpack +of docId 121 and a sek to a higher docid, this causes, with the old L1 +skip info, features for docId 81, 91 101, 111 to be decoded with the +result ignored before the features for docId 121 is decoded. For the +next seek, the null skip of DocId is also associated with a backwards +skip for features, so if the next feature to be decoded was for docId +141 then features for docId 121 will be decoded again and ignored. + +New L1 skip info, pointing to start of leaf nodes, without first docid +in leaf node pre-decoded (i.e. containing copy of last docid entry in +previous leaf node): + +L1Pos: 0 1 2 3| 4 5 6 7| 8 9 10 11| 12 13 14 15| 16 +DocId: 31 71 111 151|191 231 271 311|351 391 431 471|511 551 591 631|671 +DocIdPos: 4 8 12 16| 20 24 28 32| 36 40 44 48| 52 56 60 64| 68 + +New L2 skip info, pointing to start of interior nodes 1 level above leaf nodes +and containing copies of previous L1 skip entry: + +L2Pos: 0 1 2 3 +DocId: 151 311 471 631 +DocIdPos: 16 32 48 64 +L1Pos: 4 8 12 16 + +1 DocId delta is unpacked when using L1 or L2 skip, to get first docid +in leaf node. With old skip info, this wasn't needed. + +With new skip info, docid delta unpacking should never catch up with +next L1 skip docid (can become equal, but that's no longer sufficient +for triggering a skip). + +For each level upwards in skip info, one extra number is needed per element in +the skip info. + +For feature position (split docid/features), one extra number is needed per +element in the skip info. diff --git a/searchlib/src/tests/predicate/.gitignore b/searchlib/src/tests/predicate/.gitignore new file mode 100644 index 00000000000..eea4d347d05 --- /dev/null +++ b/searchlib/src/tests/predicate/.gitignore @@ -0,0 +1,13 @@ +searchlib_document_features_store_test_app +searchlib_predicate_bounds_posting_list_test_app +searchlib_predicate_index_test_app +searchlib_predicate_interval_posting_list_test_app +searchlib_predicate_interval_store_test_app +searchlib_predicate_range_term_expander_test_app +searchlib_predicate_ref_cache_test_app +searchlib_predicate_tree_analyzer_test_app +searchlib_predicate_tree_annotator_test_app +searchlib_predicate_zero_constraint_posting_list_test_app +searchlib_predicate_zstar_compressed_posting_list_test_app +searchlib_simple_index_test_app +searchlib_tree_crumbs_test_app diff --git a/searchlib/src/tests/predicate/CMakeLists.txt b/searchlib/src/tests/predicate/CMakeLists.txt new file mode 100644 index 00000000000..cd15356eeee --- /dev/null +++ b/searchlib/src/tests/predicate/CMakeLists.txt @@ -0,0 +1,92 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_predicate_index_test_app + SOURCES + predicate_index_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_predicate_index_test_app COMMAND searchlib_predicate_index_test_app) +vespa_add_executable(searchlib_simple_index_test_app + SOURCES + simple_index_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_simple_index_test_app COMMAND searchlib_simple_index_test_app) +vespa_add_executable(searchlib_tree_crumbs_test_app + SOURCES + tree_crumbs_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_tree_crumbs_test_app COMMAND searchlib_tree_crumbs_test_app) +vespa_add_executable(searchlib_predicate_tree_analyzer_test_app + SOURCES + predicate_tree_analyzer_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_predicate_tree_analyzer_test_app COMMAND searchlib_predicate_tree_analyzer_test_app) +vespa_add_executable(searchlib_predicate_tree_annotator_test_app + SOURCES + predicate_tree_annotator_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_predicate_tree_annotator_test_app COMMAND searchlib_predicate_tree_annotator_test_app) +vespa_add_executable(searchlib_predicate_interval_store_test_app + SOURCES + predicate_interval_store_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_predicate_interval_store_test_app COMMAND searchlib_predicate_interval_store_test_app) +vespa_add_executable(searchlib_document_features_store_test_app + SOURCES + document_features_store_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_document_features_store_test_app COMMAND searchlib_document_features_store_test_app) +vespa_add_executable(searchlib_predicate_ref_cache_test_app + SOURCES + predicate_ref_cache_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_predicate_ref_cache_test_app COMMAND searchlib_predicate_ref_cache_test_app) +vespa_add_executable(searchlib_predicate_interval_posting_list_test_app + SOURCES + predicate_interval_posting_list_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_predicate_interval_posting_list_test_app COMMAND searchlib_predicate_interval_posting_list_test_app) +vespa_add_executable(searchlib_predicate_bounds_posting_list_test_app + SOURCES + predicate_bounds_posting_list_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_predicate_bounds_posting_list_test_app COMMAND searchlib_predicate_bounds_posting_list_test_app) +vespa_add_executable(searchlib_predicate_zero_constraint_posting_list_test_app + SOURCES + predicate_zero_constraint_posting_list_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_predicate_zero_constraint_posting_list_test_app COMMAND searchlib_predicate_zero_constraint_posting_list_test_app) +vespa_add_executable(searchlib_predicate_zstar_compressed_posting_list_test_app + SOURCES + predicate_zstar_compressed_posting_list_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_predicate_zstar_compressed_posting_list_test_app COMMAND searchlib_predicate_zstar_compressed_posting_list_test_app) +vespa_add_executable(searchlib_predicate_range_term_expander_test_app + SOURCES + predicate_range_term_expander_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_predicate_range_term_expander_test_app COMMAND searchlib_predicate_range_term_expander_test_app) diff --git a/searchlib/src/tests/predicate/OWNERS b/searchlib/src/tests/predicate/OWNERS new file mode 100644 index 00000000000..569bf1cc3a1 --- /dev/null +++ b/searchlib/src/tests/predicate/OWNERS @@ -0,0 +1 @@ +bjorncs diff --git a/searchlib/src/tests/predicate/document_features_store_test.cpp b/searchlib/src/tests/predicate/document_features_store_test.cpp new file mode 100644 index 00000000000..4baf2d03fbe --- /dev/null +++ b/searchlib/src/tests/predicate/document_features_store_test.cpp @@ -0,0 +1,225 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for document_features_store. + +#include <vespa/log/log.h> +LOG_SETUP("document_features_store_test"); +#include <vespa/fastos/fastos.h> + +#include <vespa/searchlib/predicate/document_features_store.h> +#include <vespa/searchlib/predicate/predicate_index.h> +#include <vespa/searchlib/predicate/predicate_tree_annotator.h> +#include <vespa/searchlib/predicate/predicate_hash.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <string> + +using namespace search; +using namespace search::predicate; +using std::string; + +namespace { + +const uint64_t hash1 = 0x12345678; +const uint64_t hash2 = 0x123456789a; +const uint32_t doc_id = 42; + +TEST("require that DocumentFeaturesStore can store features.") { + DocumentFeaturesStore features_store(10); + PredicateTreeAnnotations annotations; + annotations.features.push_back(hash1); + annotations.features.push_back(hash2); + features_store.insert(annotations, doc_id); + + auto features = features_store.get(doc_id); + ASSERT_EQUAL(2u, features.size()); + EXPECT_EQUAL(1u, features.count(hash1)); + EXPECT_EQUAL(1u, features.count(hash2)); + + features_store.remove(doc_id); + features = features_store.get(doc_id); + EXPECT_TRUE(features.empty()); +} + +template <typename Set> +void expectHash(const string &label, const Set &set) { + TEST_STATE(label.c_str()); + uint64_t hash = PredicateHash::hash64(label); + EXPECT_EQUAL(1u, set.count(hash)); +} + +TEST("require that DocumentFeaturesStore can store ranges.") { + DocumentFeaturesStore features_store(10); + PredicateTreeAnnotations annotations; + annotations.range_features.push_back({"foo", 2, 4}); + annotations.range_features.push_back({"bar", 7, 13}); + annotations.range_features.push_back({"baz", 9, 19}); + annotations.range_features.push_back({"qux", -10, 10}); + annotations.range_features.push_back({"quux", -39, -10}); + annotations.range_features.push_back({"corge", -9, -1}); + features_store.insert(annotations, doc_id); + + auto features = features_store.get(doc_id); + ASSERT_EQUAL(13u, features.size()); + expectHash("foo=0", features); + + expectHash("bar=0", features); + expectHash("bar=10", features); + + expectHash("baz=0", features); + expectHash("baz=10-19", features); + + expectHash("qux=-10", features); + expectHash("qux=-9-0", features); + expectHash("qux=10", features); + expectHash("qux=0-9", features); + + expectHash("quux=-19-10", features); + expectHash("quux=-29-20", features); + expectHash("quux=-39-30", features); + + expectHash("corge=-9-0", features); +} + +TEST("require that DocumentFeaturesStore can store large ranges.") { + DocumentFeaturesStore features_store(10); + PredicateTreeAnnotations annotations; + annotations.range_features.push_back({"foo", 10, 199}); + annotations.range_features.push_back({"bar", 100, 239}); + annotations.range_features.push_back({"baz", -999, 999}); + features_store.insert(annotations, doc_id); + + auto features = features_store.get(doc_id); + ASSERT_EQUAL(17u, features.size()); + expectHash("foo=10-19", features); + expectHash("foo=20-29", features); + expectHash("foo=30-39", features); + expectHash("foo=40-49", features); + expectHash("foo=50-59", features); + expectHash("foo=60-69", features); + expectHash("foo=70-79", features); + expectHash("foo=80-89", features); + expectHash("foo=90-99", features); + expectHash("foo=100-199", features); + + expectHash("bar=200-209", features); + expectHash("bar=210-219", features); + expectHash("bar=220-229", features); + expectHash("bar=230-239", features); + expectHash("bar=100-199", features); + + expectHash("baz=-999-0", features); + expectHash("baz=0-999", features); +} + +TEST("require that DocumentFeaturesStore can use very large ranges.") { + DocumentFeaturesStore features_store(2); + PredicateTreeAnnotations annotations; + annotations.range_features.push_back({"foo", LLONG_MIN, 39}); + features_store.insert(annotations, doc_id); + + auto features = features_store.get(doc_id); + ASSERT_EQUAL(4u, features.size()); + expectHash("foo=-9223372036854775808", features); + expectHash("foo=-9223372036854775807-0", features); + expectHash("foo=0-31", features); + expectHash("foo=32-39", features); +} + +TEST("require that duplicate range features are removed.") { + DocumentFeaturesStore features_store(10); + PredicateTreeAnnotations annotations; + annotations.range_features.push_back({"foo", 80, 199}); + annotations.range_features.push_back({"foo", 85, 199}); + annotations.range_features.push_back({"foo", 90, 199}); + features_store.insert(annotations, doc_id); + + auto features = features_store.get(doc_id); + ASSERT_EQUAL(4u, features.size()); + expectHash("foo=80-89", features); + expectHash("foo=90-99", features); + expectHash("foo=100-199", features); + expectHash("foo=80", features); +} + +TEST("require that only unique features are returned") { + DocumentFeaturesStore features_store(10); + PredicateTreeAnnotations annotations; + annotations.range_features.push_back({"foo", 100, 199}); + annotations.features.push_back(PredicateHash::hash64("foo=100-199")); + features_store.insert(annotations, doc_id); + + auto features = features_store.get(doc_id); + ASSERT_EQUAL(1u, features.size()); + expectHash("foo=100-199", features); +} + +TEST("require that both features and ranges are removed by 'remove'") { + DocumentFeaturesStore features_store(10); + PredicateTreeAnnotations annotations; + annotations.range_features.push_back({"foo", 100, 199}); + annotations.features.push_back(PredicateHash::hash64("foo=100-199")); + features_store.insert(annotations, doc_id); + features_store.remove(doc_id); + + auto features = features_store.get(doc_id); + ASSERT_EQUAL(0u, features.size()); +} + +TEST("require that both features and ranges counts towards memory usage") { + DocumentFeaturesStore features_store(10); + EXPECT_EQUAL(332u, features_store.getMemoryUsage().usedBytes()); + + PredicateTreeAnnotations annotations; + annotations.features.push_back(PredicateHash::hash64("foo=100-199")); + features_store.insert(annotations, doc_id); + EXPECT_EQUAL(340u, features_store.getMemoryUsage().usedBytes()); + + annotations.features.clear(); + annotations.range_features.push_back({"foo", 100, 199}); + features_store.insert(annotations, doc_id + 1); + EXPECT_EQUAL(436u, features_store.getMemoryUsage().usedBytes()); +} + +TEST("require that DocumentFeaturesStore can be serialized") { + DocumentFeaturesStore features_store(10); + PredicateTreeAnnotations annotations; + annotations.range_features.push_back({"foo", 100, 199}); + annotations.features.push_back(PredicateHash::hash64("foo=bar")); + features_store.insert(annotations, doc_id); + + auto features = features_store.get(doc_id); + ASSERT_EQUAL(2u, features.size()); + expectHash("foo=bar", features); + expectHash("foo=100-199", features); + + vespalib::MMapDataBuffer buffer; + features_store.serialize(buffer); + + DocumentFeaturesStore features_store2(buffer); + features = features_store2.get(doc_id); + ASSERT_EQUAL(2u, features.size()); + expectHash("foo=bar", features); + expectHash("foo=100-199", features); +} + +TEST("require that serialization cleans up wordstore") { + DocumentFeaturesStore features_store(10); + PredicateTreeAnnotations annotations; + annotations.range_features.push_back({"foo", 100, 199}); + features_store.insert(annotations, doc_id); + EXPECT_EQUAL(428u, features_store.getMemoryUsage().usedBytes()); + annotations.range_features.push_back({"bar", 100, 199}); + features_store.insert(annotations, doc_id + 1); + EXPECT_EQUAL(720u, features_store.getMemoryUsage().usedBytes()); + features_store.remove(doc_id + 1); + EXPECT_EQUAL(672u, features_store.getMemoryUsage().usedBytes()); + + vespalib::MMapDataBuffer buffer; + features_store.serialize(buffer); + DocumentFeaturesStore features_store2(buffer); + EXPECT_EQUAL(428u, features_store2.getMemoryUsage().usedBytes()); +} + + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/predicate/predicate_bounds_posting_list_test.cpp b/searchlib/src/tests/predicate/predicate_bounds_posting_list_test.cpp new file mode 100644 index 00000000000..c54e6f49cc7 --- /dev/null +++ b/searchlib/src/tests/predicate/predicate_bounds_posting_list_test.cpp @@ -0,0 +1,107 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for predicate_bounds_posting_list. + +#include <vespa/log/log.h> +LOG_SETUP("predicate_bounds_posting_list_test"); +#include <vespa/fastos/fastos.h> + +#include <vespa/searchlib/predicate/predicate_tree_annotator.h> +#include <vespa/searchlib/predicate/predicate_bounds_posting_list.h> +#include <vespa/vespalib/testkit/testapp.h> + +using namespace search; +using namespace search::predicate; + +namespace { + +struct DummyDocIdLimitProvider : public DocIdLimitProvider { + virtual uint32_t getDocIdLimit() const { return 10000; } + virtual uint32_t getCommittedDocIdLimit() const { return 10000; } +}; + +vespalib::GenerationHandler generation_handler; +vespalib::GenerationHolder generation_holder; +DummyDocIdLimitProvider limit_provider; +SimpleIndexConfig config; +const uint64_t hash = 0x123; + +TEST("require that empty bounds posting list starts at 0.") { + PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8); + btree::EntryRef ref; + PredicateBoundsPostingList<PredicateIndex::BTreeIterator> + posting_list(index.getIntervalStore(), + index.getBoundsIndex().getBTreePostingList(ref), 42); + EXPECT_EQUAL(0u, posting_list.getDocId()); + EXPECT_EQUAL(0u, posting_list.getInterval()); + EXPECT_FALSE(posting_list.next(0)); +} + +void checkNext(PredicateBoundsPostingList<PredicateIndex::BTreeIterator> &posting_list, uint32_t move_past, + uint32_t doc_id, uint32_t interval_count) { + std::ostringstream ost; + ost << "checkNext(posting_list, " << move_past << ", " << doc_id + << ", " << interval_count << ")"; + TEST_STATE(ost.str().c_str()); + ASSERT_TRUE(posting_list.next(move_past)); + EXPECT_EQUAL(doc_id, posting_list.getDocId()); + for (uint32_t i = 0; i < interval_count - 1; ++i) { + ASSERT_TRUE(posting_list.nextInterval()); + } + ASSERT_FALSE(posting_list.nextInterval()); +} + +TEST("require that bounds posting list checks bounds.") { + PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8); + const auto &bounds_index = index.getBoundsIndex(); + for (uint32_t id = 1; id < 100; ++id) { + PredicateTreeAnnotations annotations(id); + auto &vec = annotations.bounds_map[hash]; + for (uint32_t i = 0; i <= id; ++i) { + uint32_t bounds; + if (id < 30) { + bounds = 0x80000000 | i; // diff >= i + } else if (id < 60) { + bounds = 0x40000000 | i; // diff < i + } else { + bounds = (i << 16) | (i + 10); // i < diff < i+10 + } + vec.push_back(IntervalWithBounds{(i + 1) << 16 | 0xffff, bounds}); + } + index.indexDocument(id, annotations); + } + index.commit(); + auto it = bounds_index.lookup(hash); + ASSERT_TRUE(it.valid()); + auto ref = it.getData(); + + PredicateBoundsPostingList<PredicateIndex::BTreeIterator> + posting_list(index.getIntervalStore(), + bounds_index.getBTreePostingList(ref), 5); + checkNext(posting_list, 0, 1, 2); // [0..] -> [1..] + checkNext(posting_list, 1, 2, 3); // [0..] -> [2..] + checkNext(posting_list, 10, 11, 6); // [0..] -> [5..] + checkNext(posting_list, 20, 21, 6); + + checkNext(posting_list, 30, 31, 26); // [..5] -> [..30] + checkNext(posting_list, 50, 51, 46); + + checkNext(posting_list, 60, 61, 6); // [0..10] -> [5..15] + + + PredicateBoundsPostingList<PredicateIndex::BTreeIterator> + posting_list2(index.getIntervalStore(), + bounds_index.getBTreePostingList(ref), 40); + checkNext(posting_list2, 0, 1, 2); + checkNext(posting_list2, 1, 2, 3); + checkNext(posting_list2, 20, 21, 22); // [0..] -> [21..] + + checkNext(posting_list2, 30, 41, 1); // skip ahead to match + checkNext(posting_list2, 35, 41, 1); + checkNext(posting_list2, 50, 51, 11); // [..40] -> [..50] + + checkNext(posting_list2, 60, 61, 10); // [31..40] -> [40..49] +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/predicate/predicate_index_test.cpp b/searchlib/src/tests/predicate/predicate_index_test.cpp new file mode 100644 index 00000000000..b22c80294d0 --- /dev/null +++ b/searchlib/src/tests/predicate/predicate_index_test.cpp @@ -0,0 +1,363 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for predicate_index. + +#include <vespa/log/log.h> +LOG_SETUP("predicate_index_test"); +#include <vespa/fastos/fastos.h> + +#include <vespa/searchlib/predicate/predicate_index.h> +#include <vespa/searchlib/predicate/predicate_tree_annotator.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/attribute/predicate_attribute.h> + +using namespace search; +using namespace search::predicate; +using std::make_pair; +using std::pair; +using std::vector; + +namespace { + +struct DummyDocIdLimitProvider : public DocIdLimitProvider { + virtual uint32_t getDocIdLimit() const { return 10000; } + virtual uint32_t getCommittedDocIdLimit() const { return 10000; } +}; + +vespalib::GenerationHandler generation_handler; +vespalib::GenerationHolder generation_holder; +DummyDocIdLimitProvider dummy_provider; +SimpleIndexConfig simple_index_config; + +TEST("require that PredicateIndex can index empty documents") { + PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + EXPECT_EQUAL(0u, index.getZeroConstraintDocs().size()); + index.indexEmptyDocument(2); + index.commit(); + EXPECT_EQUAL(1u, index.getZeroConstraintDocs().size()); +} + +TEST("require that indexDocument don't index empty documents") { + PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + EXPECT_EQUAL(0u, index.getZeroConstraintDocs().size()); + PredicateTreeAnnotations annotations; + index.indexDocument(3, annotations); + index.commit(); + EXPECT_EQUAL(0u, index.getZeroConstraintDocs().size()); +} + +TEST("require that PredicateIndex can remove empty documents") { + PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + EXPECT_EQUAL(0u, index.getZeroConstraintDocs().size()); + index.indexEmptyDocument(2); + index.commit(); + EXPECT_EQUAL(1u, index.getZeroConstraintDocs().size()); + index.removeDocument(2); + index.commit(); + EXPECT_EQUAL(0u, index.getZeroConstraintDocs().size()); +} + +TEST("require that indexing the same empty document multiple times is ok") { + PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + EXPECT_EQUAL(0u, index.getZeroConstraintDocs().size()); + index.indexEmptyDocument(2); + index.commit(); + EXPECT_EQUAL(1u, index.getZeroConstraintDocs().size()); + index.indexEmptyDocument(2); + index.commit(); + EXPECT_EQUAL(1u, index.getZeroConstraintDocs().size()); +} + +void indexFeature(PredicateIndex &attr, uint32_t doc_id, int min_feature, + const vector<pair<uint64_t, Interval>> &intervals, + const vector<pair<uint64_t, IntervalWithBounds>> &bounds) { + PredicateTreeAnnotations annotations(min_feature); + for (auto &p : intervals) { + annotations.interval_map[p.first] = std::vector<Interval>{{p.second}}; + annotations.features.push_back(p.first); + } + for (auto &p : bounds) { + annotations.bounds_map[p.first] = + std::vector<IntervalWithBounds>{{p.second}}; + annotations.features.push_back(p.first); + } + attr.indexDocument(doc_id, annotations); +} + +PredicateIndex::BTreeIterator +lookupPosting(const PredicateIndex &index, uint64_t hash) { + const auto &interval_index = index.getIntervalIndex(); + auto it = interval_index.lookup(hash); + ASSERT_TRUE(it.valid()); + auto entry = it.getData(); + EXPECT_TRUE(entry.valid()); + + auto posting_it = interval_index.getBTreePostingList(entry); + ASSERT_TRUE(posting_it.valid()); + return posting_it; +} + +const int min_feature = 3; +const int k = min_feature - 1; +const uint32_t doc_id = 2; +const uint64_t hash = 0x12345; +const uint64_t hash2 = 0x3456; +const Interval interval = {0x0001ffff}; +const IntervalWithBounds bounds = {0x0001ffff, 0x03}; +Interval single_buf; + +TEST("require that PredicateIndex can index document") { + PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid()); + indexFeature(index, doc_id, min_feature, {{hash, interval}}, {}); + index.commit(); + + auto posting_it = lookupPosting(index, hash); + EXPECT_EQUAL(doc_id, posting_it.getKey()); + uint32_t size; + const auto &interval_list = + index.getIntervalStore().get(posting_it.getData(), size, &single_buf); + ASSERT_EQUAL(1u, size); + EXPECT_EQUAL(interval, interval_list[0]); +} + +TEST("require that PredicateIndex can index document with bounds") { + PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid()); + indexFeature(index, doc_id, min_feature, {}, {{hash, bounds}}); + index.commit(); + + const auto &bounds_index = index.getBoundsIndex(); + auto it = bounds_index.lookup(hash); + ASSERT_TRUE(it.valid()); + auto entry = it.getData(); + EXPECT_TRUE(entry.valid()); + + auto posting_it = bounds_index.getBTreePostingList(entry); + ASSERT_TRUE(posting_it.valid()); + EXPECT_EQUAL(doc_id, posting_it.getKey()); + + uint32_t size; + IntervalWithBounds single; + const auto &interval_list = + index.getIntervalStore().get(posting_it.getData(), size, &single); + ASSERT_EQUAL(1u, size); + EXPECT_EQUAL(bounds, interval_list[0]); +} + +TEST("require that PredicateIndex can index multiple documents " + "with the same feature") { + PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid()); + for (uint32_t id = 1; id < 100; ++id) { + indexFeature(index, id, min_feature, {{hash, interval}}, {}); + } + index.commit(); + + auto posting_it = lookupPosting(index, hash); + for (uint32_t id = 1; id < 100; ++id) { + ASSERT_TRUE(posting_it.valid()); + EXPECT_EQUAL(id, posting_it.getKey()); + uint32_t size; + const auto &interval_list = index.getIntervalStore().get( + posting_it.getData(), size, &single_buf); + ASSERT_EQUAL(1u, size); + EXPECT_EQUAL(interval, interval_list[0]); + ++posting_it; + } + ASSERT_FALSE(posting_it.valid()); +} + +TEST("require that PredicateIndex can remove indexed documents") { + PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid()); + indexFeature(index, doc_id, min_feature, + {{hash, interval}}, {{hash2, bounds}}); + index.removeDocument(doc_id); + index.commit(); + auto it = index.getIntervalIndex().lookup(hash); + ASSERT_FALSE(it.valid()); + auto it2 = index.getBoundsIndex().lookup(hash2); + ASSERT_FALSE(it2.valid()); + + // Remove again. Nothing should happen. + index.removeDocument(doc_id); +} + +TEST("require that PredicateIndex can remove multiple documents") { + PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + const auto &interval_index = index.getIntervalIndex(); + EXPECT_FALSE(interval_index.lookup(hash).valid()); + for (uint32_t id = 1; id < 100; ++id) { + indexFeature(index, id, min_feature, {{hash, interval}}, {}); + } + index.commit(); + for (uint32_t id = 1; id < 110; ++id) { + index.removeDocument(id); + index.commit(); + auto it = interval_index.lookup(hash); + if (id < 99) { + ASSERT_TRUE(it.valid()); + } else { + ASSERT_FALSE(it.valid()); + } + } +} + +TEST("require that PredicateIndex can remove multiple documents with " + "multiple features") { + vector<pair<uint64_t, Interval>> intervals; + vector<pair<uint64_t, IntervalWithBounds>> bounds_intervals; + for (int i = 0; i < 100; ++i) { + intervals.push_back(make_pair(hash + i, interval)); + bounds_intervals.push_back(make_pair(hash2 + i, bounds)); + } + PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + const auto &interval_index = index.getIntervalIndex(); + EXPECT_FALSE(interval_index.lookup(hash).valid()); + for (uint32_t id = 1; id < 100; ++id) { + indexFeature(index, id, id, intervals, bounds_intervals); + } + index.commit(); + for (uint32_t id = 1; id < 100; ++id) { + index.removeDocument((id + 50) % 99 + 1); + index.commit(); + auto it = interval_index.lookup(hash); + if (id < 99) { + ASSERT_TRUE(it.valid()); + } else { + ASSERT_FALSE(it.valid()); + } + } +} + +// Helper function for next test. +template <typename Iterator, typename IntervalT> +void checkAllIntervals(Iterator posting_it, IntervalT expected_interval, + const PredicateIntervalStore &interval_store) { + for (uint32_t id = 1; id < 100u; ++id) { + ASSERT_TRUE(posting_it.valid()); + EXPECT_EQUAL(id, posting_it.getKey()); + btree::EntryRef ref = posting_it.getData(); + ASSERT_TRUE(ref.valid()); + uint32_t size; + IntervalT single; + const IntervalT *read_interval = + interval_store.get(ref, size, &single); + EXPECT_EQUAL(1u, size); + EXPECT_EQUAL(expected_interval, read_interval[0]); + ++posting_it; + } +} + +namespace { +struct DocIdLimitFinder : SimpleIndexDeserializeObserver<> { + uint32_t &_doc_id_limit; + DocIdLimitFinder(uint32_t &doc_id_limit) : _doc_id_limit(doc_id_limit) + { + doc_id_limit = 0u; + } + void notifyInsert(uint64_t, uint32_t doc_id, uint32_t) { + _doc_id_limit = std::max(_doc_id_limit, doc_id); + } +}; +} + +TEST("require that PredicateIndex can be (de)serialized") { + vector<pair<uint64_t, Interval>> intervals; + vector<pair<uint64_t, IntervalWithBounds>> bounds_intervals; + for (int i = 0; i < 100; ++i) { + intervals.push_back(make_pair(hash + i, interval)); + bounds_intervals.push_back(make_pair(hash2 + i, bounds)); + } + PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 8); + EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid()); + for (uint32_t id = 1; id < 100; ++id) { + indexFeature(index, id, id, intervals, bounds_intervals); + index.indexEmptyDocument(id + 100); + } + index.commit(); + + vespalib::MMapDataBuffer buffer; + index.serialize(buffer); + uint32_t doc_id_limit; + DocIdLimitFinder finder(doc_id_limit); + PredicateIndex index2(generation_handler, generation_holder, dummy_provider, simple_index_config, + buffer, finder, PredicateAttribute::PREDICATE_ATTRIBUTE_VERSION); + const PredicateIntervalStore &interval_store = index2.getIntervalStore(); + EXPECT_EQUAL(199u, doc_id_limit); + + EXPECT_EQUAL(index.getArity(), index2.getArity()); + EXPECT_EQUAL(index.getZeroConstraintDocs().size(), + index2.getZeroConstraintDocs().size()); + { + auto it = index2.getZeroConstraintDocs().begin(); + for (uint32_t i = 1; i < 100u; ++i) { + TEST_STATE(vespalib::make_string("%d", i).c_str()); + ASSERT_TRUE(it.valid()); + EXPECT_EQUAL(i + 100, it.getKey()); + ++it; + } + EXPECT_FALSE(it.valid()); + } + + const auto &interval_index = index2.getIntervalIndex(); + const auto &bounds_index = index2.getBoundsIndex(); + for (int i = 0; i < 100; ++i) { + { + auto it = interval_index.lookup(hash + i); + ASSERT_TRUE(it.valid()); + auto posting_it = interval_index.getBTreePostingList(it.getData()); + checkAllIntervals(posting_it, interval, interval_store); + } + { + auto it = bounds_index.lookup(hash2 + i); + ASSERT_TRUE(it.valid()); + auto posting_it = bounds_index.getBTreePostingList(it.getData()); + checkAllIntervals(posting_it, bounds, interval_store); + } + } +} + +TEST("require that DocumentFeaturesStore is restored on deserialization") { + PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + EXPECT_FALSE(index.getIntervalIndex().lookup(hash).valid()); + indexFeature(index, doc_id, min_feature, + {{hash, interval}}, {{hash2, bounds}}); + vespalib::MMapDataBuffer buffer; + index.serialize(buffer); + uint32_t doc_id_limit; + DocIdLimitFinder finder(doc_id_limit); + PredicateIndex index2(generation_handler, generation_holder, dummy_provider, simple_index_config, + buffer, finder, PredicateAttribute::PREDICATE_ATTRIBUTE_VERSION); + const auto &interval_index = index2.getIntervalIndex(); + const auto &bounds_index = index2.getBoundsIndex(); + EXPECT_EQUAL(doc_id, doc_id_limit); + + auto it = interval_index.lookup(hash); + EXPECT_TRUE(it.valid()); + auto it2 = bounds_index.lookup(hash2); + EXPECT_TRUE(it2.valid()); + + index2.removeDocument(doc_id); + index2.commit(); + + it = interval_index.lookup(hash); + EXPECT_FALSE(it.valid()); + it2 = bounds_index.lookup(hash2); + EXPECT_FALSE(it2.valid()); +} + +TEST("require that hold lists are attempted emptied on destruction") { + PredicateIndex index(generation_handler, generation_holder, dummy_provider, simple_index_config, 10); + indexFeature(index, doc_id, min_feature, + {{hash, interval}}, {{hash2, bounds}}); + { + auto guard = generation_handler.takeGuard(); + index.removeDocument(doc_id); + index.commit(); + } + // No assert on index destruction. +} +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/predicate/predicate_interval_posting_list_test.cpp b/searchlib/src/tests/predicate/predicate_interval_posting_list_test.cpp new file mode 100644 index 00000000000..1c44c096717 --- /dev/null +++ b/searchlib/src/tests/predicate/predicate_interval_posting_list_test.cpp @@ -0,0 +1,80 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for predicate_interval_posting_list. + +#include <vespa/log/log.h> +LOG_SETUP("predicate_interval_posting_list_test"); +#include <vespa/fastos/fastos.h> + +#include <vespa/searchlib/predicate/predicate_tree_annotator.h> +#include <vespa/searchlib/predicate/predicate_interval_posting_list.h> +#include <vespa/vespalib/testkit/testapp.h> + +using namespace search; +using namespace search::predicate; +namespace { + +struct DummyDocIdLimitProvider : public DocIdLimitProvider { + virtual uint32_t getDocIdLimit() const { return 10000; } + virtual uint32_t getCommittedDocIdLimit() const { return 10000; } +}; + +vespalib::GenerationHandler generation_handler; +vespalib::GenerationHolder generation_holder; +DummyDocIdLimitProvider limit_provider; +SimpleIndexConfig config; +const uint64_t hash = 0x123; + +TEST("require that empty posting list starts at 0.") { + PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8); + btree::EntryRef ref; + PredicateIntervalPostingList<PredicateIndex::BTreeIterator> + posting_list(index.getIntervalStore(), index.getIntervalIndex().getBTreePostingList(ref)); + EXPECT_EQUAL(0u, posting_list.getDocId()); + EXPECT_EQUAL(0u, posting_list.getInterval()); + EXPECT_FALSE(posting_list.next(0)); +} + +TEST("require that posting list can iterate.") { + PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8); + const auto &interval_index = index.getIntervalIndex(); + for (uint32_t id = 1; id < 100; ++id) { + PredicateTreeAnnotations annotations(id); + auto &vec = annotations.interval_map[hash]; + for (uint32_t i = 0; i < id; ++i) { + vec.push_back(Interval{(i + 1) << 16 | 0xffff}); + } + index.indexDocument(id, annotations); + } + index.commit(); + auto it = interval_index.lookup(hash); + ASSERT_TRUE(it.valid()); + auto ref = it.getData(); + + PredicateIntervalPostingList<PredicateIndex::BTreeIterator> + posting_list(index.getIntervalStore(), interval_index.getBTreePostingList(ref)); + EXPECT_EQUAL(0u, posting_list.getDocId()); + EXPECT_EQUAL(0u, posting_list.getInterval()); + EXPECT_TRUE(posting_list.next(0)); + EXPECT_EQUAL(1u, posting_list.getDocId()); + EXPECT_EQUAL(0x0001ffffu, posting_list.getInterval()); + ASSERT_FALSE(posting_list.nextInterval()); + ASSERT_TRUE(posting_list.next(1)); + EXPECT_EQUAL(2u, posting_list.getDocId()); + EXPECT_EQUAL(0x0001ffffu, posting_list.getInterval()); + ASSERT_TRUE(posting_list.nextInterval()); + EXPECT_EQUAL(0x0002ffffu, posting_list.getInterval()); + ASSERT_FALSE(posting_list.nextInterval()); + + ASSERT_TRUE(posting_list.next(50)); + EXPECT_EQUAL(51u, posting_list.getDocId()); + for (uint32_t i = 0; i < 50; ++i) { + EXPECT_EQUAL((i + 1) << 16 | 0xffff, posting_list.getInterval()); + ASSERT_TRUE(posting_list.nextInterval()); + } + EXPECT_EQUAL(0x0033ffffu, posting_list.getInterval()); + ASSERT_FALSE(posting_list.nextInterval()); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/predicate/predicate_interval_store_test.cpp b/searchlib/src/tests/predicate/predicate_interval_store_test.cpp new file mode 100644 index 00000000000..bfe6340e222 --- /dev/null +++ b/searchlib/src/tests/predicate/predicate_interval_store_test.cpp @@ -0,0 +1,152 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for predicate_interval_store. + +#include <vespa/log/log.h> +LOG_SETUP("predicate_interval_store_test"); +#include <vespa/fastos/fastos.h> + +#include <vespa/searchlib/predicate/predicate_interval_store.h> + +#include <vespa/searchlib/predicate/predicate_index.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vector> + +using namespace search; +using namespace search::predicate; +using std::vector; + +namespace { + +TEST("require that empty interval list gives invalid ref") { + PredicateIntervalStore store; + vector<Interval> interval_list; + auto ref = store.insert(interval_list); + ASSERT_FALSE(ref.valid()); +} + +Interval single_buf; + +template <typename IntervalT> +void testInsertAndRetrieve(const std::vector<IntervalT> &interval_list) { + std::ostringstream ost; + ost << "Type name: " << typeid(IntervalT).name() << ", intervals:"; + for (auto &i : interval_list) { + ost << " 0x" << std::hex << i.interval; + } + TEST_STATE(ost.str().c_str()); + PredicateIntervalStore store; + auto ref = store.insert(interval_list); + ASSERT_TRUE(ref.valid()); + + uint32_t size; + IntervalT single; + const IntervalT *intervals = store.get(ref, size, &single); + EXPECT_EQUAL(interval_list.size(), size); + ASSERT_TRUE(intervals); + for (size_t i = 0; i < interval_list.size(); ++i) { + EXPECT_EQUAL(interval_list[i], intervals[i]); + } +} + +TEST("require that single interval entry can be inserted") { + testInsertAndRetrieve<Interval>({{0x0001ffff}}); + testInsertAndRetrieve<IntervalWithBounds>({{0x0001ffff, 0x3}}); +} + +TEST("require that multi-interval entry can be inserted") { + testInsertAndRetrieve<Interval>({{0x00010001}, {0x0002ffff}}); + testInsertAndRetrieve<Interval>( + {{0x00010001}, {0x00020002}, {0x0003ffff}}); + testInsertAndRetrieve<Interval>( + {{0x00010001}, {0x00020002}, {0x00030003}, {0x00040004}, + {0x00050005}, {0x00060006}, {0x00070007}, {0x00080008}, + {0x0009ffff}}); + testInsertAndRetrieve<IntervalWithBounds>( + {{0x00010001, 0x4}, {0x0002ffff, 0x10}}); + testInsertAndRetrieve<IntervalWithBounds>( + {{0x00010001, 0x4}, {0x00020002, 0x10}, {0x00030003, 0x20}, + {0x00040004, 0x6}, {0x0005ffff, 0x7}}); +} + +TEST("require that multiple multi-interval entries can be retrieved") { + PredicateIntervalStore store; + auto ref = store.insert<Interval>({{1}, {2}}); + ASSERT_TRUE(ref.valid()); + ref = store.insert<Interval>({{3}, {4}}); + ASSERT_TRUE(ref.valid()); + + uint32_t size; + const Interval *intervals = store.get(ref, size, &single_buf); + EXPECT_EQUAL(2u, size); + ASSERT_TRUE(intervals); + EXPECT_EQUAL(3u, intervals[0].interval); + EXPECT_EQUAL(4u, intervals[1].interval); +} + +/* +TEST("require that entries can be removed and reused") { + GenerationHandler gen_handler; + PredicateIntervalStore store(gen_handler); + auto ref = store.insert<IntervalWithBounds>({{0x0001ffff, 5}}); + ASSERT_TRUE(ref.valid()); + store.remove(ref); + + auto ref2 = store.insert<Interval>({{1}, {2}, {3}, {4}, {5}, + {6}, {7}, {8}, {9}}); + ASSERT_TRUE(ref2.valid()); + store.remove(ref2); + store.commit(); + + auto ref3 = store.insert<IntervalWithBounds>({{0x0002ffff, 10}}); + ASSERT_EQUAL(ref.ref(), ref3.ref()); + + uint32_t size; + IntervalWithBounds single; + const IntervalWithBounds *bounds = store.get(ref3, size, &single); + EXPECT_EQUAL(1u, size); + EXPECT_EQUAL(0x0002ffffu, bounds->interval); + EXPECT_EQUAL(10u, bounds->bounds); + + auto ref4 = store.insert<Interval>({{2}, {3}, {4}, {5}, + {6}, {7}, {8}, {9}, {10}}); + ASSERT_EQUAL(ref2.ref(), ref4.ref()); + + const Interval *intervals = store.get(ref4, size, &single_buf); + EXPECT_EQUAL(9u, size); + EXPECT_EQUAL(2u, intervals[0].interval); + EXPECT_EQUAL(10u, intervals[8].interval); +} +*/ + +TEST("require that single interval entries are optimized") { + PredicateIntervalStore store; + auto ref = store.insert<Interval>({{0x0001ffff}}); + ASSERT_TRUE(ref.valid()); + ASSERT_EQUAL(0x0001ffffu, ref.ref()); + + uint32_t size; + const Interval *intervals = store.get(ref, size, &single_buf); + ASSERT_EQUAL(intervals, &single_buf); + EXPECT_EQUAL(0x0001ffffu, single_buf.interval); + + store.remove(ref); // Should do nothing +} + +TEST("require that interval refs are reused for identical data.") { + PredicateIntervalStore store; + auto ref = store.insert<Interval>({{0x00010001}, {0x0002ffff}}); + ASSERT_TRUE(ref.valid()); + ASSERT_EQUAL(0x02000040u, ref.ref()); + + auto ref2 = store.insert<Interval>({{0x00010001}, {0x0002ffff}}); + EXPECT_EQUAL(ref.ref(), ref2.ref()); + + uint32_t size; + const Interval *intervals = store.get(ref, size, &single_buf); + EXPECT_EQUAL(0x00010001u, intervals[0].interval); + EXPECT_EQUAL(0x0002ffffu, intervals[1].interval); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/predicate/predicate_range_term_expander_test.cpp b/searchlib/src/tests/predicate/predicate_range_term_expander_test.cpp new file mode 100644 index 00000000000..47c29184dcf --- /dev/null +++ b/searchlib/src/tests/predicate/predicate_range_term_expander_test.cpp @@ -0,0 +1,332 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for predicate_range_term_expander. + +#include <vespa/log/log.h> +LOG_SETUP("predicate_range_term_expander_test"); +#include <vespa/fastos/fastos.h> + +#include <vespa/searchlib/predicate/predicate_range_term_expander.h> +#include <vespa/vespalib/testkit/testapp.h> + +using search::predicate::PredicateRangeTermExpander; +using std::vector; +using vespalib::string; + +namespace { + +struct MyRangeHandler { + vector<string> expected_labels; + string expected_edge_label; + uint64_t expected_edge_value; + size_t i; + ~MyRangeHandler() { + EXPECT_EQUAL(expected_labels.size(), i); + } + void handleRange(const string &label) { + TEST_STATE(("handleRange: " + label).c_str()); + ASSERT_TRUE(i < expected_labels.size()); + EXPECT_EQUAL(expected_labels[i++], label); + } + void handleEdge(const string &label, uint64_t value) { + TEST_STATE(("handleEdge: " + label).c_str()); + EXPECT_EQUAL(expected_edge_label, label); + EXPECT_EQUAL(expected_edge_value, value); + } +}; + +TEST("require that small range is expanded") { + PredicateRangeTermExpander expander(10); + MyRangeHandler range_handler{{ + "key=40-49", + "key=0-99", + "key=0-999", + "key=0-9999", + "key=0-99999", + "key=0-999999", + "key=0-9999999", + "key=0-99999999", + "key=0-999999999", + "key=0-9999999999", + "key=0-99999999999", + "key=0-999999999999", + "key=0-9999999999999", + "key=0-99999999999999", + "key=0-999999999999999", + "key=0-9999999999999999", + "key=0-99999999999999999", + "key=0-999999999999999999"}, "key=40", 2, 0}; + expander.expand("key", 42, range_handler); +} + +TEST("require that large range is expanded") { + PredicateRangeTermExpander expander(10); + MyRangeHandler range_handler{{ + "key=123456789012345670-123456789012345679", + "key=123456789012345600-123456789012345699", + "key=123456789012345000-123456789012345999", + "key=123456789012340000-123456789012349999", + "key=123456789012300000-123456789012399999", + "key=123456789012000000-123456789012999999", + "key=123456789010000000-123456789019999999", + "key=123456789000000000-123456789099999999", + "key=123456789000000000-123456789999999999", + "key=123456780000000000-123456789999999999", + "key=123456700000000000-123456799999999999", + "key=123456000000000000-123456999999999999", + "key=123450000000000000-123459999999999999", + "key=123400000000000000-123499999999999999", + "key=123000000000000000-123999999999999999", + "key=120000000000000000-129999999999999999", + "key=100000000000000000-199999999999999999", + "key=0-999999999999999999"}, + "key=123456789012345670", 8, 0}; + expander.expand("key", 123456789012345678, range_handler); +} + +TEST("require that max range is expanded") { + PredicateRangeTermExpander expander(10); + MyRangeHandler range_handler{{}, "key=9223372036854775800", 7, 0}; + expander.expand("key", 9223372036854775807, range_handler); +} + +TEST("require that small negative range is expanded") { + PredicateRangeTermExpander expander(10); + MyRangeHandler range_handler{{ + "key=-49-40", + "key=-99-0", + "key=-999-0", + "key=-9999-0", + "key=-99999-0", + "key=-999999-0", + "key=-9999999-0", + "key=-99999999-0", + "key=-999999999-0", + "key=-9999999999-0", + "key=-99999999999-0", + "key=-999999999999-0", + "key=-9999999999999-0", + "key=-99999999999999-0", + "key=-999999999999999-0", + "key=-9999999999999999-0", + "key=-99999999999999999-0", + "key=-999999999999999999-0"}, "key=-40", 2, 0}; + expander.expand("key", -42, range_handler); +} + +TEST("require that min range is expanded") { + PredicateRangeTermExpander expander(10); + MyRangeHandler range_handler{{}, "key=-9223372036854775800", 8, 0}; + expander.expand("key", -9223372036854775808ull, range_handler); +} +TEST("require that min range - 9 is expanded") { + PredicateRangeTermExpander expander(10); + MyRangeHandler range_handler{{ + "key=-9223372036854775799-9223372036854775790", + "key=-9223372036854775799-9223372036854775700"}, + "key=-9223372036854775790", 9, 0}; + expander.expand("key", -9223372036854775799ll, range_handler); +} + +TEST("require that min range is expanded with arity 8") { + PredicateRangeTermExpander expander(8); + MyRangeHandler range_handler{{}, "key=-9223372036854775808", 0, 0}; + expander.expand("key", -9223372036854775808ull, range_handler); +} + +TEST("require that small range is expanded in arity 2") { + PredicateRangeTermExpander expander(2); + MyRangeHandler range_handler{{ + "key=42-43", + "key=40-43", + "key=40-47", + "key=32-47", + "key=32-63", + "key=0-63", + "key=0-127", + "key=0-255", + "key=0-511", + "key=0-1023", + "key=0-2047", + "key=0-4095", + "key=0-8191", + "key=0-16383", + "key=0-32767", + "key=0-65535", + "key=0-131071", + "key=0-262143", + "key=0-524287", + "key=0-1048575", + "key=0-2097151", + "key=0-4194303", + "key=0-8388607", + "key=0-16777215", + "key=0-33554431", + "key=0-67108863", + "key=0-134217727", + "key=0-268435455", + "key=0-536870911", + "key=0-1073741823", + "key=0-2147483647", + "key=0-4294967295", + "key=0-8589934591", + "key=0-17179869183", + "key=0-34359738367", + "key=0-68719476735", + "key=0-137438953471", + "key=0-274877906943", + "key=0-549755813887", + "key=0-1099511627775", + "key=0-2199023255551", + "key=0-4398046511103", + "key=0-8796093022207", + "key=0-17592186044415", + "key=0-35184372088831", + "key=0-70368744177663", + "key=0-140737488355327", + "key=0-281474976710655", + "key=0-562949953421311", + "key=0-1125899906842623", + "key=0-2251799813685247", + "key=0-4503599627370495", + "key=0-9007199254740991", + "key=0-18014398509481983", + "key=0-36028797018963967", + "key=0-72057594037927935", + "key=0-144115188075855871", + "key=0-288230376151711743", + "key=0-576460752303423487", + "key=0-1152921504606846975", + "key=0-2305843009213693951", + "key=0-4611686018427387903", + "key=0-9223372036854775807"}, "key=42", 0, 0}; + expander.expand("key", 42, range_handler); +} + +TEST("require that small negative range is expanded in arity 2") { + PredicateRangeTermExpander expander(2); + MyRangeHandler range_handler{{ + "key=-43-42", + "key=-43-40", + "key=-47-40", + "key=-47-32", + "key=-63-32", + "key=-63-0", + "key=-127-0", + "key=-255-0", + "key=-511-0", + "key=-1023-0", + "key=-2047-0", + "key=-4095-0", + "key=-8191-0", + "key=-16383-0", + "key=-32767-0", + "key=-65535-0", + "key=-131071-0", + "key=-262143-0", + "key=-524287-0", + "key=-1048575-0", + "key=-2097151-0", + "key=-4194303-0", + "key=-8388607-0", + "key=-16777215-0", + "key=-33554431-0", + "key=-67108863-0", + "key=-134217727-0", + "key=-268435455-0", + "key=-536870911-0", + "key=-1073741823-0", + "key=-2147483647-0", + "key=-4294967295-0", + "key=-8589934591-0", + "key=-17179869183-0", + "key=-34359738367-0", + "key=-68719476735-0", + "key=-137438953471-0", + "key=-274877906943-0", + "key=-549755813887-0", + "key=-1099511627775-0", + "key=-2199023255551-0", + "key=-4398046511103-0", + "key=-8796093022207-0", + "key=-17592186044415-0", + "key=-35184372088831-0", + "key=-70368744177663-0", + "key=-140737488355327-0", + "key=-281474976710655-0", + "key=-562949953421311-0", + "key=-1125899906842623-0", + "key=-2251799813685247-0", + "key=-4503599627370495-0", + "key=-9007199254740991-0", + "key=-18014398509481983-0", + "key=-36028797018963967-0", + "key=-72057594037927935-0", + "key=-144115188075855871-0", + "key=-288230376151711743-0", + "key=-576460752303423487-0", + "key=-1152921504606846975-0", + "key=-2305843009213693951-0", + "key=-4611686018427387903-0", + "key=-9223372036854775807-0"}, "key=-42", 0, 0}; + expander.expand("key", -42, range_handler); +} + +TEST("require that upper bound is used") { + PredicateRangeTermExpander expander(10, -99, 9999); + MyRangeHandler range_handler{{ + "key=40-49", + "key=0-99", + "key=0-999", + "key=0-9999"}, "key=40", 2, 0}; + expander.expand("key", 42, range_handler); +} + +TEST("require that lower bound is used") { + PredicateRangeTermExpander expander(10, -9999, 99); + MyRangeHandler range_handler{{ + "key=-49-40", + "key=-99-0", + "key=-999-0", + "key=-9999-0"}, "key=-40", 2, 0}; + expander.expand("key", -42, range_handler); +} + +TEST("require that value outside bounds is not used") { + PredicateRangeTermExpander expander(10, -99, 99); + MyRangeHandler range_handler{{}, "handleEdge is never called", 2, 0}; + expander.expand("key", 100, range_handler); +} + +TEST("require that upper and lower bound > 0 works") { + PredicateRangeTermExpander expander(10, 100, 9999); + MyRangeHandler range_handler{{ + "key=140-149", + "key=100-199", + "key=0-999", + "key=0-9999"}, "key=140", 2, 0}; + expander.expand("key", 142, range_handler); +} + +TEST("require that search close to uneven upper bound is sensible") { + PredicateRangeTermExpander expander(10, -99, 1234); + MyRangeHandler range_handler{{ + "key=40-49", + "key=0-99", + "key=0-999", + "key=0-9999"}, "key=40", 2, 0}; + expander.expand("key", 42, range_handler); +} + +TEST("require that search close to max uneven upper bound is sensible") { + PredicateRangeTermExpander expander(10, 0, 9223372036854771234); + MyRangeHandler range_handler{{ + "key=9223372036854770000-9223372036854770009", + "key=9223372036854770000-9223372036854770099", + "key=9223372036854770000-9223372036854770999"}, + "key=9223372036854770000", 0, 0}; + expander.expand("key", 9223372036854770000, range_handler); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/predicate/predicate_ref_cache_test.cpp b/searchlib/src/tests/predicate/predicate_ref_cache_test.cpp new file mode 100644 index 00000000000..a51f3d678d5 --- /dev/null +++ b/searchlib/src/tests/predicate/predicate_ref_cache_test.cpp @@ -0,0 +1,106 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for predicate_ref_cache. + +#include <vespa/log/log.h> +LOG_SETUP("predicate_ref_cache_test"); +#include <vespa/fastos/fastos.h> + +#include <vespa/searchlib/predicate/predicate_ref_cache.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vector> + +using namespace search; +using namespace search::predicate; + +namespace { + +struct MyBufferStore { + std::vector<uint32_t> store; + const uint32_t *getBuffer(uint32_t ref) const { + ASSERT_LESS(ref, store.size()); + return &store[ref]; + } + uint32_t insert(uint32_t value) { + size_t size = store.size(); + store.push_back(value); + return size | 0x01000000; // size = 1 + } + uint32_t insert(std::vector<uint32_t> data) { + size_t size = store.size(); + uint8_t data_size = data.size(); + if (data.size() >= 0xff) { + store.push_back(data.size()); + data_size = 0xff; + } + store.insert(store.end(), data.begin(), data.end()); + return size | (data_size << 24); + } +}; + +TEST("require that single entries are cached") { + MyBufferStore store; + PredicateRefCache<MyBufferStore> cache(store); + + uint32_t ref = store.insert(42); + uint32_t new_ref = cache.insert(ref); + EXPECT_EQUAL(ref, new_ref); + + uint32_t ref2 = store.insert(42); + new_ref = cache.insert(ref2); + EXPECT_EQUAL(ref, new_ref); + + uint32_t ref3 = store.insert(44); + new_ref = cache.insert(ref3); + EXPECT_EQUAL(ref3, new_ref); +} + +TEST("require that multivalue entries are cached") { + MyBufferStore store; + PredicateRefCache<MyBufferStore> cache(store); + + std::vector<uint32_t> data1 = {1, 2, 3, 4, 5}; + std::vector<uint32_t> data2 = {1, 2, 3, 4, 6}; + uint32_t ref = store.insert(data1); + uint32_t new_ref = cache.insert(ref); + EXPECT_EQUAL(ref, new_ref); + + uint32_t ref2 = store.insert(data1); + new_ref = cache.insert(ref2); + EXPECT_EQUAL(ref, new_ref); + + uint32_t ref3 = store.insert(data2); + new_ref = cache.insert(ref3); + EXPECT_EQUAL(ref3, new_ref); +} + +TEST("require that entries can be looked up") { + MyBufferStore store; + PredicateRefCache<MyBufferStore> cache(store); + + uint32_t data = 42; + EXPECT_EQUAL(0u, cache.find(&data, 1)); + uint32_t ref = store.insert(42); + cache.insert(ref); + EXPECT_EQUAL(ref, cache.find(&data, 1)); +} + +TEST("require that cache handles large entries") { + MyBufferStore store; + PredicateRefCache<MyBufferStore> cache(store); + + std::vector<uint32_t> data1(300); + std::vector<uint32_t> data2(300); + data2.back() = 42; + uint32_t ref1 = store.insert(data1); + cache.insert(ref1); + EXPECT_EQUAL(ref1, cache.find(&data1[0], data1.size())); + EXPECT_EQUAL(0u, cache.find(&data2[0], data2.size())); + uint32_t ref2 = store.insert(data2); + uint32_t ref = cache.insert(ref2); + EXPECT_EQUAL(ref, ref2); + EXPECT_EQUAL(ref2, cache.find(&data2[0], data2.size())); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/predicate/predicate_tree_analyzer_test.cpp b/searchlib/src/tests/predicate/predicate_tree_analyzer_test.cpp new file mode 100644 index 00000000000..f455abced3f --- /dev/null +++ b/searchlib/src/tests/predicate/predicate_tree_analyzer_test.cpp @@ -0,0 +1,157 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for PredicateTreeAnalyzer. + +#include <vespa/log/log.h> +LOG_SETUP("PredicateTreeAnalyzer_test"); +#include <vespa/fastos/fastos.h> + +#include <vespa/document/predicate/predicate.h> +#include <vespa/document/predicate/predicate_slime_builder.h> +#include <vespa/searchlib/predicate/predicate_tree_analyzer.h> +#include <vespa/vespalib/testkit/testapp.h> + +using document::PredicateSlimeBuilder; +using namespace search; +using namespace search::predicate; +using document::Predicate; +using vespalib::Slime; +using vespalib::slime::Cursor; +using std::map; +using std::string; + +namespace { +typedef PredicateSlimeBuilder Builder; + +TEST("require that minfeature is 1 for simple term") { + auto slime(Builder().feature("foo").value("bar").build()); + PredicateTreeAnalyzer analyzer(slime->get()); + EXPECT_EQUAL(1, analyzer.getMinFeature()); + EXPECT_EQUAL(1, analyzer.getSize()); + EXPECT_TRUE(analyzer.getSizeMap().empty()); +} + +TEST("require that minfeature is 1 for simple negative term") { + auto slime(Builder().neg().feature("foo").value("bar").build()); + PredicateTreeAnalyzer analyzer(slime->get()); + EXPECT_EQUAL(1, analyzer.getMinFeature()); + EXPECT_EQUAL(2, analyzer.getSize()); +} + +void checkSizeMap(const map<string, int> &map, const string &key, int val) { + auto it = map.find(key); + ASSERT_TRUE(it != map.end()); + EXPECT_EQUAL(val, it->second); +} + +TEST("require that minfeature is sum for and") { + auto slime(Builder() + .and_node({Builder().feature("foo").value("bar"), + Builder().feature("baz").value("qux"), + Builder().feature("quux").value("corge")}).build()); + PredicateTreeAnalyzer analyzer(slime->get()); + EXPECT_EQUAL(3, analyzer.getMinFeature()); + EXPECT_EQUAL(3, analyzer.getSize()); + EXPECT_EQUAL(3u, analyzer.getSizeMap().size()); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a0", 1)); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a1", 1)); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a2", 1)); +} + +TEST("require that minfeature is min for or") { + auto slime(Builder().or_node + ({Builder().and_node + ({Builder().feature("foo").value("bar"), + Builder().feature("baz").value("qux"), + Builder().feature("quux").value("corge")}), + Builder().and_node + ({Builder().feature("grault").value("garply"), + Builder().feature("waldo").value("fred")})}) + .build()); + PredicateTreeAnalyzer analyzer(slime->get()); + EXPECT_EQUAL(2, analyzer.getMinFeature()); + EXPECT_EQUAL(5, analyzer.getSize()); + EXPECT_EQUAL(5u, analyzer.getSizeMap().size()); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "o0a0", 1)); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "o0a1", 1)); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "o0a2", 1)); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "o1a0", 1)); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "o1a1", 1)); +} + +TEST("require that minfeature rounds up") { + auto slime(Builder() + .or_node({Builder().feature("foo").value("bar"), + Builder().feature("foo").value("bar"), + Builder().feature("foo").value("bar")}).build()); + PredicateTreeAnalyzer analyzer(slime->get()); + EXPECT_EQUAL(1, analyzer.getMinFeature()); + EXPECT_EQUAL(3, analyzer.getSize()); +} + +TEST("require that multivalue feature set considers all values") { + { + auto slime(Builder() + .and_node({Builder().feature("foo").value("A").value("B"), + Builder().feature("foo").value("B")}).build()); + PredicateTreeAnalyzer analyzer(slime->get()); + EXPECT_EQUAL(1, analyzer.getMinFeature()); + EXPECT_EQUAL(2, analyzer.getSize()); + } + { + auto slime(Builder() + .and_node({Builder().feature("foo").value("A").value("B"), + Builder().feature("foo").value("C")}).build()); + PredicateTreeAnalyzer analyzer(slime->get()); + EXPECT_EQUAL(2, analyzer.getMinFeature()); + EXPECT_EQUAL(2, analyzer.getSize()); + } +} + +TEST("require that not-features don't count towards minfeature calculation") { + auto slime(Builder() + .and_node({Builder().feature("foo").value("A"), + Builder().neg().feature("foo").value("A"), + Builder().neg().feature("foo").value("B"), + Builder().feature("foo").value("B")}).build()); + PredicateTreeAnalyzer analyzer(slime->get()); + EXPECT_EQUAL(3, analyzer.getMinFeature()); + EXPECT_EQUAL(6, analyzer.getSize()); +} + +TEST("require that not-ranges don't count towards minfeature calculation") { + auto slime(Builder() + .and_node({Builder().feature("foo").range(0, 10), + Builder().neg().feature("foo").range(0, 10), + Builder().neg().feature("bar").range(0, 10), + Builder().feature("bar").range(0, 10)}).build()); + PredicateTreeAnalyzer analyzer(slime->get()); + EXPECT_EQUAL(3, analyzer.getMinFeature()); + EXPECT_EQUAL(6, analyzer.getSize()); +} + +TEST("require that multilevel AND stores sizes") { + auto slime(Builder().and_node + ({Builder().and_node + ({Builder().feature("foo").value("bar"), + Builder().feature("baz").value("qux"), + Builder().feature("quux").value("corge")}), + Builder().and_node + ({Builder().feature("grault").value("garply"), + Builder().feature("waldo").value("fred")})}) + .build()); + PredicateTreeAnalyzer analyzer(slime->get()); + EXPECT_EQUAL(5, analyzer.getMinFeature()); + EXPECT_EQUAL(5, analyzer.getSize()); + EXPECT_EQUAL(7u, analyzer.getSizeMap().size()); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a0", 3)); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a1", 2)); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a0a0", 1)); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a0a1", 1)); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a0a2", 1)); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a1a0", 1)); + TEST_DO(checkSizeMap(analyzer.getSizeMap(), "a1a1", 1)); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/predicate/predicate_tree_annotator_test.cpp b/searchlib/src/tests/predicate/predicate_tree_annotator_test.cpp new file mode 100644 index 00000000000..92271cd1c20 --- /dev/null +++ b/searchlib/src/tests/predicate/predicate_tree_annotator_test.cpp @@ -0,0 +1,381 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for PredicateTreeAnnotator. + +#include <vespa/log/log.h> +LOG_SETUP("PredicateTreeAnnotator_test"); +#include <vespa/fastos/fastos.h> + +#include <vespa/document/predicate/predicate.h> +#include <vespa/document/predicate/predicate_slime_builder.h> +#include <vespa/searchlib/predicate/predicate_index.h> +#include <vespa/searchlib/predicate/predicate_tree_annotator.h> +#include <vespa/searchlib/predicate/predicate_hash.h> +#include <vespa/vespalib/data/slime/slime.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <sstream> +#include <vector> + +using document::Predicate; +using std::ostringstream; +using std::pair; +using std::string; +using std::vector; +using vespalib::Slime; +using vespalib::slime::Cursor; +using namespace search; +using namespace search::predicate; +using namespace document::predicate_slime_builder; + +namespace { +Cursor &makeAndNode(Cursor &obj) { + obj.setLong(Predicate::NODE_TYPE, Predicate::TYPE_CONJUNCTION); + return obj.setArray(Predicate::CHILDREN); +} + +Cursor &makeOrNode(Cursor &obj) { + obj.setLong(Predicate::NODE_TYPE, Predicate::TYPE_DISJUNCTION); + return obj.setArray(Predicate::CHILDREN); +} + +void makeFeatureSet(Cursor &obj, const string &key, const string &value) { + obj.setLong(Predicate::NODE_TYPE, Predicate::TYPE_FEATURE_SET); + obj.setString(Predicate::KEY, key); + Cursor &set = obj.setArray(Predicate::SET); + set.addString(value); +} + +void makeHashedFeatureRange(Cursor &obj, const string &key, + const vector<string> &partitions, + const vector<vector<int64_t> >& edge_partitions) { + obj.setLong(Predicate::NODE_TYPE, Predicate::TYPE_FEATURE_RANGE); + obj.setString(Predicate::KEY, key); + Cursor &p = obj.setArray(Predicate::HASHED_PARTITIONS); + for (auto partition : partitions) { + p.addLong(PredicateHash::hash64(partition)); + } + Cursor &e = obj.setArray(Predicate::HASHED_EDGE_PARTITIONS); + for (auto edge_partition : edge_partitions) { + ostringstream label; + label << key << "=" << edge_partition[0]; + uint64_t hash = PredicateHash::hash64(label.str()); + int64_t value = edge_partition[1]; + int64_t payload = edge_partition[2]; + + Cursor &o = e.addObject(); + o.setLong(Predicate::HASH, hash); + o.setLong(Predicate::VALUE, value); + o.setLong(Predicate::PAYLOAD, payload); + } +} + +void checkInterval(const PredicateTreeAnnotations &result, + const string &feature, vector<uint32_t> expected) { + TEST_STATE(("Check interval: " + feature).c_str()); + uint64_t hash = PredicateHash::hash64(feature); + auto it = result.interval_map.find(hash); + ASSERT_TRUE(it != result.interval_map.end()); + const auto &intervals = it->second; + ASSERT_EQUAL(expected.size(), intervals.size()); + for (size_t i = 0; i < expected.size(); ++i) { + EXPECT_EQUAL(expected[i], intervals[i].interval); + } +} + +void checkBounds(const PredicateTreeAnnotations &result, + const string &feature, + vector<IntervalWithBounds> expected) { + TEST_STATE(("Check bounds: " + feature).c_str()); + uint64_t hash = PredicateHash::hash64(feature); + auto it = result.bounds_map.find(hash); + ASSERT_TRUE(it != result.bounds_map.end()); + const auto &intervals = it->second; + ASSERT_EQUAL(expected.size(), intervals.size()); + for (size_t i = 0; i < expected.size(); ++i) { + EXPECT_EQUAL(expected[i].interval, intervals[i].interval); + EXPECT_EQUAL(expected[i].bounds, intervals[i].bounds); + } +} + +TEST("require that OR intervals are the same") { + Slime slime; + Cursor &children = makeOrNode(slime.setObject()); + makeFeatureSet(children.addObject(), "key1", "value1"); + makeFeatureSet(children.addObject(), "key2", "value2"); + + PredicateTreeAnnotations result; + PredicateTreeAnnotator::annotate(slime.get(), result); + + EXPECT_EQUAL(1u, result.min_feature); + EXPECT_EQUAL(2u, result.interval_range); + EXPECT_EQUAL(2u, result.interval_map.size()); + checkInterval(result, "key1=value1", {0x00010002}); + checkInterval(result, "key2=value2", {0x00010002}); +} + +TEST("require that ANDs below ORs get different intervals") { + auto slime = orNode({andNode({featureSet("key1", {"value1"}), + featureSet("key1", {"value1"}), + featureSet("key1", {"value1"})}), + andNode({featureSet("key2", {"value2"}), + featureSet("key2", {"value2"}), + featureSet("key2", {"value2"})})}); + PredicateTreeAnnotations result; + PredicateTreeAnnotator::annotate(slime->get(), result); + + EXPECT_EQUAL(1u, result.min_feature); + EXPECT_EQUAL(6u, result.interval_range); + EXPECT_EQUAL(2u, result.interval_map.size()); + checkInterval(result, "key1=value1", {0x00010001, 0x00020002, 0x00030006}); + checkInterval(result, "key2=value2", {0x00010004, 0x00050005, 0x00060006}); +} + +TEST("require that NOTs get correct intervals") { + auto slime = andNode({featureSet("key", {"value"}), + neg(featureSet("key", {"value"})), + featureSet("key", {"value"}), + neg(featureSet("key", {"value"}))}); + PredicateTreeAnnotations result; + PredicateTreeAnnotator::annotate(slime->get(), result); + + EXPECT_EQUAL(2u, result.min_feature); // needs key=value and z-star + EXPECT_EQUAL(6u, result.interval_range); + EXPECT_EQUAL(2u, result.interval_map.size()); + checkInterval(result, "key=value", + {0x00010001, 0x00020002, 0x00040004, 0x00050005}); + checkInterval(result, PredicateIndex::z_star_compressed_attribute_name, + {0x00020001, 0x00050004}); +} + +TEST("require that NOT inverts ANDs and ORs") { + auto slime = neg(andNode({featureSet("key", {"value"}), + neg(featureSet("key", {"value"}))})); + PredicateTreeAnnotations result; + PredicateTreeAnnotator::annotate(slime->get(), result); + + EXPECT_EQUAL(1u, result.min_feature); // needs key=value or z-star + EXPECT_EQUAL(3u, result.interval_range); + EXPECT_EQUAL(2u, result.interval_map.size()); + checkInterval(result, "key=value", + {0x00010002, 0x00010003}); + checkInterval(result, PredicateIndex::z_star_compressed_attribute_name, + {0x00020000}); +} + +TEST("require that final first NOT-interval is extended") { + auto slime = neg(featureSet("key", {"A"})); + PredicateTreeAnnotations result; + PredicateTreeAnnotator::annotate(slime->get(), result); + EXPECT_EQUAL(1u, result.min_feature); + EXPECT_EQUAL(2u, result.interval_range); + EXPECT_EQUAL(2u, result.interval_map.size()); + checkInterval(result, "key=A", {0x00010001}); + checkInterval(result, PredicateIndex::z_star_compressed_attribute_name, + {0x00010000}); +} + +TEST("show different types of NOT-intervals") { + auto slime = andNode({orNode({andNode({featureSet("key", {"A"}), + neg(featureSet("key", {"B"}))}), + andNode({neg(featureSet("key", {"C"})), + featureSet("key", {"D"})})}), + featureSet("foo", {"bar"})}); + PredicateTreeAnnotations result; + PredicateTreeAnnotator::annotate(slime->get(), result); + EXPECT_EQUAL(3u, result.min_feature); + EXPECT_EQUAL(7u, result.interval_range); + EXPECT_EQUAL(6u, result.interval_map.size()); + checkInterval(result, "foo=bar", {0x00070007}); + checkInterval(result, "key=A", {0x00010001}); + checkInterval(result, "key=B", {0x00020002}); + checkInterval(result, "key=C", {0x00010004}); + checkInterval(result, "key=D", {0x00060006}); + checkInterval(result, PredicateIndex::z_star_compressed_attribute_name, + {0x00020001, 0x00000006, 0x00040000}); + + slime = orNode({neg(featureSet("key", {"A"})), + neg(featureSet("key", {"B"}))}); + result = PredicateTreeAnnotations(); + PredicateTreeAnnotator::annotate(slime->get(), result); + EXPECT_EQUAL(1u, result.min_feature); + EXPECT_EQUAL(4u, result.interval_range); + EXPECT_EQUAL(3u, result.interval_map.size()); + checkInterval(result, "key=A", {0x00010003}); + checkInterval(result, "key=B", {0x00010003}); + checkInterval(result, PredicateIndex::z_star_compressed_attribute_name, + {0x00030000, 0x00030000}); + + slime = orNode({andNode({neg(featureSet("key", {"A"})), + neg(featureSet("key", {"B"}))}), + andNode({neg(featureSet("key", {"C"})), + neg(featureSet("key", {"D"}))})}); + result = PredicateTreeAnnotations(); + PredicateTreeAnnotator::annotate(slime->get(), result); + EXPECT_EQUAL(1u, result.min_feature); + EXPECT_EQUAL(8u, result.interval_range); + EXPECT_EQUAL(5u, result.interval_map.size()); + checkInterval(result, "key=A", {0x00010001}); + checkInterval(result, "key=B", {0x00030007}); + checkInterval(result, "key=C", {0x00010005}); + checkInterval(result, "key=D", {0x00070007}); + checkInterval(result, PredicateIndex::z_star_compressed_attribute_name, + {0x00010000, 0x00070002, 0x00050000, + 0x00070006}); + +} + +TEST("require that hashed ranges get correct intervals") { + Slime slime; + Cursor &children = makeAndNode(slime.setObject()); + makeHashedFeatureRange( + children.addObject(), "key", + {"key=10-19", "key=20-29"}, {{0, 5, -1}, {30, 0, 3}}); + makeHashedFeatureRange( + children.addObject(), "foo", + {"foo=10-19", "foo=20-29"}, {{0, 5, -1}, {30, 0, 3}}); + + PredicateTreeAnnotations result; + PredicateTreeAnnotator::annotate(slime.get(), result); + + EXPECT_EQUAL(2u, result.min_feature); + EXPECT_EQUAL(2u, result.interval_range); + EXPECT_EQUAL(4u, result.interval_map.size()); + EXPECT_EQUAL(4u, result.bounds_map.size()); + checkInterval(result, "key=10-19", {0x00010001}); + checkInterval(result, "key=20-29", {0x00010001}); + checkBounds(result, "key=0", {{0x00010001, 0xffffffff}}); + checkBounds(result, "key=30", {{0x00010001, 3}}); + + checkInterval(result, "foo=10-19", {0x00020002}); + checkInterval(result, "foo=20-29", {0x00020002}); + checkBounds(result, "foo=0", {{0x00020002, 0xffffffff}}); + checkBounds(result, "foo=30", {{0x00020002, 3}}); +} + +TEST("require that extreme ranges works") { + Slime slime; + Cursor &children = makeAndNode(slime.setObject()); + makeHashedFeatureRange( + children.addObject(), "max range", + {"max range=9223372036854775806-9223372036854775807"}, {}); + makeHashedFeatureRange( + children.addObject(), "max edge", + {}, {{9223372036854775807, 0, 0x40000001}}); + makeHashedFeatureRange( + children.addObject(), "min range", + {"min range=-9223372036854775807-9223372036854775806"}, {}); + makeHashedFeatureRange( + children.addObject(), "min edge", + {}, {{LLONG_MIN, 0, 0x40000001}}); + + PredicateTreeAnnotations result; + PredicateTreeAnnotator::annotate(slime.get(), result); + + EXPECT_EQUAL(4u, result.min_feature); + EXPECT_EQUAL(4u, result.interval_range); + EXPECT_EQUAL(2u, result.interval_map.size()); + EXPECT_EQUAL(2u, result.bounds_map.size()); + checkInterval(result, "max range=9223372036854775806-9223372036854775807", + {0x00010001}); + checkBounds(result, "max edge=9223372036854775807", + {{0x00020002, 0x40000001}}); + checkInterval(result, "min range=-9223372036854775807-9223372036854775806", + {0x00030003}); + checkBounds(result, "min edge=-9223372036854775808", + {{0x00040004, 0x40000001}}); +} + +TEST("require that unique features and all ranges are collected") { + auto slime = andNode({featureSet("key1", {"value1"}), + featureSet("key1", {"value1"}), + featureRange("key2", 9, 40), + featureRange("key2", 9, 40)}); + Cursor &c1 = slime->get()[Predicate::CHILDREN][2] + .setArray(Predicate::HASHED_PARTITIONS); + c1.addLong(PredicateHash::hash64("key2=10-19")); + c1.addLong(PredicateHash::hash64("key2=20-29")); + c1.addLong(PredicateHash::hash64("key2=30-39")); + c1.addLong(PredicateHash::hash64("key2=0")); + c1.addLong(PredicateHash::hash64("key2=40")); + Cursor &c2 = slime->get()[Predicate::CHILDREN][3] + .setArray(Predicate::HASHED_PARTITIONS); + c2.addLong(PredicateHash::hash64("key2=10-19")); + c2.addLong(PredicateHash::hash64("key2=20-29")); + c2.addLong(PredicateHash::hash64("key2=30-39")); + c2.addLong(PredicateHash::hash64("key2=0")); + c2.addLong(PredicateHash::hash64("key2=40")); + + PredicateTreeAnnotations result; + PredicateTreeAnnotator::annotate(slime->get(), result); + + EXPECT_EQUAL(4u, result.interval_range); + ASSERT_EQUAL(1u, result.features.size()); + EXPECT_EQUAL(static_cast<uint64_t>(PredicateHash::hash64("key1=value1")), + result.features[0]); + ASSERT_EQUAL(2u, result.range_features.size()); + EXPECT_EQUAL("key2", result.range_features[0].label.make_string()); + EXPECT_EQUAL(9, result.range_features[0].from); + EXPECT_EQUAL(40, result.range_features[0].to); + EXPECT_EQUAL("key2", result.range_features[1].label.make_string()); + EXPECT_EQUAL(9, result.range_features[1].from); + EXPECT_EQUAL(40, result.range_features[1].to); +} + +TEST("require that z-star feature is only registered once") { + auto slime = andNode({neg(featureSet("key1", {"value1"})), + neg(featureRange("key2", 10, 19))}); + Cursor &c = slime->get()[Predicate::CHILDREN][1][Predicate::CHILDREN][0] + .setArray(Predicate::HASHED_PARTITIONS); + c.addLong(PredicateHash::hash64("key2=10-19")); + + // simple range will be stored as a feature. + PredicateTreeAnnotations result; + PredicateTreeAnnotator::annotate(slime->get(), result); + + EXPECT_EQUAL(4u, result.interval_range); + ASSERT_EQUAL(3u, result.features.size()); + EXPECT_EQUAL(PredicateHash::hash64("key1=value1"), result.features[0]); + EXPECT_EQUAL(PredicateIndex::z_star_compressed_hash, result.features[1]); + EXPECT_EQUAL(PredicateHash::hash64("key2=10-19"), result.features[2]); + ASSERT_EQUAL(0u, result.range_features.size()); +} + +TEST("require that default open range works") { + auto slime = lessEqual("foo", 39); + Cursor &c = slime->get().setArray(Predicate::HASHED_PARTITIONS); + c.addLong(PredicateHash::hash64("foo=-9223372036854775808")); + c.addLong(PredicateHash::hash64("foo=-9223372036854775807-0")); + c.addLong(PredicateHash::hash64("foo=0-31")); + c.addLong(PredicateHash::hash64("foo=32-39")); + + PredicateTreeAnnotations result; + PredicateTreeAnnotator::annotate(slime->get(), result); + + EXPECT_EQUAL(1u, result.interval_range); + EXPECT_EQUAL(0u, result.features.size()); + ASSERT_EQUAL(1u, result.range_features.size()); + EXPECT_EQUAL("foo", result.range_features[0].label.make_string()); + EXPECT_EQUAL(LLONG_MIN, result.range_features[0].from); + EXPECT_EQUAL(39, result.range_features[0].to); +} + +TEST("require that open range works") { + auto slime = lessEqual("foo", 39); + Cursor &c = slime->get().setArray(Predicate::HASHED_PARTITIONS); + c.addLong(PredicateHash::hash64("foo=8-15")); + c.addLong(PredicateHash::hash64("foo=16-31")); + c.addLong(PredicateHash::hash64("foo=32-39")); + + PredicateTreeAnnotations result; + PredicateTreeAnnotator::annotate(slime->get(), result, 8, 200); + + EXPECT_EQUAL(1u, result.interval_range); + EXPECT_EQUAL(0u, result.features.size()); + ASSERT_EQUAL(1u, result.range_features.size()); + EXPECT_EQUAL("foo", result.range_features[0].label.make_string()); + EXPECT_EQUAL(8, result.range_features[0].from); + EXPECT_EQUAL(39, result.range_features[0].to); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/predicate/predicate_zero_constraint_posting_list_test.cpp b/searchlib/src/tests/predicate/predicate_zero_constraint_posting_list_test.cpp new file mode 100644 index 00000000000..1751c725044 --- /dev/null +++ b/searchlib/src/tests/predicate/predicate_zero_constraint_posting_list_test.cpp @@ -0,0 +1,58 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for predicate_zero_constraint_posting_list. + +#include <vespa/log/log.h> +LOG_SETUP("predicate_zero_constraint_posting_list_test"); +#include <vespa/fastos/fastos.h> + +#include <vespa/searchlib/predicate/predicate_zero_constraint_posting_list.h> +#include <vespa/vespalib/testkit/testapp.h> + +using namespace search; +using namespace search::predicate; + +namespace { + +struct DummyDocIdLimitProvider : public DocIdLimitProvider { + virtual uint32_t getDocIdLimit() const { return 10000; } + virtual uint32_t getCommittedDocIdLimit() const { return 10000; } +}; + +vespalib::GenerationHandler generation_handler; +vespalib::GenerationHolder generation_holder; +DummyDocIdLimitProvider limit_provider; +SimpleIndexConfig config; +const uint64_t hash = 0x123; + +TEST("require that empty posting list starts at 0.") { + PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8); + PredicateZeroConstraintPostingList posting_list(index.getZeroConstraintDocs().begin()); + EXPECT_EQUAL(0u, posting_list.getDocId()); + EXPECT_EQUAL(0x00010001u, posting_list.getInterval()); + EXPECT_FALSE(posting_list.next(0)); +} + +TEST("require that posting list can iterate.") { + PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8); + for (uint32_t id = 1; id < 100; ++id) { + index.indexEmptyDocument(id); + } + index.commit(); + ASSERT_EQUAL(99u, index.getZeroConstraintDocs().size()); + + PredicateZeroConstraintPostingList posting_list(index.getZeroConstraintDocs().begin()); + EXPECT_EQUAL(0u, posting_list.getDocId()); + EXPECT_EQUAL(0x00010001u, posting_list.getInterval()); + + for (size_t i = 0; i < 99; ++i) { + EXPECT_TRUE(posting_list.next(i)); + EXPECT_EQUAL(i + 1, posting_list.getDocId()); + EXPECT_EQUAL(0x00010001u, posting_list.getInterval()); + EXPECT_FALSE(posting_list.nextInterval()); + } + EXPECT_FALSE(posting_list.next(99)); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/predicate/predicate_zstar_compressed_posting_list_test.cpp b/searchlib/src/tests/predicate/predicate_zstar_compressed_posting_list_test.cpp new file mode 100644 index 00000000000..2dff14b4417 --- /dev/null +++ b/searchlib/src/tests/predicate/predicate_zstar_compressed_posting_list_test.cpp @@ -0,0 +1,95 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for predicate_zstar_compressed_posting_list. + +#include <vespa/log/log.h> +LOG_SETUP("predicate_zstar_compressed_posting_list_test"); +#include <vespa/fastos/fastos.h> + +#include <vespa/searchlib/predicate/predicate_tree_annotator.h> +#include <vespa/searchlib/predicate/predicate_zstar_compressed_posting_list.h> +#include <vespa/vespalib/testkit/testapp.h> + +using namespace search; +using namespace search::predicate; +using std::vector; + +namespace { + +struct DummyDocIdLimitProvider : public DocIdLimitProvider { + virtual uint32_t getDocIdLimit() const { return 10000; } + virtual uint32_t getCommittedDocIdLimit() const { return 10000; } +}; + +vespalib::GenerationHandler generation_handler; +vespalib::GenerationHolder generation_holder; +DummyDocIdLimitProvider limit_provider; +SimpleIndexConfig config; +const uint64_t hash = 0x123; + +TEST("require that empty posting list starts at 0.") { + PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8); + btree::EntryRef ref; + PredicateZstarCompressedPostingList<PredicateIndex::BTreeIterator> + posting_list(index.getIntervalStore(), index.getIntervalIndex().getBTreePostingList(ref)); + EXPECT_EQUAL(0u, posting_list.getDocId()); + EXPECT_EQUAL(0u, posting_list.getInterval()); + EXPECT_FALSE(posting_list.next(0)); +} + +TEST("require that posting list can iterate.") { + PredicateIndex index(generation_handler, generation_holder, limit_provider, config, 8); + const auto &interval_index = index.getIntervalIndex(); + vector<vector<Interval>> intervals = + {{{0x00010000}}, + {{0x00010000}, {0x0000ffff}}, + {{0x00010000}, {0x00000003}, {0x00040003}, {0x00060005}}}; + for (size_t i = 0; i < intervals.size(); ++i) { + PredicateTreeAnnotations annotations(1); + annotations.interval_map[hash] = intervals[i]; + index.indexDocument(i + 1, annotations); + } + index.commit(); + auto it = interval_index.lookup(hash); + ASSERT_TRUE(it.valid()); + auto ref = it.getData(); + + PredicateZstarCompressedPostingList<PredicateIndex::BTreeIterator> + posting_list(index.getIntervalStore(), interval_index.getBTreePostingList(ref)); + EXPECT_EQUAL(0u, posting_list.getDocId()); + EXPECT_EQUAL(0u, posting_list.getInterval()); + + EXPECT_TRUE(posting_list.next(0)); + EXPECT_EQUAL(1u, posting_list.getDocId()); + EXPECT_EQUAL(0x00010000u, posting_list.getInterval()); + ASSERT_TRUE(posting_list.nextInterval()); + EXPECT_EQUAL(0x00020001u, posting_list.getInterval()); + ASSERT_FALSE(posting_list.nextInterval()); + + EXPECT_TRUE(posting_list.next(1)); + EXPECT_EQUAL(2u, posting_list.getDocId()); + EXPECT_EQUAL(0x00010000u, posting_list.getInterval()); + ASSERT_TRUE(posting_list.nextInterval()); + EXPECT_EQUAL(0xffff0001u, posting_list.getInterval()); + ASSERT_FALSE(posting_list.nextInterval()); + + ASSERT_TRUE(posting_list.next(2)); + EXPECT_EQUAL(3u, posting_list.getDocId()); + EXPECT_EQUAL(0x00010000u, posting_list.getInterval()); + ASSERT_TRUE(posting_list.nextInterval()); + EXPECT_EQUAL(0x00030001u, posting_list.getInterval()); + ASSERT_TRUE(posting_list.nextInterval()); + EXPECT_EQUAL(0x00040003u, posting_list.getInterval()); + ASSERT_TRUE(posting_list.nextInterval()); + EXPECT_EQUAL(0x00050004u, posting_list.getInterval()); + ASSERT_TRUE(posting_list.nextInterval()); + EXPECT_EQUAL(0x00060005u, posting_list.getInterval()); + ASSERT_TRUE(posting_list.nextInterval()); + EXPECT_EQUAL(0x00070006u, posting_list.getInterval()); + ASSERT_FALSE(posting_list.nextInterval()); + + ASSERT_FALSE(posting_list.next(4)); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/predicate/simple_index_test.cpp b/searchlib/src/tests/predicate/simple_index_test.cpp new file mode 100644 index 00000000000..8ba9e6182fb --- /dev/null +++ b/searchlib/src/tests/predicate/simple_index_test.cpp @@ -0,0 +1,333 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for simple_index. + +#include <vespa/log/log.h> +LOG_SETUP("simple_index_test"); +#include <vespa/fastos/fastos.h> + +#include <vespa/searchlib/predicate/simple_index.hpp> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/attribute/predicate_attribute.h> + +using namespace search; +using namespace search::predicate; +using vespalib::GenerationHolder; + +namespace { + +struct MyData { + uint32_t data; + MyData() : data(0) {} + MyData(uint32_t d) : data(d) {} + bool valid() const { + return data != 0; + } +}; + +struct MyDataSerializer : PostingSerializer<MyData> { + void serialize(const MyData &data, + vespalib::MMapDataBuffer& buffer) const { + buffer.writeInt32(data.data); + } +}; + +struct MyDataDeserializer : PostingDeserializer<MyData> { + MyData deserialize(vespalib::MMapDataBuffer& buffer) { + return {buffer.readInt32()}; + } +}; + +struct SimpleDocIdLimitProvider : public DocIdLimitProvider { + uint32_t _doc_id_limit = 1; + uint32_t _committed_doc_id_limit = 1; + virtual uint32_t getDocIdLimit() const { return _doc_id_limit; } + virtual uint32_t getCommittedDocIdLimit() const { return _committed_doc_id_limit; } +}; + +constexpr uint64_t key = 0x123456; +constexpr uint32_t doc_id = 42; +const MyData data{100}; + +constexpr double UPPER_DOCID_FREQ_THRESHOLD = 0.5; +constexpr double LOWER_DOCID_FREQ_THRESHOLD = 0.25; +constexpr size_t UPPER_VECTOR_SIZE_THRESHOLD = 10; +constexpr size_t LOWER_VECTOR_SIZE_THRESHOLD = 8; +constexpr size_t VECTOR_PRUNE_FREQUENCY = 1; +constexpr double FOREACH_VECTOR_THRESHOLD = 0.0; +const auto config = SimpleIndexConfig(UPPER_DOCID_FREQ_THRESHOLD, + LOWER_DOCID_FREQ_THRESHOLD, + UPPER_VECTOR_SIZE_THRESHOLD, + LOWER_VECTOR_SIZE_THRESHOLD, + VECTOR_PRUNE_FREQUENCY, + FOREACH_VECTOR_THRESHOLD, + GrowStrategy()); +struct Fixture { + GenerationHolder _generation_holder; + SimpleDocIdLimitProvider _limit_provider; + SimpleIndex<MyData> _index; + Fixture() : _generation_holder(), _limit_provider(), + _index(_generation_holder, _limit_provider, config) {} + ~Fixture() { + _generation_holder.clearHoldLists(); + } + SimpleIndex<MyData> &index() { + return _index; + } + void addPosting(uint64_t k, uint32_t id, const MyData &d) { + if (id >= _limit_provider._doc_id_limit) { + _limit_provider._doc_id_limit = id + 1; + } + _index.addPosting(k, id, d); + } + SimpleIndex<MyData>::DictionaryIterator lookup(uint64_t k) { + return _index.lookup(k); + } + bool hasKey(uint64_t k) { + return lookup(k).valid(); + } + std::pair<MyData, bool> removeFromPostingList(uint64_t k, uint32_t id) { + return _index.removeFromPostingList(k, id); + } + bool hasVectorPostingList(uint64_t k) { + return _index.getVectorPostingList(k).operator bool(); + } + SimpleIndex<MyData>::VectorIterator getVectorPostingList(uint64_t k) { + return *_index.getVectorPostingList(k); + } + SimpleIndex<MyData>::BTreeIterator getBTreePostingList(btree::EntryRef ref) { + return _index.getBTreePostingList(ref); + } + void commit() { + _index.commit(); + _limit_provider._committed_doc_id_limit = _limit_provider._doc_id_limit; + } +}; + +TEST_F("require that SimpleIndex can insert and remove a value.", Fixture) { + f.addPosting(key, doc_id, data); + f.commit(); + auto it = f.lookup(key); + ASSERT_TRUE(it.valid()); + btree::EntryRef ref = it.getData(); + auto posting_it = f.getBTreePostingList(ref); + ASSERT_TRUE(posting_it.valid()); + EXPECT_EQUAL(doc_id, posting_it.getKey()); + EXPECT_EQUAL(data.data, posting_it.getData().data); + + auto result = f.removeFromPostingList(key, doc_id); + EXPECT_TRUE(result.second); + EXPECT_EQUAL(data.data, result.first.data); + f.commit(); + + result = f.removeFromPostingList(key, doc_id); + EXPECT_FALSE(result.second); + EXPECT_FALSE(result.first.valid()); + + ASSERT_FALSE(f.hasKey(key)); +} + +TEST_F("require that SimpleIndex can insert and remove many values.", Fixture) { + for (uint32_t id = 1; id < 100; ++id) { + f.addPosting(key, id, {id}); + } + f.commit(); + auto it = f.lookup(key); + ASSERT_TRUE(it.valid()); + btree::EntryRef ref = it.getData(); + auto posting_it = f.getBTreePostingList(ref); + for (size_t id = 1; id < 100; ++id) { + ASSERT_TRUE(posting_it.valid()); + EXPECT_EQUAL(id, posting_it.getKey()); + EXPECT_EQUAL(id, posting_it.getData().data); + ++posting_it; + } + ASSERT_FALSE(posting_it.valid()); + for (uint32_t id = 1; id < 100; ++id) { + it = f.lookup(key); + ASSERT_TRUE(it.valid()); + ref = it.getData(); + auto result = f.removeFromPostingList(key, id); + EXPECT_TRUE(result.second); + EXPECT_EQUAL(id, result.first.data); + } + f.commit(); + ASSERT_FALSE(f.hasKey(key)); +} + +struct MyObserver : SimpleIndexDeserializeObserver<> { + std::map<uint32_t, uint64_t> features; + void notifyInsert(uint64_t my_key, uint32_t my_doc_id, uint32_t) { + features[my_doc_id] = my_key; + } + bool hasSeenDoc(uint32_t doc) { + return features.find(doc) != features.end(); + } +}; + +TEST_FF("require that SimpleIndex can be serialized and deserialized.", Fixture, Fixture) { + for (uint32_t id = 1; id < 100; ++id) { + f1.addPosting(key, id, {id}); + } + f1.commit(); + vespalib::MMapDataBuffer buffer; + f1.index().serialize(buffer, MyDataSerializer()); + MyObserver observer; + MyDataDeserializer deserializer; + f2.index().deserialize(buffer, deserializer, observer, PredicateAttribute::PREDICATE_ATTRIBUTE_VERSION); + + auto it = f2.lookup(key); + ASSERT_TRUE(it.valid()); + btree::EntryRef ref = it.getData(); + auto posting_it = f1.getBTreePostingList(ref); + for (uint32_t id = 1; id < 100; ++id) { + ASSERT_TRUE(posting_it.valid()); + EXPECT_EQUAL(id, posting_it.getKey()); + EXPECT_EQUAL(id, posting_it.getData().data); + EXPECT_TRUE(observer.hasSeenDoc(id)); + ++posting_it; + } + EXPECT_FALSE(posting_it.valid()); +} + +TEST_F("require that SimpleIndex can update by inserting the same key twice.", Fixture) { + f.addPosting(key, doc_id, data); + + MyData new_data{42}; + f.addPosting(key, doc_id, new_data); + f.commit(); + + auto it = f.lookup(key); + ASSERT_TRUE(it.valid()); + btree::EntryRef ref = it.getData(); + auto posting_it = f.getBTreePostingList(ref); + ASSERT_TRUE(posting_it.valid()); + EXPECT_EQUAL(doc_id, posting_it.getKey()); + EXPECT_EQUAL(new_data.data, posting_it.getData().data); +} + +TEST_F("require that only that btrees exceeding size threshold is promoted to vector", Fixture) { + for (uint32_t i = 1; i < 10; ++i) { + f.addPosting(key, i, {i}); + } + f.commit(); + ASSERT_TRUE(f.hasKey(key)); + EXPECT_FALSE(f.hasVectorPostingList(key)); + f.addPosting(key, 10, {10}); + f.commit(); + ASSERT_TRUE(f.hasVectorPostingList(key)); +} + +TEST_F("require that vectors below size threshold is pruned", Fixture) { + for (uint32_t i = 1; i <= 10; ++i) { + f.addPosting(key, i, {i}); + } + f.commit(); + auto it = f.lookup(key); + ASSERT_TRUE(it.valid()); + for (uint32_t i = 10; i > 8; --i) { + f.removeFromPostingList(key, i); + } + f.commit(); + EXPECT_TRUE(f.hasVectorPostingList(key)); + f.removeFromPostingList(key, 8); + f.commit(); + EXPECT_FALSE(f.hasVectorPostingList(key)); +} + +TEST_F("require that only btrees with high enough doc frequency is promoted to vector", Fixture) { + for (uint32_t i = 100; i > 51; --i) { + f.addPosting(key, i, {i}); + } + f.commit(); + auto it = f.lookup(key); + ASSERT_TRUE(it.valid()); + EXPECT_FALSE(f.hasVectorPostingList(key)); + f.addPosting(key, 51, {51}); + f.commit(); + ASSERT_TRUE(f.hasVectorPostingList(key)); +} + +TEST_F("require that vectors below doc frequency is pruned by removeFromPostingList", Fixture) { + for (uint32_t i = 1; i <= 100; ++i) { + f.addPosting(key, i, {i}); + } + f.commit(); + ASSERT_TRUE(f.hasKey(key)); + EXPECT_TRUE(f.hasVectorPostingList(key)); + for (uint32_t i = 100; i > 25; --i) { + f.removeFromPostingList(key, i); + } + f.commit(); + EXPECT_TRUE(f.hasVectorPostingList(key)); + f.removeFromPostingList(key, 25); + f.commit(); + EXPECT_FALSE(f.hasVectorPostingList(key)); +} + +TEST_F("require that vectors below doc frequency is pruned by addPosting", Fixture) { + for (uint32_t i = 1; i <= 10; ++i) { + f.addPosting(key, i, {i}); + } + f.commit(); + ASSERT_TRUE(f.hasKey(key)); + EXPECT_TRUE(f.hasVectorPostingList(key)); + for (uint32_t i = 1; i <= 100; ++i) { + f.addPosting(key + 1, i, {i}); + } + f.commit(); + EXPECT_FALSE(f.hasVectorPostingList(key)); +} + +TEST_F("require that promoteOverThresholdVectors promotes posting lists over threshold to vectors", Fixture) { + f._limit_provider._doc_id_limit = 100; + for (uint32_t i = 1; i <= 20; ++i) { + f.addPosting(key + 0, i, {i}); + f.addPosting(key + 1, i, {i}); + f.addPosting(key + 2, i, {i}); + } + for (uint32_t i = 21; i <= 40; ++i) { + f.addPosting(key + 0, i, {i}); + f.addPosting(key + 2, i, {i}); + } + f.commit(); + EXPECT_FALSE(f.hasVectorPostingList(key + 0)); + EXPECT_FALSE(f.hasVectorPostingList(key + 1)); + EXPECT_FALSE(f.hasVectorPostingList(key + 2)); + f._limit_provider._doc_id_limit = 50; + f.index().promoteOverThresholdVectors(); + f.commit(); + EXPECT_TRUE(f.hasVectorPostingList(key + 0)); + EXPECT_FALSE(f.hasVectorPostingList(key + 1)); + EXPECT_TRUE(f.hasVectorPostingList(key + 2)); +} + +TEST_F("require that vector contains correct postings", Fixture) { + for (uint32_t i = 1; i <= 100; ++i) { + f.addPosting(key, i, i % 5 > 0 ? MyData{i * 2} : MyData{0}); + } + f.commit(); + ASSERT_TRUE(f.hasKey(key)); + ASSERT_TRUE(f.hasVectorPostingList(key)); + auto v = f.getVectorPostingList(key); + + EXPECT_EQUAL(1u, v.getKey()); + EXPECT_EQUAL(2u, v.getData().data); + + for (uint32_t i = 1; i < 100; ++i) { + v.linearSeek(i); + ASSERT_TRUE(v.valid()); + if (i % 5 == 0) { + EXPECT_EQUAL(i + 1, v.getKey()); + EXPECT_EQUAL((i + 1) * 2, v.getData().data); + } else { + EXPECT_EQUAL(i, v.getKey()); + EXPECT_EQUAL(i * 2, v.getData().data); + } + } + v.linearSeek(100); + EXPECT_FALSE(v.valid()); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/predicate/tree_crumbs_test.cpp b/searchlib/src/tests/predicate/tree_crumbs_test.cpp new file mode 100644 index 00000000000..2f38bb74507 --- /dev/null +++ b/searchlib/src/tests/predicate/tree_crumbs_test.cpp @@ -0,0 +1,65 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for TreeCrumbs. + +#include <vespa/log/log.h> +LOG_SETUP("TreeCrumbs_test"); +#include <vespa/fastos/fastos.h> + +#include <vespa/searchlib/predicate/tree_crumbs.h> +#include <vespa/vespalib/testkit/testapp.h> + +using namespace search::predicate; + +namespace { + +TEST("require that crumbs can set child and resize") { + TreeCrumbs crumbs; + EXPECT_EQUAL(0u, crumbs.size()); + EXPECT_EQUAL("", crumbs.getCrumb()); + crumbs.setChild(2); + EXPECT_EQUAL(2u, crumbs.size()); + EXPECT_EQUAL(":2", crumbs.getCrumb()); + crumbs.setChild(12345); + EXPECT_EQUAL(8u, crumbs.size()); + EXPECT_EQUAL(":2:12345", crumbs.getCrumb()); + crumbs.resize(2); + EXPECT_EQUAL(2u, crumbs.size()); + EXPECT_EQUAL(":2", crumbs.getCrumb()); + crumbs.setChild(42); + EXPECT_EQUAL(5u, crumbs.size()); + EXPECT_EQUAL(":2:42", crumbs.getCrumb()); + crumbs.resize(2); + EXPECT_EQUAL(2u, crumbs.size()); + EXPECT_EQUAL(":2", crumbs.getCrumb()); + crumbs.resize(0); + EXPECT_EQUAL(0u, crumbs.size()); + EXPECT_EQUAL("", crumbs.getCrumb()); +} + +TEST("require that child counts of 2^31 - 1 is ok") { + TreeCrumbs crumbs; + EXPECT_EQUAL(0u, crumbs.size()); + EXPECT_EQUAL("", crumbs.getCrumb()); + crumbs.setChild(0xffffffff); + EXPECT_EQUAL(11u, crumbs.size()); + EXPECT_EQUAL(":4294967295", crumbs.getCrumb()); +} + +TEST("require that child 0 gets number") { + TreeCrumbs crumbs; + crumbs.setChild(0); + EXPECT_EQUAL(2u, crumbs.size()); + EXPECT_EQUAL(":0", crumbs.getCrumb()); +} + +TEST("require that crumbs can set custom initial char") { + TreeCrumbs crumbs; + crumbs.setChild(0, 'a'); + crumbs.setChild(1, 'b'); + crumbs.setChild(2, 'c'); + EXPECT_EQUAL("a0b1c2", crumbs.getCrumb()); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/prettyfloat/.gitignore b/searchlib/src/tests/prettyfloat/.gitignore new file mode 100644 index 00000000000..bf0327f3372 --- /dev/null +++ b/searchlib/src/tests/prettyfloat/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +prettyfloat_test +searchlib_prettyfloat_test_app diff --git a/searchlib/src/tests/prettyfloat/CMakeLists.txt b/searchlib/src/tests/prettyfloat/CMakeLists.txt new file mode 100644 index 00000000000..74e91518030 --- /dev/null +++ b/searchlib/src/tests/prettyfloat/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_prettyfloat_test_app + SOURCES + prettyfloat.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_prettyfloat_test_app COMMAND searchlib_prettyfloat_test_app) diff --git a/searchlib/src/tests/prettyfloat/DESC b/searchlib/src/tests/prettyfloat/DESC new file mode 100644 index 00000000000..fc4e85bcc09 --- /dev/null +++ b/searchlib/src/tests/prettyfloat/DESC @@ -0,0 +1 @@ +prettyfloat test. Take a look at prettyfloat.cpp for details. diff --git a/searchlib/src/tests/prettyfloat/FILES b/searchlib/src/tests/prettyfloat/FILES new file mode 100644 index 00000000000..fe3e151cf90 --- /dev/null +++ b/searchlib/src/tests/prettyfloat/FILES @@ -0,0 +1 @@ +prettyfloat.cpp diff --git a/searchlib/src/tests/prettyfloat/prettyfloat.cpp b/searchlib/src/tests/prettyfloat/prettyfloat.cpp new file mode 100644 index 00000000000..1ed9b7e1767 --- /dev/null +++ b/searchlib/src/tests/prettyfloat/prettyfloat.cpp @@ -0,0 +1,32 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("prettyfloat_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/util/rawbuf.h> +#include <vespa/searchlib/common/hitrank.h> + +using namespace search; + +TEST_SETUP(Test); + +int +Test::Main() +{ + TEST_INIT("prettyfloat_test"); + { + RawBuf buf(5000); + SignedHitRank rank = 10; + buf.addSignedHitRank(rank); + *buf.GetWritableFillPos() = '\0'; + EXPECT_EQUAL(std::string("10"), buf.GetDrainPos()); + } + { + RawBuf buf(5000); + HitRank rank = 10; + buf.addHitRank(rank); + *buf.GetWritableFillPos() = '\0'; + EXPECT_EQUAL(std::string("10"), buf.GetDrainPos()); + } + TEST_DONE(); +} diff --git a/searchlib/src/tests/query/.gitignore b/searchlib/src/tests/query/.gitignore new file mode 100644 index 00000000000..8b9d7f9993f --- /dev/null +++ b/searchlib/src/tests/query/.gitignore @@ -0,0 +1,10 @@ +*_test +.depend +Makefile +searchlib_customtypevisitor_test_app +searchlib_query-old-large_test_app +searchlib_query-old_test_app +searchlib_query_visitor_test_app +searchlib_querybuilder_test_app +searchlib_stackdumpquerycreator_test_app +searchlib_templatetermvisitor_test_app diff --git a/searchlib/src/tests/query/CMakeLists.txt b/searchlib/src/tests/query/CMakeLists.txt new file mode 100644 index 00000000000..16a75b7142a --- /dev/null +++ b/searchlib/src/tests/query/CMakeLists.txt @@ -0,0 +1,50 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_query_visitor_test_app + SOURCES + query_visitor_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_query_visitor_test_app COMMAND searchlib_query_visitor_test_app) +vespa_add_executable(searchlib_customtypevisitor_test_app + SOURCES + customtypevisitor_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_customtypevisitor_test_app COMMAND searchlib_customtypevisitor_test_app) +vespa_add_executable(searchlib_templatetermvisitor_test_app + SOURCES + templatetermvisitor_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_templatetermvisitor_test_app COMMAND searchlib_templatetermvisitor_test_app) +vespa_add_executable(searchlib_querybuilder_test_app + SOURCES + querybuilder_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_querybuilder_test_app COMMAND searchlib_querybuilder_test_app) +vespa_add_executable(searchlib_stackdumpquerycreator_test_app + SOURCES + stackdumpquerycreator_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_stackdumpquerycreator_test_app COMMAND searchlib_stackdumpquerycreator_test_app) +vespa_add_executable(searchlib_query-old_test_app + SOURCES + query-old.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_query-old_test_app COMMAND searchlib_query-old_test_app) +vespa_add_executable(searchlib_query-old-large_test_app + SOURCES + query-old-large.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_query-old-large_test_app COMMAND searchlib_query-old-large_test_app) diff --git a/searchlib/src/tests/query/DESC b/searchlib/src/tests/query/DESC new file mode 100644 index 00000000000..6461797f6bd --- /dev/null +++ b/searchlib/src/tests/query/DESC @@ -0,0 +1 @@ +This is a test of the query interface. diff --git a/searchlib/src/tests/query/FILES b/searchlib/src/tests/query/FILES new file mode 100644 index 00000000000..6f11f149162 --- /dev/null +++ b/searchlib/src/tests/query/FILES @@ -0,0 +1,2 @@ +query.cpp +query-old.cpp diff --git a/searchlib/src/tests/query/customtypevisitor_test.cpp b/searchlib/src/tests/query/customtypevisitor_test.cpp new file mode 100644 index 00000000000..c9da2757d81 --- /dev/null +++ b/searchlib/src/tests/query/customtypevisitor_test.cpp @@ -0,0 +1,157 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for customtypevisitor. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("customtypevisitor_test"); + +#include <vespa/searchlib/query/tree/customtypevisitor.h> +#include <vespa/searchlib/query/tree/intermediatenodes.h> +#include <vespa/searchlib/query/tree/termnodes.h> +#include <vespa/vespalib/testkit/testapp.h> + +using std::string; + +using namespace search::query; + +namespace { + +class Test : public vespalib::TestApp { + const char *current_state; + virtual void DumpState(bool) { + fprintf(stderr, "%s: ERROR: in %s\n", GetName(), current_state); + } + + template <class T> void requireThatNodeIsVisited(); + +public: + int Main(); +}; + +template <class Base> +struct InitTerm : Base { + InitTerm() : Base(typename Base::Type(), "view", 0, Weight(0)) {} +}; + +struct MyAnd : And {}; +struct MyAndNot : AndNot {}; +struct MyEquiv : Equiv {}; +struct MyNear : Near { MyNear() : Near(1) {} }; +struct MyONear : ONear { MyONear() : ONear(1) {} }; +struct MyOr : Or {}; +struct MyPhrase : Phrase { MyPhrase() : Phrase("view", 0, Weight(42)) {} }; +struct MyRank : Rank {}; +struct MyNumberTerm : InitTerm<NumberTerm> {}; +struct MyLocationTerm : InitTerm<LocationTerm> {}; +struct MyPrefixTerm : InitTerm<PrefixTerm> {}; +struct MyRangeTerm : InitTerm<RangeTerm> {}; +struct MyStringTerm : InitTerm<StringTerm> {}; +struct MySubstrTerm : InitTerm<SubstringTerm> {}; +struct MySuffixTerm : InitTerm<SuffixTerm> {}; +struct MyWeakAnd : WeakAnd { MyWeakAnd() : WeakAnd(1234, "view") {} }; +struct MyWeightedSetTerm : WeightedSetTerm { MyWeightedSetTerm() : WeightedSetTerm("view", 0, Weight(42)) {} }; +struct MyDotProduct : DotProduct { MyDotProduct() : DotProduct("view", 0, Weight(42)) {} }; +struct MyWandTerm : WandTerm { MyWandTerm() : WandTerm("view", 0, Weight(42), 57, 67, 77.7) {} }; +struct MyPredicateQuery : InitTerm<PredicateQuery> {}; +struct MyRegExpTerm : InitTerm<RegExpTerm> {}; + +struct MyQueryNodeTypes { + typedef MyAnd And; + typedef MyAndNot AndNot; + typedef MyEquiv Equiv; + typedef MyNumberTerm NumberTerm; + typedef MyLocationTerm LocationTerm; + typedef MyNear Near; + typedef MyONear ONear; + typedef MyOr Or; + typedef MyPhrase Phrase; + typedef MyPrefixTerm PrefixTerm; + typedef MyRangeTerm RangeTerm; + typedef MyRank Rank; + typedef MyStringTerm StringTerm; + typedef MySubstrTerm SubstringTerm; + typedef MySuffixTerm SuffixTerm; + typedef MyWeakAnd WeakAnd; + typedef MyWeightedSetTerm WeightedSetTerm; + typedef MyDotProduct DotProduct; + typedef MyWandTerm WandTerm; + typedef MyPredicateQuery PredicateQuery; + typedef MyRegExpTerm RegExpTerm; +}; + +class MyCustomVisitor : public CustomTypeVisitor<MyQueryNodeTypes> +{ +public: + template <typename T> + bool &isVisited() { + static bool b; + return b; + } + + template <typename T> void setVisited() { isVisited<T>() = true; } + + virtual void visit(MyAnd &) { setVisited<MyAnd>(); } + virtual void visit(MyAndNot &) { setVisited<MyAndNot>(); } + virtual void visit(MyEquiv &) { setVisited<MyEquiv>(); } + virtual void visit(MyNumberTerm &) { setVisited<MyNumberTerm>(); } + virtual void visit(MyLocationTerm &) { setVisited<MyLocationTerm>(); } + virtual void visit(MyNear &) { setVisited<MyNear>(); } + virtual void visit(MyONear &) { setVisited<MyONear>(); } + virtual void visit(MyOr &) { setVisited<MyOr>(); } + virtual void visit(MyPhrase &) { setVisited<MyPhrase>(); } + virtual void visit(MyPrefixTerm &) { setVisited<MyPrefixTerm>(); } + virtual void visit(MyRangeTerm &) { setVisited<MyRangeTerm>(); } + virtual void visit(MyRank &) { setVisited<MyRank>(); } + virtual void visit(MyStringTerm &) { setVisited<MyStringTerm>(); } + virtual void visit(MySubstrTerm &) { setVisited<MySubstrTerm>(); } + virtual void visit(MySuffixTerm &) { setVisited<MySuffixTerm>(); } + virtual void visit(MyWeakAnd &) { setVisited<MyWeakAnd>(); } + virtual void visit(MyWeightedSetTerm &) { setVisited<MyWeightedSetTerm>(); } + virtual void visit(MyDotProduct &) { setVisited<MyDotProduct>(); } + virtual void visit(MyWandTerm &) { setVisited<MyWandTerm>(); } + virtual void visit(MyPredicateQuery &) { setVisited<MyPredicateQuery>(); } + virtual void visit(MyRegExpTerm &) { setVisited<MyRegExpTerm>(); } +}; + +template <class T> +void Test::requireThatNodeIsVisited() { + MyCustomVisitor visitor; + Node::UP query(new T); + visitor.isVisited<T>() = false; + query->accept(visitor); + ASSERT_TRUE(visitor.isVisited<T>()); +} + +#define TEST_CALL(func) \ + current_state = #func; \ + func(); + +int +Test::Main() +{ + TEST_INIT("customtypevisitor_test"); + + TEST_CALL(requireThatNodeIsVisited<MyAnd>); + TEST_CALL(requireThatNodeIsVisited<MyAndNot>); + TEST_CALL(requireThatNodeIsVisited<MyNear>); + TEST_CALL(requireThatNodeIsVisited<MyONear>); + TEST_CALL(requireThatNodeIsVisited<MyOr>); + TEST_CALL(requireThatNodeIsVisited<MyPhrase>); + TEST_CALL(requireThatNodeIsVisited<MyRangeTerm>); + TEST_CALL(requireThatNodeIsVisited<MyRank>); + TEST_CALL(requireThatNodeIsVisited<MyNumberTerm>); + TEST_CALL(requireThatNodeIsVisited<MyPrefixTerm>); + TEST_CALL(requireThatNodeIsVisited<MyStringTerm>); + TEST_CALL(requireThatNodeIsVisited<MySubstrTerm>); + TEST_CALL(requireThatNodeIsVisited<MySuffixTerm>); + TEST_CALL(requireThatNodeIsVisited<MyWeightedSetTerm>); + TEST_CALL(requireThatNodeIsVisited<MyDotProduct>); + TEST_CALL(requireThatNodeIsVisited<MyWandTerm>); + TEST_CALL(requireThatNodeIsVisited<MyPredicateQuery>); + TEST_CALL(requireThatNodeIsVisited<MyRegExpTerm>); + + TEST_DONE(); +} +} // namespace + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/query/query-old-large.cpp b/searchlib/src/tests/query/query-old-large.cpp new file mode 100644 index 00000000000..4e0d0fb85de --- /dev/null +++ b/searchlib/src/tests/query/query-old-large.cpp @@ -0,0 +1,51 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/searchlib/query/query.h> +#include <vespa/searchlib/query/tree/querybuilder.h> +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/query/tree/stackdumpcreator.h> +#include <vespa/vespalib/testkit/test_kit.h> +#include <limits> + +using namespace search; +using namespace search::query; + +namespace { + +void setMaxStackSize(rlim_t maxStackSize) +{ + struct rlimit limit; + getrlimit(RLIMIT_STACK, &limit); + limit.rlim_cur = maxStackSize; + setrlimit(RLIMIT_STACK, &limit); +} + +} + + +// NOTE: This test explicitly sets thread stack size and will fail due to +// a stack overflow if the stack usage increases. +TEST("testveryLongQueryResultingInBug6850778") { + const uint32_t NUMITEMS=20000; + setMaxStackSize(4 * 1024 * 1024); + QueryBuilder<SimpleQueryNodeTypes> builder; + for (uint32_t i=0; i <= NUMITEMS; i++) { + builder.addAnd(2); + builder.addStringTerm("a", "", 0, Weight(0)); + if (i < NUMITEMS) { + } else { + builder.addStringTerm("b", "", 0, Weight(0)); + } + } + Node::UP node = builder.build(); + vespalib::string stackDump = StackDumpCreator::create(*node); + + EmptyQueryNodeResult empty; + Query q(empty, stackDump); + QueryTermList terms; + QueryNodeRefList phrases; + q.getLeafs(terms); + ASSERT_EQUAL(NUMITEMS + 2, terms.size()); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/query/query-old.cpp b/searchlib/src/tests/query/query-old.cpp new file mode 100644 index 00000000000..94eeacc2b4d --- /dev/null +++ b/searchlib/src/tests/query/query-old.cpp @@ -0,0 +1,650 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/searchlib/query/query.h> +#include <vespa/searchlib/query/tree/querybuilder.h> +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/query/tree/stackdumpcreator.h> +#include <vespa/vespalib/testkit/test_kit.h> +#include <limits> + +using namespace search; +using namespace search::query; + +void assertHit(const Hit & h, size_t expWordpos, size_t expContext, int32_t weight) { + EXPECT_EQUAL(h.wordpos(), expWordpos); + EXPECT_EQUAL(h.context(), expContext); + EXPECT_EQUAL(h.weight(), weight); +} + +TEST("testQueryLanguage") { + EmptyQueryNodeResult eqnr; + int64_t ia(0), ib(0); + double da(0), db(0); + + QueryTerm q(eqnr, "7", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, 7); + EXPECT_EQUAL(ib, 7); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, 7); + EXPECT_EQUAL(db, 7); + + q = QueryTerm(eqnr, "-7", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, -7); + EXPECT_EQUAL(ib, -7); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -7); + EXPECT_EQUAL(db, -7); + + q = QueryTerm(eqnr, "7.5", "index", QueryTerm::WORD); + EXPECT_TRUE(!q.getAsIntegerTerm(ia, ib)); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, 7.5); + EXPECT_EQUAL(db, 7.5); + + q = QueryTerm(eqnr, "-7.5", "index", QueryTerm::WORD); + EXPECT_TRUE(!q.getAsIntegerTerm(ia, ib)); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -7.5); + EXPECT_EQUAL(db, -7.5); + + q = QueryTerm(eqnr, "<7", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, std::numeric_limits<int64_t>::min()); + EXPECT_EQUAL(ib, 6); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -std::numeric_limits<double>::max()); + EXPECT_LESS(db, 7); + EXPECT_GREATER(db, 6.99); + + q = QueryTerm(eqnr, "[;7]", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, std::numeric_limits<int64_t>::min()); + EXPECT_EQUAL(ib, 7); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -std::numeric_limits<double>::max()); + EXPECT_EQUAL(db, 7); + + q = QueryTerm(eqnr, ">7", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, 8); + EXPECT_EQUAL(ib, std::numeric_limits<int64_t>::max()); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_GREATER(da, 7); + EXPECT_LESS(da, 7.01); + EXPECT_EQUAL(db, std::numeric_limits<double>::max()); + + q = QueryTerm(eqnr, "[7;]", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, 7); + EXPECT_EQUAL(ib, std::numeric_limits<int64_t>::max()); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, 7); + EXPECT_EQUAL(db, std::numeric_limits<double>::max()); + + q = QueryTerm(eqnr, "[-7;7]", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, -7); + EXPECT_EQUAL(ib, 7); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -7); + EXPECT_EQUAL(db, 7); + + q = QueryTerm(eqnr, "[-7.1;7.1]", "index", QueryTerm::WORD); + EXPECT_FALSE(q.getAsIntegerTerm(ia, ib)); // This is dubious and perhaps a regression. + EXPECT_EQUAL(ia, std::numeric_limits<int64_t>::min()); + EXPECT_EQUAL(ib, std::numeric_limits<int64_t>::max()); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -7.1); + EXPECT_EQUAL(db, 7.1); + + q = QueryTerm(eqnr, "[500.0;1.7976931348623157E308]", "index", QueryTerm::WORD); + EXPECT_FALSE(q.getAsIntegerTerm(ia, ib)); // This is dubious and perhaps a regression. + EXPECT_EQUAL(ia, std::numeric_limits<int64_t>::min()); + EXPECT_EQUAL(ib, std::numeric_limits<int64_t>::max()); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, 500.0); + EXPECT_EQUAL(db, std::numeric_limits<double>::max()); + + const double minusSeven(-7), seven(7); + q = QueryTerm(eqnr, "<-7;7]", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, -6); + EXPECT_EQUAL(ib, 7); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, nextafterf(minusSeven, seven)); + EXPECT_EQUAL(db, seven); + + q = QueryTerm(eqnr, "<-7;7>", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, -6); + EXPECT_EQUAL(ib, 6); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, nextafterf(minusSeven, seven)); + EXPECT_EQUAL(db, nextafterf(seven, minusSeven)); + + q = QueryTerm(eqnr, "<1;2>", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, 2); + EXPECT_EQUAL(ib, 1); + + q = QueryTerm(eqnr, "[-7;7>", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, -7); + EXPECT_EQUAL(ib, 6); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, minusSeven); + EXPECT_EQUAL(db, nextafterf(seven, minusSeven)); + + q = QueryTerm(eqnr, "<-7", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, std::numeric_limits<int64_t>::min()); + EXPECT_EQUAL(ib, -8); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -std::numeric_limits<double>::max()); + EXPECT_LESS(db, -7); + EXPECT_GREATER(db, -7.01); + + q = QueryTerm(eqnr, "[;-7]", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, std::numeric_limits<int64_t>::min()); + EXPECT_EQUAL(ib, -7); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -std::numeric_limits<double>::max()); + EXPECT_EQUAL(db, -7); + + q = QueryTerm(eqnr, "<;-7]", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, std::numeric_limits<int64_t>::min()); + EXPECT_EQUAL(ib, -7); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -std::numeric_limits<double>::max()); + EXPECT_EQUAL(db, -7); + + q = QueryTerm(eqnr, ">-7", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, -6); + EXPECT_EQUAL(ib, std::numeric_limits<int64_t>::max()); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_GREATER(da, -7); + EXPECT_LESS(da, -6.99); + EXPECT_EQUAL(db, std::numeric_limits<double>::max()); + + q = QueryTerm(eqnr, "[-7;]", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, -7); + EXPECT_EQUAL(ib, std::numeric_limits<int64_t>::max()); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -7); + EXPECT_EQUAL(db, std::numeric_limits<double>::max()); + + q = QueryTerm(eqnr, "[-7;>", "index", QueryTerm::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, -7); + EXPECT_EQUAL(ib, std::numeric_limits<int64_t>::max()); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -7); + EXPECT_EQUAL(db, std::numeric_limits<double>::max()); + + q = QueryTerm(eqnr, "a", "index", QueryTerm::WORD); + EXPECT_TRUE(!q.getAsIntegerTerm(ia, ib)); + EXPECT_TRUE(!q.getAsDoubleTerm(da, db)); + + q = QueryTerm(eqnr, "word", "index", QueryTerm::WORD); + EXPECT_TRUE(!q.isPrefix()); + EXPECT_TRUE(!q.isSubstring()); + EXPECT_TRUE(!q.isSuffix()); + + q = QueryTerm(eqnr, "prefix", "index", QueryTerm::PREFIXTERM); + EXPECT_TRUE(q.isPrefix()); + EXPECT_TRUE(!q.isSubstring()); + EXPECT_TRUE(!q.isSuffix()); + + q = QueryTerm(eqnr, "substring", "index", QueryTerm::SUBSTRINGTERM); + EXPECT_TRUE(!q.isPrefix()); + EXPECT_TRUE(q.isSubstring()); + EXPECT_TRUE(!q.isSuffix()); + + q = QueryTerm(eqnr, "suffix", "index", QueryTerm::SUFFIXTERM); + EXPECT_TRUE(!q.isPrefix()); + EXPECT_TRUE(!q.isSubstring()); + EXPECT_TRUE(q.isSuffix()); + + q = QueryTerm(eqnr, "regexp", "index", QueryTerm::REGEXP); + EXPECT_TRUE(!q.isPrefix()); + EXPECT_TRUE(!q.isSubstring()); + EXPECT_TRUE(!q.isSuffix()); + EXPECT_TRUE(q.isRegex()); +} + +class AllowRewrite : public EmptyQueryNodeResult +{ +public: + DUPLICATE(AllowRewrite); // create duplicate function + + virtual bool getRewriteFloatTerms() const { return true; } +}; + +IMPLEMENT_DUPLICATE(AllowRewrite); + +const char TERM_UNIQ = ParseItem::ITEM_TERM | ParseItem::IF_UNIQUEID; + +TEST("e is not rewritten even if allowed") { + const char term[6] = {TERM_UNIQ, 3, 1, 'c', 1, 'e'}; + vespalib::stringref stackDump(term, sizeof(term)); + EXPECT_EQUAL(6u, stackDump.size()); + AllowRewrite empty; + Query q(empty, stackDump); + EXPECT_TRUE(q.valid()); + const QueryNode::LP & root = q.getRoot(); + EXPECT_EQUAL(QueryTerm::classId, root->getClass().id()); + const QueryTerm & qt = static_cast<const QueryTerm &>(*root); + EXPECT_EQUAL("c", qt.index()); + EXPECT_EQUAL(vespalib::stringref("e"), qt.getTerm()); + EXPECT_EQUAL(3u, qt.uniqueId()); +} + +TEST("1.0e is not rewritten by default") { + const char term[9] = {TERM_UNIQ, 3, 1, 'c', 4, '1', '.', '0', 'e'}; + vespalib::stringref stackDump(term, sizeof(term)); + EXPECT_EQUAL(9u, stackDump.size()); + EmptyQueryNodeResult empty; + Query q(empty, stackDump); + EXPECT_TRUE(q.valid()); + const QueryNode::LP & root = q.getRoot(); + EXPECT_EQUAL(QueryTerm::classId, root->getClass().id()); + const QueryTerm & qt = static_cast<const QueryTerm &>(*root); + EXPECT_EQUAL("c", qt.index()); + EXPECT_EQUAL(vespalib::stringref("1.0e"), qt.getTerm()); + EXPECT_EQUAL(3u, qt.uniqueId()); +} + +TEST("1.0e is rewritten if allowed too.") { + const char term[9] = {TERM_UNIQ, 3, 1, 'c', 4, '1', '.', '0', 'e'}; + vespalib::stringref stackDump(term, sizeof(term)); + EXPECT_EQUAL(9u, stackDump.size()); + AllowRewrite empty; + Query q(empty, stackDump); + EXPECT_TRUE(q.valid()); + const QueryNode::LP & root = q.getRoot(); + EXPECT_EQUAL(EquivQueryNode::classId, root->getClass().id()); + const EquivQueryNode & equiv = static_cast<const EquivQueryNode &>(*root); + EXPECT_EQUAL(2u, equiv.size()); + EXPECT_EQUAL(QueryTerm::classId, equiv[0]->getClass().id()); + { + const QueryTerm & qt = static_cast<const QueryTerm &>(*equiv[0]); + EXPECT_EQUAL("c", qt.index()); + EXPECT_EQUAL(vespalib::stringref("1.0e"), qt.getTerm()); + EXPECT_EQUAL(3u, qt.uniqueId()); + } + EXPECT_EQUAL(PhraseQueryNode::classId, equiv[1]->getClass().id()); + { + const PhraseQueryNode & phrase = static_cast<const PhraseQueryNode &>(*equiv[1]); + EXPECT_EQUAL(2u, phrase.size()); + EXPECT_EQUAL(QueryTerm::classId, phrase[0]->getClass().id()); + { + const QueryTerm & qt = static_cast<const QueryTerm &>(*phrase[0]); + EXPECT_EQUAL("c", qt.index()); + EXPECT_EQUAL(vespalib::stringref("1"), qt.getTerm()); + EXPECT_EQUAL(0u, qt.uniqueId()); + } + EXPECT_EQUAL(QueryTerm::classId, phrase[1]->getClass().id()); + { + const QueryTerm & qt = static_cast<const QueryTerm &>(*phrase[1]); + EXPECT_EQUAL("c", qt.index()); + EXPECT_EQUAL(vespalib::stringref("0e"), qt.getTerm()); + EXPECT_EQUAL(0u, qt.uniqueId()); + } + } +} + +TEST("testGetQueryParts") { + QueryBuilder<SimpleQueryNodeTypes> builder; + builder.addAnd(4); + { + builder.addStringTerm("a", "", 0, Weight(0)); + builder.addPhrase(3, "", 0, Weight(0)); + { + builder.addStringTerm("b", "", 0, Weight(0)); + builder.addStringTerm("c", "", 0, Weight(0)); + builder.addStringTerm("d", "", 0, Weight(0)); + } + builder.addStringTerm("e", "", 0, Weight(0)); + builder.addPhrase(2, "", 0, Weight(0)); + { + builder.addStringTerm("f", "", 0, Weight(0)); + builder.addStringTerm("g", "", 0, Weight(0)); + } + } + Node::UP node = builder.build(); + vespalib::string stackDump = StackDumpCreator::create(*node); + + EmptyQueryNodeResult empty; + Query q(empty, stackDump); + QueryTermList terms; + QueryNodeRefList phrases; + q.getLeafs(terms); + q.getPhrases(phrases); + ASSERT_TRUE(terms.size() == 7); + ASSERT_TRUE(phrases.size() == 2); + { + QueryTermList pts; + phrases[0]->getLeafs(pts); + ASSERT_TRUE(pts.size() == 3); + for (size_t i = 0; i < 3; ++i) { + EXPECT_EQUAL(pts[i], terms[i + 1]); + } + } + { + QueryTermList pts; + phrases[1]->getLeafs(pts); + ASSERT_TRUE(pts.size() == 2); + for (size_t i = 0; i < 2; ++i) { + EXPECT_EQUAL(pts[i], terms[i + 5]); + } + } +} + +TEST("testPhraseEvaluate") { + QueryBuilder<SimpleQueryNodeTypes> builder; + builder.addPhrase(3, "", 0, Weight(0)); + { + builder.addStringTerm("a", "", 0, Weight(0)); + builder.addStringTerm("b", "", 0, Weight(0)); + builder.addStringTerm("c", "", 0, Weight(0)); + } + Node::UP node = builder.build(); + vespalib::string stackDump = StackDumpCreator::create(*node); + EmptyQueryNodeResult empty; + Query q(empty, stackDump); + QueryNodeRefList phrases; + q.getPhrases(phrases); + QueryTermList terms; + q.getLeafs(terms); + // field 0 + terms[0]->add(0, 0, 1); + terms[1]->add(1, 0, 1); + terms[2]->add(2, 0, 1); + terms[0]->add(7, 0, 1); + terms[1]->add(8, 0, 1); + terms[2]->add(9, 0, 1); + // field 1 + terms[0]->add(4, 1, 1); + terms[1]->add(5, 1, 1); + terms[2]->add(6, 1, 1); + // field 2 (not complete match) + terms[0]->add(1, 2, 1); + terms[1]->add(2, 2, 1); + terms[2]->add(4, 2, 1); + // field 3 + terms[0]->add(0, 3, 1); + terms[1]->add(1, 3, 1); + terms[2]->add(2, 3, 1); + // field 4 (not complete match) + terms[0]->add(1, 4, 1); + terms[1]->add(2, 4, 1); + // field 5 (not complete match) + terms[0]->add(2, 5, 1); + terms[1]->add(1, 5, 1); + terms[2]->add(0, 5, 1); + HitList hits; + PhraseQueryNode * p = static_cast<PhraseQueryNode *>(phrases[0]); + p->evaluateHits(hits); + ASSERT_TRUE(hits.size() == 4); + EXPECT_EQUAL(hits[0].wordpos(), 2u); + EXPECT_EQUAL(hits[0].context(), 0u); + EXPECT_EQUAL(hits[1].wordpos(), 9u); + EXPECT_EQUAL(hits[1].context(), 0u); + EXPECT_EQUAL(hits[2].wordpos(), 6u); + EXPECT_EQUAL(hits[2].context(), 1u); + EXPECT_EQUAL(hits[3].wordpos(), 2u); + EXPECT_EQUAL(hits[3].context(), 3u); + ASSERT_TRUE(p->getFieldInfoSize() == 4); + EXPECT_EQUAL(p->getFieldInfo(0).getHitOffset(), 0u); + EXPECT_EQUAL(p->getFieldInfo(0).getHitCount(), 2u); + EXPECT_EQUAL(p->getFieldInfo(1).getHitOffset(), 2u); + EXPECT_EQUAL(p->getFieldInfo(1).getHitCount(), 1u); + EXPECT_EQUAL(p->getFieldInfo(2).getHitOffset(), 0u); // invalid, but will never be used + EXPECT_EQUAL(p->getFieldInfo(2).getHitCount(), 0u); + EXPECT_EQUAL(p->getFieldInfo(3).getHitOffset(), 3u); + EXPECT_EQUAL(p->getFieldInfo(3).getHitCount(), 1u); +} + +TEST("testHit") { + // positions (0 - (2^24-1)) + assertHit(Hit(0, 0, 0), 0, 0, 0); + assertHit(Hit(256, 0, 1), 256, 0, 1); + assertHit(Hit(16777215, 0, -1), 16777215, 0, -1); + assertHit(Hit(16777216, 0, 1), 0, 1, 1); // overflow + + // contexts (0 - 255) + assertHit(Hit(0, 1, 1), 0, 1, 1); + assertHit(Hit(0, 255, 1), 0, 255, 1); + assertHit(Hit(0, 256, 1), 0, 0, 1); // overflow +} + +void assertInt8Range(const std::string &term, bool expAdjusted, int64_t expLow, int64_t expHigh) { + QueryTermSimple q(term, QueryTermSimple::WORD); + QueryTermSimple::RangeResult<int8_t> res = q.getRange<int8_t>(); + EXPECT_EQUAL(true, res.valid); + EXPECT_EQUAL(expAdjusted, res.adjusted); + EXPECT_EQUAL(expLow, (int64_t)res.low); + EXPECT_EQUAL(expHigh, (int64_t)res.high); +} + +void assertInt32Range(const std::string &term, bool expAdjusted, int64_t expLow, int64_t expHigh) { + QueryTermSimple q(term, QueryTermSimple::WORD); + QueryTermSimple::RangeResult<int32_t> res = q.getRange<int32_t>(); + EXPECT_EQUAL(true, res.valid); + EXPECT_EQUAL(expAdjusted, res.adjusted); + EXPECT_EQUAL(expLow, (int64_t)res.low); + EXPECT_EQUAL(expHigh, (int64_t)res.high); +} + +void assertInt64Range(const std::string &term, bool expAdjusted, int64_t expLow, int64_t expHigh) { + QueryTermSimple q(term, QueryTermSimple::WORD); + QueryTermSimple::RangeResult<int64_t> res = q.getRange<int64_t>(); + EXPECT_EQUAL(true, res.valid); + EXPECT_EQUAL(expAdjusted, res.adjusted); + EXPECT_EQUAL(expLow, (int64_t)res.low); + EXPECT_EQUAL(expHigh, (int64_t)res.high); +} + +TEST("requireThatInt8LimitsAreEnforced") { + //std::numeric_limits<int8_t>::min() -> -128 + //std::numeric_limits<int8_t>::max() -> 127 + + assertInt8Range("-129", true, -128, -128); + assertInt8Range("-128", false, -128, -128); + assertInt8Range("127", false, 127, 127); + assertInt8Range("128", true, 127, 127); + assertInt8Range("[-129;0]", true, -128, 0); + assertInt8Range("[-128;0]", false, -128, 0); + assertInt8Range("[0;127]", false, 0, 127); + assertInt8Range("[0;128]", true, 0, 127); + assertInt8Range("[-130;-129]", true, -128, -128); + assertInt8Range("[128;129]", true, 127, 127); + assertInt8Range("[-129;128]", true, -128, 127); +} + +TEST("requireThatInt32LimitsAreEnforced") { + //std::numeric_limits<int32_t>::min() -> -2147483648 + //std::numeric_limits<int32_t>::max() -> 2147483647 + + int64_t min = std::numeric_limits<int32_t>::min(); + int64_t max = std::numeric_limits<int32_t>::max(); + + assertInt32Range("-2147483649", true, min, min); + assertInt32Range("-2147483648", false, min, min); + assertInt32Range("2147483647", false, max, max); + assertInt32Range("2147483648", true, max, max); + assertInt32Range("[-2147483649;0]", true, min, 0); + assertInt32Range("[-2147483648;0]", false, min, 0); + assertInt32Range("[0;2147483647]", false, 0, max); + assertInt32Range("[0;2147483648]", true, 0, max); + assertInt32Range("[-2147483650;-2147483649]", true, min, min); + assertInt32Range("[2147483648;2147483649]", true, max, max); + assertInt32Range("[-2147483649;2147483648]", true, min, max); +} + +TEST("requireThatInt64LimitsAreEnforced") { + //std::numeric_limits<int64_t>::min() -> -9223372036854775808 + //std::numeric_limits<int64_t>::max() -> 9223372036854775807 + + int64_t min = std::numeric_limits<int64_t>::min(); + int64_t max = std::numeric_limits<int64_t>::max(); + + assertInt64Range("-9223372036854775809", false, min, min); + assertInt64Range("-9223372036854775808", false, min, min); + assertInt64Range("9223372036854775807", false, max, max); + assertInt64Range("9223372036854775808", false, max, max); + assertInt64Range("[-9223372036854775809;0]", false, min, 0); + assertInt64Range("[-9223372036854775808;0]", false, min, 0); + assertInt64Range("[0;9223372036854775807]", false, 0, max); + assertInt64Range("[0;9223372036854775808]", false, 0, max); + assertInt64Range("[-9223372036854775810;-9223372036854775809]", false, min, min); + assertInt64Range("[9223372036854775808;9223372036854775809]", false, max, max); + assertInt64Range("[-9223372036854775809;9223372036854775808]", false, min, max); +} + +TEST("require sensible rounding when using integer attributes.") { + assertInt64Range("1.2", false, 1, 1); + assertInt64Range("1.51", false, 2, 2); + assertInt64Range("2.49", false, 2, 2); +} + +TEST("require that we can take floating point values in range search too.") { + assertInt64Range("[1;2]", false, 1, 2); + assertInt64Range("[1.1;2.1]", false, 2, 2); + assertInt64Range("[1.9;3.9]", false, 2, 3); + assertInt64Range("[1.9;3.9]", false, 2, 3); + assertInt64Range("[1.0;3.0]", false, 1, 3); + assertInt64Range("<1.0;3.0>", false, 2, 2); + assertInt64Range("[500.0;1.7976931348623157E308]", false, 500, std::numeric_limits<int64_t>::max()); + assertInt64Range("[500.0;1.6976931348623157E308]", false, 500, std::numeric_limits<int64_t>::max()); + assertInt64Range("[-1.7976931348623157E308;500.0]", false, std::numeric_limits<int64_t>::min(), 500); + assertInt64Range("[-1.6976931348623157E308;500.0]", false, std::numeric_limits<int64_t>::min(), 500); + assertInt64Range("[10;-10]", false, 10, -10); + assertInt64Range("[10.0;-10.0]", false, 10, -10); + assertInt64Range("[1.6976931348623157E308;-1.6976931348623157E308]", false, std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::min()); + assertInt64Range("[1.7976931348623157E308;-1.7976931348623157E308]", false, std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::min()); +} + +TEST("require that we handle empty range as expected") { + assertInt64Range("[1;1]", false, 1, 1); + assertInt64Range("<1;1]", false, 2, 1); + assertInt64Range("[0;1>", false, 0, 0); + assertInt64Range("[1;1>", false, 1, 0); + assertInt64Range("<1;1>", false, 2, 0); +} + +TEST("require that ascending range can be specified with limit only") { + int64_t low_integer = 0; + int64_t high_integer = 0; + double low_double = 0.0; + double high_double = 0.0; + + EmptyQueryNodeResult eqnr; + QueryTerm ascending_query(eqnr, "[;;500]", "index", QueryTerm::WORD); + + EXPECT_TRUE(ascending_query.getAsIntegerTerm(low_integer, high_integer)); + EXPECT_TRUE(ascending_query.getAsDoubleTerm(low_double, high_double)); + EXPECT_EQUAL(std::numeric_limits<int64_t>::min(), low_integer); + EXPECT_EQUAL(std::numeric_limits<int64_t>::max(), high_integer); + EXPECT_EQUAL(-std::numeric_limits<double>::max(), low_double); + EXPECT_EQUAL(std::numeric_limits<double>::max(), high_double); + EXPECT_EQUAL(500, ascending_query.getRangeLimit()); +} + +TEST("require that descending range can be specified with limit only") { + int64_t low_integer = 0; + int64_t high_integer = 0; + double low_double = 0.0; + double high_double = 0.0; + + EmptyQueryNodeResult eqnr; + QueryTerm descending_query(eqnr, "[;;-500]", "index", QueryTerm::WORD); + + EXPECT_TRUE(descending_query.getAsIntegerTerm(low_integer, high_integer)); + EXPECT_TRUE(descending_query.getAsDoubleTerm(low_double, high_double)); + EXPECT_EQUAL(std::numeric_limits<int64_t>::min(), low_integer); + EXPECT_EQUAL(std::numeric_limits<int64_t>::max(), high_integer); + EXPECT_EQUAL(-std::numeric_limits<double>::max(), low_double); + EXPECT_EQUAL(std::numeric_limits<double>::max(), high_double); + EXPECT_EQUAL(-500, descending_query.getRangeLimit()); +} + +TEST("require that correctly specified diversity can be parsed") { + EmptyQueryNodeResult eqnr; + QueryTerm descending_query(eqnr, "[;;-500;ab56;78]", "index", QueryTerm::WORD); + EXPECT_TRUE(descending_query.isValid()); + EXPECT_EQUAL(-500, descending_query.getRangeLimit()); + EXPECT_EQUAL("ab56", descending_query.getDiversityAttribute()); + EXPECT_EQUAL(78u, descending_query.getMaxPerGroup()); + EXPECT_EQUAL(std::numeric_limits<uint32_t>::max(), descending_query.getDiversityCutoffGroups()); + EXPECT_FALSE(descending_query.getDiversityCutoffStrict()); +} + +TEST("require that correctly specified diversity with cutoff groups can be parsed") { + EmptyQueryNodeResult eqnr; + QueryTerm descending_query(eqnr, "[;;-500;ab56;78;93]", "index", QueryTerm::WORD); + EXPECT_TRUE(descending_query.isValid()); + EXPECT_EQUAL(-500, descending_query.getRangeLimit()); + EXPECT_EQUAL("ab56", descending_query.getDiversityAttribute()); + EXPECT_EQUAL(78u, descending_query.getMaxPerGroup()); + EXPECT_EQUAL(93u, descending_query.getDiversityCutoffGroups()); + EXPECT_FALSE(descending_query.getDiversityCutoffStrict()); +} + +TEST("require that correctly specified diversity with cutoff groups can be parsed") { + EmptyQueryNodeResult eqnr; + QueryTerm descending_query(eqnr, "[;;-500;ab56;78;13]", "index", QueryTerm::WORD); + EXPECT_TRUE(descending_query.isValid()); + EXPECT_EQUAL(-500, descending_query.getRangeLimit()); + EXPECT_EQUAL("ab56", descending_query.getDiversityAttribute()); + EXPECT_EQUAL(78u, descending_query.getMaxPerGroup()); + EXPECT_EQUAL(13u, descending_query.getDiversityCutoffGroups()); + EXPECT_FALSE(descending_query.getDiversityCutoffStrict()); +} + +TEST("require that correctly specified diversity with incorrect cutoff groups can be parsed") { + EmptyQueryNodeResult eqnr; + QueryTerm descending_query(eqnr, "[;;-500;ab56;78;a13.9]", "index", QueryTerm::WORD); + EXPECT_TRUE(descending_query.isValid()); + EXPECT_EQUAL(-500, descending_query.getRangeLimit()); + EXPECT_EQUAL("ab56", descending_query.getDiversityAttribute()); + EXPECT_EQUAL(78u, descending_query.getMaxPerGroup()); + EXPECT_EQUAL(std::numeric_limits<uint32_t>::max(), descending_query.getDiversityCutoffGroups()); + EXPECT_FALSE(descending_query.getDiversityCutoffStrict()); +} + +TEST("require that correctly specified diversity with cutoff strategy can be parsed") { + EmptyQueryNodeResult eqnr; + QueryTerm descending_query(eqnr, "[;;-500;ab56;78;93;anything but strict]", "index", QueryTerm::WORD); + EXPECT_TRUE(descending_query.isValid()); + EXPECT_EQUAL(-500, descending_query.getRangeLimit()); + EXPECT_EQUAL("ab56", descending_query.getDiversityAttribute()); + EXPECT_EQUAL(78u, descending_query.getMaxPerGroup()); + EXPECT_EQUAL(93u, descending_query.getDiversityCutoffGroups()); + EXPECT_FALSE(descending_query.getDiversityCutoffStrict()); +} + +TEST("require that correctly specified diversity with strict cutoff strategy can be parsed") { + EmptyQueryNodeResult eqnr; + QueryTerm descending_query(eqnr, "[;;-500;ab56;78;93;strict]", "index", QueryTerm::WORD); + EXPECT_TRUE(descending_query.isValid()); + EXPECT_EQUAL(-500, descending_query.getRangeLimit()); + EXPECT_EQUAL("ab56", descending_query.getDiversityAttribute()); + EXPECT_EQUAL(78u, descending_query.getMaxPerGroup()); + EXPECT_EQUAL(93u, descending_query.getDiversityCutoffGroups()); + EXPECT_TRUE(descending_query.getDiversityCutoffStrict()); +} + +TEST("require that incorrectly specified diversity can be parsed") { + EmptyQueryNodeResult eqnr; + QueryTerm descending_query(eqnr, "[;;-500;ab56]", "index", QueryTerm::WORD); + EXPECT_FALSE(descending_query.isValid()); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/query/query_visitor_test.cpp b/searchlib/src/tests/query/query_visitor_test.cpp new file mode 100644 index 00000000000..b98e14604e3 --- /dev/null +++ b/searchlib/src/tests/query/query_visitor_test.cpp @@ -0,0 +1,114 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for query_visitor. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("query_visitor_test"); + +#include <vespa/searchlib/query/tree/intermediatenodes.h> +#include <vespa/searchlib/query/tree/point.h> +#include <vespa/searchlib/query/tree/queryvisitor.h> +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/query/tree/termnodes.h> +#include <vespa/vespalib/testkit/testapp.h> + +using namespace search::query; + +namespace { + +class Test : public vespalib::TestApp { + void requireThatAllNodesCanBeVisited(); + + template <class T> void checkVisit(T *node); + +public: + int Main(); +}; + +int +Test::Main() +{ + TEST_INIT("query_visitor_test"); + + TEST_DO(requireThatAllNodesCanBeVisited()); + + TEST_DONE(); +} + +class MyVisitor : public QueryVisitor +{ +public: + template <typename T> + bool &isVisited() { + static bool b; + return b; + } + + virtual void visit(And &) { isVisited<And>() = true; } + virtual void visit(AndNot &) { isVisited<AndNot>() = true; } + virtual void visit(Equiv &) { isVisited<Equiv>() = true; } + virtual void visit(NumberTerm &) { isVisited<NumberTerm>() = true; } + virtual void visit(LocationTerm &) { isVisited<LocationTerm>() = true; } + virtual void visit(Near &) { isVisited<Near>() = true; } + virtual void visit(ONear &) { isVisited<ONear>() = true; } + virtual void visit(Or &) { isVisited<Or>() = true; } + virtual void visit(Phrase &) { isVisited<Phrase>() = true; } + virtual void visit(PrefixTerm &) { isVisited<PrefixTerm>() = true; } + virtual void visit(RangeTerm &) { isVisited<RangeTerm>() = true; } + virtual void visit(Rank &) { isVisited<Rank>() = true; } + virtual void visit(StringTerm &) { isVisited<StringTerm>() = true; } + virtual void visit(SubstringTerm &) { isVisited<SubstringTerm>() = true; } + virtual void visit(SuffixTerm &) { isVisited<SuffixTerm>() = true; } + virtual void visit(WeakAnd &) { isVisited<WeakAnd>() = true; } + virtual void visit(WeightedSetTerm &) + { isVisited<WeightedSetTerm>() = true; } + virtual void visit(DotProduct &) { isVisited<DotProduct>() = true; } + virtual void visit(WandTerm &) { isVisited<WandTerm>() = true; } + virtual void visit(PredicateQuery &) + { isVisited<PredicateQuery>() = true; } + virtual void visit(RegExpTerm &) { isVisited<RegExpTerm>() = true; } +}; + +template <class T> +void Test::checkVisit(T *node) { + Node::UP query(node); + MyVisitor visitor; + visitor.isVisited<T>() = false; + query->accept(visitor); + ASSERT_TRUE(visitor.isVisited<T>()); +} + +void Test::requireThatAllNodesCanBeVisited() { + checkVisit<And>(new SimpleAnd); + checkVisit<AndNot>(new SimpleAndNot); + checkVisit<Near>(new SimpleNear(0)); + checkVisit<ONear>(new SimpleONear(0)); + checkVisit<Or>(new SimpleOr); + checkVisit<Phrase>(new SimplePhrase("field", 0, Weight(42))); + checkVisit<WeightedSetTerm>( + new SimpleWeightedSetTerm("field", 0, Weight(42))); + checkVisit<DotProduct>(new SimpleDotProduct("field", 0, Weight(42))); + checkVisit<WandTerm>( + new SimpleWandTerm("field", 0, Weight(42), 57, 67, 77.7)); + checkVisit<Rank>(new SimpleRank); + checkVisit<NumberTerm>( + new SimpleNumberTerm("0.42", "field", 0, Weight(0))); + const Location location(Point(10, 10), 20, 0); + checkVisit<LocationTerm>( + new SimpleLocationTerm(location, "field", 0, Weight(0))); + checkVisit<PrefixTerm>(new SimplePrefixTerm("t", "field", 0, Weight(0))); + checkVisit<RangeTerm>( + new SimpleRangeTerm(Range(0, 1), "field", 0, Weight(0))); + checkVisit<StringTerm>(new SimpleStringTerm("t", "field", 0, Weight(0))); + checkVisit<SubstringTerm>( + new SimpleSubstringTerm("t", "field", 0, Weight(0))); + checkVisit<SuffixTerm>(new SimpleSuffixTerm("t", "field", 0, Weight(0))); + checkVisit<PredicateQuery>( + new SimplePredicateQuery(PredicateQueryTerm::UP(), + "field", 0, Weight(0))); + checkVisit<RegExpTerm>(new SimpleRegExpTerm("t", "field", 0, Weight(0))); +} + +} // namespace + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/query/querybuilder_test.cpp b/searchlib/src/tests/query/querybuilder_test.cpp new file mode 100644 index 00000000000..b64a46e9b18 --- /dev/null +++ b/searchlib/src/tests/query/querybuilder_test.cpp @@ -0,0 +1,615 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for querybuilder. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("querybuilder_test"); + +#include <vespa/searchlib/parsequery/parse.h> +#include <vespa/searchlib/parsequery/simplequerystack.h> +#include <vespa/searchlib/query/tree/customtypevisitor.h> +#include <vespa/searchlib/query/tree/intermediatenodes.h> +#include <vespa/searchlib/query/tree/point.h> +#include <vespa/searchlib/query/tree/querybuilder.h> +#include <vespa/searchlib/query/tree/querytreecreator.h> +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/query/tree/stackdumpcreator.h> +#include <vespa/searchlib/query/tree/termnodes.h> +#include <vespa/searchlib/util/rawbuf.h> +#include <vespa/vespalib/testkit/test_kit.h> +#include <string> + +using std::string; +using search::SimpleQueryStackDumpIterator; +using namespace search::query; + +namespace { + +template <class NodeTypes> void checkQueryTreeTypes(Node *node); + +const string str[] = { "foo", "bar", "baz", "qux", "quux", "corge", + "grault", "garply", "waldo", "fred", "plugh" }; +const string (&view)[11] = str; +const int32_t id[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 }; +const Weight weight[] = { Weight(1), Weight(2), Weight(3), Weight(4), + Weight(5), Weight(6), Weight(7), Weight(8), + Weight(9), Weight(10), Weight(11) }; +const size_t distance = 4; +const string int1 = "42"; +const string float1 = "3.14"; +const Range range(32, 64); +const Point position(100, 100); +const int max_distance = 20; +const uint32_t x_aspect = 0; +const Location location(position, max_distance, x_aspect); + +PredicateQueryTerm::UP getPredicateQueryTerm() { + PredicateQueryTerm::UP pqt(new PredicateQueryTerm); + pqt->addFeature("key", "value"); + pqt->addRangeFeature("key2", 42, 0xfff); + return pqt; +} + +template <class NodeTypes> +Node::UP createQueryTree() { + QueryBuilder<NodeTypes> builder; + builder.addAnd(9); + { + builder.addRank(2); + { + builder.addNear(2, distance); + { + builder.addStringTerm(str[0], view[0], id[0], weight[0]); + builder.addSubstringTerm(str[1], view[1], id[1], weight[1]); + } + builder.addONear(2, distance); + { + builder.addSuffixTerm(str[2], view[2], id[2], weight[2]); + builder.addPrefixTerm(str[3], view[3], id[3], weight[3]); + } + } + builder.addOr(3); + { + builder.addPhrase(3, view[4], id[4], weight[4]); + { + builder.addStringTerm(str[4], view[4], id[4], weight[5]); + builder.addStringTerm(str[5], view[5], id[5], weight[6]); + builder.addStringTerm(str[6], view[6], id[6], weight[7]); + } + builder.addPhrase(2, view[4], id[4], weight[4]) + .setRanked(false); + { + builder.addStringTerm(str[4], view[4], id[4], weight[5]); + builder.addStringTerm(str[5], view[5], id[5], weight[6]); + } + builder.addAndNot(2); + { + builder.addNumberTerm(int1, view[7], id[7], weight[7]); + builder.addNumberTerm(float1, view[8], id[8], weight[8]) + .setRanked(false); + } + } + builder.addRangeTerm(range, view[9], id[9], weight[9]); + builder.addLocationTerm(location, view[10], id[10], weight[10]); + builder.addWeakAnd(2, 123, view[0]); + { + builder.addStringTerm(str[4], view[4], id[4], weight[4]); + builder.addStringTerm(str[5], view[5], id[5], weight[5]); + } + builder.addPredicateQuery(getPredicateQueryTerm(), + view[3], id[3], weight[3]); + builder.addDotProduct(3, view[2], id[2], weight[2]); + { + builder.addStringTerm(str[3], view[3], id[3], weight[3]); + builder.addStringTerm(str[4], view[4], id[4], weight[4]); + builder.addStringTerm(str[5], view[5], id[5], weight[5]); + } + builder.addWandTerm(2, view[0], id[0], weight[0], 57, 67, 77.7); + { + builder.addStringTerm(str[1], view[1], id[1], weight[1]); + builder.addStringTerm(str[2], view[2], id[2], weight[2]); + } + builder.addRegExpTerm(str[5], view[5], id[5], weight[5]); + } + Node::UP node = builder.build(); + ASSERT_TRUE(node.get()); + return node; +} + +template <class TermType> +bool compareTerms(const TermType &expected, const TermType &actual) { + return EXPECT_TRUE(expected == actual); +} +template <typename T> +bool compareTerms(const std::unique_ptr<T> &expected, + const std::unique_ptr<T> &actual) { + return EXPECT_TRUE(*expected == *actual); +} + +template <class Term> +bool checkTerm(const Term *term, const typename Term::Type &t, const string &f, + int32_t i, Weight w, bool ranked = true, + bool use_position_data = true) { + return EXPECT_TRUE(term != 0) && + (EXPECT_TRUE(compareTerms(t, term->getTerm())) & + EXPECT_EQUAL(f, term->getView()) & + EXPECT_EQUAL(i, term->getId()) & + EXPECT_EQUAL(w.percent(), term->getWeight().percent()) & + EXPECT_EQUAL(ranked, term->isRanked()) & + EXPECT_EQUAL(use_position_data, term->usePositionData())); +} + +template <class NodeTypes> +void checkQueryTreeTypes(Node *node) { + typedef typename NodeTypes::And And; + typedef typename NodeTypes::AndNot AndNot; + typedef typename NodeTypes::NumberTerm NumberTerm; + //typedef typename NodeTypes::NumberTerm FloatTrm; + typedef typename NodeTypes::Near Near; + typedef typename NodeTypes::ONear ONear; + typedef typename NodeTypes::Or Or; + typedef typename NodeTypes::Phrase Phrase; + typedef typename NodeTypes::PrefixTerm PrefixTerm; + typedef typename NodeTypes::RangeTerm RangeTerm; + typedef typename NodeTypes::Rank Rank; + typedef typename NodeTypes::StringTerm StringTerm; + //typedef typename NodeTypes::SubstringTerm SubstrTr; + typedef typename NodeTypes::SuffixTerm SuffixTerm; + typedef typename NodeTypes::LocationTerm LocationTerm; + //typedef typename NodeTypes::WeightedSetTerm WeightedSetTerm; + typedef typename NodeTypes::DotProduct DotProduct; + typedef typename NodeTypes::WandTerm WandTerm; + typedef typename NodeTypes::WeakAnd WeakAnd; + typedef typename NodeTypes::PredicateQuery PredicateQuery; + typedef typename NodeTypes::RegExpTerm RegExpTerm; + + ASSERT_TRUE(node); + And *and_node = dynamic_cast<And *>(node); + ASSERT_TRUE(and_node); + EXPECT_EQUAL(9u, and_node->getChildren().size()); + + + Rank *rank = dynamic_cast<Rank *>(and_node->getChildren()[0]); + ASSERT_TRUE(rank); + EXPECT_EQUAL(2u, rank->getChildren().size()); + + Near *near = dynamic_cast<Near *>(rank->getChildren()[0]); + ASSERT_TRUE(near); + EXPECT_EQUAL(2u, near->getChildren().size()); + EXPECT_EQUAL(distance, near->getDistance()); + StringTerm *string_term = + dynamic_cast<StringTerm *>(near->getChildren()[0]); + EXPECT_TRUE(checkTerm(string_term, str[0], view[0], id[0], weight[0])); + SubstringTerm *substring_term = + dynamic_cast<SubstringTerm *>(near->getChildren()[1]); + EXPECT_TRUE(checkTerm(substring_term, str[1], view[1], id[1], weight[1])); + + ONear *onear = dynamic_cast<ONear *>(rank->getChildren()[1]); + ASSERT_TRUE(onear); + EXPECT_EQUAL(2u, onear->getChildren().size()); + EXPECT_EQUAL(distance, onear->getDistance()); + SuffixTerm *suffix_term = + dynamic_cast<SuffixTerm *>(onear->getChildren()[0]); + EXPECT_TRUE(checkTerm(suffix_term, str[2], view[2], id[2], weight[2])); + PrefixTerm *prefix_term = + dynamic_cast<PrefixTerm *>(onear->getChildren()[1]); + EXPECT_TRUE(checkTerm(prefix_term, str[3], view[3], id[3], weight[3])); + + + Or *or_node = dynamic_cast<Or *>(and_node->getChildren()[1]); + ASSERT_TRUE(or_node); + EXPECT_EQUAL(3u, or_node->getChildren().size()); + + Phrase *phrase = dynamic_cast<Phrase *>(or_node->getChildren()[0]); + ASSERT_TRUE(phrase); + EXPECT_TRUE(phrase->isRanked()); + EXPECT_EQUAL(weight[4].percent(), phrase->getWeight().percent()); + EXPECT_EQUAL(3u, phrase->getChildren().size()); + string_term = dynamic_cast<StringTerm *>(phrase->getChildren()[0]); + EXPECT_TRUE(checkTerm(string_term, str[4], view[4], id[4], weight[4])); + string_term = dynamic_cast<StringTerm *>(phrase->getChildren()[1]); + EXPECT_TRUE(checkTerm(string_term, str[5], view[5], id[5], weight[4])); + string_term = dynamic_cast<StringTerm *>(phrase->getChildren()[2]); + EXPECT_TRUE(checkTerm(string_term, str[6], view[6], id[6], weight[4])); + + phrase = dynamic_cast<Phrase *>(or_node->getChildren()[1]); + ASSERT_TRUE(phrase); + EXPECT_TRUE(!phrase->isRanked()); + EXPECT_EQUAL(weight[4].percent(), phrase->getWeight().percent()); + EXPECT_EQUAL(2u, phrase->getChildren().size()); + string_term = dynamic_cast<StringTerm *>(phrase->getChildren()[0]); + EXPECT_TRUE(checkTerm(string_term, str[4], view[4], id[4], weight[4])); + string_term = dynamic_cast<StringTerm *>(phrase->getChildren()[1]); + EXPECT_TRUE(checkTerm(string_term, str[5], view[5], id[5], weight[4])); + + AndNot *and_not = dynamic_cast<AndNot *>(or_node->getChildren()[2]); + ASSERT_TRUE(and_not); + EXPECT_EQUAL(2u, and_not->getChildren().size()); + NumberTerm *integer_term = + dynamic_cast<NumberTerm *>(and_not->getChildren()[0]); + EXPECT_TRUE(checkTerm(integer_term, int1, view[7], id[7], weight[7])); + NumberTerm *float_term = + dynamic_cast<NumberTerm *>(and_not->getChildren()[1]); + EXPECT_TRUE(checkTerm(float_term, float1, view[8], id[8], weight[8], + false)); + + + RangeTerm *range_term = + dynamic_cast<RangeTerm *>(and_node->getChildren()[2]); + ASSERT_TRUE(range_term); + EXPECT_TRUE(checkTerm(range_term, range, view[9], id[9], weight[9])); + + LocationTerm *loc_term = + dynamic_cast<LocationTerm *>(and_node->getChildren()[3]); + ASSERT_TRUE(loc_term); + EXPECT_TRUE(checkTerm(loc_term, location, view[10], id[10], weight[10])); + + + WeakAnd *wand = dynamic_cast<WeakAnd *>(and_node->getChildren()[4]); + ASSERT_TRUE(wand != 0); + EXPECT_EQUAL(123u, wand->getMinHits()); + EXPECT_EQUAL(2u, wand->getChildren().size()); + string_term = dynamic_cast<StringTerm *>(wand->getChildren()[0]); + EXPECT_TRUE(checkTerm(string_term, str[4], view[4], id[4], weight[4])); + string_term = dynamic_cast<StringTerm *>(wand->getChildren()[1]); + EXPECT_TRUE(checkTerm(string_term, str[5], view[5], id[5], weight[5])); + + PredicateQuery *predicateQuery = + dynamic_cast<PredicateQuery *>(and_node->getChildren()[5]); + ASSERT_TRUE(predicateQuery); + PredicateQueryTerm::UP pqt(new PredicateQueryTerm); + EXPECT_TRUE(checkTerm(predicateQuery, getPredicateQueryTerm(), + view[3], id[3], weight[3])); + + DotProduct *dotProduct = + dynamic_cast<DotProduct *>(and_node->getChildren()[6]); + ASSERT_TRUE(dotProduct); + EXPECT_EQUAL(3u, dotProduct->getChildren().size()); + string_term = dynamic_cast<StringTerm *>(dotProduct->getChildren()[0]); + EXPECT_TRUE(checkTerm(string_term, str[3], view[3], id[3], weight[3])); + string_term = dynamic_cast<StringTerm *>(dotProduct->getChildren()[1]); + EXPECT_TRUE(checkTerm(string_term, str[4], view[4], id[4], weight[4])); + string_term = dynamic_cast<StringTerm *>(dotProduct->getChildren()[2]); + EXPECT_TRUE(checkTerm(string_term, str[5], view[5], id[5], weight[5])); + + WandTerm *wandTerm = dynamic_cast<WandTerm *>(and_node->getChildren()[7]); + ASSERT_TRUE(wandTerm); + EXPECT_EQUAL(57u, wandTerm->getTargetNumHits()); + EXPECT_EQUAL(67, wandTerm->getScoreThreshold()); + EXPECT_EQUAL(77.7, wandTerm->getThresholdBoostFactor()); + EXPECT_EQUAL(2u, wandTerm->getChildren().size()); + string_term = dynamic_cast<StringTerm *>(wandTerm->getChildren()[0]); + EXPECT_TRUE(checkTerm(string_term, str[1], view[1], id[1], weight[1])); + string_term = dynamic_cast<StringTerm *>(wandTerm->getChildren()[1]); + EXPECT_TRUE(checkTerm(string_term, str[2], view[2], id[2], weight[2])); + + RegExpTerm *regexp_term = + dynamic_cast<RegExpTerm *>(and_node->getChildren()[8]); + EXPECT_TRUE(checkTerm(regexp_term, str[5], view[5], id[5], weight[5])); +} + +struct AbstractTypes { + typedef search::query::And And; + typedef search::query::AndNot AndNot; + typedef search::query::NumberTerm NumberTerm; + typedef search::query::LocationTerm LocationTerm; + typedef search::query::Near Near; + typedef search::query::ONear ONear; + typedef search::query::Or Or; + typedef search::query::Phrase Phrase; + typedef search::query::PrefixTerm PrefixTerm; + typedef search::query::RangeTerm RangeTerm; + typedef search::query::Rank Rank; + typedef search::query::StringTerm StringTerm; + typedef search::query::SubstringTerm SubstringTerm; + typedef search::query::SuffixTerm SuffixTerm; + typedef search::query::WeightedSetTerm WeightedSetTerm; + typedef search::query::DotProduct DotProduct; + typedef search::query::WandTerm WandTerm; + typedef search::query::WeakAnd WeakAnd; + typedef search::query::PredicateQuery PredicateQuery; + typedef search::query::RegExpTerm RegExpTerm; +}; + +// Builds a tree with simplequery and checks that the results have the +// correct abstract types. +TEST("require that Query Trees Can Be Built") { + Node::UP node = createQueryTree<SimpleQueryNodeTypes>(); + checkQueryTreeTypes<AbstractTypes>(node.get()); +} + +// Builds a tree with simplequery and checks that the results have the +// correct concrete types. +TEST("require that Simple Query Trees Can Be Built") { + Node::UP node = createQueryTree<SimpleQueryNodeTypes>(); + checkQueryTreeTypes<SimpleQueryNodeTypes>(node.get()); +} + +struct MyAnd : And {}; +struct MyAndNot : AndNot {}; +struct MyEquiv : Equiv { + MyEquiv(int32_t i, Weight w) : Equiv(i, w) {} +}; +struct MyNear : Near { MyNear(size_t dist) : Near(dist) {} }; +struct MyONear : ONear { MyONear(size_t dist) : ONear(dist) {} }; +struct MyWeakAnd : WeakAnd { MyWeakAnd(uint32_t minHits, const vespalib::string & v) : WeakAnd(minHits, v) {} }; +struct MyOr : Or {}; +struct MyPhrase : Phrase { + MyPhrase(const string &f, int32_t i, Weight w) : Phrase(f, i, w) {} +}; +struct MyWeightedSetTerm : WeightedSetTerm { + MyWeightedSetTerm(const string &f, int32_t i, Weight w) : WeightedSetTerm(f, i, w) {} +}; +struct MyDotProduct : DotProduct { + MyDotProduct(const string &f, int32_t i, Weight w) : DotProduct(f, i, w) {} +}; +struct MyWandTerm : WandTerm { + MyWandTerm(const string &f, int32_t i, Weight w, uint32_t targetNumHits, + int64_t scoreThreshold, double thresholdBoostFactor) + : WandTerm(f, i, w, targetNumHits, scoreThreshold, thresholdBoostFactor) {} +}; +struct MyRank : Rank {}; +struct MyNumberTerm : NumberTerm { + MyNumberTerm(Type t, const string &f, int32_t i, Weight w) + : NumberTerm(t, f, i, w) { + } +}; +struct MyLocationTerm : LocationTerm { + MyLocationTerm(const Type &t, const string &f, int32_t i, Weight w) + : LocationTerm(t, f, i, w) { + } +}; +struct MyPrefixTerm : PrefixTerm { + MyPrefixTerm(const Type &t, const string &f, int32_t i, Weight w) + : PrefixTerm(t, f, i, w) { + } +}; +struct MyRangeTerm : RangeTerm { + MyRangeTerm(const Type &t, const string &f, int32_t i, Weight w) + : RangeTerm(t, f, i, w) { + } +}; +struct MyStringTerm : StringTerm { + MyStringTerm(const Type &t, const string &f, int32_t i, Weight w) + : StringTerm(t, f, i, w) { + } +}; +struct MySubstringTerm : SubstringTerm { + MySubstringTerm(const Type &t, const string &f, int32_t i, Weight w) + : SubstringTerm(t, f, i, w) { + } +}; +struct MySuffixTerm : SuffixTerm { + MySuffixTerm(const Type &t, const string &f, int32_t i, Weight w) + : SuffixTerm(t, f, i, w) { + } +}; +struct MyPredicateQuery : PredicateQuery { + MyPredicateQuery(Type &&t, const string &f, int32_t i, Weight w) + : PredicateQuery(std::move(t), f, i, w) { + } +}; +struct MyRegExpTerm : RegExpTerm { + MyRegExpTerm(const Type &t, const string &f, int32_t i, Weight w) + : RegExpTerm(t, f, i, w) { + } +}; + +struct MyQueryNodeTypes { + typedef MyAnd And; + typedef MyAndNot AndNot; + typedef MyEquiv Equiv; + typedef MyNumberTerm NumberTerm; + typedef MyLocationTerm LocationTerm; + typedef MyNear Near; + typedef MyONear ONear; + typedef MyOr Or; + typedef MyPhrase Phrase; + typedef MyPrefixTerm PrefixTerm; + typedef MyRangeTerm RangeTerm; + typedef MyRank Rank; + typedef MyStringTerm StringTerm; + typedef MySubstringTerm SubstringTerm; + typedef MySuffixTerm SuffixTerm; + typedef MyWeakAnd WeakAnd; + typedef MyWeightedSetTerm WeightedSetTerm; + typedef MyDotProduct DotProduct; + typedef MyWandTerm WandTerm; + typedef MyPredicateQuery PredicateQuery; + typedef MyRegExpTerm RegExpTerm; +}; + +TEST("require that Custom Query Trees Can Be Built") { + Node::UP node = createQueryTree<MyQueryNodeTypes>(); + checkQueryTreeTypes<MyQueryNodeTypes>(node.get()); +} + +TEST("require that Invalid Trees Cannot Be Built") { + // Incomplete tree. + QueryBuilder<SimpleQueryNodeTypes> builder; + builder.addAnd(1); + ASSERT_TRUE(!builder.build().get()); + EXPECT_EQUAL("Trying to build incomplete query tree.", builder.error()); + + // Adding a node after build() and before reset() is a no-op. + builder.addStringTerm(str[0], view[0], id[0], weight[0]); + ASSERT_TRUE(!builder.build().get()); + EXPECT_EQUAL("Trying to build incomplete query tree.", builder.error()); + + builder.reset(); + EXPECT_TRUE(builder.error().empty()); + + // Too many nodes. + builder.addAnd(1); + builder.addStringTerm(str[0], view[0], id[0], weight[0]); + builder.addStringTerm(str[1], view[1], id[1], weight[1]); + ASSERT_TRUE(!builder.build().get()); + EXPECT_EQUAL("QueryBuilder got invalid node structure.", builder.error()); + + // Adding an intermediate node after build() is also a no-op. + builder.addAnd(1); + ASSERT_TRUE(!builder.build().get()); + EXPECT_EQUAL("QueryBuilder got invalid node structure.", builder.error()); +} + +TEST("require that Term Index Can Be Added") { + const int term_index0 = 14; + const int term_index1 = 65; + + QueryBuilder<SimpleQueryNodeTypes> builder; + builder.addAnd(2); + builder.addStringTerm(str[0], view[0], id[0], weight[0]) + .setTermIndex(term_index0); + builder.addSubstringTerm(str[1], view[1], id[1], weight[1]) + .setTermIndex(term_index1); + + Node::UP node = builder.build(); + ASSERT_TRUE(!builder.hasError()); + Intermediate *intermediate = dynamic_cast<Intermediate *>(node.get()); + ASSERT_TRUE(intermediate); + ASSERT_TRUE(intermediate->getChildren().size() == 2); + Term *term = dynamic_cast<Term *>(intermediate->getChildren()[0]); + ASSERT_TRUE(term); + EXPECT_EQUAL(term_index0, term->getTermIndex()); + term = dynamic_cast<Term *>(intermediate->getChildren()[1]); + ASSERT_TRUE(term); + EXPECT_EQUAL(term_index1, term->getTermIndex()); +} + +TEST("require that Rank Can Be Turned Off") { + QueryBuilder<SimpleQueryNodeTypes> builder; + builder.addAnd(3); + builder.addStringTerm(str[0], view[0], id[0], weight[0]); + builder.addSubstringTerm(str[1], view[1], id[1], weight[1]) + .setRanked(false); + builder.addPhrase(2, view[2], id[2], weight[2]) + .setRanked(false); + { + builder.addStringTerm(str[2], view[2], id[3], weight[3]); + builder.addStringTerm(str[3], view[2], id[4], weight[4]); + } + + Node::UP node = builder.build(); + ASSERT_TRUE(!builder.hasError()); + Intermediate *intermediate = dynamic_cast<Intermediate *>(node.get()); + ASSERT_TRUE(intermediate); + ASSERT_TRUE(intermediate->getChildren().size() == 3); + Term *term = dynamic_cast<Term *>(intermediate->getChildren()[0]); + ASSERT_TRUE(term); + EXPECT_TRUE(term->isRanked()); + term = dynamic_cast<Term *>(intermediate->getChildren()[1]); + ASSERT_TRUE(term); + EXPECT_TRUE(!term->isRanked()); + Phrase *phrase = dynamic_cast<Phrase *>(intermediate->getChildren()[2]); + ASSERT_TRUE(phrase); + EXPECT_TRUE(!phrase->isRanked()); +} + +TEST("require that Using Position Data Can Be Turned Off") { + QueryBuilder<SimpleQueryNodeTypes> builder; + builder.addAnd(2); + builder.addStringTerm(str[0], view[0], id[0], weight[0]).setPositionData(false); + builder.addPhrase(2, view[1], id[1], weight[1]).setPositionData(false); + builder.addStringTerm(str[2], view[1], id[2], weight[2]); + builder.addStringTerm(str[3], view[1], id[3], weight[3]); + + Node::UP node = builder.build(); + ASSERT_TRUE(!builder.hasError()); + Intermediate * andNode = dynamic_cast<Intermediate *>(node.get()); + ASSERT_TRUE(andNode != NULL); + ASSERT_TRUE(andNode->getChildren().size() == 2); + Term * term = dynamic_cast<Term *>(andNode->getChildren()[0]); + ASSERT_TRUE(term != NULL); + EXPECT_TRUE(!term->usePositionData()); + Phrase * phrase = dynamic_cast<Phrase *>(andNode->getChildren()[1]); + ASSERT_TRUE(phrase != NULL); + EXPECT_TRUE(!phrase->usePositionData()); +} + +TEST("require that Weight Override Works Across Multiple Levels") { + QueryBuilder<SimpleQueryNodeTypes> builder; + builder.addPhrase(2, view[0], id[0], weight[0]); + + SimpleStringTerm &string_term_1 = + builder.addStringTerm(str[1], view[1], id[1], weight[1]); + EXPECT_EQUAL(weight[0].percent(), string_term_1.getWeight().percent()); + + builder.addAnd(2); + SimpleStringTerm &string_term_2 = + builder.addStringTerm(str[2], view[2], id[2], weight[2]); + EXPECT_EQUAL(weight[0].percent(), string_term_2.getWeight().percent()); +} + +TEST("require that Query Tree Creator Can Replicate Queries") { + Node::UP node = createQueryTree<SimpleQueryNodeTypes>(); + Node::UP new_node = QueryTreeCreator<MyQueryNodeTypes>::replicate(*node); + + checkQueryTreeTypes<SimpleQueryNodeTypes>(node.get()); + checkQueryTreeTypes<MyQueryNodeTypes>(new_node.get()); +} + +TEST("require that Query Tree Creator Can Create Queries From Stack") { + Node::UP node = createQueryTree<MyQueryNodeTypes>(); + string stackDump = StackDumpCreator::create(*node); + SimpleQueryStackDumpIterator iterator(stackDump); + + Node::UP new_node = + QueryTreeCreator<SimpleQueryNodeTypes>::create(iterator); + checkQueryTreeTypes<SimpleQueryNodeTypes>(new_node.get()); +} + +TEST("require that All Range Syntaxes Work") { + + Range range0("[2,42.1]"); + Range range1(">10"); + Range range2("<45.23"); + + QueryBuilder<SimpleQueryNodeTypes> builder; + builder.addAnd(3); + builder.addRangeTerm(range0, "view", 0, Weight(0)); + builder.addRangeTerm(range1, "view", 0, Weight(0)); + builder.addRangeTerm(range2, "view", 0, Weight(0)); + Node::UP node = builder.build(); + + string stackDump = StackDumpCreator::create(*node); + SimpleQueryStackDumpIterator iterator(stackDump); + + Node::UP new_node = + QueryTreeCreator<SimpleQueryNodeTypes>::create(iterator); + And *and_node = dynamic_cast<And *>(new_node.get()); + ASSERT_TRUE(and_node); + EXPECT_EQUAL(3u, and_node->getChildren().size()); + + RangeTerm *range_term = + dynamic_cast<RangeTerm *>(and_node->getChildren()[0]); + ASSERT_TRUE(range_term); + EXPECT_TRUE(range0 == range_term->getTerm()); + + range_term = dynamic_cast<RangeTerm *>(and_node->getChildren()[1]); + ASSERT_TRUE(range_term); + EXPECT_TRUE(range1 == range_term->getTerm()); + + range_term = dynamic_cast<RangeTerm *>(and_node->getChildren()[2]); + ASSERT_TRUE(range_term); + EXPECT_TRUE(range2 == range_term->getTerm()); +} + +TEST("require that empty intermediate node can be added") { + QueryBuilder<SimpleQueryNodeTypes> builder; + builder.addAnd(0); + Node::UP node = builder.build(); + ASSERT_TRUE(node.get()); + + string stackDump = StackDumpCreator::create(*node); + SimpleQueryStackDumpIterator iterator(stackDump); + + Node::UP new_node = + QueryTreeCreator<SimpleQueryNodeTypes>::create(iterator); + And *and_node = dynamic_cast<And *>(new_node.get()); + ASSERT_TRUE(and_node); + EXPECT_EQUAL(0u, and_node->getChildren().size()); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/query/stackdumpquerycreator_test.cpp b/searchlib/src/tests/query/stackdumpquerycreator_test.cpp new file mode 100644 index 00000000000..269947b7059 --- /dev/null +++ b/searchlib/src/tests/query/stackdumpquerycreator_test.cpp @@ -0,0 +1,116 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for stackdumpquerycreator. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("stackdumpquerycreator_test"); + +#include <vespa/searchlib/parsequery/parse.h> +#include <vespa/searchlib/parsequery/stackdumpiterator.h> +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/query/tree/stackdumpquerycreator.h> +#include <vespa/searchlib/util/rawbuf.h> +#include <vespa/vespalib/testkit/testapp.h> + +using search::ParseItem; +using search::RawBuf; +using search::SimpleQueryStackDumpIterator; +using std::string; +using namespace search::query; + +namespace { + +template <typename T> +void append(RawBuf &buf, T i) { + buf.preAlloc(sizeof(T)); + buf.PutToInet(i); +} + +void appendString(RawBuf &buf, const string &s) { + buf.preAlloc(sizeof(uint32_t) + s.size()); + buf.appendCompressedPositiveNumber(s.size()); + buf.append(s.data(), s.size()); +} + +void appendNumTerm(RawBuf &buf, const string &term_string) { + uint8_t typefield = ParseItem::ITEM_NUMTERM | + ParseItem::IF_WEIGHT | + ParseItem::IF_UNIQUEID; + buf.append(typefield); + buf.appendCompressedNumber(2); // weight + buf.appendCompressedPositiveNumber(42); // id + appendString(buf, "view_name"); + appendString(buf, term_string); +} + +TEST("requireThatTooLargeNumTermIsTreatedAsFloat") { + const string term_string("99999999999999999999999999999999999"); + RawBuf buf(1024); + appendNumTerm(buf, term_string); + + SimpleQueryStackDumpIterator query_stack(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen())); + Node::UP node = + StackDumpQueryCreator<SimpleQueryNodeTypes>::create(query_stack); + ASSERT_TRUE(node.get()); + NumberTerm *term = dynamic_cast<NumberTerm *>(node.get()); + ASSERT_TRUE(term); + EXPECT_EQUAL(term_string, term->getTerm()); +} + +TEST("requireThatTooLargeFloatNumTermIsTreatedAsFloat") { + const string term_string = "1" + string(310, '0') + ".20"; + RawBuf buf(1024); + appendNumTerm(buf, term_string); + + SimpleQueryStackDumpIterator + query_stack(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen())); + Node::UP node = + StackDumpQueryCreator<SimpleQueryNodeTypes>::create(query_stack); + ASSERT_TRUE(node.get()); + NumberTerm *term = dynamic_cast<NumberTerm *>(node.get()); + ASSERT_TRUE(term); + EXPECT_EQUAL(term_string, term->getTerm()); +} + +TEST("require that PredicateQueryItem stack dump item can be read") { + RawBuf buf(1024); + uint8_t typefield = ParseItem::ITEM_PREDICATE_QUERY; + buf.append(typefield); + appendString(buf, "view_name"); + + buf.appendCompressedNumber(2); + appendString(buf, "key1"); + appendString(buf, "value1"); + buf.Put64ToInet(-1ULL); + appendString(buf, "key2"); + appendString(buf, "value2"); + buf.Put64ToInet(0xffffULL); + + buf.appendCompressedNumber(2); + appendString(buf, "key3"); + buf.Put64ToInet(42ULL); + buf.Put64ToInet(-1ULL); + appendString(buf, "key4"); + buf.Put64ToInet(84ULL); + buf.Put64ToInet(0xffffULL); + + SimpleQueryStackDumpIterator + query_stack(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen())); + Node::UP node = + StackDumpQueryCreator<SimpleQueryNodeTypes>::create(query_stack); + ASSERT_TRUE(node.get()); + PredicateQuery *p = dynamic_cast<PredicateQuery *>(node.get()); + ASSERT_TRUE(p); + const PredicateQueryTerm &term = *p->getTerm(); + ASSERT_EQUAL(2u, term.getFeatures().size()); + ASSERT_EQUAL(2u, term.getRangeFeatures().size()); + ASSERT_EQUAL("value1", term.getFeatures()[0].getValue()); + ASSERT_EQUAL(0xffffffffffffffffULL, + term.getFeatures()[0].getSubQueryBitmap()); + ASSERT_EQUAL("key2", term.getFeatures()[1].getKey()); + ASSERT_EQUAL(42u, term.getRangeFeatures()[0].getValue()); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/query/templatetermvisitor_test.cpp b/searchlib/src/tests/query/templatetermvisitor_test.cpp new file mode 100644 index 00000000000..369266f5b2d --- /dev/null +++ b/searchlib/src/tests/query/templatetermvisitor_test.cpp @@ -0,0 +1,87 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for templatetermvisitor. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("templatetermvisitor_test"); + +#include <vespa/searchlib/query/tree/intermediatenodes.h> +#include <vespa/searchlib/query/tree/templatetermvisitor.h> +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/query/tree/termnodes.h> +#include <vespa/vespalib/testkit/testapp.h> + +using namespace search::query; + +namespace { + +class MyVisitor; + +class Test : public vespalib::TestApp { + void requireThatAllTermsCanBeVisited(); + +public: + int Main(); +}; + +int +Test::Main() +{ + TEST_INIT("templatetermvisitor_test"); + + TEST_DO(requireThatAllTermsCanBeVisited()); + + TEST_DONE(); +} + +class MyVisitor : public TemplateTermVisitor<MyVisitor, SimpleQueryNodeTypes> +{ +public: + template <typename T> + bool &isVisited() { + static bool b; + return b; + } + + template <class TermType> + void visitTerm(TermType &) { isVisited<TermType>() = true; } +}; + +template <class T> +bool checkVisit(T *q) { + Node::UP query(q); + MyVisitor visitor; + visitor.isVisited<T>() = false; + query->accept(visitor); + return visitor.isVisited<T>(); +} + +template <class T> +bool checkVisit() { + return checkVisit(new T(typename T::Type(), "field", 0, Weight(0))); +} + +void Test::requireThatAllTermsCanBeVisited() { + EXPECT_TRUE(checkVisit<SimpleNumberTerm>()); + EXPECT_TRUE(checkVisit<SimpleLocationTerm>()); + EXPECT_TRUE(checkVisit<SimplePrefixTerm>()); + EXPECT_TRUE(checkVisit<SimpleRangeTerm>()); + EXPECT_TRUE(checkVisit<SimpleStringTerm>()); + EXPECT_TRUE(checkVisit<SimpleSubstringTerm>()); + EXPECT_TRUE(checkVisit<SimpleSuffixTerm>()); + EXPECT_TRUE(checkVisit<SimplePredicateQuery>()); + EXPECT_TRUE(checkVisit<SimpleRegExpTerm>()); + EXPECT_TRUE(checkVisit(new SimplePhrase("field", 0, Weight(0)))); + EXPECT_TRUE(!checkVisit(new SimpleAnd)); + EXPECT_TRUE(!checkVisit(new SimpleAndNot)); + EXPECT_TRUE(!checkVisit(new SimpleEquiv(17, Weight(100)))); + EXPECT_TRUE(!checkVisit(new SimpleNear(2))); + EXPECT_TRUE(!checkVisit(new SimpleONear(2))); + EXPECT_TRUE(!checkVisit(new SimpleOr)); + EXPECT_TRUE(!checkVisit(new SimpleRank)); +} + +} // namespace + +TEST_APPHOOK(Test); +#include <vespa/vespalib/testkit/testapp.h> diff --git a/searchlib/src/tests/queryeval/.gitignore b/searchlib/src/tests/queryeval/.gitignore new file mode 100644 index 00000000000..7039566e7c2 --- /dev/null +++ b/searchlib/src/tests/queryeval/.gitignore @@ -0,0 +1,5 @@ +.depend +Makefile +*_test +*_bench +searchlib_queryeval_test_app diff --git a/searchlib/src/tests/queryeval/CMakeLists.txt b/searchlib/src/tests/queryeval/CMakeLists.txt new file mode 100644 index 00000000000..35496b7f99a --- /dev/null +++ b/searchlib/src/tests/queryeval/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_queryeval_test_app + SOURCES + queryeval.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_queryeval_test_app COMMAND searchlib_queryeval_test_app) diff --git a/searchlib/src/tests/queryeval/DESC b/searchlib/src/tests/queryeval/DESC new file mode 100644 index 00000000000..15e6efd489d --- /dev/null +++ b/searchlib/src/tests/queryeval/DESC @@ -0,0 +1 @@ +queryeval test. Take a look at queryeval.cpp for details. diff --git a/searchlib/src/tests/queryeval/FILES b/searchlib/src/tests/queryeval/FILES new file mode 100644 index 00000000000..d082d6f8725 --- /dev/null +++ b/searchlib/src/tests/queryeval/FILES @@ -0,0 +1 @@ +queryeval.cpp diff --git a/searchlib/src/tests/queryeval/blueprint/.cvsignore b/searchlib/src/tests/queryeval/blueprint/.cvsignore new file mode 100644 index 00000000000..a8da5289575 --- /dev/null +++ b/searchlib/src/tests/queryeval/blueprint/.cvsignore @@ -0,0 +1,3 @@ +.depend +Makefile +blueprint_test diff --git a/searchlib/src/tests/queryeval/blueprint/.gitignore b/searchlib/src/tests/queryeval/blueprint/.gitignore new file mode 100644 index 00000000000..da4bf633103 --- /dev/null +++ b/searchlib/src/tests/queryeval/blueprint/.gitignore @@ -0,0 +1,8 @@ +*_test +.depend +Makefile +lhs.out +rhs.out +searchlib_blueprint_test_app +searchlib_intermediate_blueprints_test_app +searchlib_leaf_blueprints_test_app diff --git a/searchlib/src/tests/queryeval/blueprint/CMakeLists.txt b/searchlib/src/tests/queryeval/blueprint/CMakeLists.txt new file mode 100644 index 00000000000..88ba3deeb29 --- /dev/null +++ b/searchlib/src/tests/queryeval/blueprint/CMakeLists.txt @@ -0,0 +1,23 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_blueprint_test_app + SOURCES + blueprint_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_blueprint_test_app COMMAND searchlib_blueprint_test_app || diff -u lhs.out rhs.out) +vespa_add_executable(searchlib_leaf_blueprints_test_app + SOURCES + leaf_blueprints_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_leaf_blueprints_test_app COMMAND searchlib_leaf_blueprints_test_app || diff -u lhs.out rhs.out) +vespa_add_executable(searchlib_intermediate_blueprints_test_app + SOURCES + intermediate_blueprints_test.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_intermediate_blueprints_test_app COMMAND searchlib_intermediate_blueprints_test_app || diff -u lhs.out rhs.out) diff --git a/searchlib/src/tests/queryeval/blueprint/DESC b/searchlib/src/tests/queryeval/blueprint/DESC new file mode 100644 index 00000000000..a2634c017bd --- /dev/null +++ b/searchlib/src/tests/queryeval/blueprint/DESC @@ -0,0 +1 @@ +blueprint test. Take a look at blueprint_test.cpp for details. diff --git a/searchlib/src/tests/queryeval/blueprint/FILES b/searchlib/src/tests/queryeval/blueprint/FILES new file mode 100644 index 00000000000..89c566c5aea --- /dev/null +++ b/searchlib/src/tests/queryeval/blueprint/FILES @@ -0,0 +1 @@ +blueprint_test.cpp diff --git a/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp new file mode 100644 index 00000000000..79fec3770b3 --- /dev/null +++ b/searchlib/src/tests/queryeval/blueprint/blueprint_test.cpp @@ -0,0 +1,766 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("blueprint_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/queryeval/blueprint.h> +#include <vespa/searchlib/queryeval/intermediate_blueprints.h> +#include <vespa/vespalib/objects/objectdumper.h> +#include <vespa/vespalib/objects/visit.h> + +#include "mysearch.h" + +using namespace search::queryeval; +using namespace search::fef; + +namespace { + +//----------------------------------------------------------------------------- + +class MyOr : public IntermediateBlueprint +{ +private: +public: + virtual HitEstimate combine(const std::vector<HitEstimate> &data) const { + return max(data); + } + + virtual FieldSpecBaseList exposeFields() const { + return mixChildrenFields(); + } + + virtual void sort(std::vector<Blueprint*> &children) const { + std::sort(children.begin(), children.end(), GreaterEstimate()); + } + + virtual bool inheritStrict(size_t i) const { + (void)i; + return true; + } + + virtual SearchIterator::UP + createIntermediateSearch(const MultiSearch::Children &subSearches, + bool strict, MatchData &md) const + { + return SearchIterator::UP(new MySearch("or", subSearches, &md, strict)); + } + + static MyOr& create() { return *(new MyOr()); } + MyOr& add(Blueprint *n) { addChild(UP(n)); return *this; } + MyOr& add(Blueprint &n) { addChild(UP(&n)); return *this; } +}; + + +class OtherOr : public OrBlueprint +{ +private: +public: + virtual SearchIterator::UP + createIntermediateSearch(const MultiSearch::Children &subSearches, + bool strict, MatchData &md) const + { + return SearchIterator::UP(new MySearch("or", subSearches, &md, strict)); + } + + static OtherOr& create() { return *(new OtherOr()); } + OtherOr& add(Blueprint *n) { addChild(UP(n)); return *this; } + OtherOr& add(Blueprint &n) { addChild(UP(&n)); return *this; } +}; + +//----------------------------------------------------------------------------- + +class MyAnd : public AndBlueprint +{ +private: +public: + virtual HitEstimate combine(const std::vector<HitEstimate> &data) const { + return min(data); + } + + virtual FieldSpecBaseList exposeFields() const { + return FieldSpecBaseList(); + } + + virtual bool inheritStrict(size_t i) const { + return (i == 0); + } + + virtual SearchIterator::UP + createIntermediateSearch(const MultiSearch::Children &subSearches, + bool strict, MatchData &md) const + { + return SearchIterator::UP(new MySearch("and", subSearches, &md, strict)); + } + + static MyAnd& create() { return *(new MyAnd()); } + MyAnd& add(Blueprint *n) { addChild(UP(n)); return *this; } + MyAnd& add(Blueprint &n) { addChild(UP(&n)); return *this; } +}; + + +class OtherAnd : public AndBlueprint +{ +private: +public: + virtual SearchIterator::UP + createIntermediateSearch(const MultiSearch::Children &subSearches, + bool strict, MatchData &md) const + { + return SearchIterator::UP(new MySearch("and", subSearches, &md, strict)); + } + + static OtherAnd& create() { return *(new OtherAnd()); } + OtherAnd& add(Blueprint *n) { addChild(UP(n)); return *this; } + OtherAnd& add(Blueprint &n) { addChild(UP(&n)); return *this; } +}; + +class OtherAndNot : public AndNotBlueprint +{ +public: + virtual SearchIterator::UP + createIntermediateSearch(const MultiSearch::Children &subSearches, + bool strict, MatchData &md) const + { + return SearchIterator::UP(new MySearch("andnot", subSearches, &md, strict)); + } + + static OtherAndNot& create() { return *(new OtherAndNot()); } + OtherAndNot& add(Blueprint *n) { addChild(UP(n)); return *this; } + OtherAndNot& add(Blueprint &n) { addChild(UP(&n)); return *this; } + +}; + +//----------------------------------------------------------------------------- + +struct MyTerm : SimpleLeafBlueprint { + MyTerm(const FieldSpecBaseList &fields, uint32_t hitEstimate) : SimpleLeafBlueprint(fields) { + setEstimate(HitEstimate(hitEstimate, false)); + } + virtual SearchIterator::UP createLeafSearch(const search::fef::TermFieldMatchDataArray &, bool) const { + return SearchIterator::UP(); + } +}; + +//----------------------------------------------------------------------------- + +} // namespace <unnamed> + +class Test : public vespalib::TestApp +{ +private: + MatchData::UP _md; + + static Blueprint::UP ap(Blueprint *b) { return Blueprint::UP(b); } + static Blueprint::UP ap(Blueprint &b) { return Blueprint::UP(&b); } + + SearchIterator::UP create(const Blueprint &blueprint); + bool check_equal(const SearchIterator &a, const SearchIterator &b); + bool check_equal(const Blueprint &a, const Blueprint &b); + bool check_not_equal(const SearchIterator &a, const SearchIterator &b); + bool check_not_equal(const Blueprint &a, const Blueprint &b); + +public: + Test() + : vespalib::TestApp(), + _md(MatchData::makeTestInstance(0, 100, 10)) + { + } + Blueprint::UP buildBlueprint1(); + Blueprint::UP buildBlueprint2(); + void testBlueprintBuilding(); + void testHitEstimateCalculation(); + void testHitEstimatePropagation(); + void testMatchDataPropagation(); + void testChildSorting(); + void testChildAndNotCollapsing(); + void testChildAndCollapsing(); + void testChildOrCollapsing(); + void testSearchCreation(); + void testBlueprintMakeNew(); + void requireThatAsStringWorks(); + void requireThatVisitMembersWorks(); + void requireThatDocIdLimitInjectionWorks(); + int Main(); +}; + +SearchIterator::UP +Test::create(const Blueprint &blueprint) +{ + const_cast<Blueprint &>(blueprint).fetchPostings(true); + SearchIterator::UP search = blueprint.createSearch(*_md, true); + MySearch::verifyAndInfer(search.get(), *_md); + return search; +} + +bool +Test::check_equal(const SearchIterator &a, const SearchIterator &b) +{ + return EXPECT_EQUAL(a.asString(), b.asString()); +} + +bool +Test::check_equal(const Blueprint &a, const Blueprint &b) +{ + SearchIterator::UP searchA = create(a); + SearchIterator::UP searchB = create(b); + TEST_STATE("check_equal"); + bool ok = check_equal(*searchA, *searchB); + return ok; +} + +bool +Test::check_not_equal(const SearchIterator &a, const SearchIterator &b) +{ + return EXPECT_NOT_EQUAL(a.asString(), b.asString()); +} + +bool +Test::check_not_equal(const Blueprint &a, const Blueprint &b) +{ + SearchIterator::UP searchA = create(a); + SearchIterator::UP searchB = create(b); + TEST_STATE("check_not_equal"); + bool ok = check_not_equal(*searchA, *searchB); + return ok; +} + +Blueprint::UP +Test::buildBlueprint1() +{ + return ap(MyAnd::create() + .add(MyOr::create() + .add(MyLeafSpec(10).addField(1, 11).create()) + .add(MyLeafSpec(20).addField(1, 21).create()) + .add(MyLeafSpec(30).addField(1, 31).create()) + ) + .add(MyOr::create() + .add(MyLeafSpec(100).addField(2, 22).create()) + .add(MyLeafSpec(200).addField(2, 42).create()) + ) + ); +} + +Blueprint::UP +Test::buildBlueprint2() +{ + return ap(MyAnd::create() + .add(MyOr::create() + .add(MyLeafSpec(10).addField(1, 11).create()) + .add(MyLeafSpec(20).addField(1, 21).create()) + ) + .add(MyOr::create() + .add(MyLeafSpec(100).addField(2, 22).create()) + .add(MyLeafSpec(200).addField(2, 32).create()) + .add(MyLeafSpec(300).addField(2, 42).create()) + ) + ); +} + +void +Test::testBlueprintBuilding() +{ + Blueprint::UP root1 = buildBlueprint1(); + Blueprint::UP root2 = buildBlueprint2(); + SearchIterator::UP search1 = create(*root1); + SearchIterator::UP search2 = create(*root2); + // fprintf(stderr, "%s\n", search1->asString().c_str()); + // fprintf(stderr, "%s\n", search2->asString().c_str()); +} + +void +Test::testHitEstimateCalculation() +{ + { + Blueprint::UP leaf = ap(MyLeafSpec(37).create()); + EXPECT_EQUAL(37u, leaf->getState().estimate().estHits); + EXPECT_EQUAL(0u, leaf->getState().numFields()); + } + { + Blueprint::UP a1 = ap(MyAnd::create() + .add(MyLeafSpec(7).addField(1, 11).create()) + .add(MyLeafSpec(4).addField(1, 21).create()) + .add(MyLeafSpec(6).addField(1, 31).create())); + EXPECT_EQUAL(4u, a1->getState().estimate().estHits); + } + { + Blueprint::UP a2 = ap(MyAnd::create() + .add(MyLeafSpec(4).addField(1, 1).create()) + .add(MyLeafSpec(7).addField(2, 2).create()) + .add(MyLeafSpec(6).addField(3, 3).create())); + EXPECT_EQUAL(4u, a2->getState().estimate().estHits); + } + { + Blueprint::UP o1 = ap(MyOr::create() + .add(MyLeafSpec(7).addField(1, 11).create()) + .add(MyLeafSpec(4).addField(1, 21).create()) + .add(MyLeafSpec(6).addField(1, 31).create())); + EXPECT_EQUAL(7u, o1->getState().estimate().estHits); + } + { + Blueprint::UP o2 = ap(MyOr::create() + .add(MyLeafSpec(4).addField(1, 1).create()) + .add(MyLeafSpec(7).addField(2, 2).create()) + .add(MyLeafSpec(6).addField(3, 3).create())); + EXPECT_EQUAL(7u, o2->getState().estimate().estHits); + } + { + Blueprint::UP a = ap(MyAnd::create() + .add(MyLeafSpec(0).create()) + .add(MyLeafSpec(0, true).create())); + EXPECT_EQUAL(0u, a->getState().estimate().estHits); + EXPECT_EQUAL(true, a->getState().estimate().empty); + } + { + Blueprint::UP o = ap(MyOr::create() + .add(MyLeafSpec(0).create()) + .add(MyLeafSpec(0, true).create())); + EXPECT_EQUAL(0u, o->getState().estimate().estHits); + EXPECT_EQUAL(false, o->getState().estimate().empty); + } + { + Blueprint::UP tree1 = buildBlueprint1(); + EXPECT_EQUAL(30u, tree1->getState().estimate().estHits); + + Blueprint::UP tree2 = buildBlueprint2(); + EXPECT_EQUAL(20u, tree2->getState().estimate().estHits); + } +} + +void +Test::testHitEstimatePropagation() +{ + MyLeaf *leaf1 = new MyLeaf(FieldSpecBaseList()); + leaf1->estimate(10); + + MyLeaf *leaf2 = new MyLeaf(FieldSpecBaseList()); + leaf2->estimate(20); + + MyLeaf *leaf3 = new MyLeaf(FieldSpecBaseList()); + leaf3->estimate(30); + + MyOr *parent = new MyOr(); + MyOr *grandparent = new MyOr(); + + Blueprint::UP root(grandparent); + + parent->addChild(ap(leaf1)); + parent->addChild(ap(leaf3)); + grandparent->addChild(ap(leaf2)); + grandparent->addChild(ap(parent)); + EXPECT_EQUAL(30u, root->getState().estimate().estHits); + + // edit + leaf3->estimate(50); + EXPECT_EQUAL(50u, root->getState().estimate().estHits); + + // remove + ASSERT_TRUE(parent->childCnt() == 2); + Blueprint::UP tmp = parent->removeChild(1); + ASSERT_TRUE(tmp.get() == leaf3); + EXPECT_EQUAL(1u, parent->childCnt()); + EXPECT_EQUAL(20u, root->getState().estimate().estHits); + + // add + leaf3->estimate(25); + EXPECT_EQUAL(20u, root->getState().estimate().estHits); + parent->addChild(std::move(tmp)); + EXPECT_TRUE(tmp.get() == 0); + EXPECT_EQUAL(25u, root->getState().estimate().estHits); +} + +void +Test::testMatchDataPropagation() +{ + { + Blueprint::UP leaf = ap(MyLeafSpec(0, true).create()); + EXPECT_EQUAL(0u, leaf->getState().numFields()); + } + { + Blueprint::UP leaf = ap(MyLeafSpec(42) + .addField(1, 41) + .addField(2, 72).create()); + EXPECT_EQUAL(42u, leaf->getState().estimate().estHits); + ASSERT_TRUE(leaf->getState().numFields() == 2); + EXPECT_EQUAL(1u, leaf->getState().field(0).getFieldId()); + EXPECT_EQUAL(2u, leaf->getState().field(1).getFieldId()); + EXPECT_EQUAL(41u, leaf->getState().field(0).getHandle()); + EXPECT_EQUAL(72u, leaf->getState().field(1).getHandle()); + } + { + Blueprint::UP a = ap(MyAnd::create() + .add(MyLeafSpec(7).addField(1, 11).create()) + .add(MyLeafSpec(4).addField(1, 21).create()) + .add(MyLeafSpec(6).addField(1, 31).create())); + EXPECT_EQUAL(0u, a->getState().numFields()); + } + { + MyOr &o = MyOr::create() + .add(MyLeafSpec(1).addField(1, 1).create()) + .add(MyLeafSpec(2).addField(2, 2).create()); + + Blueprint::UP a = ap(o); + ASSERT_TRUE(a->getState().numFields() == 2); + EXPECT_EQUAL(1u, a->getState().field(0).getFieldId()); + EXPECT_EQUAL(2u, a->getState().field(1).getFieldId()); + EXPECT_EQUAL(1u, a->getState().field(0).getHandle()); + EXPECT_EQUAL(2u, a->getState().field(1).getHandle()); + EXPECT_EQUAL(2u, a->getState().estimate().estHits); + + o.add(MyLeafSpec(5).addField(2, 2).create()); + ASSERT_TRUE(a->getState().numFields() == 2); + EXPECT_EQUAL(1u, a->getState().field(0).getFieldId()); + EXPECT_EQUAL(2u, a->getState().field(1).getFieldId()); + EXPECT_EQUAL(1u, a->getState().field(0).getHandle()); + EXPECT_EQUAL(2u, a->getState().field(1).getHandle()); + EXPECT_EQUAL(5u, a->getState().estimate().estHits); + + o.add(MyLeafSpec(5).addField(2, 32).create()); + EXPECT_EQUAL(0u, a->getState().numFields()); + o.removeChild(3); + EXPECT_EQUAL(2u, a->getState().numFields()); + o.add(MyLeafSpec(0, true).create()); + EXPECT_EQUAL(0u, a->getState().numFields()); + } +} + +void +Test::testChildAndNotCollapsing() +{ + Blueprint::UP unsorted = ap(OtherAndNot::create() + .add(OtherAndNot::create() + .add(OtherAndNot::create() + .add(MyLeafSpec(200).addField(1, 11).create()) + .add(MyLeafSpec(100).addField(1, 21).create()) + .add(MyLeafSpec(300).addField(1, 31).create()) + ) + .add(OtherAnd::create() + .add(MyLeafSpec(1).addField(2, 42).create()) + .add(MyLeafSpec(2).addField(2, 52).create()) + .add(MyLeafSpec(3).addField(2, 62).create()) + ) + ) + .add(MyLeafSpec(30).addField(3, 73).create()) + .add(MyLeafSpec(20).addField(3, 83).create()) + .add(MyLeafSpec(10).addField(3, 93).create()) + ); + + Blueprint::UP sorted = ap(OtherAndNot::create() + .add(MyLeafSpec(200).addField(1, 11).create()) + .add(MyLeafSpec(300).addField(1, 31).create()) + .add(MyLeafSpec(100).addField(1, 21).create()) + .add(MyLeafSpec(30).addField(3, 73).create()) + .add(MyLeafSpec(20).addField(3, 83).create()) + .add(MyLeafSpec(10).addField(3, 93).create()) + .add(OtherAnd::create() + .add(MyLeafSpec(1).addField(2, 42).create()) + .add(MyLeafSpec(2).addField(2, 52).create()) + .add(MyLeafSpec(3).addField(2, 62).create()) + ) + ); + TEST_DO(check_not_equal(*sorted, *unsorted)); + unsorted = Blueprint::optimize(std::move(unsorted)); + TEST_DO(check_equal(*sorted, *unsorted)); +} + +void +Test::testChildAndCollapsing() +{ + Blueprint::UP unsorted = ap(OtherAnd::create() + .add(OtherAnd::create() + .add(OtherAnd::create() + .add(MyLeafSpec(200).addField(1, 11).create()) + .add(MyLeafSpec(100).addField(1, 21).create()) + .add(MyLeafSpec(300).addField(1, 31).create()) + ) + .add(OtherAnd::create() + .add(MyLeafSpec(1).addField(2, 42).create()) + .add(MyLeafSpec(2).addField(2, 52).create()) + .add(MyLeafSpec(3).addField(2, 62).create()) + ) + ) + .add(OtherAnd::create() + .add(MyLeafSpec(30).addField(3, 73).create()) + .add(MyLeafSpec(20).addField(3, 83).create()) + .add(MyLeafSpec(10).addField(3, 93).create()) + ) + ); + + Blueprint::UP sorted = ap(OtherAnd::create() + .add(MyLeafSpec(1).addField(2, 42).create()) + .add(MyLeafSpec(2).addField(2, 52).create()) + .add(MyLeafSpec(3).addField(2, 62).create()) + .add(MyLeafSpec(10).addField(3, 93).create()) + .add(MyLeafSpec(20).addField(3, 83).create()) + .add(MyLeafSpec(30).addField(3, 73).create()) + .add(MyLeafSpec(100).addField(1, 21).create()) + .add(MyLeafSpec(200).addField(1, 11).create()) + .add(MyLeafSpec(300).addField(1, 31).create()) + ); + + TEST_DO(check_not_equal(*sorted, *unsorted)); + unsorted = Blueprint::optimize(std::move(unsorted)); + TEST_DO(check_equal(*sorted, *unsorted)); +} + +void +Test::testChildOrCollapsing() +{ + Blueprint::UP unsorted = ap(OtherOr::create() + .add(OtherOr::create() + .add(OtherOr::create() + .add(MyLeafSpec(200).addField(1, 11).create()) + .add(MyLeafSpec(100).addField(1, 21).create()) + .add(MyLeafSpec(300).addField(1, 31).create()) + ) + .add(OtherOr::create() + .add(MyLeafSpec(1).addField(2, 42).create()) + .add(MyLeafSpec(2).addField(2, 52).create()) + .add(MyLeafSpec(3).addField(2, 62).create()) + ) + ) + .add(OtherOr::create() + .add(MyLeafSpec(30).addField(3, 73).create()) + .add(MyLeafSpec(20).addField(3, 83).create()) + .add(MyLeafSpec(10).addField(3, 93).create()) + ) + ); + + Blueprint::UP sorted = ap(OtherOr::create() + .add(MyLeafSpec(300).addField(1, 31).create()) + .add(MyLeafSpec(200).addField(1, 11).create()) + .add(MyLeafSpec(100).addField(1, 21).create()) + .add(MyLeafSpec(30).addField(3, 73).create()) + .add(MyLeafSpec(20).addField(3, 83).create()) + .add(MyLeafSpec(10).addField(3, 93).create()) + .add(MyLeafSpec(3).addField(2, 62).create()) + .add(MyLeafSpec(2).addField(2, 52).create()) + .add(MyLeafSpec(1).addField(2, 42).create()) + ); + TEST_DO(check_not_equal(*sorted, *unsorted)); + unsorted = Blueprint::optimize(std::move(unsorted)); + TEST_DO(check_equal(*sorted, *unsorted)); +} + +void +Test::testChildSorting() +{ + Blueprint::UP unsorted = ap(MyAnd::create() + .add(MyOr::create() + .add(MyLeafSpec(200).addField(1, 11).create()) + .add(MyLeafSpec(100).addField(1, 21).create()) + .add(MyLeafSpec(300).addField(1, 31).create()) + ) + .add(MyOr::create() + .add(MyLeafSpec(1).addField(2, 42).create()) + .add(MyLeafSpec(2).addField(2, 52).create()) + .add(MyLeafSpec(3).addField(2, 62).create()) + ) + .add(MyOr::create() + .add(MyLeafSpec(30).addField(3, 73).create()) + .add(MyLeafSpec(20).addField(3, 83).create()) + .add(MyLeafSpec(10).addField(3, 93).create()) + ) + ); + + Blueprint::UP sorted = ap(MyAnd::create() + .add(MyOr::create() + .add(MyLeafSpec(3).addField(2, 62).create()) + .add(MyLeafSpec(2).addField(2, 52).create()) + .add(MyLeafSpec(1).addField(2, 42).create()) + ) + .add(MyOr::create() + .add(MyLeafSpec(30).addField(3, 73).create()) + .add(MyLeafSpec(20).addField(3, 83).create()) + .add(MyLeafSpec(10).addField(3, 93).create()) + ) + .add(MyOr::create() + .add(MyLeafSpec(300).addField(1, 31).create()) + .add(MyLeafSpec(200).addField(1, 11).create()) + .add(MyLeafSpec(100).addField(1, 21).create()) + ) + ); + + TEST_DO(check_not_equal(*sorted, *unsorted)); + unsorted = Blueprint::optimize(std::move(unsorted)); + TEST_DO(check_equal(*sorted, *unsorted)); +} + + +void +Test::testSearchCreation() +{ + { + Blueprint::UP l = ap(MyLeafSpec(3) + .addField(1, 1) + .addField(2, 2) + .addField(3, 3).create()); + SearchIterator::UP leafsearch = create(*l); + + MySearch *lw = new MySearch("leaf", true, true); + lw->addHandle(1).addHandle(2).addHandle(3); + SearchIterator::UP wantleaf(lw); + + TEST_DO(check_equal(*wantleaf, *leafsearch)); + } + { + Blueprint::UP a = ap(MyAnd::create() + .add(MyLeafSpec(1).addField(1, 1).create()) + .add(MyLeafSpec(2).addField(2, 2).create())); + SearchIterator::UP andsearch = create(*a); + + MySearch *l1 = new MySearch("leaf", true, true); + MySearch *l2 = new MySearch("leaf", true, false); + l1->addHandle(1); + l2->addHandle(2); + MySearch *aw = new MySearch("and", false, true); + aw->add(l1); + aw->add(l2); + SearchIterator::UP wanted(aw); + TEST_DO(check_equal(*wanted, *andsearch)); + } + { + Blueprint::UP o = ap(MyOr::create() + .add(MyLeafSpec(1).addField(1, 11).create()) + .add(MyLeafSpec(2).addField(2, 22).create())); + SearchIterator::UP orsearch = create(*o); + + MySearch *l1 = new MySearch("leaf", true, true); + MySearch *l2 = new MySearch("leaf", true, true); + l1->addHandle(11); + l2->addHandle(22); + MySearch *ow = new MySearch("or", false, true); + ow->add(l1); + ow->add(l2); + SearchIterator::UP wanted(ow); + TEST_DO(check_equal(*wanted, *orsearch)); + } +} + +template<typename T> +Blueprint::UP makeNew(T *orig) +{ + return Blueprint::UP(new T(*orig)); +} + +void +Test::testBlueprintMakeNew() +{ + Blueprint::UP orig = ap(MyOr::create() + .add(MyLeafSpec(1).addField(1, 11).create()) + .add(MyLeafSpec(2).addField(2, 22).create())); + orig->setSourceId(42); + MyOr *myOr = dynamic_cast<MyOr*>(orig.get()); + ASSERT_TRUE(myOr != 0); + Blueprint::UP copy1 = makeNew(myOr); + Blueprint::UP copy2 = makeNew(myOr); + TEST_DO(check_equal(*copy1, *copy2)); + TEST_DO(check_not_equal(*orig, *copy1)); + TEST_DO(check_not_equal(*orig, *copy2)); + EXPECT_TRUE(dynamic_cast<MyOr*>(copy1.get()) != 0); + EXPECT_TRUE(dynamic_cast<MyOr*>(copy2.get()) != 0); + EXPECT_EQUAL(42u, orig->getSourceId()); + EXPECT_EQUAL(42u, copy1->getSourceId()); + EXPECT_EQUAL(2u, orig->getState().numFields()); + EXPECT_EQUAL(0u, copy1->getState().numFields()); +} + +vespalib::string +getExpectedBlueprint() +{ + return "(anonymous namespace)::MyOr {\n" + " isTermLike: true\n" + " fields: FieldList {\n" + " [0]: Field {\n" + " fieldId: 5\n" + " handle: 7\n" + " isFilter: false\n" + " }\n" + " }\n" + " estimate: HitEstimate {\n" + " empty: false\n" + " estHits: 9\n" + " tree_size: 2\n" + " allow_termwise_eval: 0\n" + " }\n" + " sourceId: 4294967295\n" + " docid_limit: 0\n" + " children: std::vector {\n" + " [0]: (anonymous namespace)::MyTerm {\n" + " isTermLike: true\n" + " fields: FieldList {\n" + " [0]: Field {\n" + " fieldId: 5\n" + " handle: 7\n" + " isFilter: false\n" + " }\n" + " }\n" + " estimate: HitEstimate {\n" + " empty: false\n" + " estHits: 9\n" + " tree_size: 1\n" + " allow_termwise_eval: 1\n" + " }\n" + " sourceId: 4294967295\n" + " docid_limit: 0\n" + " }\n" + " }\n" + "}\n"; +} + +struct BlueprintFixture +{ + MyOr _blueprint; + BlueprintFixture() : _blueprint() { + _blueprint.add(new MyTerm(FieldSpecBaseList().add(FieldSpecBase(5, 7)), 9)); + } +}; + +void +Test::requireThatAsStringWorks() +{ + BlueprintFixture f; + EXPECT_EQUAL(getExpectedBlueprint(), f._blueprint.asString()); +} + +void +Test::requireThatVisitMembersWorks() +{ + BlueprintFixture f; + vespalib::ObjectDumper dumper; + visit(dumper, "", &f._blueprint); + EXPECT_EQUAL(getExpectedBlueprint(), dumper.toString()); +} + +void +Test::requireThatDocIdLimitInjectionWorks() +{ + BlueprintFixture f; + ASSERT_GREATER(f._blueprint.childCnt(), 0u); + const MyTerm &term = dynamic_cast<MyTerm&>(f._blueprint.getChild(0)); + EXPECT_EQUAL(0u, term.get_docid_limit()); + f._blueprint.setDocIdLimit(1000); + EXPECT_EQUAL(1000u, term.get_docid_limit()); +} + +int +Test::Main() +{ + TEST_DEBUG("lhs.out", "rhs.out"); + TEST_INIT("blueprint_test"); + testBlueprintBuilding(); + testHitEstimateCalculation(); + testHitEstimatePropagation(); + testMatchDataPropagation(); + testChildSorting(); + testChildAndNotCollapsing(); + testChildAndCollapsing(); + testChildOrCollapsing(); + testSearchCreation(); + testBlueprintMakeNew(); + requireThatAsStringWorks(); + requireThatVisitMembersWorks(); + requireThatDocIdLimitInjectionWorks(); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp new file mode 100644 index 00000000000..161537104e0 --- /dev/null +++ b/searchlib/src/tests/queryeval/blueprint/intermediate_blueprints_test.cpp @@ -0,0 +1,1332 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("blueprint_test"); + +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/queryeval/blueprint.h> +#include <vespa/searchlib/queryeval/intermediate_blueprints.h> +#include <vespa/searchlib/queryeval/leaf_blueprints.h> +#include <vespa/searchlib/queryeval/equiv_blueprint.h> +#include <vespa/searchlib/queryeval/searchable.h> + +#include "mysearch.h" + +#include <vespa/searchlib/queryeval/multisearch.h> +#include <vespa/searchlib/queryeval/andnotsearch.h> +#include <vespa/searchlib/queryeval/andsearch.h> +#include <vespa/searchlib/queryeval/orsearch.h> +#include <vespa/searchlib/queryeval/nearsearch.h> +#include <vespa/searchlib/queryeval/ranksearch.h> +#include <vespa/searchlib/queryeval/wand/weak_and_search.h> +#include <vespa/searchlib/queryeval/fake_requestcontext.h> +#include <vespa/vespalib/io/fileutil.h> +#include <vespa/searchlib/test/diskindex/testdiskindex.h> +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/common/bitvectoriterator.h> +#include <vespa/searchlib/diskindex/zcpostingiterators.h> + +#include <algorithm> + +using namespace search::queryeval; +using namespace search::fef; +using namespace search::query; + +struct WeightOrder { + bool operator()(const wand::Term &t1, const wand::Term &t2) const { + return (t1.weight < t2.weight); + } +}; + +Blueprint::UP ap(Blueprint *b) { return Blueprint::UP(b); } +Blueprint::UP ap(Blueprint &b) { return Blueprint::UP(&b); } + +TEST("test AndNot Blueprint") { + AndNotBlueprint b; + { // combine + std::vector<Blueprint::HitEstimate> est; + EXPECT_EQUAL(true, b.combine(est).empty); + EXPECT_EQUAL(0u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(10, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(20, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(5, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + } + { + AndNotBlueprint a; + a.addChild(ap(MyLeafSpec(10).addField(1, 1).create())); + EXPECT_EQUAL(0u, a.exposeFields().size()); + } + { + std::vector<Blueprint *> children; + Blueprint::UP c1 = ap(MyLeafSpec(10).create()); + Blueprint::UP c2 = ap(MyLeafSpec(20).create()); + Blueprint::UP c3 = ap(MyLeafSpec(40).create()); + Blueprint::UP c4 = ap(MyLeafSpec(30).create()); + children.push_back(c1.get()); + children.push_back(c2.get()); + children.push_back(c3.get()); + children.push_back(c4.get()); + b.sort(children); + EXPECT_EQUAL(c1.get(), children[0]); + EXPECT_EQUAL(c3.get(), children[1]); + EXPECT_EQUAL(c4.get(), children[2]); + EXPECT_EQUAL(c2.get(), children[3]); + } + { + EXPECT_EQUAL(true, b.inheritStrict(0)); + EXPECT_EQUAL(false, b.inheritStrict(1)); + EXPECT_EQUAL(false, b.inheritStrict(2)); + EXPECT_EQUAL(false, b.inheritStrict(-1)); + } + // createSearch tested by iterator unit test +} + +TEST("test And Blueprint") { + AndBlueprint b; + { // combine + std::vector<Blueprint::HitEstimate> est; + EXPECT_EQUAL(true, b.combine(est).empty); + EXPECT_EQUAL(0u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(10, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(20, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(5, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(5u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(0, true)); + EXPECT_EQUAL(true, b.combine(est).empty); + EXPECT_EQUAL(0u, b.combine(est).estHits); + } + { + AndBlueprint a; + a.addChild(ap(MyLeafSpec(10).addField(1, 1).create())); + EXPECT_EQUAL(0u, a.exposeFields().size()); + } + { + std::vector<Blueprint *> children; + Blueprint::UP c1 = ap(MyLeafSpec(20).create()); + Blueprint::UP c2 = ap(MyLeafSpec(40).create()); + Blueprint::UP c3 = ap(MyLeafSpec(10).create()); + Blueprint::UP c4 = ap(MyLeafSpec(30).create()); + children.push_back(c1.get()); + children.push_back(c2.get()); + children.push_back(c3.get()); + children.push_back(c4.get()); + b.sort(children); + EXPECT_EQUAL(c3.get(), children[0]); + EXPECT_EQUAL(c1.get(), children[1]); + EXPECT_EQUAL(c4.get(), children[2]); + EXPECT_EQUAL(c2.get(), children[3]); + } + { + EXPECT_EQUAL(true, b.inheritStrict(0)); + EXPECT_EQUAL(false, b.inheritStrict(1)); + EXPECT_EQUAL(false, b.inheritStrict(2)); + EXPECT_EQUAL(false, b.inheritStrict(-1)); + } + // createSearch tested by iterator unit test +} + +TEST("test Or Blueprint") { + OrBlueprint b; + { // combine + std::vector<Blueprint::HitEstimate> est; + EXPECT_EQUAL(true, b.combine(est).empty); + EXPECT_EQUAL(0u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(10, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(20, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(20u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(5, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(20u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(0, true)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(20u, b.combine(est).estHits); + } + { + OrBlueprint &o = *(new OrBlueprint()); + o.addChild(ap(MyLeafSpec(1).addField(1, 1).create())); + o.addChild(ap(MyLeafSpec(2).addField(2, 2).create())); + + Blueprint::UP a(&o); + ASSERT_TRUE(a->getState().numFields() == 2); + EXPECT_EQUAL(1u, a->getState().field(0).getFieldId()); + EXPECT_EQUAL(2u, a->getState().field(1).getFieldId()); + EXPECT_EQUAL(1u, a->getState().field(0).getHandle()); + EXPECT_EQUAL(2u, a->getState().field(1).getHandle()); + EXPECT_EQUAL(2u, a->getState().estimate().estHits); + + o.addChild(ap(MyLeafSpec(5).addField(2, 2).create())); + ASSERT_TRUE(a->getState().numFields() == 2); + EXPECT_EQUAL(1u, a->getState().field(0).getFieldId()); + EXPECT_EQUAL(2u, a->getState().field(1).getFieldId()); + EXPECT_EQUAL(1u, a->getState().field(0).getHandle()); + EXPECT_EQUAL(2u, a->getState().field(1).getHandle()); + EXPECT_EQUAL(5u, a->getState().estimate().estHits); + + o.addChild(ap(MyLeafSpec(5).addField(2, 3).create())); + EXPECT_EQUAL(0u, a->getState().numFields()); + o.removeChild(3); + EXPECT_EQUAL(2u, a->getState().numFields()); + o.addChild(ap(MyLeafSpec(0, true).create())); + EXPECT_EQUAL(0u, a->getState().numFields()); + } + { + std::vector<Blueprint *> children; + Blueprint::UP c1 = ap(MyLeafSpec(10).create()); + Blueprint::UP c2 = ap(MyLeafSpec(20).create()); + Blueprint::UP c3 = ap(MyLeafSpec(40).create()); + Blueprint::UP c4 = ap(MyLeafSpec(30).create()); + children.push_back(c1.get()); + children.push_back(c2.get()); + children.push_back(c3.get()); + children.push_back(c4.get()); + b.sort(children); + EXPECT_EQUAL(c3.get(), children[0]); + EXPECT_EQUAL(c4.get(), children[1]); + EXPECT_EQUAL(c2.get(), children[2]); + EXPECT_EQUAL(c1.get(), children[3]); + } + { + EXPECT_EQUAL(true, b.inheritStrict(0)); + EXPECT_EQUAL(true, b.inheritStrict(1)); + EXPECT_EQUAL(true, b.inheritStrict(2)); + EXPECT_EQUAL(true, b.inheritStrict(-1)); + } + // createSearch tested by iterator unit test +} + +TEST("test Near Blueprint") { + NearBlueprint b(7); + { // combine + std::vector<Blueprint::HitEstimate> est; + EXPECT_EQUAL(true, b.combine(est).empty); + EXPECT_EQUAL(0u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(10, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(20, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(5, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(5u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(0, true)); + EXPECT_EQUAL(true, b.combine(est).empty); + EXPECT_EQUAL(0u, b.combine(est).estHits); + } + { + NearBlueprint a(7); + a.addChild(ap(MyLeafSpec(10).addField(1, 1).create())); + EXPECT_EQUAL(0u, a.exposeFields().size()); + } + { + std::vector<Blueprint *> children; + Blueprint::UP c1 = ap(MyLeafSpec(40).create()); + Blueprint::UP c2 = ap(MyLeafSpec(10).create()); + Blueprint::UP c3 = ap(MyLeafSpec(30).create()); + Blueprint::UP c4 = ap(MyLeafSpec(20).create()); + children.push_back(c1.get()); + children.push_back(c2.get()); + children.push_back(c3.get()); + children.push_back(c4.get()); + b.sort(children); + EXPECT_EQUAL(c2.get(), children[0]); + EXPECT_EQUAL(c4.get(), children[1]); + EXPECT_EQUAL(c3.get(), children[2]); + EXPECT_EQUAL(c1.get(), children[3]); + } + { + EXPECT_EQUAL(true, b.inheritStrict(0)); + EXPECT_EQUAL(false, b.inheritStrict(1)); + EXPECT_EQUAL(false, b.inheritStrict(2)); + EXPECT_EQUAL(false, b.inheritStrict(-1)); + } + // createSearch tested by iterator unit test +} + +TEST("test ONear Blueprint") { + ONearBlueprint b(8); + { // combine + std::vector<Blueprint::HitEstimate> est; + EXPECT_EQUAL(true, b.combine(est).empty); + EXPECT_EQUAL(0u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(10, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(20, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(5, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(5u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(0, true)); + EXPECT_EQUAL(true, b.combine(est).empty); + EXPECT_EQUAL(0u, b.combine(est).estHits); + } + { + ONearBlueprint a(8); + a.addChild(ap(MyLeafSpec(10).addField(1, 1).create())); + EXPECT_EQUAL(0u, a.exposeFields().size()); + } + { + std::vector<Blueprint *> children; + Blueprint::UP c1 = ap(MyLeafSpec(20).create()); + Blueprint::UP c2 = ap(MyLeafSpec(10).create()); + Blueprint::UP c3 = ap(MyLeafSpec(40).create()); + Blueprint::UP c4 = ap(MyLeafSpec(30).create()); + children.push_back(c1.get()); + children.push_back(c2.get()); + children.push_back(c3.get()); + children.push_back(c4.get()); + b.sort(children); + EXPECT_EQUAL(c1.get(), children[0]); + EXPECT_EQUAL(c2.get(), children[1]); + EXPECT_EQUAL(c3.get(), children[2]); + EXPECT_EQUAL(c4.get(), children[3]); + } + { + EXPECT_EQUAL(true, b.inheritStrict(0)); + EXPECT_EQUAL(false, b.inheritStrict(1)); + EXPECT_EQUAL(false, b.inheritStrict(2)); + EXPECT_EQUAL(false, b.inheritStrict(-1)); + } + // createSearch tested by iterator unit test +} + +TEST("test Rank Blueprint") { + RankBlueprint b; + { // combine + std::vector<Blueprint::HitEstimate> est; + EXPECT_EQUAL(true, b.combine(est).empty); + EXPECT_EQUAL(0u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(10, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(20, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(5, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(0, true)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + } + { + RankBlueprint a; + a.addChild(ap(MyLeafSpec(10).addField(1, 1).create())); + EXPECT_EQUAL(0u, a.exposeFields().size()); + } + { + std::vector<Blueprint *> children; + Blueprint::UP c1 = ap(MyLeafSpec(20).create()); + Blueprint::UP c2 = ap(MyLeafSpec(10).create()); + Blueprint::UP c3 = ap(MyLeafSpec(40).create()); + Blueprint::UP c4 = ap(MyLeafSpec(30).create()); + children.push_back(c1.get()); + children.push_back(c2.get()); + children.push_back(c3.get()); + children.push_back(c4.get()); + b.sort(children); + EXPECT_EQUAL(c1.get(), children[0]); + EXPECT_EQUAL(c2.get(), children[1]); + EXPECT_EQUAL(c3.get(), children[2]); + EXPECT_EQUAL(c4.get(), children[3]); + } + { + EXPECT_EQUAL(true, b.inheritStrict(0)); + EXPECT_EQUAL(false, b.inheritStrict(1)); + EXPECT_EQUAL(false, b.inheritStrict(2)); + EXPECT_EQUAL(false, b.inheritStrict(-1)); + } + // createSearch tested by iterator unit test +} + +TEST("test SourceBlender Blueprint") { + ISourceSelector *selector = 0; // not needed here + SourceBlenderBlueprint b(*selector); + { // combine + std::vector<Blueprint::HitEstimate> est; + EXPECT_EQUAL(true, b.combine(est).empty); + EXPECT_EQUAL(0u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(10, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(20, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(20u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(5, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(20u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(0, true)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(20u, b.combine(est).estHits); + } + { + SourceBlenderBlueprint &o = *(new SourceBlenderBlueprint(*selector)); + o.addChild(ap(MyLeafSpec(1).addField(1, 1).create())); + o.addChild(ap(MyLeafSpec(2).addField(2, 2).create())); + + Blueprint::UP a(&o); + ASSERT_TRUE(a->getState().numFields() == 2); + EXPECT_EQUAL(1u, a->getState().field(0).getFieldId()); + EXPECT_EQUAL(2u, a->getState().field(1).getFieldId()); + EXPECT_EQUAL(1u, a->getState().field(0).getHandle()); + EXPECT_EQUAL(2u, a->getState().field(1).getHandle()); + EXPECT_EQUAL(2u, a->getState().estimate().estHits); + + o.addChild(ap(MyLeafSpec(5).addField(2, 2).create())); + ASSERT_TRUE(a->getState().numFields() == 2); + EXPECT_EQUAL(1u, a->getState().field(0).getFieldId()); + EXPECT_EQUAL(2u, a->getState().field(1).getFieldId()); + EXPECT_EQUAL(1u, a->getState().field(0).getHandle()); + EXPECT_EQUAL(2u, a->getState().field(1).getHandle()); + EXPECT_EQUAL(5u, a->getState().estimate().estHits); + + o.addChild(ap(MyLeafSpec(5).addField(2, 3).create())); + EXPECT_EQUAL(0u, a->getState().numFields()); + o.removeChild(3); + EXPECT_EQUAL(2u, a->getState().numFields()); + o.addChild(ap(MyLeafSpec(0, true).create())); + EXPECT_EQUAL(0u, a->getState().numFields()); + } + { + std::vector<Blueprint *> children; + Blueprint::UP c1 = ap(MyLeafSpec(20).create()); + Blueprint::UP c2 = ap(MyLeafSpec(10).create()); + Blueprint::UP c3 = ap(MyLeafSpec(40).create()); + Blueprint::UP c4 = ap(MyLeafSpec(30).create()); + children.push_back(c1.get()); + children.push_back(c2.get()); + children.push_back(c3.get()); + children.push_back(c4.get()); + b.sort(children); + EXPECT_EQUAL(c1.get(), children[0]); + EXPECT_EQUAL(c2.get(), children[1]); + EXPECT_EQUAL(c3.get(), children[2]); + EXPECT_EQUAL(c4.get(), children[3]); + } + { + EXPECT_EQUAL(true, b.inheritStrict(0)); + EXPECT_EQUAL(true, b.inheritStrict(1)); + EXPECT_EQUAL(true, b.inheritStrict(2)); + EXPECT_EQUAL(true, b.inheritStrict(-1)); + } + // createSearch tested by iterator unit test +} + +TEST("test SourceBlender below AND optimization") { + ISourceSelector *selector_1 = 0; // the one + ISourceSelector *selector_2 = reinterpret_cast<ISourceSelector*>(100); // not the one + //------------------------------------------------------------------------- + AndBlueprint *top = new AndBlueprint(); + Blueprint::UP top_bp(top); + top->addChild(ap(MyLeafSpec(2).create())); + top->addChild(ap(MyLeafSpec(1).create())); + top->addChild(ap(MyLeafSpec(3).create())); + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(200).create()->setSourceId(2))); + blender->addChild(ap(MyLeafSpec(100).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(300).create()->setSourceId(3))); + top->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(30).create()->setSourceId(3))); + top->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_2); + blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + top->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(2000).create()->setSourceId(2))); + blender->addChild(ap(MyLeafSpec(1000).create()->setSourceId(1))); + top->addChild(ap(blender)); + } + //------------------------------------------------------------------------- + AndBlueprint *expect = new AndBlueprint(); + Blueprint::UP expect_bp(expect); + expect->addChild(ap(MyLeafSpec(1).create())); + expect->addChild(ap(MyLeafSpec(2).create())); + expect->addChild(ap(MyLeafSpec(3).create())); + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_2); + blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + expect->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender(new SourceBlenderBlueprint(*selector_1)); + { + AndBlueprint *sub_and = new AndBlueprint(); + sub_and->setSourceId(3); + sub_and->addChild(ap(MyLeafSpec(30).create()->setSourceId(3))); + sub_and->addChild(ap(MyLeafSpec(300).create()->setSourceId(3))); + blender->addChild(ap(sub_and)); + } + { + AndBlueprint *sub_and = new AndBlueprint(); + sub_and->setSourceId(2); + sub_and->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + sub_and->addChild(ap(MyLeafSpec(200).create()->setSourceId(2))); + sub_and->addChild(ap(MyLeafSpec(2000).create()->setSourceId(2))); + blender->addChild(ap(sub_and)); + } + { + AndBlueprint *sub_and = new AndBlueprint(); + sub_and->setSourceId(1); + sub_and->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + sub_and->addChild(ap(MyLeafSpec(100).create()->setSourceId(1))); + sub_and->addChild(ap(MyLeafSpec(1000).create()->setSourceId(1))); + blender->addChild(ap(sub_and)); + } + expect->addChild(ap(blender)); + } + //------------------------------------------------------------------------- + EXPECT_NOT_EQUAL(expect_bp->asString(), top_bp->asString()); + top_bp = Blueprint::optimize(std::move(top_bp)); + EXPECT_EQUAL(expect_bp->asString(), top_bp->asString()); + expect_bp = Blueprint::optimize(std::move(expect_bp)); + EXPECT_EQUAL(expect_bp->asString(), top_bp->asString()); +} + +TEST("test SourceBlender below OR optimization") { + ISourceSelector *selector_1 = 0; // the one + ISourceSelector *selector_2 = reinterpret_cast<ISourceSelector*>(100); // not the one + //------------------------------------------------------------------------- + OrBlueprint *top = new OrBlueprint(); + Blueprint::UP top_up(top); + top->addChild(ap(MyLeafSpec(2).create())); + top->addChild(ap(MyLeafSpec(1).create())); + top->addChild(ap(MyLeafSpec(3).create())); + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(200).create()->setSourceId(2))); + blender->addChild(ap(MyLeafSpec(100).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(300).create()->setSourceId(3))); + top->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(30).create()->setSourceId(3))); + top->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_2); + blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + top->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(2000).create()->setSourceId(2))); + blender->addChild(ap(MyLeafSpec(1000).create()->setSourceId(1))); + top->addChild(ap(blender)); + } + //------------------------------------------------------------------------- + OrBlueprint *expect = new OrBlueprint(); + Blueprint::UP expect_up(expect); + { + SourceBlenderBlueprint *blender(new SourceBlenderBlueprint(*selector_1)); + { + OrBlueprint *sub_and = new OrBlueprint(); + sub_and->setSourceId(3); + sub_and->addChild(ap(MyLeafSpec(300).create()->setSourceId(3))); + sub_and->addChild(ap(MyLeafSpec(30).create()->setSourceId(3))); + blender->addChild(ap(sub_and)); + } + { + OrBlueprint *sub_and = new OrBlueprint(); + sub_and->setSourceId(2); + sub_and->addChild(ap(MyLeafSpec(2000).create()->setSourceId(2))); + sub_and->addChild(ap(MyLeafSpec(200).create()->setSourceId(2))); + sub_and->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + blender->addChild(ap(sub_and)); + } + { + OrBlueprint *sub_and = new OrBlueprint(); + sub_and->setSourceId(1); + sub_and->addChild(ap(MyLeafSpec(1000).create()->setSourceId(1))); + sub_and->addChild(ap(MyLeafSpec(100).create()->setSourceId(1))); + sub_and->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(sub_and)); + } + expect->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_2); + blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + expect->addChild(ap(blender)); + } + expect->addChild(ap(MyLeafSpec(3).create())); + expect->addChild(ap(MyLeafSpec(2).create())); + expect->addChild(ap(MyLeafSpec(1).create())); + //------------------------------------------------------------------------- + EXPECT_NOT_EQUAL(expect_up->asString(), top_up->asString()); + top_up = Blueprint::optimize(std::move(top_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); + expect_up = Blueprint::optimize(std::move(expect_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); +} + +TEST("test SourceBlender below AND_NOT optimization") { + ISourceSelector *selector_1 = 0; // the one + ISourceSelector *selector_2 = reinterpret_cast<ISourceSelector*>(100); // not the one + //------------------------------------------------------------------------- + AndNotBlueprint *top = new AndNotBlueprint(); + Blueprint::UP top_up(top); + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(42).create()->setSourceId(1))); + top->addChild(ap(blender)); + } + top->addChild(ap(MyLeafSpec(2).create())); + top->addChild(ap(MyLeafSpec(1).create())); + top->addChild(ap(MyLeafSpec(3).create())); + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(200).create()->setSourceId(2))); + blender->addChild(ap(MyLeafSpec(100).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(300).create()->setSourceId(3))); + top->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(30).create()->setSourceId(3))); + top->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_2); + blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + top->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(2000).create()->setSourceId(2))); + blender->addChild(ap(MyLeafSpec(1000).create()->setSourceId(1))); + top->addChild(ap(blender)); + } + //------------------------------------------------------------------------- + AndNotBlueprint *expect = new AndNotBlueprint(); + Blueprint::UP expect_up(expect); + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(42).create()->setSourceId(1))); + expect->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender(new SourceBlenderBlueprint(*selector_1)); + { + OrBlueprint *sub_and = new OrBlueprint(); + sub_and->setSourceId(3); + sub_and->addChild(ap(MyLeafSpec(300).create()->setSourceId(3))); + sub_and->addChild(ap(MyLeafSpec(30).create()->setSourceId(3))); + blender->addChild(ap(sub_and)); + } + { + OrBlueprint *sub_and = new OrBlueprint(); + sub_and->setSourceId(2); + sub_and->addChild(ap(MyLeafSpec(2000).create()->setSourceId(2))); + sub_and->addChild(ap(MyLeafSpec(200).create()->setSourceId(2))); + sub_and->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + blender->addChild(ap(sub_and)); + } + { + OrBlueprint *sub_and = new OrBlueprint(); + sub_and->setSourceId(1); + sub_and->addChild(ap(MyLeafSpec(1000).create()->setSourceId(1))); + sub_and->addChild(ap(MyLeafSpec(100).create()->setSourceId(1))); + sub_and->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(sub_and)); + } + expect->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_2); + blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + expect->addChild(ap(blender)); + } + expect->addChild(ap(MyLeafSpec(3).create())); + expect->addChild(ap(MyLeafSpec(2).create())); + expect->addChild(ap(MyLeafSpec(1).create())); + //------------------------------------------------------------------------- + EXPECT_NOT_EQUAL(expect_up->asString(), top_up->asString()); + top_up = Blueprint::optimize(std::move(top_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); + expect_up = Blueprint::optimize(std::move(expect_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); +} + +TEST("test SourceBlender below RANK optimization") { + ISourceSelector *selector_1 = 0; // the one + ISourceSelector *selector_2 = reinterpret_cast<ISourceSelector*>(100); // not the one + //------------------------------------------------------------------------- + RankBlueprint *top = new RankBlueprint(); + Blueprint::UP top_up(top); + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(42).create()->setSourceId(1))); + top->addChild(ap(blender)); + } + top->addChild(ap(MyLeafSpec(2).create())); + top->addChild(ap(MyLeafSpec(1).create())); + top->addChild(ap(MyLeafSpec(3).create())); + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(200).create()->setSourceId(2))); + blender->addChild(ap(MyLeafSpec(100).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(300).create()->setSourceId(3))); + top->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(30).create()->setSourceId(3))); + top->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_2); + blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + top->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(2000).create()->setSourceId(2))); + blender->addChild(ap(MyLeafSpec(1000).create()->setSourceId(1))); + top->addChild(ap(blender)); + } + //------------------------------------------------------------------------- + RankBlueprint *expect = new RankBlueprint(); + Blueprint::UP expect_up(expect); + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_1); + blender->addChild(ap(MyLeafSpec(42).create()->setSourceId(1))); + expect->addChild(ap(blender)); + } + expect->addChild(ap(MyLeafSpec(2).create())); + expect->addChild(ap(MyLeafSpec(1).create())); + expect->addChild(ap(MyLeafSpec(3).create())); + { + SourceBlenderBlueprint *blender = new SourceBlenderBlueprint(*selector_2); + blender->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + expect->addChild(ap(blender)); + } + { + SourceBlenderBlueprint *blender(new SourceBlenderBlueprint(*selector_1)); + { + OrBlueprint *sub_and = new OrBlueprint(); + sub_and->setSourceId(3); + sub_and->addChild(ap(MyLeafSpec(300).create()->setSourceId(3))); + sub_and->addChild(ap(MyLeafSpec(30).create()->setSourceId(3))); + blender->addChild(ap(sub_and)); + } + { + OrBlueprint *sub_and = new OrBlueprint(); + sub_and->setSourceId(2); + sub_and->addChild(ap(MyLeafSpec(2000).create()->setSourceId(2))); + sub_and->addChild(ap(MyLeafSpec(200).create()->setSourceId(2))); + sub_and->addChild(ap(MyLeafSpec(20).create()->setSourceId(2))); + blender->addChild(ap(sub_and)); + } + { + OrBlueprint *sub_and = new OrBlueprint(); + sub_and->setSourceId(1); + sub_and->addChild(ap(MyLeafSpec(1000).create()->setSourceId(1))); + sub_and->addChild(ap(MyLeafSpec(100).create()->setSourceId(1))); + sub_and->addChild(ap(MyLeafSpec(10).create()->setSourceId(1))); + blender->addChild(ap(sub_and)); + } + expect->addChild(ap(blender)); + } + //------------------------------------------------------------------------- + EXPECT_NOT_EQUAL(expect_up->asString(), top_up->asString()); + top_up = Blueprint::optimize(std::move(top_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); + expect_up = Blueprint::optimize(std::move(expect_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); +} + +TEST("test empty root node optimization and safeness") { + //------------------------------------------------------------------------- + // tests leaf node elimination + Blueprint::UP top1_up(ap(MyLeafSpec(0, true).create())); + //------------------------------------------------------------------------- + // tests intermediate node elimination + Blueprint::UP top2_up(ap((new AndBlueprint())-> + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(10).create())). + addChild(ap(MyLeafSpec(20).create())))); + //------------------------------------------------------------------------- + // tests safety of empty AND_NOT child removal + Blueprint::UP top3_up(ap((new AndNotBlueprint())-> + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(10).create())). + addChild(ap(MyLeafSpec(20).create())))); + //------------------------------------------------------------------------- + // tests safety of empty RANK child removal + Blueprint::UP top4_up(ap((new RankBlueprint())-> + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(10).create())). + addChild(ap(MyLeafSpec(20).create())))); + //------------------------------------------------------------------------- + // tests safety of empty OR child removal + Blueprint::UP top5_up(ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(0, true).create())))); + //------------------------------------------------------------------------- + Blueprint::UP expect_up(new EmptyBlueprint()); + //------------------------------------------------------------------------- + top1_up = Blueprint::optimize(std::move(top1_up)); + top2_up = Blueprint::optimize(std::move(top2_up)); + top3_up = Blueprint::optimize(std::move(top3_up)); + top4_up = Blueprint::optimize(std::move(top4_up)); + top5_up = Blueprint::optimize(std::move(top5_up)); + EXPECT_EQUAL(expect_up->asString(), top1_up->asString()); + EXPECT_EQUAL(expect_up->asString(), top2_up->asString()); + EXPECT_EQUAL(expect_up->asString(), top3_up->asString()); + EXPECT_EQUAL(expect_up->asString(), top4_up->asString()); + EXPECT_EQUAL(expect_up->asString(), top5_up->asString()); +} + +TEST("and with one empty child is optimized away") { + ISourceSelector *selector = 0; + Blueprint::UP top(ap((new SourceBlenderBlueprint(*selector))-> + addChild(ap(MyLeafSpec(10).create())). + addChild(ap((new AndBlueprint())-> + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(10).create())). + addChild(ap(MyLeafSpec(20).create())))))); + top = Blueprint::optimize(std::move(top)); + Blueprint::UP expect_up(ap((new SourceBlenderBlueprint(*selector))-> + addChild(ap(MyLeafSpec(10).create())). + addChild(ap(new EmptyBlueprint())))); + EXPECT_EQUAL(expect_up->asString(), top->asString()); +} + +TEST("test single child optimization") { + ISourceSelector *selector = 0; + //------------------------------------------------------------------------- + Blueprint::UP top_up( + ap((new AndNotBlueprint())-> + addChild(ap((new AndBlueprint())-> + addChild(ap((new OrBlueprint())-> + addChild(ap((new SourceBlenderBlueprint(*selector))-> + addChild(ap((new RankBlueprint())-> + addChild(ap(MyLeafSpec(42).create())))))))))))); + //------------------------------------------------------------------------- + Blueprint::UP expect_up( + ap((new SourceBlenderBlueprint(*selector))-> + addChild(ap(MyLeafSpec(42).create())))); + //------------------------------------------------------------------------- + EXPECT_NOT_EQUAL(expect_up->asString(), top_up->asString()); + top_up = Blueprint::optimize(std::move(top_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); + expect_up = Blueprint::optimize(std::move(expect_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); +} + +TEST("test empty OR child optimization") { + //------------------------------------------------------------------------- + Blueprint::UP top_up( + ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(20).create())). + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(10).create())). + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(0).create())). + addChild(ap(MyLeafSpec(30).create())). + addChild(ap(MyLeafSpec(0, true).create())))); + //------------------------------------------------------------------------- + Blueprint::UP expect_up( + ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(30).create())). + addChild(ap(MyLeafSpec(20).create())). + addChild(ap(MyLeafSpec(10).create())). + addChild(ap(MyLeafSpec(0).create())))); + //------------------------------------------------------------------------- + EXPECT_NOT_EQUAL(expect_up->asString(), top_up->asString()); + top_up = Blueprint::optimize(std::move(top_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); + expect_up = Blueprint::optimize(std::move(expect_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); +} + +TEST("test empty AND_NOT child optimization") { + //------------------------------------------------------------------------- + Blueprint::UP top_up( + ap((new AndNotBlueprint())-> + addChild(ap(MyLeafSpec(42).create())). + addChild(ap(MyLeafSpec(20).create())). + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(10).create())). + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(0).create())). + addChild(ap(MyLeafSpec(30).create())). + addChild(ap(MyLeafSpec(0, true).create())))); + //------------------------------------------------------------------------- + Blueprint::UP expect_up( + ap((new AndNotBlueprint())-> + addChild(ap(MyLeafSpec(42).create())). + addChild(ap(MyLeafSpec(30).create())). + addChild(ap(MyLeafSpec(20).create())). + addChild(ap(MyLeafSpec(10).create())). + addChild(ap(MyLeafSpec(0).create())))); + //------------------------------------------------------------------------- + EXPECT_NOT_EQUAL(expect_up->asString(), top_up->asString()); + top_up = Blueprint::optimize(std::move(top_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); + expect_up = Blueprint::optimize(std::move(expect_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); +} + +TEST("test empty RANK child optimization") { + //------------------------------------------------------------------------- + Blueprint::UP top_up( + ap((new RankBlueprint())-> + addChild(ap(MyLeafSpec(42).create())). + addChild(ap(MyLeafSpec(20).create())). + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(10).create())). + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(0).create())). + addChild(ap(MyLeafSpec(30).create())). + addChild(ap(MyLeafSpec(0, true).create())))); + //------------------------------------------------------------------------- + Blueprint::UP expect_up( + ap((new RankBlueprint())-> + addChild(ap(MyLeafSpec(42).create())). + addChild(ap(MyLeafSpec(20).create())). + addChild(ap(MyLeafSpec(10).create())). + addChild(ap(MyLeafSpec(0).create())). + addChild(ap(MyLeafSpec(30).create())))); + //------------------------------------------------------------------------- + EXPECT_NOT_EQUAL(expect_up->asString(), top_up->asString()); + top_up = Blueprint::optimize(std::move(top_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); + expect_up = Blueprint::optimize(std::move(expect_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); +} + +TEST("require that replaced blueprints retain source id") { + //------------------------------------------------------------------------- + // replace empty root with empty search + Blueprint::UP top1_up(ap(MyLeafSpec(0, true).create()->setSourceId(13))); + Blueprint::UP expect1_up(new EmptyBlueprint()); + expect1_up->setSourceId(13); + //------------------------------------------------------------------------- + // replace self with single child + Blueprint::UP top2_up(ap(static_cast<AndBlueprint&>((new AndBlueprint())->setSourceId(42)). + addChild(ap(MyLeafSpec(30).create()->setSourceId(55))))); + Blueprint::UP expect2_up(ap(MyLeafSpec(30).create()->setSourceId(42))); + //------------------------------------------------------------------------- + top1_up = Blueprint::optimize(std::move(top1_up)); + top2_up = Blueprint::optimize(std::move(top2_up)); + EXPECT_EQUAL(expect1_up->asString(), top1_up->asString()); + EXPECT_EQUAL(expect2_up->asString(), top2_up->asString()); + EXPECT_EQUAL(13u, top1_up->getSourceId()); + EXPECT_EQUAL(42u, top2_up->getSourceId()); +} + +TEST("test Equiv Blueprint") { + FieldSpecBaseList fields; + search::fef::MatchDataLayout subLayout; + fields.add(FieldSpecBase(1, 1)); + fields.add(FieldSpecBase(2, 2)); + fields.add(FieldSpecBase(3, 3)); + EquivBlueprint b(fields, subLayout); + { + EquivBlueprint &o = *(new EquivBlueprint(fields, subLayout)); + o.addTerm(ap(MyLeafSpec(5).addField(1, 4).create()), 1.0); + o.addTerm(ap(MyLeafSpec(10).addField(1, 5).create()), 1.0); + o.addTerm(ap(MyLeafSpec(20).addField(1, 6).create()), 1.0); + o.addTerm(ap(MyLeafSpec(50).addField(2, 7).create()), 1.0); + + Blueprint::UP a(&o); + ASSERT_TRUE(a->getState().numFields() == 3); + EXPECT_EQUAL(1u, a->getState().field(0).getFieldId()); + EXPECT_EQUAL(2u, a->getState().field(1).getFieldId()); + EXPECT_EQUAL(3u, a->getState().field(2).getFieldId()); + + EXPECT_EQUAL(1u, a->getState().field(0).getHandle()); + EXPECT_EQUAL(2u, a->getState().field(1).getHandle()); + EXPECT_EQUAL(3u, a->getState().field(2).getHandle()); + + EXPECT_EQUAL(50u, a->getState().estimate().estHits); + EXPECT_EQUAL(false, a->getState().estimate().empty); + } + // createSearch tested by iterator unit test +} + + +TEST("test WeakAnd Blueprint") { + WeakAndBlueprint b(1000); + { // combine + std::vector<Blueprint::HitEstimate> est; + EXPECT_EQUAL(true, b.combine(est).empty); + EXPECT_EQUAL(0u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(10, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(10u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(20, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(20u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(5, false)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(20u, b.combine(est).estHits); + est.push_back(Blueprint::HitEstimate(0, true)); + EXPECT_EQUAL(false, b.combine(est).empty); + EXPECT_EQUAL(20u, b.combine(est).estHits); + } + { + WeakAndBlueprint a(1000); + a.addChild(ap(MyLeafSpec(10).addField(1, 1).create())); + EXPECT_EQUAL(0u, a.exposeFields().size()); + } + { + std::vector<Blueprint *> children; + Blueprint::UP c1 = ap(MyLeafSpec(10).create()); + Blueprint::UP c2 = ap(MyLeafSpec(20).create()); + Blueprint::UP c3 = ap(MyLeafSpec(40).create()); + Blueprint::UP c4 = ap(MyLeafSpec(30).create()); + children.push_back(c1.get()); + children.push_back(c2.get()); + children.push_back(c3.get()); + children.push_back(c4.get()); + b.sort(children); + EXPECT_EQUAL(c1.get(), children[0]); + EXPECT_EQUAL(c2.get(), children[1]); + EXPECT_EQUAL(c3.get(), children[2]); + EXPECT_EQUAL(c4.get(), children[3]); + } + { + EXPECT_EQUAL(true, b.inheritStrict(0)); + EXPECT_EQUAL(true, b.inheritStrict(1)); + EXPECT_EQUAL(true, b.inheritStrict(2)); + EXPECT_EQUAL(true, b.inheritStrict(-1)); + } + { + FieldSpec field("foo", 1, 1); + FakeResult x = FakeResult().doc(1).doc(2).doc(5); + FakeResult y = FakeResult().doc(2); + FakeResult z = FakeResult().doc(1).doc(4); + { + WeakAndBlueprint wa(456); + MatchData::UP md = MatchData::makeTestInstance(0, 100, 10); + wa.addTerm(Blueprint::UP(new FakeBlueprint(field, x)), 120); + wa.addTerm(Blueprint::UP(new FakeBlueprint(field, z)), 140); + wa.addTerm(Blueprint::UP(new FakeBlueprint(field, y)), 130); + { + wa.fetchPostings(true); + SearchIterator::UP search = wa.createSearch(*md, true); + EXPECT_TRUE(dynamic_cast<WeakAndSearch*>(search.get()) != 0); + WeakAndSearch &s = dynamic_cast<WeakAndSearch&>(*search); + EXPECT_EQUAL(456u, s.getN()); + ASSERT_EQUAL(3u, s.getTerms().size()); + EXPECT_GREATER(s.get_max_score(0), 0.0); + EXPECT_GREATER(s.get_max_score(1), 0.0); + EXPECT_GREATER(s.get_max_score(2), 0.0); + wand::Terms terms = s.getTerms(); + std::sort(terms.begin(), terms.end(), WeightOrder()); + EXPECT_EQUAL(120, terms[0].weight); + EXPECT_EQUAL(3u, terms[0].estHits); + EXPECT_EQUAL(0u, terms[0].maxScore); // NB: not set + EXPECT_EQUAL(130, terms[1].weight); + EXPECT_EQUAL(1u, terms[1].estHits); + EXPECT_EQUAL(0u, terms[1].maxScore); // NB: not set + EXPECT_EQUAL(140, terms[2].weight); + EXPECT_EQUAL(2u, terms[2].estHits); + EXPECT_EQUAL(0u, terms[2].maxScore); // NB: not set + } + { + wa.fetchPostings(false); + SearchIterator::UP search = wa.createSearch(*md, false); + EXPECT_TRUE(dynamic_cast<WeakAndSearch*>(search.get()) != 0); + EXPECT_TRUE(search->seek(1)); + EXPECT_TRUE(search->seek(2)); + EXPECT_FALSE(search->seek(3)); + EXPECT_TRUE(search->seek(4)); + EXPECT_TRUE(search->seek(5)); + EXPECT_FALSE(search->seek(6)); + } + } + } +} + +TEST("require_that_unpack_of_or_over_multisearch_is_optimized") { + Blueprint::UP child1( + ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(20).addField(1,1).create())). + addChild(ap(MyLeafSpec(20).addField(2,2).create())). + addChild(ap(MyLeafSpec(10).addField(3,3).create())))); + Blueprint::UP child2( + ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(20).addField(4,4).create())). + addChild(ap(MyLeafSpec(20).addField(5,5).create())). + addChild(ap(MyLeafSpec(10).addField(6,6).create())))); + Blueprint::UP top_up( + ap((new OrBlueprint())-> + addChild(std::move(child1)). + addChild(std::move(child2)))); + MatchData::UP md = MatchData::makeTestInstance(0, 100, 10); + top_up->fetchPostings(false); + EXPECT_EQUAL("search::queryeval::OrLikeSearch<false, search::queryeval::(anonymous namespace)::FullUnpack>", + top_up->createSearch(*md, false)->getClassName()); + md->resolveTermField(2)->tagAsNotNeeded(); + EXPECT_EQUAL("search::queryeval::OrLikeSearch<false, search::queryeval::(anonymous namespace)::FullUnpack>", + top_up->createSearch(*md, false)->getClassName()); + md->resolveTermField(1)->tagAsNotNeeded(); + md->resolveTermField(3)->tagAsNotNeeded(); + EXPECT_EQUAL("search::queryeval::OrLikeSearch<false, search::queryeval::(anonymous namespace)::SelectiveUnpack>", + top_up->createSearch(*md, false)->getClassName()); + md->resolveTermField(4)->tagAsNotNeeded(); + md->resolveTermField(6)->tagAsNotNeeded(); + EXPECT_EQUAL("search::queryeval::OrLikeSearch<false, search::queryeval::(anonymous namespace)::SelectiveUnpack>", + top_up->createSearch(*md, false)->getClassName()); + md->resolveTermField(5)->tagAsNotNeeded(); + EXPECT_EQUAL("search::queryeval::OrLikeSearch<false, search::queryeval::NoUnpack>", + top_up->createSearch(*md, false)->getClassName()); +} + +TEST("require_that_unpack_of_or_is_optimized") { + Blueprint::UP top_up( + ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(20).addField(1,1).create())). + addChild(ap(MyLeafSpec(20).addField(2,2).create())). + addChild(ap(MyLeafSpec(10).addField(3,3).create())))); + MatchData::UP md = MatchData::makeTestInstance(0, 100, 10); + top_up->fetchPostings(false); + EXPECT_EQUAL("search::queryeval::OrLikeSearch<false, search::queryeval::(anonymous namespace)::FullUnpack>", + top_up->createSearch(*md, false)->getClassName()); + md->resolveTermField(2)->tagAsNotNeeded(); + EXPECT_EQUAL("search::queryeval::OrLikeSearch<false, search::queryeval::(anonymous namespace)::SelectiveUnpack>", + top_up->createSearch(*md, false)->getClassName()); + md->resolveTermField(1)->tagAsNotNeeded(); + md->resolveTermField(3)->tagAsNotNeeded(); + EXPECT_EQUAL("search::queryeval::OrLikeSearch<false, search::queryeval::NoUnpack>", + top_up->createSearch(*md, false)->getClassName()); +} + +TEST("require_that_unpack_of_and_is_optimized") { + Blueprint::UP top_up( + ap((new AndBlueprint())-> + addChild(ap(MyLeafSpec(20).addField(1,1).create())). + addChild(ap(MyLeafSpec(20).addField(2,2).create())). + addChild(ap(MyLeafSpec(10).addField(3,3).create())))); + MatchData::UP md = MatchData::makeTestInstance(0, 100, 10); + top_up->fetchPostings(false); + EXPECT_EQUAL("search::queryeval::AndSearchNoStrict<search::queryeval::(anonymous namespace)::FullUnpack>", + top_up->createSearch(*md, false)->getClassName()); + md->resolveTermField(2)->tagAsNotNeeded(); + EXPECT_EQUAL("search::queryeval::AndSearchNoStrict<search::queryeval::(anonymous namespace)::SelectiveUnpack>", + top_up->createSearch(*md, false)->getClassName()); + md->resolveTermField(1)->tagAsNotNeeded(); + md->resolveTermField(3)->tagAsNotNeeded(); + EXPECT_EQUAL("search::queryeval::AndSearchNoStrict<search::queryeval::NoUnpack>", + top_up->createSearch(*md, false)->getClassName()); +} + +TEST("require_that_unpack_optimization_is_honoured_by_parents") { + Blueprint::UP top_up( + ap((new AndBlueprint())-> + addChild(ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(20).addField(1,1).create())). + addChild(ap(MyLeafSpec(20).addField(2,2).create())). + addChild(ap(MyLeafSpec(10).addField(3,3).create())))))); + MatchData::UP md = MatchData::makeTestInstance(0, 100, 10); + top_up->fetchPostings(false); + EXPECT_EQUAL("search::queryeval::AndSearchNoStrict<search::queryeval::(anonymous namespace)::FullUnpack>", + top_up->createSearch(*md, false)->getClassName()); + md->resolveTermField(2)->tagAsNotNeeded(); + EXPECT_EQUAL("search::queryeval::AndSearchNoStrict<search::queryeval::(anonymous namespace)::FullUnpack>", + top_up->createSearch(*md, false)->getClassName()); + md->resolveTermField(1)->tagAsNotNeeded(); + md->resolveTermField(3)->tagAsNotNeeded(); + EXPECT_EQUAL("search::queryeval::AndSearchNoStrict<search::queryeval::NoUnpack>", + top_up->createSearch(*md, false)->getClassName()); +} + +namespace { + +SimpleStringTerm +makeTerm(const std::string & term) +{ + return SimpleStringTerm(term, "field", 0, search::query::Weight(0)); +} + +} + +TEST("require that children does not optimize when parents refuse them to") { + FakeRequestContext requestContext; + search::diskindex::TestDiskIndex index; + vespalib::mkdir("index", false); + index.buildSchema(); + index.openIndex("index/1", false, true, false, false, false); + FieldSpecBaseList fields; + fields.add(FieldSpecBase(1, 11)); + fields.add(FieldSpecBase(2, 22)); + search::fef::MatchDataLayout subLayout; + search::fef::TermFieldHandle idxth21 = subLayout.allocTermField(2); + search::fef::TermFieldHandle idxth22 = subLayout.allocTermField(2); + search::fef::TermFieldHandle idxth1 = subLayout.allocTermField(1); + Blueprint::UP top_up( + ap((new EquivBlueprint(fields, subLayout))-> + addTerm(index.getIndex().createBlueprint(requestContext, + FieldSpec("f2", 2, idxth22, true), + makeTerm("w2")), + 1.0). + addTerm(index.getIndex().createBlueprint(requestContext, + FieldSpec("f1", 1, idxth1), + makeTerm("w1")), + 1.0). + addTerm(index.getIndex().createBlueprint(requestContext, + FieldSpec("f2", 2, idxth21), makeTerm("w2")), + 1.0))); + MatchData::UP md = MatchData::makeTestInstance(0, 100, 10); + top_up->fetchPostings(false); + SearchIterator::UP search = top_up->createSearch(*md, true); + EXPECT_EQUAL("search::queryeval::EquivImpl<true>", search->getClassName()); + { + const MultiSearch & e = dynamic_cast<const MultiSearch &>(*search); + EXPECT_EQUAL("search::BitVectorIteratorStrict", e.getChildren()[0]->getClassName()); + EXPECT_EQUAL("search::diskindex::Zc4RareWordPosOccIterator<true>", e.getChildren()[1]->getClassName()); + EXPECT_EQUAL("search::diskindex::Zc4RareWordPosOccIterator<true>", e.getChildren()[2]->getClassName()); + } + + md->resolveTermField(12)->tagAsNotNeeded(); + search = top_up->createSearch(*md, true); + EXPECT_EQUAL("search::queryeval::EquivImpl<true>", search->getClassName()); + { + const MultiSearch & e = dynamic_cast<const MultiSearch &>(*search); + EXPECT_EQUAL("search::BitVectorIteratorStrict", e.getChildren()[0]->getClassName()); + EXPECT_EQUAL("search::diskindex::Zc4RareWordPosOccIterator<true>", e.getChildren()[1]->getClassName()); + EXPECT_EQUAL("search::diskindex::Zc4RareWordPosOccIterator<true>", e.getChildren()[2]->getClassName()); + } +} + +TEST("require_that_unpack_optimization_is_overruled_by_equiv") { + FieldSpecBaseList fields; + fields.add(FieldSpecBase(1, 1)); + fields.add(FieldSpecBase(2, 2)); + fields.add(FieldSpecBase(3, 3)); + search::fef::MatchDataLayout subLayout; + search::fef::TermFieldHandle idxth1 = subLayout.allocTermField(1); + search::fef::TermFieldHandle idxth2 = subLayout.allocTermField(2); + search::fef::TermFieldHandle idxth3 = subLayout.allocTermField(3); + Blueprint::UP top_up( + ap((new EquivBlueprint(fields, subLayout))-> + addTerm(ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(20).addField(1,idxth1).create())). + addChild(ap(MyLeafSpec(20).addField(2,idxth2).create())). + addChild(ap(MyLeafSpec(10).addField(3,idxth3).create()))), + 1.0))); + MatchData::UP md = MatchData::makeTestInstance(0, 100, 10); + top_up->fetchPostings(false); + SearchIterator::UP search = top_up->createSearch(*md, true); + EXPECT_EQUAL("search::queryeval::EquivImpl<true>", search->getClassName()); + { + const MultiSearch & e = dynamic_cast<const MultiSearch &>(*search); + EXPECT_EQUAL("search::queryeval::OrLikeSearch<true, search::queryeval::(anonymous namespace)::FullUnpack>", + e.getChildren()[0]->getClassName()); + } + + md->resolveTermField(2)->tagAsNotNeeded(); + search = top_up->createSearch(*md, true); + EXPECT_EQUAL("search::queryeval::EquivImpl<true>", search->getClassName()); + { + const MultiSearch & e = dynamic_cast<const MultiSearch &>(*search); + EXPECT_EQUAL("search::queryeval::OrLikeSearch<true, search::queryeval::(anonymous namespace)::FullUnpack>", + e.getChildren()[0]->getClassName()); + } + + md->resolveTermField(1)->tagAsNotNeeded(); + md->resolveTermField(3)->tagAsNotNeeded(); + search = top_up->createSearch(*md, true); + EXPECT_EQUAL("search::queryeval::EquivImpl<true>", search->getClassName()); + { + const MultiSearch & e = dynamic_cast<const MultiSearch &>(*search); + EXPECT_EQUAL("search::queryeval::OrLikeSearch<true, search::queryeval::(anonymous namespace)::FullUnpack>", + e.getChildren()[0]->getClassName()); + } +} + +TEST("require that children of near are not optimized") { + //------------------------------------------------------------------------- + Blueprint::UP top_up( + ap((new NearBlueprint(10))-> + addChild(ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(20).create())). + addChild(ap(MyLeafSpec(0, true).create())))). + addChild(ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(30).create())))))); + //------------------------------------------------------------------------- + Blueprint::UP expect_up( + ap((new NearBlueprint(10))-> + addChild(ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(20).create())). + addChild(ap(MyLeafSpec(0, true).create())))). + addChild(ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(30).create())))))); + //------------------------------------------------------------------------- + top_up = Blueprint::optimize(std::move(top_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); +} + +TEST("require that children of onear are not optimized") { + //------------------------------------------------------------------------- + Blueprint::UP top_up( + ap((new ONearBlueprint(10))-> + addChild(ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(20).create()->estimate(20))). + addChild(ap(MyLeafSpec(0, true).create()->estimate(0, true))))). + addChild(ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(0, true).create()->estimate(0, true))). + addChild(ap(MyLeafSpec(30).create()->estimate(30))))))); + //------------------------------------------------------------------------- + Blueprint::UP expect_up( + ap((new ONearBlueprint(10))-> + addChild(ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(20).create())). + addChild(ap(MyLeafSpec(0, true).create())))). + addChild(ap((new OrBlueprint())-> + addChild(ap(MyLeafSpec(0, true).create())). + addChild(ap(MyLeafSpec(30).create())))))); + //------------------------------------------------------------------------- + top_up = Blueprint::optimize(std::move(top_up)); + EXPECT_EQUAL(expect_up->asString(), top_up->asString()); +} + +TEST_MAIN() { TEST_DEBUG("lhs.out", "rhs.out"); TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/blueprint/leaf_blueprints_test.cpp b/searchlib/src/tests/queryeval/blueprint/leaf_blueprints_test.cpp new file mode 100644 index 00000000000..a2353184c9f --- /dev/null +++ b/searchlib/src/tests/queryeval/blueprint/leaf_blueprints_test.cpp @@ -0,0 +1,125 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("blueprint_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/queryeval/blueprint.h> +#include <vespa/searchlib/queryeval/leaf_blueprints.h> +#include <vespa/vespalib/objects/visit.h> + +using namespace search::queryeval; +using namespace search::fef; + +class Test : public vespalib::TestApp +{ +public: + void testEmptyBlueprint(); + void testSimpleBlueprint(); + void testFakeBlueprint(); + int Main(); +}; + +void +Test::testEmptyBlueprint() +{ + MatchData::UP md(MatchData::makeTestInstance(0, 100, 10)); + EmptyBlueprint empty(FieldSpecBase(1, 11)); + EmptyBlueprint copy(empty); + ASSERT_TRUE(copy.getState().numFields() == 1u); + EXPECT_EQUAL(1u, copy.getState().field(0).getFieldId()); + EXPECT_EQUAL(11u, copy.getState().field(0).getHandle()); + + copy.fetchPostings(true); + SearchIterator::UP search = copy.createSearch(*md, true); + + SimpleResult res; + res.search(*search); + SimpleResult expect; // empty + EXPECT_EQUAL(res, expect); +} + +void +Test::testSimpleBlueprint() +{ + MatchData::UP md(MatchData::makeTestInstance(0, 100, 10)); + SimpleResult a; + a.addHit(3).addHit(5).addHit(7); + SimpleBlueprint simple(a); + simple.tag("tag"); + SimpleBlueprint copy(simple); + EXPECT_EQUAL("tag", copy.tag()); + copy.fetchPostings(true); + SearchIterator::UP search = copy.createSearch(*md, true); + + SimpleResult res; + res.search(*search); + SimpleResult expect; + expect.addHit(3).addHit(5).addHit(7); + EXPECT_EQUAL(res, expect); +} + +void +Test::testFakeBlueprint() +{ + MatchData::UP md(MatchData::makeTestInstance(0, 100, 10)); + FakeResult fake; + fake.doc(10).len(50).pos(2).pos(3) + .doc(25).len(10).pos(5); + + uint32_t fieldId = 0; + TermFieldHandle handle = 0; + FakeBlueprint orig(FieldSpec("<field>", fieldId, handle), fake); + FakeBlueprint copy(orig); + + copy.fetchPostings(true); + SearchIterator::UP search = copy.createSearch(*md, true); + search->initFullRange(); + EXPECT_TRUE(!search->seek(1u)); + EXPECT_EQUAL(10u, search->getDocId()); + { + search->unpack(10u); + TermFieldMatchData &data = *md->resolveTermField(handle); + EXPECT_EQUAL(fieldId, data.getFieldId()); + EXPECT_EQUAL(10u, data.getDocId()); + EXPECT_EQUAL(10u, data.getDocId()); + FieldPositionsIterator itr = data.getIterator(); + EXPECT_EQUAL(50u, itr.getFieldLength()); + EXPECT_EQUAL(2u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(2u, itr.getPosition()); + itr.next(); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(3u, itr.getPosition()); + itr.next(); + EXPECT_TRUE(!itr.valid()); + } + EXPECT_TRUE(search->seek(25)); + EXPECT_EQUAL(25u, search->getDocId()); + { + search->unpack(25u); + TermFieldMatchData &data = *md->resolveTermField(handle); + EXPECT_EQUAL(fieldId, data.getFieldId()); + EXPECT_EQUAL(25u, data.getDocId()); + FieldPositionsIterator itr = data.getIterator(); + EXPECT_EQUAL(10u, itr.getFieldLength()); + EXPECT_EQUAL(1u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(5u, itr.getPosition()); + itr.next(); + EXPECT_TRUE(!itr.valid()); + } + EXPECT_TRUE(!search->seek(50)); + EXPECT_TRUE(search->isAtEnd()); +} + +int +Test::Main() +{ + TEST_INIT("leaf_blueprints_test"); + testEmptyBlueprint(); + testSimpleBlueprint(); + testFakeBlueprint(); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/queryeval/blueprint/mysearch.h b/searchlib/src/tests/queryeval/blueprint/mysearch.h new file mode 100644 index 00000000000..7ab852b384f --- /dev/null +++ b/searchlib/src/tests/queryeval/blueprint/mysearch.h @@ -0,0 +1,155 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/searchlib/queryeval/blueprint.h> +#include <vespa/vespalib/objects/visit.h> + +namespace search { +namespace queryeval { + +//----------------------------------------------------------------------------- + +class MySearch : public SearchIterator +{ +public: + typedef MultiSearch::Children Children; + typedef std::vector<SearchIterator::UP> MyChildren; + typedef search::fef::TermFieldMatchDataArray TFMDA; + typedef search::fef::MatchData MatchData; + +private: + std::string _tag; + bool _isLeaf; + bool _isStrict; + MyChildren _children; + TFMDA _match; + MatchData *_md; + + std::vector<uint32_t> _handles; + +protected: + virtual void doSeek(uint32_t) {} + virtual void doUnpack(uint32_t) {} + +public: + MySearch(const std::string &tag, bool leaf, bool strict) + : _tag(tag), _isLeaf(leaf), _isStrict(strict), _children(), + _match(), _md(0) {} + + MySearch(const std::string &tag, const TFMDA &tfmda, bool strict) + : _tag(tag), _isLeaf(true), _isStrict(strict), _children(), + _match(tfmda), _md(0) {} + + MySearch(const std::string &tag, const Children &children, + MatchData *md, bool strict) + : _tag(tag), _isLeaf(false), _isStrict(strict), _children(), + _match(), _md(md) { + for (size_t i(0); i < children.size(); i++) { + _children.emplace_back(children[i]); + } + } + + MySearch &add(SearchIterator *search) { + _children.emplace_back(search); + return *this; + } + + MySearch &addHandle(uint32_t handle) { + _handles.push_back(handle); + return *this; + } + + bool verifyAndInferImpl(MatchData &md) { + bool ok = true; + if (!_isLeaf) { + ok &= (_md == &md); + } + for (size_t i = 0; i < _children.size(); ++i) { + MySearch *child = dynamic_cast<MySearch *>(_children[i].get()); + ok &= (child != 0); + if (child != 0) { + ok &= child->verifyAndInferImpl(md); + } + } + for (size_t i = 0; i < _match.size(); ++i) { + search::fef::TermFieldMatchData *tfmd = _match[i]; + _handles.push_back(search::fef::IllegalHandle); + for (search::fef::TermFieldHandle j = 0; j < md.getNumTermFields(); ++j) { + if (md.resolveTermField(j) == tfmd) { + _handles.back() = j; + break; + } + } + ok &= (_handles.back() != search::fef::IllegalHandle); + } + return ok; + } + + static bool verifyAndInfer(SearchIterator *search, MatchData &md) { + MySearch *self = dynamic_cast<MySearch *>(search); + if (self == 0) { + return false; + } else { + return self->verifyAndInferImpl(md); + } + } + + virtual void visitMembers(vespalib::ObjectVisitor &visitor) const { + visit(visitor, "_tag", _tag); + visit(visitor, "_isLeaf", _isLeaf); + visit(visitor, "_isStrict", _isStrict); + visit(visitor, "_children", _children); + visit(visitor, "_handles", _handles); + } + + virtual ~MySearch() {} +}; + +//----------------------------------------------------------------------------- + +class MyLeaf : public SimpleLeafBlueprint +{ + typedef search::fef::TermFieldMatchDataArray TFMDA; + +public: + virtual SearchIterator::UP + createLeafSearch(const TFMDA &tfmda, bool strict) const + { + return SearchIterator::UP(new MySearch("leaf", tfmda, strict)); + } + + MyLeaf(const FieldSpecBaseList &fields) + : SimpleLeafBlueprint(fields) + {} + + MyLeaf &estimate(uint32_t hits, bool empty = false) { + setEstimate(HitEstimate(hits, empty)); + return *this; + } +}; + +//----------------------------------------------------------------------------- + +class MyLeafSpec +{ +private: + FieldSpecBaseList _fields; + Blueprint::HitEstimate _estimate; + +public: + explicit MyLeafSpec(uint32_t estHits, bool empty = false) + : _fields(), _estimate(estHits, empty) {} + + MyLeafSpec &addField(uint32_t fieldId, uint32_t handle) { + _fields.add(FieldSpecBase(fieldId, handle)); + return *this; + } + MyLeaf *create() const { + MyLeaf *leaf = new MyLeaf(_fields); + leaf->estimate(_estimate.estHits, _estimate.empty); + return leaf; + } +}; + +//----------------------------------------------------------------------------- + +} // namespace queryeval +} // namespace search diff --git a/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/.cvsignore b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/.cvsignore new file mode 100644 index 00000000000..9e6565f9d16 --- /dev/null +++ b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/.cvsignore @@ -0,0 +1,3 @@ +.depend +Makefile +booleanmatchiteratorwrapper_test diff --git a/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/.gitignore b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/.gitignore new file mode 100644 index 00000000000..b568b87514a --- /dev/null +++ b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +booleanmatchiteratorwrapper_test +searchlib_booleanmatchiteratorwrapper_test_app diff --git a/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/CMakeLists.txt b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/CMakeLists.txt new file mode 100644 index 00000000000..cf701c430aa --- /dev/null +++ b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_booleanmatchiteratorwrapper_test_app + SOURCES + booleanmatchiteratorwrapper_test.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_booleanmatchiteratorwrapper_test_app COMMAND searchlib_booleanmatchiteratorwrapper_test_app) diff --git a/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/DESC b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/DESC new file mode 100644 index 00000000000..097198d38ef --- /dev/null +++ b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/DESC @@ -0,0 +1 @@ +booleanmatchiteratorwrapper test. Take a look at booleanmatchiteratorwrapper.cpp for details. diff --git a/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/FILES b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/FILES new file mode 100644 index 00000000000..a47b5b35a40 --- /dev/null +++ b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/FILES @@ -0,0 +1 @@ +booleanmatchiteratorwrapper.cpp diff --git a/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/booleanmatchiteratorwrapper_test.cpp b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/booleanmatchiteratorwrapper_test.cpp new file mode 100644 index 00000000000..940f825b691 --- /dev/null +++ b/searchlib/src/tests/queryeval/booleanmatchiteratorwrapper/booleanmatchiteratorwrapper_test.cpp @@ -0,0 +1,133 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("booleanmatchiteratorwrapper_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchlib/common/bitvectoriterator.h> +#include <vespa/searchlib/test/initrange.h> + +using namespace search::fef; +using namespace search::queryeval; +using search::BitVector; +using search::BitVectorIterator; + +struct DummyItr : public SearchIterator { + static uint32_t seekCnt; + static uint32_t unpackCnt; + static uint32_t dtorCnt; + static uint32_t _unpackedDocId; + TermFieldMatchData *match; + + DummyItr(TermFieldMatchData *m) { + match = m; + } + + ~DummyItr() { + ++dtorCnt; + } + + void doSeek(uint32_t docid) { + ++seekCnt; + if (docid <= 10) { + setDocId(10); + } else if (docid <= 20) { + setDocId(20); + } else { + setAtEnd(); + } + } + + void doUnpack(uint32_t docid) { + ++unpackCnt; + if (match != 0) { + _unpackedDocId = docid; + } + } +}; +uint32_t DummyItr::seekCnt = 0; +uint32_t DummyItr::unpackCnt = 0; +uint32_t DummyItr::dtorCnt = 0; +uint32_t DummyItr::_unpackedDocId = 0; + + +TEST("mostly everything") { + EXPECT_EQUAL(DummyItr::seekCnt, 0u); + EXPECT_EQUAL(DummyItr::unpackCnt, 0u); + EXPECT_EQUAL(DummyItr::dtorCnt, 0u); + { // without wrapper + TermFieldMatchData match; + DummyItr::_unpackedDocId = 0; + SearchIterator::UP search(new DummyItr(&match)); + search->initFullRange(); + EXPECT_EQUAL(DummyItr::_unpackedDocId, 0u); + EXPECT_TRUE(!search->seek(1u)); + EXPECT_EQUAL(search->getDocId(), 10u); + EXPECT_TRUE(search->seek(10)); + search->unpack(10); + EXPECT_EQUAL(DummyItr::_unpackedDocId, 10u); + EXPECT_TRUE(!search->seek(15)); + EXPECT_EQUAL(search->getDocId(), 20u); + EXPECT_TRUE(search->seek(20)); + search->unpack(20); + EXPECT_EQUAL(DummyItr::_unpackedDocId, 20u); + EXPECT_TRUE(!search->seek(25)); + EXPECT_TRUE(search->isAtEnd()); + } + EXPECT_EQUAL(DummyItr::seekCnt, 3u); + EXPECT_EQUAL(DummyItr::unpackCnt, 2u); + EXPECT_EQUAL(DummyItr::dtorCnt, 1u); + { // with wrapper + TermFieldMatchData match; + TermFieldMatchDataArray tfmda; + tfmda.add(&match); + DummyItr::_unpackedDocId = 0; + SearchIterator::UP search(new BooleanMatchIteratorWrapper(SearchIterator::UP(new DummyItr(&match)), tfmda)); + search->initFullRange(); + EXPECT_EQUAL(DummyItr::_unpackedDocId, 0u); + EXPECT_TRUE(!search->seek(1u)); + EXPECT_EQUAL(search->getDocId(), 10u); + EXPECT_TRUE(search->seek(10)); + search->unpack(10); + EXPECT_EQUAL(DummyItr::_unpackedDocId, 0u); + EXPECT_TRUE(!search->seek(15)); + EXPECT_EQUAL(search->getDocId(), 20u); + EXPECT_TRUE(search->seek(20)); + search->unpack(20); + EXPECT_EQUAL(DummyItr::_unpackedDocId, 0u); + EXPECT_TRUE(!search->seek(25)); + EXPECT_TRUE(search->isAtEnd()); + } + EXPECT_EQUAL(DummyItr::seekCnt, 6u); + EXPECT_EQUAL(DummyItr::unpackCnt, 2u); + EXPECT_EQUAL(DummyItr::dtorCnt, 2u); + { // with wrapper, without match data + SearchIterator::UP search(new BooleanMatchIteratorWrapper(SearchIterator::UP(new DummyItr(0)), TermFieldMatchDataArray())); + search->initFullRange(); + EXPECT_TRUE(!search->seek(1u)); + EXPECT_EQUAL(search->getDocId(), 10u); + EXPECT_TRUE(search->seek(10)); + search->unpack(10); + EXPECT_TRUE(!search->seek(15)); + EXPECT_EQUAL(search->getDocId(), 20u); + EXPECT_TRUE(search->seek(20)); + search->unpack(20); + EXPECT_TRUE(!search->seek(25)); + EXPECT_TRUE(search->isAtEnd()); + } + EXPECT_EQUAL(DummyItr::seekCnt, 9u); + EXPECT_EQUAL(DummyItr::unpackCnt, 2u); + EXPECT_EQUAL(DummyItr::dtorCnt, 3u); +} + +TEST("Test boolean wrapper iterators adheres to initRange") { + search::test::InitRangeVerifier ir; + TermFieldMatchDataArray tfmda; + BooleanMatchIteratorWrapper relaxed(ir.createIterator(ir.getExpectedDocIds(), false), tfmda); + ir.verify(relaxed); + BooleanMatchIteratorWrapper strict(ir.createIterator(ir.getExpectedDocIds(), true), tfmda); + ir.verify(strict); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/dot_product/.gitignore b/searchlib/src/tests/queryeval/dot_product/.gitignore new file mode 100644 index 00000000000..a22cb6c5ea0 --- /dev/null +++ b/searchlib/src/tests/queryeval/dot_product/.gitignore @@ -0,0 +1 @@ +searchlib_dot_product_test_app diff --git a/searchlib/src/tests/queryeval/dot_product/CMakeLists.txt b/searchlib/src/tests/queryeval/dot_product/CMakeLists.txt new file mode 100644 index 00000000000..91b78f2e54d --- /dev/null +++ b/searchlib/src/tests/queryeval/dot_product/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_dot_product_test_app + SOURCES + dot_product_test.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_dot_product_test_app COMMAND searchlib_dot_product_test_app) diff --git a/searchlib/src/tests/queryeval/dot_product/FILES b/searchlib/src/tests/queryeval/dot_product/FILES new file mode 100644 index 00000000000..cf1bcd96ec4 --- /dev/null +++ b/searchlib/src/tests/queryeval/dot_product/FILES @@ -0,0 +1 @@ +dot_product_test.cpp diff --git a/searchlib/src/tests/queryeval/dot_product/dot_product_test.cpp b/searchlib/src/tests/queryeval/dot_product/dot_product_test.cpp new file mode 100644 index 00000000000..e9dcc34219b --- /dev/null +++ b/searchlib/src/tests/queryeval/dot_product/dot_product_test.cpp @@ -0,0 +1,219 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("dot_product_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/queryeval/dot_product_search.h> + +#include <vespa/searchlib/fef/fef.h> +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/queryeval/field_spec.h> +#include <vespa/searchlib/queryeval/searchiterator.h> +#include <vespa/searchlib/queryeval/blueprint.h> +#include <vespa/searchlib/queryeval/fake_result.h> +#include <vespa/searchlib/queryeval/fake_searchable.h> +#include <vespa/searchlib/queryeval/fake_requestcontext.h> +#include <vespa/searchlib/queryeval/dot_product_search.h> +#include <vespa/searchlib/test/initrange.h> +#include <vespa/searchlib/test/document_weight_attribute_helper.h> +#include <memory> +#include <string> +#include <map> + +using namespace search; +using namespace search::query; +using namespace search::fef; +using namespace search::queryeval; +using search::test::InitRangeVerifier; +using search::test::DocumentWeightAttributeHelper; + +namespace { + +void setupFakeSearchable(FakeSearchable &fake) { + for (size_t docid = 1; docid < 10; ++docid) { + std::string token1 = vespalib::make_string("%zu", docid); + std::string token2 = vespalib::make_string("1%zu", docid); + std::string token3 = vespalib::make_string("2%zu", docid); + + fake.addResult("field", token1, FakeResult().doc(docid).weight(docid).pos(0)); + fake.addResult("multi-field", token1, FakeResult().doc(docid).weight(docid).pos(0)); + fake.addResult("multi-field", token2, FakeResult().doc(docid).weight(2 * docid).pos(0)); + fake.addResult("multi-field", token3, FakeResult().doc(docid).weight(3 * docid).pos(0)); + } +} + +struct DP { + static const uint32_t fieldId = 0; + static const TermFieldHandle handle = 0; + std::vector<std::pair<std::string, uint32_t> > tokens; + + DP &add(const std::string &token, uint32_t weight) { + tokens.push_back(std::make_pair(token, weight)); + return *this; + } + + Node::UP createNode() const { + SimpleDotProduct *node = new SimpleDotProduct("view", 0, Weight(0)); + for (size_t i = 0; i < tokens.size(); ++i) { + node->append(Node::UP(new SimpleStringTerm(tokens[i].first, "view", 0, Weight(tokens[i].second)))); + } + return Node::UP(node); + } + + FakeResult search(Searchable &searchable, const std::string &field, bool strict) const { + MatchData::UP md(MatchData::makeTestInstance(0, 1, 1)); + FakeRequestContext requestContext; + Node::UP node = createNode(); + FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle)); + queryeval::Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, *node); + bp->fetchPostings(strict); + SearchIterator::UP sb = bp->createSearch(*md, strict); + EXPECT_TRUE(dynamic_cast<DotProductSearch*>(sb.get()) != 0); + sb->initFullRange(); + FakeResult result; + for (uint32_t docId = 1; docId < 10; ++docId) { + if (sb->seek(docId)) { + sb->unpack(docId); + result.doc(docId); + double score = md->resolveTermField(handle)->getRawScore(); + EXPECT_EQUAL((int)score, score); + result.score(score); + } + } + return result; + } +}; + +struct MockSearch : public SearchIterator { + int seekCnt; + uint32_t _initial; + MockSearch(uint32_t initial) : SearchIterator(), seekCnt(0), _initial(initial) { } + void initRange(uint32_t begin, uint32_t end) override { + SearchIterator::initRange(begin, end); + setDocId(_initial); + } + virtual void doSeek(uint32_t) { + ++seekCnt; + setAtEnd(); + } + virtual void doUnpack(uint32_t) {} +}; + +struct MockFixture { + MockSearch *mock; + TermFieldMatchData tfmd; + std::unique_ptr<SearchIterator> search; + MockFixture(uint32_t initial) : mock(0), tfmd(), search() { + std::vector<SearchIterator*> children; + std::vector<TermFieldMatchData*> childMatch; + std::vector<int32_t> weights; + MatchData::UP md(MatchData::makeTestInstance(0, 1, 1)); + mock = new MockSearch(initial); + children.push_back(mock); + childMatch.push_back(md->resolveTermField(0)); + weights.push_back(1); + search = DotProductSearch::create(children, tfmd, childMatch, weights, std::move(md)); + } +}; + +} // namespace <unnamed> + +TEST("test Simple") { + FakeSearchable index; + setupFakeSearchable(index); + FakeResult expect = FakeResult() + .doc(3).score(30 * 3) + .doc(5).score(50 * 5) + .doc(7).score(70 * 7); + DP ws = DP().add("7", 70).add("5", 50).add("3", 30).add("100", 1000); + + EXPECT_EQUAL(expect, ws.search(index, "field", true)); + EXPECT_EQUAL(expect, ws.search(index, "field", false)); + EXPECT_EQUAL(expect, ws.search(index, "multi-field", true)); + EXPECT_EQUAL(expect, ws.search(index, "multi-field", false)); +} + +TEST("test Multi") { + FakeSearchable index; + setupFakeSearchable(index); + FakeResult expect = FakeResult() + .doc(3).score(30 * 3 + 130 * 2 * 3 + 230 * 3 * 3) + .doc(5).score(50 * 5 + 150 * 2 * 5) + .doc(7).score(70 * 7); + DP ws = DP().add("7", 70).add("5", 50).add("3", 30) + .add("15", 150).add("13", 130) + .add("23", 230).add("100", 1000); + + EXPECT_EQUAL(expect, ws.search(index, "multi-field", true)); + EXPECT_EQUAL(expect, ws.search(index, "multi-field", false)); +} + +TEST_F("test Eager Empty Child", MockFixture(search::endDocId)) { + MockSearch *mock = f1.mock; + SearchIterator &search = *f1.search; + search.initFullRange(); + EXPECT_EQUAL(search.beginId(), search.getDocId()); + EXPECT_TRUE(!search.seek(1)); + EXPECT_TRUE(search.isAtEnd()); + EXPECT_EQUAL(0, mock->seekCnt); +} + +TEST_F("test Eager Matching Child", MockFixture(5)) { + MockSearch *mock = f1.mock; + SearchIterator &search = *f1.search; + search.initFullRange(); + EXPECT_EQUAL(search.beginId(), search.getDocId()); + EXPECT_TRUE(!search.seek(3)); + EXPECT_EQUAL(5u, search.getDocId()); + EXPECT_EQUAL(0, mock->seekCnt); + EXPECT_TRUE(search.seek(5)); + EXPECT_EQUAL(5u, search.getDocId()); + EXPECT_EQUAL(0, mock->seekCnt); + EXPECT_TRUE(!search.seek(7)); + EXPECT_TRUE(search.isAtEnd()); + EXPECT_EQUAL(1, mock->seekCnt); +} + +TEST("verify initRange with search iterator children") { + const size_t num_children = 7; + InitRangeVerifier ir; + using DocIds = InitRangeVerifier::DocIds; + std::vector<DocIds> split_lists(num_children); + auto full_list = ir.getExpectedDocIds(); + for (size_t i = 0; i < full_list.size(); ++i) { + split_lists[i % num_children].push_back(full_list[i]); + } + bool strict = true; + std::vector<SearchIterator*> children; + for (size_t i = 0; i < num_children; ++i) { + children.push_back(ir.createIterator(split_lists[i], strict).release()); + } + TermFieldMatchData tfmd; + std::vector<int32_t> weights(num_children, 1); + std::vector<fef::TermFieldMatchData*> no_child_match; // unpack not called + MatchData::UP no_match_data; // unpack not called + SearchIterator::UP itr = DotProductSearch::create(children, tfmd, no_child_match, weights, std::move(no_match_data)); + ir.verify(*itr); +} + +TEST("verify initRange with document weight iterator children") { + const size_t num_children = 7; + InitRangeVerifier ir; + DocumentWeightAttributeHelper helper; + helper.add_docs(ir.getDocIdLimit()); + auto full_list = ir.getExpectedDocIds(); + for (size_t i = 0; i < full_list.size(); ++i) { + helper.set_doc(full_list[i], i % num_children, 1); + } + TermFieldMatchData tfmd; + std::vector<int32_t> weights(num_children, 1); + std::vector<DocumentWeightIterator> children; + for (size_t i = 0; i < num_children; ++i) { + auto dict_entry = helper.dwa().lookup(vespalib::make_string("%zu", i).c_str()); + helper.dwa().create(dict_entry.posting_idx, children); + } + SearchIterator::UP itr(DotProductSearch::create(tfmd, weights, std::move(children))); + ir.verify(*itr); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/equiv/.cvsignore b/searchlib/src/tests/queryeval/equiv/.cvsignore new file mode 100644 index 00000000000..1f159f55125 --- /dev/null +++ b/searchlib/src/tests/queryeval/equiv/.cvsignore @@ -0,0 +1,3 @@ +.depend +Makefile +equiv_test diff --git a/searchlib/src/tests/queryeval/equiv/.gitignore b/searchlib/src/tests/queryeval/equiv/.gitignore new file mode 100644 index 00000000000..d28d4650b98 --- /dev/null +++ b/searchlib/src/tests/queryeval/equiv/.gitignore @@ -0,0 +1,4 @@ +*_test +.depend +Makefile +searchlib_equiv_test_app diff --git a/searchlib/src/tests/queryeval/equiv/CMakeLists.txt b/searchlib/src/tests/queryeval/equiv/CMakeLists.txt new file mode 100644 index 00000000000..695e9b87121 --- /dev/null +++ b/searchlib/src/tests/queryeval/equiv/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_equiv_test_app + SOURCES + equiv_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_equiv_test_app COMMAND searchlib_equiv_test_app) diff --git a/searchlib/src/tests/queryeval/equiv/DESC b/searchlib/src/tests/queryeval/equiv/DESC new file mode 100644 index 00000000000..e294d10cb23 --- /dev/null +++ b/searchlib/src/tests/queryeval/equiv/DESC @@ -0,0 +1 @@ +equiv test. Take a look at equiv_test.cpp for details. diff --git a/searchlib/src/tests/queryeval/equiv/FILES b/searchlib/src/tests/queryeval/equiv/FILES new file mode 100644 index 00000000000..79adf32c1d5 --- /dev/null +++ b/searchlib/src/tests/queryeval/equiv/FILES @@ -0,0 +1 @@ +equiv_test.cpp diff --git a/searchlib/src/tests/queryeval/equiv/equiv_test.cpp b/searchlib/src/tests/queryeval/equiv/equiv_test.cpp new file mode 100644 index 00000000000..3d97d05995f --- /dev/null +++ b/searchlib/src/tests/queryeval/equiv/equiv_test.cpp @@ -0,0 +1,130 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("equiv_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/queryeval/leaf_blueprints.h> +#include <vespa/searchlib/queryeval/intermediate_blueprints.h> +#include <vespa/searchlib/queryeval/equiv_blueprint.h> +#include <vespa/searchlib/fef/matchdatalayout.h> + +using namespace search::queryeval; +using search::fef::MatchData; +using search::fef::MatchDataLayout; +using search::fef::TermFieldHandle; +using search::fef::TermFieldMatchData; +using search::fef::FieldPositionsIterator; + +class Test : public vespalib::TestApp { +public: + void testEquiv(); + int Main(); +}; + +void +Test::testEquiv() +{ + FakeResult a; + FakeResult b; + FakeResult c; + + a.doc(5).pos(1); + b.doc(5).pos(2); + c.doc(5).pos(3).doc(10).pos(4); + + MatchDataLayout subLayout; + TermFieldHandle fbh11 = subLayout.allocTermField(1); + TermFieldHandle fbh21 = subLayout.allocTermField(2); + TermFieldHandle fbh22 = subLayout.allocTermField(2); + + FieldSpecBaseList fields; + fields.add(FieldSpecBase(1, 1)); + fields.add(FieldSpecBase(2, 2)); + EquivBlueprint *eq_b = new EquivBlueprint(fields, subLayout); + + eq_b->addTerm(Blueprint::UP(new FakeBlueprint(FieldSpec("foo", 1, fbh11), a)), 1.0); + eq_b->addTerm(Blueprint::UP(new FakeBlueprint(FieldSpec("bar", 2, fbh21), b)), 1.0); + eq_b->addTerm(Blueprint::UP(new FakeBlueprint(FieldSpec("bar", 2, fbh22), c)), 1.0); + + Blueprint::UP bp(eq_b); + for (int i = 0; i <= 1; ++i) { + bool strict = (i == 0); + TEST_STATE(strict ? "strict" : "non-strict"); + MatchData::UP md = MatchData::makeTestInstance(0, 100, 10); + bp->fetchPostings(strict); + SearchIterator::UP search = bp->createSearch(*md, strict); + search->initFullRange(); + + EXPECT_TRUE(!search->seek(3)); + if (!strict) { + EXPECT_EQUAL(SearchIterator::beginId(), search->getDocId()); + EXPECT_TRUE(search->seek(5u)); + } + EXPECT_EQUAL(5u, search->getDocId()); + { // test doc 5 results + search->unpack(5u); + { + TermFieldMatchData &data = *md->resolveTermField(1); + EXPECT_EQUAL(1u, data.getFieldId()); + EXPECT_EQUAL(5u, data.getDocId()); + FieldPositionsIterator itr = data.getIterator(); + EXPECT_EQUAL(1u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(1u, itr.getPosition()); + itr.next(); + EXPECT_TRUE(!itr.valid()); + } + { + TermFieldMatchData &data = *md->resolveTermField(2); + EXPECT_EQUAL(2u, data.getFieldId()); + EXPECT_EQUAL(5u, data.getDocId()); + FieldPositionsIterator itr = data.getIterator(); + EXPECT_EQUAL(2u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(2u, itr.getPosition()); + itr.next(); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(3u, itr.getPosition()); + itr.next(); + EXPECT_TRUE(!itr.valid()); + } + } + EXPECT_TRUE(!search->seek(7)); + if (!strict) { + EXPECT_EQUAL(5u, search->getDocId()); + EXPECT_TRUE(search->seek(10u)); + } + EXPECT_EQUAL(10u, search->getDocId()); + { // test doc 10 results + search->unpack(10u); + EXPECT_EQUAL(5u, md->resolveTermField(1)->getDocId()); // no match + { + TermFieldMatchData &data = *md->resolveTermField(2); + EXPECT_EQUAL(2u, data.getFieldId()); + EXPECT_EQUAL(10u, data.getDocId()); + FieldPositionsIterator itr = data.getIterator(); + EXPECT_EQUAL(1u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(4u, itr.getPosition()); + itr.next(); + EXPECT_TRUE(!itr.valid()); + } + } + EXPECT_TRUE(!search->seek(13)); + if (strict) { + EXPECT_TRUE(search->isAtEnd()); + } else { + EXPECT_EQUAL(10u, search->getDocId()); + } + } +} + +int +Test::Main() +{ + TEST_INIT("equiv_test"); + testEquiv(); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/queryeval/fake_searchable/.cvsignore b/searchlib/src/tests/queryeval/fake_searchable/.cvsignore new file mode 100644 index 00000000000..56a6e2188be --- /dev/null +++ b/searchlib/src/tests/queryeval/fake_searchable/.cvsignore @@ -0,0 +1,3 @@ +.depend +Makefile +fake_searchable_test diff --git a/searchlib/src/tests/queryeval/fake_searchable/.gitignore b/searchlib/src/tests/queryeval/fake_searchable/.gitignore new file mode 100644 index 00000000000..42b48509660 --- /dev/null +++ b/searchlib/src/tests/queryeval/fake_searchable/.gitignore @@ -0,0 +1,4 @@ +*_test +.depend +Makefile +searchlib_fake_searchable_test_app diff --git a/searchlib/src/tests/queryeval/fake_searchable/CMakeLists.txt b/searchlib/src/tests/queryeval/fake_searchable/CMakeLists.txt new file mode 100644 index 00000000000..75eaae7d9ed --- /dev/null +++ b/searchlib/src/tests/queryeval/fake_searchable/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_fake_searchable_test_app + SOURCES + fake_searchable_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_fake_searchable_test_app COMMAND searchlib_fake_searchable_test_app) diff --git a/searchlib/src/tests/queryeval/fake_searchable/DESC b/searchlib/src/tests/queryeval/fake_searchable/DESC new file mode 100644 index 00000000000..75ce65796f0 --- /dev/null +++ b/searchlib/src/tests/queryeval/fake_searchable/DESC @@ -0,0 +1 @@ +fake_searchable test. Take a look at fake_searchable_test.cpp for details. diff --git a/searchlib/src/tests/queryeval/fake_searchable/FILES b/searchlib/src/tests/queryeval/fake_searchable/FILES new file mode 100644 index 00000000000..b02a791e332 --- /dev/null +++ b/searchlib/src/tests/queryeval/fake_searchable/FILES @@ -0,0 +1 @@ +fake_searchable_test.cpp diff --git a/searchlib/src/tests/queryeval/fake_searchable/fake_searchable_test.cpp b/searchlib/src/tests/queryeval/fake_searchable/fake_searchable_test.cpp new file mode 100644 index 00000000000..c3ff31625d3 --- /dev/null +++ b/searchlib/src/tests/queryeval/fake_searchable/fake_searchable_test.cpp @@ -0,0 +1,379 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("fake_searchable_test"); +#include <vespa/vespalib/testkit/testapp.h> + +#include <vespa/searchlib/queryeval/fake_searchable.h> +#include <vespa/searchlib/queryeval/fake_requestcontext.h> +#include <vespa/searchlib/query/tree/intermediatenodes.h> +#include <vespa/searchlib/query/tree/termnodes.h> +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/fef/matchdata.h> + +using namespace search::queryeval; +using namespace search::query; +using namespace search::fef; + +class Test : public vespalib::TestApp { +public: + int Main(); + void testTestFakeResult(); + void testTerm(); + void testPhrase(); + void testWeightedSet(); + void testMultiField(); + void testPhraseWithEmptyChild(); +private: + FakeRequestContext _requestContext; +}; + +void +Test::testTestFakeResult() +{ + EXPECT_EQUAL(FakeResult().doc(5).elem(5).len(15).weight(5).pos(5).pos(6).elem(6).doc(6), + FakeResult().doc(5).elem(5).len(15).weight(5).pos(5).pos(6).elem(6).doc(6)); + + EXPECT_NOT_EQUAL(FakeResult().doc(5).elem(5).len(15).weight(5).pos(5), + FakeResult().doc(1).elem(5).len(15).weight(5).pos(5)); + + EXPECT_NOT_EQUAL(FakeResult().doc(5).elem(5).len(15).weight(5).pos(5), + FakeResult().doc(5).elem(1).len(15).weight(5).pos(5)); + + EXPECT_NOT_EQUAL(FakeResult().doc(5).elem(5).len(15).weight(5).pos(5), + FakeResult().doc(5).elem(5).len(19).weight(5).pos(5)); + + EXPECT_NOT_EQUAL(FakeResult().doc(5).elem(5).len(15).weight(5).pos(5), + FakeResult().doc(5).elem(5).len(15).weight(1).pos(5)); + + EXPECT_NOT_EQUAL(FakeResult().doc(5).elem(5).len(15).weight(5).pos(5), + FakeResult().doc(5).elem(5).len(15).weight(5).pos(1)); + + EXPECT_NOT_EQUAL(FakeResult().doc(5).elem(5).len(15).weight(5).pos(5), + FakeResult().doc(5).elem(5).len(15).weight(5).pos(5).doc(6)); + + EXPECT_NOT_EQUAL(FakeResult().doc(5).elem(5).len(15).weight(5).pos(5), + FakeResult().doc(5).elem(5).len(15).weight(5).pos(5).elem(6)); + + EXPECT_NOT_EQUAL(FakeResult().doc(5).elem(5).len(15).weight(5).pos(5), + FakeResult().doc(5).elem(5).len(15).weight(5).pos(5).pos(6)); +} + +void +Test::testTerm() +{ + Weight w(100); + + FakeSearchable source; + source.addResult("fieldfoo", "word1", + FakeResult().doc(5).pos(3)); + + SimpleStringTerm termNode("word1", "viewfoo", 1, w); + + FieldSpecList fields; + fields.add(FieldSpec("fieldfoo", 1, 1)); + Blueprint::UP bp = source.createBlueprint(_requestContext, fields, termNode); + for (int i = 0; i <= 1; ++i) { + bool strict = (i == 0); + TEST_STATE(strict ? "strict" : "non-strict"); + MatchData::UP md = MatchData::makeTestInstance(0, 100, 10); + bp->fetchPostings(strict); + SearchIterator::UP search = bp->createSearch(*md, strict); + search->initFullRange(); + + EXPECT_TRUE(!search->seek(3)); + if (strict) { + EXPECT_EQUAL(5u, search->getDocId()); + } else { + EXPECT_TRUE(search->seek(5u)); + } + EXPECT_EQUAL(5u, search->getDocId()); + { // test doc 5 results + search->unpack(5u); + { + TermFieldMatchData &data = *md->resolveTermField(1); + EXPECT_EQUAL(1u, data.getFieldId()); + EXPECT_EQUAL(5u, data.getDocId()); + FieldPositionsIterator itr = data.getIterator(); + EXPECT_EQUAL(1u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(3u, itr.getPosition()); + itr.next(); + EXPECT_TRUE(!itr.valid()); + } + } + EXPECT_TRUE(!search->seek(13)); + if (strict) { + EXPECT_TRUE(search->isAtEnd()); + } + } +} + +void +Test::testPhrase() +{ + Weight w(100); + + FakeSearchable source; + source.addResult("fieldfoo", "word1", + FakeResult().doc(3).pos(7).doc(5).pos(3)); + source.addResult("fieldfoo", "word2", + FakeResult().doc(2).pos(1).doc(3).pos(10).doc(5).pos(4)); + + SimplePhrase phraseNode("viewfoo", 1, w); + phraseNode.append(Node::UP(new SimpleStringTerm("word1", "viewfoo", 2, w))); + phraseNode.append(Node::UP(new SimpleStringTerm("word2", "viewfoo", 3, w))); + + FieldSpecList fields; + fields.add(FieldSpec("fieldfoo", 1, 1)); + Blueprint::UP bp = source.createBlueprint(_requestContext, fields, phraseNode); + for (int i = 0; i <= 1; ++i) { + bool strict = (i == 0); + TEST_STATE(strict ? "strict" : "non-strict"); + MatchData::UP md = MatchData::makeTestInstance(0, 100, 10); + bp->fetchPostings(strict); + SearchIterator::UP search = bp->createSearch(*md, strict); + search->initFullRange(); + + EXPECT_TRUE(!search->seek(3)); + if (strict) { + EXPECT_EQUAL(5u, search->getDocId()); + } else { + EXPECT_TRUE(search->seek(5u)); + } + EXPECT_EQUAL(5u, search->getDocId()); + { // test doc 5 results + search->unpack(5u); + { + TermFieldMatchData &data = *md->resolveTermField(1); + EXPECT_EQUAL(1u, data.getFieldId()); + EXPECT_EQUAL(5u, data.getDocId()); + FieldPositionsIterator itr = data.getIterator(); + EXPECT_EQUAL(1u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(3u, itr.getPosition()); + itr.next(); + EXPECT_TRUE(!itr.valid()); + } + } + EXPECT_TRUE(!search->seek(13)); + if (strict) { + EXPECT_TRUE(search->isAtEnd()); + } + } +} + +void +Test::testWeightedSet() +{ + Weight w(100); + + FakeSearchable source; + source.addResult("fieldfoo", "friend1", + FakeResult().doc(3).doc(5).doc(7).doc(9)); + source.addResult("fieldfoo", "friend2", + FakeResult().doc(3).doc(4).doc(5).doc(6)); + source.addResult("fieldfoo", "friend3", + FakeResult().doc(5)); + + SimpleWeightedSetTerm weightedSet("fieldfoo", 1, w); + weightedSet.append(Node::UP(new SimpleStringTerm("friend1", "fieldfoo", 2, Weight(1)))); + weightedSet.append(Node::UP(new SimpleStringTerm("friend2", "fieldfoo", 3, Weight(2)))); + + FieldSpecList fields; + fields.add(FieldSpec("fieldfoo", 1, 1)); + Blueprint::UP bp = source.createBlueprint(_requestContext, fields, weightedSet); + for (int i = 0; i <= 1; ++i) { + bool strict = (i == 0); + TEST_STATE(strict ? "strict" : "non-strict"); + MatchData::UP md = MatchData::makeTestInstance(0, 100, 10); + bp->fetchPostings(strict); + SearchIterator::UP search = bp->createSearch(*md, strict); + search->initFullRange(); + + EXPECT_TRUE(!search->seek(2)); + if (strict) { + EXPECT_EQUAL(3u, search->getDocId()); + } else { + EXPECT_TRUE(search->seek(3u)); + } + EXPECT_EQUAL(3u, search->getDocId()); + { // test doc 3 results + search->unpack(3u); + { + TermFieldMatchData &data = *md->resolveTermField(1); + EXPECT_EQUAL(1u, data.getFieldId()); + EXPECT_EQUAL(3u, data.getDocId()); + FieldPositionsIterator itr = data.getIterator(); + EXPECT_EQUAL(2u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(2, itr.getElementWeight()); + itr.next(); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(1, itr.getElementWeight()); + itr.next(); + EXPECT_TRUE(!itr.valid()); + } + } + EXPECT_TRUE(search->seek(4)); + EXPECT_TRUE(search->seek(5)); + EXPECT_TRUE(search->seek(6)); + EXPECT_TRUE(search->seek(7)); + EXPECT_TRUE(!search->seek(8)); + EXPECT_TRUE(search->seek(9)); + { // test doc 9 results + search->unpack(9u); + { + TermFieldMatchData &data = *md->resolveTermField(1); + EXPECT_EQUAL(1u, data.getFieldId()); + EXPECT_EQUAL(9u, data.getDocId()); + FieldPositionsIterator itr = data.getIterator(); + EXPECT_EQUAL(1u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(1, itr.getElementWeight()); + itr.next(); + EXPECT_TRUE(!itr.valid()); + } + } + EXPECT_TRUE(!search->seek(13)); + if (strict) { + EXPECT_TRUE(search->isAtEnd()); + } + } +} + +void +Test::testMultiField() +{ + Weight w(100); + + FakeSearchable source; + source.addResult("fieldfoo", "word1", + FakeResult().doc(5).pos(3)); + source.addResult("fieldbar", "word1", + FakeResult().doc(5).pos(7).doc(10).pos(2)); + + SimpleStringTerm termNode("word1", "viewfoobar", 1, w); + + FieldSpecList fields; + fields.add(FieldSpec("fieldfoo", 1, 1)); + fields.add(FieldSpec("fieldbar", 2, 2)); + Blueprint::UP bp = source.createBlueprint(_requestContext, fields, termNode); + for (int i = 0; i <= 1; ++i) { + bool strict = (i == 0); + TEST_STATE(strict ? "strict" : "non-strict"); + MatchData::UP md = MatchData::makeTestInstance(0, 100, 10); + bp->fetchPostings(strict); + SearchIterator::UP search = bp->createSearch(*md, strict); + search->initFullRange(); + + EXPECT_TRUE(!search->seek(3)); + if (strict) { + EXPECT_EQUAL(5u, search->getDocId()); + } else { + EXPECT_TRUE(search->seek(5u)); + } + EXPECT_EQUAL(5u, search->getDocId()); + { // test doc 5 results + search->unpack(5u); + { + TermFieldMatchData &data = *md->resolveTermField(1); + EXPECT_EQUAL(1u, data.getFieldId()); + EXPECT_EQUAL(5u, data.getDocId()); + FieldPositionsIterator itr = data.getIterator(); + EXPECT_EQUAL(1u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(3u, itr.getPosition()); + itr.next(); + EXPECT_TRUE(!itr.valid()); + } + { + TermFieldMatchData &data = *md->resolveTermField(2); + EXPECT_EQUAL(2u, data.getFieldId()); + EXPECT_EQUAL(5u, data.getDocId()); + FieldPositionsIterator itr = data.getIterator(); + EXPECT_EQUAL(1u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(7u, itr.getPosition()); + itr.next(); + EXPECT_TRUE(!itr.valid()); + } + } + EXPECT_TRUE(!search->seek(7)); + if (strict) { + EXPECT_EQUAL(10u, search->getDocId()); + } else { + EXPECT_TRUE(search->seek(10u)); + } + EXPECT_EQUAL(10u, search->getDocId()); + { // test doc 10 results + search->unpack(10u); + { + TermFieldMatchData &data = *md->resolveTermField(1); + EXPECT_EQUAL(1u, data.getFieldId()); + EXPECT_NOT_EQUAL(10u, data.getDocId()); + } + { + TermFieldMatchData &data = *md->resolveTermField(2); + EXPECT_EQUAL(2u, data.getFieldId()); + EXPECT_EQUAL(10u, data.getDocId()); + FieldPositionsIterator itr = data.getIterator(); + EXPECT_EQUAL(1u, itr.size()); + ASSERT_TRUE(itr.valid()); + EXPECT_EQUAL(2u, itr.getPosition()); + itr.next(); + EXPECT_TRUE(!itr.valid()); + } + } + EXPECT_TRUE(!search->seek(13)); + if (strict) { + EXPECT_TRUE(search->isAtEnd()); + } + } +} + +void +Test::testPhraseWithEmptyChild() +{ + Weight w(100); + + FakeSearchable source; + source.addResult("fieldfoo", "word1", + FakeResult().doc(3).pos(7).doc(5).pos(3)); + + SimplePhrase phraseNode("viewfoo", 1, w); + phraseNode.append(Node::UP(new SimpleStringTerm("word1", "viewfoo", 2, w))); + phraseNode.append(Node::UP(new SimpleStringTerm("word2", "viewfoo", 3, w))); + + FieldSpecList fields; + fields.add(FieldSpec("fieldfoo", 1, 1)); + Blueprint::UP bp = source.createBlueprint(_requestContext, fields, phraseNode); + for (int i = 0; i <= 1; ++i) { + bool strict = (i == 0); + TEST_STATE(strict ? "strict" : "non-strict"); + MatchData::UP md = MatchData::makeTestInstance(0, 100, 10); + bp->fetchPostings(strict); + SearchIterator::UP search = bp->createSearch(*md, strict); + search->initFullRange(); + + EXPECT_TRUE(!search->seek(3)); + if (strict) { + EXPECT_TRUE(search->isAtEnd()); + } + } +} + +int +Test::Main() +{ + TEST_INIT("fake_searchable_test"); + testTestFakeResult(); + testTerm(); + testPhrase(); + testWeightedSet(); + testMultiField(); + testPhraseWithEmptyChild(); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/queryeval/getnodeweight/.gitignore b/searchlib/src/tests/queryeval/getnodeweight/.gitignore new file mode 100644 index 00000000000..a050aeb9215 --- /dev/null +++ b/searchlib/src/tests/queryeval/getnodeweight/.gitignore @@ -0,0 +1 @@ +searchlib_getnodeweight_test_app diff --git a/searchlib/src/tests/queryeval/getnodeweight/CMakeLists.txt b/searchlib/src/tests/queryeval/getnodeweight/CMakeLists.txt new file mode 100644 index 00000000000..5502e2c033b --- /dev/null +++ b/searchlib/src/tests/queryeval/getnodeweight/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_getnodeweight_test_app + SOURCES + getnodeweight_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_getnodeweight_test_app COMMAND searchlib_getnodeweight_test_app) diff --git a/searchlib/src/tests/queryeval/getnodeweight/getnodeweight_test.cpp b/searchlib/src/tests/queryeval/getnodeweight/getnodeweight_test.cpp new file mode 100644 index 00000000000..949a1364061 --- /dev/null +++ b/searchlib/src/tests/queryeval/getnodeweight/getnodeweight_test.cpp @@ -0,0 +1,49 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("getweight_test"); +#include <vespa/vespalib/testkit/testapp.h> + +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/queryeval/get_weight_from_node.h> + +using namespace search::query; +using namespace search::queryeval; + +class Test : public vespalib::TestApp { +public: + int32_t getWeight(const Node &node); + int Main(); +}; + +int32_t +Test::getWeight(const Node &node) { + return getWeightFromNode(node).percent(); +} + +int +Test::Main() +{ + TEST_INIT("getweight_test"); + EXPECT_EQUAL(0, getWeight(SimpleAnd())); + EXPECT_EQUAL(0, getWeight(SimpleAndNot())); + EXPECT_EQUAL(42, getWeight(SimpleEquiv(0, Weight(42)))); + EXPECT_EQUAL(42, getWeight(SimpleNumberTerm("foo", "bar", 1, Weight(42)))); + EXPECT_EQUAL(42, getWeight(SimpleLocationTerm(Location(), "bar", 1, Weight(42)))); + EXPECT_EQUAL(0, getWeight(SimpleNear(5))); + EXPECT_EQUAL(0, getWeight(SimpleONear(5))); + EXPECT_EQUAL(0, getWeight(SimpleOr())); + EXPECT_EQUAL(42, getWeight(SimplePhrase("bar", 1, Weight(42)))); + EXPECT_EQUAL(42, getWeight(SimplePrefixTerm("foo", "bar", 1, Weight(42)))); + EXPECT_EQUAL(42, getWeight(SimpleRangeTerm(Range(), "bar", 1, Weight(42)))); + EXPECT_EQUAL(0, getWeight(SimpleRank())); + EXPECT_EQUAL(42, getWeight(SimpleStringTerm("foo", "bar", 1, Weight(42)))); + EXPECT_EQUAL(42, getWeight(SimpleSubstringTerm("foo", "bar", 1, Weight(42)))); + EXPECT_EQUAL(42, getWeight(SimpleSuffixTerm("foo", "bar", 1, Weight(42)))); + EXPECT_EQUAL(42, getWeight(SimpleWeightedSetTerm("bar", 1, Weight(42)))); + EXPECT_EQUAL(42, getWeight(SimpleDotProduct("bar", 1, Weight(42)))); + EXPECT_EQUAL(42, getWeight(SimpleWandTerm("bar", 1, Weight(42), 57, 67, 77.7))); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/queryeval/monitoring_search_iterator/.gitignore b/searchlib/src/tests/queryeval/monitoring_search_iterator/.gitignore new file mode 100644 index 00000000000..a3012152158 --- /dev/null +++ b/searchlib/src/tests/queryeval/monitoring_search_iterator/.gitignore @@ -0,0 +1 @@ +searchlib_monitoring_search_iterator_test_app diff --git a/searchlib/src/tests/queryeval/monitoring_search_iterator/CMakeLists.txt b/searchlib/src/tests/queryeval/monitoring_search_iterator/CMakeLists.txt new file mode 100644 index 00000000000..eebc9c8cf17 --- /dev/null +++ b/searchlib/src/tests/queryeval/monitoring_search_iterator/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_monitoring_search_iterator_test_app + SOURCES + monitoring_search_iterator_test.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_monitoring_search_iterator_test_app COMMAND searchlib_monitoring_search_iterator_test_app) diff --git a/searchlib/src/tests/queryeval/monitoring_search_iterator/DESC b/searchlib/src/tests/queryeval/monitoring_search_iterator/DESC new file mode 100644 index 00000000000..1c126deb4ed --- /dev/null +++ b/searchlib/src/tests/queryeval/monitoring_search_iterator/DESC @@ -0,0 +1 @@ +monitoring_search_iterator test. Take a look at monitoring_search_iterator_test.cpp for details. diff --git a/searchlib/src/tests/queryeval/monitoring_search_iterator/FILES b/searchlib/src/tests/queryeval/monitoring_search_iterator/FILES new file mode 100644 index 00000000000..b514a3cf512 --- /dev/null +++ b/searchlib/src/tests/queryeval/monitoring_search_iterator/FILES @@ -0,0 +1 @@ +monitoring_search_iterator_test.cpp diff --git a/searchlib/src/tests/queryeval/monitoring_search_iterator/monitoring_search_iterator_test.cpp b/searchlib/src/tests/queryeval/monitoring_search_iterator/monitoring_search_iterator_test.cpp new file mode 100644 index 00000000000..a559be21ea3 --- /dev/null +++ b/searchlib/src/tests/queryeval/monitoring_search_iterator/monitoring_search_iterator_test.cpp @@ -0,0 +1,325 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchlib/queryeval/andsearch.h> +#include <vespa/searchlib/queryeval/monitoring_search_iterator.h> +#include <vespa/searchlib/queryeval/monitoring_dump_iterator.h> +#include <vespa/searchlib/queryeval/simpleresult.h> +#include <vespa/searchlib/queryeval/simplesearch.h> +#include <vespa/searchlib/queryeval/test/searchhistory.h> +#include <vespa/vespalib/objects/objectdumper.h> +#include <vespa/searchlib/test/initrange.h> +#include <vespa/searchlib/common/bitvectoriterator.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> + +using namespace search::queryeval; +using namespace search::queryeval::test; +using namespace search::fef; +using search::BitVector; +using search::BitVectorIterator; +using std::make_unique; + +struct HistorySearchIterator : public SearchIterator +{ + SearchHistory _history; + mutable bool _getPostingInfoCalled; + HistorySearchIterator() : _history(), _getPostingInfoCalled(false) {} + virtual void doSeek(uint32_t docId) { + _history.seek("x", docId); + setDocId(docId); + } + virtual void doUnpack(uint32_t docId) { _history.unpack("x", docId); } + virtual const PostingInfo *getPostingInfo() const { + _getPostingInfoCalled = true; + return NULL; + } +}; + +struct SimpleFixture +{ + MonitoringSearchIterator _itr; + SimpleResult _res; + SimpleFixture() + : _itr("SimpleIterator", + SearchIterator::UP(new SimpleSearch(SimpleResult().addHit(2).addHit(4).addHit(8))), + false), + _res() + { + _res.search(_itr); + } +}; + +struct AdvancedFixture +{ + MonitoringSearchIterator _itr; + AdvancedFixture() + : _itr("AdvancedIterator", + SearchIterator::UP(new SimpleSearch(SimpleResult().addHit(2).addHit(4).addHit(8). + addHit(16).addHit(32).addHit(64).addHit(128))), + true) + { + } +}; + +struct HistoryFixture +{ + MonitoringSearchIterator _itr; + HistoryFixture() + : _itr("HistoryIterator", SearchIterator::UP(new HistorySearchIterator()), false) + { + } +}; + +struct TreeFixture +{ + MonitoringSearchIterator::UP _itr; + SimpleResult _res; + TreeFixture() + : _itr() + { + MultiSearch::Children children; + children.push_back(new MonitoringSearchIterator("child1", + SearchIterator::UP + (new SimpleSearch(SimpleResult().addHit(2).addHit(4).addHit(6))), + false)); + children.push_back(new MonitoringSearchIterator("child2", + SearchIterator::UP + (new SimpleSearch(SimpleResult().addHit(3).addHit(4).addHit(5))), + false)); + _itr.reset(new MonitoringSearchIterator("and", + SearchIterator::UP(AndSearch::create(children, true)), + false)); + _res.search(*_itr); + } +}; + +TEST_F("require that number of seeks is collected", SimpleFixture) +{ + EXPECT_EQUAL(4u, f._itr.getStats().getNumSeeks()); + EXPECT_EQUAL(4.0 / 3.0, f._itr.getStats().getNumSeeksPerUnpack()); +} + +TEST_F("require that number of unpacks is collected", SimpleFixture) +{ + EXPECT_EQUAL(3u, f._itr.getStats().getNumUnpacks()); +} + +TEST_F("require that docId stepping is collected (root iterator)", SimpleFixture) +{ + EXPECT_EQUAL(4u, f._itr.getStats().getNumDocIdSteps()); + EXPECT_EQUAL(1, f._itr.getStats().getAvgDocIdSteps()); +} + +TEST_F("require that docId stepping is collected (child iterator)", AdvancedFixture) +{ + f._itr.seek(1); // 2 - 1 + EXPECT_EQUAL(1u, f._itr.getStats().getNumDocIdSteps()); + f._itr.seek(19); // 19 - 2 + EXPECT_EQUAL(18u, f._itr.getStats().getNumDocIdSteps()); + f._itr.seek(64); // 64 - 32 + EXPECT_EQUAL(50u, f._itr.getStats().getNumDocIdSteps()); + f._itr.seek(74); // 74 - 64 + EXPECT_EQUAL(60u, f._itr.getStats().getNumDocIdSteps()); + EXPECT_EQUAL(60 / 4, f._itr.getStats().getAvgDocIdSteps()); +} + +TEST_F("require that hit skipping is collected ", AdvancedFixture) +{ + f._itr.seek(1); + EXPECT_EQUAL(0u, f._itr.getStats().getNumHitSkips()); + f._itr.seek(4); + EXPECT_EQUAL(0u, f._itr.getStats().getNumHitSkips()); + f._itr.seek(16); + EXPECT_EQUAL(1u, f._itr.getStats().getNumHitSkips()); + f._itr.seek(120); + EXPECT_EQUAL(3u, f._itr.getStats().getNumHitSkips()); + EXPECT_EQUAL(3.0 / 4.0, f._itr.getStats().getAvgHitSkips()); +} + +TEST_F("require that results from underlying iterator is exposed through monitoring iterator", SimpleFixture) +{ + EXPECT_EQUAL(SimpleResult().addHit(2).addHit(4).addHit(8), f._res); +} + +TEST_F("require that calls are forwarded to underlying iterator", HistoryFixture) +{ + f._itr.seek(2); + EXPECT_EQUAL(2u, f._itr.getDocId()); + f._itr.unpack(2); + f._itr.seek(4); + EXPECT_EQUAL(4u, f._itr.getDocId()); + f._itr.unpack(4); + f._itr.seek(8); + EXPECT_EQUAL(8u, f._itr.getDocId()); + f._itr.unpack(8); + f._itr.getPostingInfo(); + const HistorySearchIterator &hsi = dynamic_cast<const HistorySearchIterator &>(f._itr.getIterator()); + EXPECT_EQUAL(SearchHistory().seek("x", 2).unpack("x", 2).seek("x", 4).unpack("x", 4).seek("x", 8).unpack("x", 8), + hsi._history); + EXPECT_TRUE(hsi._getPostingInfoCalled); +} + +void +addIterator(MonitoringSearchIterator::Dumper &d, + const vespalib::string &name, + int64_t numSeeks, + double avgDocIdSteps, + double avgHitSkips, + int64_t numUnpacks, + double numSeeksPerUnpack) +{ + d.openStruct("void", "search::queryeval::MonitoringSearchIterator"); + d.visitString("iteratorName", name); + { + d.openStruct("void", "MonitoringSearchIterator::Stats"); + d.visitInt("numSeeks", numSeeks); + d.visitFloat("avgDocIdSteps", avgDocIdSteps); + d.visitFloat("avgHitSkips", avgHitSkips); + d.visitInt("numUnpacks", numUnpacks); + d.visitFloat("numSeeksPerUnpack", numSeeksPerUnpack); + d.closeStruct(); + } + d.closeStruct(); +} + +TEST("require that dumper can handle formatting on several levels") +{ + MonitoringSearchIterator::Dumper d(2, 6, 6, 10, 3); + addIterator(d, "root", 1, 1.1, 11.22, 11, 111.3); + { + d.openStruct("children", "void"); + addIterator(d, "c.1", 222222, 2.1111, 22.2222, 222000, 222.4444); + { + d.openStruct("children", "void"); + addIterator(d, "c.1.1", 333333, 3.1111, 33.2222, 333000, 333333.4444); + addIterator(d, "c.1.2", 444, 4.22, 4.33, 440, 4.44); + d.closeStruct(); + } + addIterator(d, "c.2", 555, 5.22, 5.33, 550, 5.44); + { + d.openStruct("children", "void"); + addIterator(d, "c.2.1", 666666, 6.1111, 66.2222, 333000, 666666.4444); + addIterator(d, "c.2.2", 777, 7.22, 7.33, 770, 7.44); + d.closeStruct(); + } + d.closeStruct(); + } + EXPECT_EQUAL( + "root: 1 seeks, 1.100 steps/seek, 11.220 skips/seek, 11 unpacks, 111.300 seeks/unpack\n" + " c.1: 222222 seeks, 2.111 steps/seek, 22.222 skips/seek, 222000 unpacks, 222.444 seeks/unpack\n" + " c.1.1: 333333 seeks, 3.111 steps/seek, 33.222 skips/seek, 333000 unpacks, 333333.444 seeks/unpack\n" + " c.1.2: 444 seeks, 4.220 steps/seek, 4.330 skips/seek, 440 unpacks, 4.440 seeks/unpack\n" + " c.2: 555 seeks, 5.220 steps/seek, 5.330 skips/seek, 550 unpacks, 5.440 seeks/unpack\n" + " c.2.1: 666666 seeks, 6.111 steps/seek, 66.222 skips/seek, 333000 unpacks, 666666.444 seeks/unpack\n" + " c.2.2: 777 seeks, 7.220 steps/seek, 7.330 skips/seek, 770 unpacks, 7.440 seeks/unpack\n", + d.toString()); +} + +TEST_F("require that single iterator can be dumped compact", AdvancedFixture) +{ + f._itr.seek(6); + f._itr.seek(16); + f._itr.unpack(16); + MonitoringSearchIterator::Dumper dumper; + visit(dumper, "", f._itr); + EXPECT_EQUAL("AdvancedIterator: 2 seeks, 7.00 steps/seek, 1.00 skips/seek, 1 unpacks, 2.00 seeks/unpack\n", + dumper.toString()); +} + +TEST_F("require that iterator tree can be dumped compact", TreeFixture) +{ + MonitoringSearchIterator::Dumper dumper; + visit(dumper, "", f._itr.get()); + EXPECT_EQUAL("and: 2 seeks, 1.00 steps/seek, 0.00 skips/seek, 1 unpacks, 2.00 seeks/unpack\n" + " child1: 3 seeks, 1.00 steps/seek, 0.00 skips/seek, 1 unpacks, 3.00 seeks/unpack\n" + " child2: 3 seeks, 1.67 steps/seek, 0.00 skips/seek, 1 unpacks, 3.00 seeks/unpack\n", + dumper.toString()); +} + +TEST_F("require that single iterator can be dumped verbosely", AdvancedFixture) +{ + f._itr.seek(6); + f._itr.seek(16); + f._itr.unpack(16); + vespalib::ObjectDumper dumper; + visit(dumper, "", &f._itr); + EXPECT_EQUAL("search::queryeval::MonitoringSearchIterator {\n" + " iteratorName: 'AdvancedIterator'\n" + " iteratorType: 'search::queryeval::SimpleSearch'\n" + " stats: MonitoringSearchIterator::Stats {\n" + " numSeeks: 2\n" + " numDocIdSteps: 14\n" + " avgDocIdSteps: 7\n" + " numHitSkips: 2\n" + " avgHitSkips: 1\n" + " numUnpacks: 1\n" + " numSeeksPerUnpack: 2\n" + " }\n" + " tag: '<null>'\n" + "}\n", + dumper.toString()); +} + +TEST_F("require that iterator tree can be dumped verbosely", TreeFixture) +{ + vespalib::ObjectDumper dumper; + visit(dumper, "", f._itr.get()); + EXPECT_EQUAL("search::queryeval::MonitoringSearchIterator {\n" + " iteratorName: 'and'\n" + " iteratorType: 'search::queryeval::AndSearchStrict<search::queryeval::(anonymous namespace)::FullUnpack>'\n" + " stats: MonitoringSearchIterator::Stats {\n" + " numSeeks: 2\n" + " numDocIdSteps: 2\n" + " avgDocIdSteps: 1\n" + " numHitSkips: 0\n" + " avgHitSkips: 0\n" + " numUnpacks: 1\n" + " numSeeksPerUnpack: 2\n" + " }\n" + " children: std::vector {\n" + " [0]: search::queryeval::MonitoringSearchIterator {\n" + " iteratorName: 'child1'\n" + " iteratorType: 'search::queryeval::SimpleSearch'\n" + " stats: MonitoringSearchIterator::Stats {\n" + " numSeeks: 3\n" + " numDocIdSteps: 3\n" + " avgDocIdSteps: 1\n" + " numHitSkips: 0\n" + " avgHitSkips: 0\n" + " numUnpacks: 1\n" + " numSeeksPerUnpack: 3\n" + " }\n" + " tag: '<null>'\n" + " }\n" + " [1]: search::queryeval::MonitoringSearchIterator {\n" + " iteratorName: 'child2'\n" + " iteratorType: 'search::queryeval::SimpleSearch'\n" + " stats: MonitoringSearchIterator::Stats {\n" + " numSeeks: 3\n" + " numDocIdSteps: 5\n" + " avgDocIdSteps: 1.66667\n" + " numHitSkips: 0\n" + " avgHitSkips: 0\n" + " numUnpacks: 1\n" + " numSeeksPerUnpack: 3\n" + " }\n" + " tag: '<null>'\n" + " }\n" + " }\n" + "}\n", + dumper.toString()); +} + +MonitoringSearchIterator::UP +create(SearchIterator::UP child) { + return make_unique<MonitoringSearchIterator>("test", std::move(child), false); +} + +TEST("test monitoring search iterator handles initRange accoring to spec") { + search::test::InitRangeVerifier ir; + ir.verify(*create(ir.createIterator(ir.getExpectedDocIds(), false))); + ir.verify(*make_unique<MonitoringDumpIterator>(create(ir.createIterator(ir.getExpectedDocIds(), false)))); +} + + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/multibitvectoriterator/.gitignore b/searchlib/src/tests/queryeval/multibitvectoriterator/.gitignore new file mode 100644 index 00000000000..415cfe14f11 --- /dev/null +++ b/searchlib/src/tests/queryeval/multibitvectoriterator/.gitignore @@ -0,0 +1,2 @@ +searchlib_multibitvectoriterator_test_app +searchlib_multibitvectoriterator_bench_app diff --git a/searchlib/src/tests/queryeval/multibitvectoriterator/CMakeLists.txt b/searchlib/src/tests/queryeval/multibitvectoriterator/CMakeLists.txt new file mode 100644 index 00000000000..1bac095225f --- /dev/null +++ b/searchlib/src/tests/queryeval/multibitvectoriterator/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_multibitvectoriterator_test_app + SOURCES + multibitvectoriterator_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_multibitvectoriterator_test_app COMMAND searchlib_multibitvectoriterator_test_app) +vespa_add_executable(searchlib_multibitvectoriterator_bench_app + SOURCES + multibitvectoriterator_bench.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_multibitvectoriterator_bench_app COMMAND searchlib_multibitvectoriterator_bench_app and no no 10 100000000 50 50 50 BENCHMARK) diff --git a/searchlib/src/tests/queryeval/multibitvectoriterator/DESC b/searchlib/src/tests/queryeval/multibitvectoriterator/DESC new file mode 100644 index 00000000000..96fc26f5950 --- /dev/null +++ b/searchlib/src/tests/queryeval/multibitvectoriterator/DESC @@ -0,0 +1 @@ +multibitvectoriterator test. Take a look at multibitvectoriterator_test.cpp for details. diff --git a/searchlib/src/tests/queryeval/multibitvectoriterator/FILES b/searchlib/src/tests/queryeval/multibitvectoriterator/FILES new file mode 100644 index 00000000000..7ae4331d090 --- /dev/null +++ b/searchlib/src/tests/queryeval/multibitvectoriterator/FILES @@ -0,0 +1,2 @@ +multibitvectoriterator_test.cpp +multibitvectoriterator_bench.cpp diff --git a/searchlib/src/tests/queryeval/multibitvectoriterator/multibitvectoriterator_bench.cpp b/searchlib/src/tests/queryeval/multibitvectoriterator/multibitvectoriterator_bench.cpp new file mode 100644 index 00000000000..8912be56351 --- /dev/null +++ b/searchlib/src/tests/queryeval/multibitvectoriterator/multibitvectoriterator_bench.cpp @@ -0,0 +1,138 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("multibitvectoriterator_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/queryeval/multibitvectoriterator.h> +#include <vespa/searchlib/queryeval/emptysearch.h> +#include <vespa/searchlib/common/bitvectoriterator.h> +#include <vespa/searchlib/queryeval/andsearch.h> +#include <vespa/searchlib/queryeval/orsearch.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchlib/fef/termfieldmatchdataarray.h> + +using namespace search::queryeval; +using namespace search::fef; +using namespace search; + +//----------------------------------------------------------------------------- + +class Test : public vespalib::TestApp +{ +public: + void benchmark(); + int Main(); + template <typename T> + void testSearch(bool strict); +private: + void searchAndCompare(SearchIterator::UP s, uint32_t docIdLimit); + void setup(); + std::vector< BitVector::UP > _bvs; + uint32_t _numSearch; + uint32_t _numDocs; + bool _strict; + bool _optimize; + vespalib::string _type; + std::vector<int> _fillLimits; +}; + +void Test::setup() +{ + for(size_t i(0); i < _fillLimits.size(); i++) { + _bvs.push_back(BitVector::create(_numDocs)); + BitVector & bv(*_bvs.back()); + for (size_t j(0); j < bv.size(); j++) { + int r = rand(); + if (r < _fillLimits[i]) { + bv.setBit(j); + } + } + bv.invalidateCachedCount(); + LOG(info, "Filled bitvector %ld with %d bits", i, bv.countTrueBits()); + } +} + +typedef std::vector<uint32_t> H; + +H +seek(SearchIterator & s, uint32_t docIdLimit) +{ + H h; + for (uint32_t docId(0); docId < docIdLimit; ) { + if (s.seek(docId)) { + h.push_back(docId); + docId++; + } else { + if (s.getDocId() > docId) { + docId = s.getDocId(); + } else { + docId++; + } + } + //printf("docId = %u\n", docId); + } + return h; +} + +void +Test::benchmark() +{ + if (_type == "and") { + LOG(info, "Testing 'and'"); + for (size_t i(0); i < _numSearch; i++) { + testSearch<AndSearch>(_strict); + } + } else { + LOG(info, "Testing 'or'"); + for (size_t i(0); i < _numSearch; i++) { + testSearch<OrSearch>(_strict); + } + } +} + +template <typename T> +void +Test::testSearch(bool strict) +{ + TermFieldMatchData tfmd; + TermFieldMatchDataArray tfmda; + tfmda.add(&tfmd); + MultiSearch::Children andd; + for (size_t i(0); i < _bvs.size(); i++) { + andd.push_back(BitVectorIterator::create(_bvs[i].get(), tfmda, strict).release()); + } + SearchIterator::UP s(T::create(andd, strict)); + if (_optimize) { + LOG(info, "Optimizing iterator"); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + } + H h = seek(*s, _numDocs); + LOG(info, "Found %ld hits", h.size()); +} + +int +Test::Main() +{ + TEST_INIT("multibitvectoriterator_benchmark"); + if (_argc < 6) { + LOG(info, "%s <'and/or'> <'strict/no-strict'> <'optimize/no-optimize> <numsearch> <numdocs> <fill 1> [<fill N>]", _argv[0]); + return -1; + } + _type = _argv[1]; + _strict = _argv[2] == vespalib::string("strict"); + _optimize = _argv[3] == vespalib::string("optimize"); + _numSearch = strtoul(_argv[4], NULL, 0); + _numDocs = strtoul(_argv[5], NULL, 0); + for (int i(6); i < _argc; i++) { + _fillLimits.push_back((RAND_MAX/100) * strtoul(_argv[i], NULL, 0)); + } + LOG(info, "Start setup of '%s' isearch with %ld vectors with %d documents", _type.c_str(), _fillLimits.size(), _numDocs); + setup(); + LOG(info, "Start benchmark"); + benchmark(); + LOG(info, "Done benchmark"); + TEST_FLUSH(); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/queryeval/multibitvectoriterator/multibitvectoriterator_test.cpp b/searchlib/src/tests/queryeval/multibitvectoriterator/multibitvectoriterator_test.cpp new file mode 100644 index 00000000000..f3a25d675b2 --- /dev/null +++ b/searchlib/src/tests/queryeval/multibitvectoriterator/multibitvectoriterator_test.cpp @@ -0,0 +1,531 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("multibitvectoriterator_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/queryeval/multibitvectoriterator.h> +#include <vespa/searchlib/queryeval/emptysearch.h> +#include <vespa/searchlib/queryeval/truesearch.h> +#include <vespa/searchlib/common/bitvectoriterator.h> +#include <vespa/searchlib/queryeval/andsearch.h> +#include <vespa/searchlib/queryeval/andnotsearch.h> +#include <vespa/searchlib/queryeval/orsearch.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchlib/fef/termfieldmatchdataarray.h> + +using namespace search::queryeval; +using namespace search::fef; +using namespace search; + +//----------------------------------------------------------------------------- + +class Test : public vespalib::TestApp +{ +public: + void testAndNot(); + void testAnd(); + void testBug7163266(); + void testOr(); + void testAndWith(); + void testEndGuard(); + template<typename T> + void testThatOptimizePreservesUnpack(); + template <typename T> + void testOptimizeCommon(bool isAnd); + template <typename T> + void testOptimizeAndOr(); + template <typename T> + void testSearch(bool strict); + int Main(); +private: + void verifySelectiveUnpack(SearchIterator & s, const TermFieldMatchData * tfmd); + void searchAndCompare(SearchIterator::UP s, uint32_t docIdLimit); + void setup(); + std::vector< BitVector::UP > _bvs; +}; + +void Test::setup() +{ + srand(7); + for(size_t i(0); i < 3; i++) { + _bvs.push_back(BitVector::create(10000)); + BitVector & bv(*_bvs.back()); + for (size_t j(0); j < bv.size(); j++) { + int r = rand(); + if (r & 0x1) { + bv.setBit(j); + } + } + } +} + +typedef std::vector<uint32_t> H; + +H +seekNoReset(SearchIterator & s, uint32_t start, uint32_t docIdLimit) +{ + H h; + for (uint32_t docId(start); docId < docIdLimit; ) { + if (s.seek(docId)) { + h.push_back(docId); + docId++; + } else { + if (s.getDocId() > docId) { + docId = s.getDocId(); + } else { + docId++; + } + } + //printf("docId = %u\n", docId); + } + return h; +} + +H +seek(SearchIterator & s, uint32_t docIdLimit) +{ + s.resetRange(); + s.initFullRange(); + return seekNoReset(s, 1, docIdLimit); +} + +void +Test::testAndWith() +{ + TermFieldMatchData tfmd; + TermFieldMatchDataArray tfmda; + tfmda.add(&tfmd); + { + MultiSearch::Children children; + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release()); + + SearchIterator::UP s(AndSearch::create(children, false)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + + s->initFullRange(); + H firstHits2 = seekNoReset(*s, 1, 130); + SearchIterator::UP filter(s->andWith(BitVectorIterator::create(_bvs[2].get(), tfmda, false), 9)); + H lastHits2F = seekNoReset(*s, 130, _bvs[0]->size()); + + children.clear(); + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release()); + children.push_back(BitVectorIterator::create(_bvs[2].get(), tfmda, false).release()); + s.reset(AndSearch::create(children, false)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + s->initFullRange(); + H firstHits3 = seekNoReset(*s, 1, 130); + H lastHits3 = seekNoReset(*s, 130, _bvs[0]->size()); + //These constants will change if srand(7) is changed. + EXPECT_EQUAL(30u, firstHits2.size()); + EXPECT_EQUAL(19u, firstHits3.size()); + EXPECT_EQUAL(1234u, lastHits2F.size()); + ASSERT_EQUAL(lastHits3.size(), lastHits2F.size()); + for (size_t i(0); i < lastHits3.size(); i++) { + EXPECT_EQUAL(lastHits3[i], lastHits2F[i]); + } + } +} + +void +Test::testAndNot() +{ + testOptimizeCommon<AndNotSearch>(false); + testSearch<AndNotSearch>(false); + testSearch<AndNotSearch>(true); +} + +void +Test::testAnd() +{ + testOptimizeCommon<AndSearch>(true); + testOptimizeAndOr<AndSearch>(); + testSearch<AndSearch>(false); + testSearch<AndSearch>(true); +} + +void +Test::testOr() +{ + testOptimizeCommon< OrSearch >(false); + testOptimizeAndOr< OrSearch >(); + testSearch<OrSearch>(false); + testSearch<OrSearch>(true); +} + +void +Test::testBug7163266() +{ + TermFieldMatchData tfmd[30]; + TermFieldMatchDataArray tfmda[30]; + for (size_t i(0); i < 30; i++) { + tfmda[i].add(&tfmd[i]); + } + _bvs[0]->setBit(1); + _bvs[1]->setBit(1); + MultiSearch::Children children; + UnpackInfo unpackInfo; + for (size_t i(0); i < 28; i++) { + children.push_back(new TrueSearch(tfmd[2])); + unpackInfo.add(i); + } + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda[0], false).release()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda[1], false).release()); + SearchIterator::UP s(AndSearch::create(children, false, unpackInfo)); + const MultiSearch * ms = dynamic_cast<const MultiSearch *>(s.get()); + EXPECT_TRUE(ms != NULL); + EXPECT_EQUAL(30u, ms->getChildren().size()); + EXPECT_EQUAL("search::queryeval::AndSearchNoStrict<search::queryeval::(anonymous namespace)::SelectiveUnpack>", s->getClassName()); + for (size_t i(0); i < 28; i++) { + EXPECT_TRUE(ms->needUnpack(i)); + } + EXPECT_FALSE(ms->needUnpack(28)); + EXPECT_FALSE(ms->needUnpack(29)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + ms = dynamic_cast<const MultiSearch *>(s.get()); + EXPECT_TRUE(ms != NULL); + EXPECT_EQUAL(29u, ms->getChildren().size()); + EXPECT_EQUAL("search::queryeval::AndSearchNoStrict<search::queryeval::(anonymous namespace)::SelectiveUnpack>", s->getClassName()); + for (size_t i(0); i < 28; i++) { + EXPECT_TRUE(ms->needUnpack(i)); + } + EXPECT_TRUE(ms->needUnpack(28)); // NB: force unpack all +} + +template<typename T> +void +Test::testThatOptimizePreservesUnpack() +{ + TermFieldMatchData tfmd[4]; + TermFieldMatchDataArray tfmda[4]; + for (size_t i(0); i < 4; i++) { + tfmda[i].add(&tfmd[i]); + } + _bvs[0]->setBit(1); + _bvs[1]->setBit(1); + _bvs[2]->setBit(1); + MultiSearch::Children children; + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda[0], false).release()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda[1], false).release()); + children.push_back(new TrueSearch(tfmd[2])); + children.push_back(BitVectorIterator::create(_bvs[2].get(), tfmda[3], false).release()); + UnpackInfo unpackInfo; + unpackInfo.add(1); + unpackInfo.add(2); + SearchIterator::UP s(T::create(children, false, unpackInfo)); + s->initFullRange(); + const MultiSearch * ms = dynamic_cast<const MultiSearch *>(s.get()); + EXPECT_TRUE(ms != NULL); + EXPECT_EQUAL(4u, ms->getChildren().size()); + verifySelectiveUnpack(*s, tfmd); + tfmd[1].resetOnlyDocId(0); + tfmd[2].resetOnlyDocId(0); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + s->resetRange(); + s->initFullRange(); + ms = dynamic_cast<const MultiSearch *>(s.get()); + EXPECT_TRUE(ms != NULL); + EXPECT_EQUAL(2u, ms->getChildren().size()); + verifySelectiveUnpack(*s, tfmd); +} + +void +Test::verifySelectiveUnpack(SearchIterator & s, const TermFieldMatchData * tfmd) +{ + s.seek(1); + EXPECT_EQUAL(0u, tfmd[0].getDocId()); + EXPECT_EQUAL(0u, tfmd[1].getDocId()); + EXPECT_EQUAL(0u, tfmd[2].getDocId()); + EXPECT_EQUAL(0u, tfmd[3].getDocId()); + s.unpack(1); + EXPECT_EQUAL(0u, tfmd[0].getDocId()); + EXPECT_EQUAL(1u, tfmd[1].getDocId()); + EXPECT_EQUAL(1u, tfmd[2].getDocId()); + EXPECT_EQUAL(0u, tfmd[3].getDocId()); +} + +void +Test::searchAndCompare(SearchIterator::UP s, uint32_t docIdLimit) +{ + H a = seek(*s, docIdLimit); + SearchIterator * p = s.get(); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + if (s.get() != p) { + H b = seek(*s, docIdLimit); + EXPECT_FALSE(a.empty()); + EXPECT_EQUAL(a.size(), b.size()); + for (size_t i(0); i < a.size(); i++) { + EXPECT_EQUAL(a[i], b[i]); + } + } +} + +template <typename T> +void +Test::testSearch(bool strict) +{ + TermFieldMatchData tfmd; + TermFieldMatchDataArray tfmda; + tfmda.add(&tfmd); + uint32_t docIdLimit(_bvs[0]->size()); + { + MultiSearch::Children children; + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, strict).release()); + SearchIterator::UP s(T::create(children, strict)); + searchAndCompare(std::move(s), docIdLimit); + } + { + MultiSearch::Children children; + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, strict).release()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, strict).release()); + SearchIterator::UP s(T::create(children, strict)); + searchAndCompare(std::move(s), docIdLimit); + } + { + MultiSearch::Children children; + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, strict).release()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, strict).release()); + children.push_back(BitVectorIterator::create(_bvs[2].get(), tfmda, strict).release()); + SearchIterator::UP s(T::create(children, strict)); + searchAndCompare(std::move(s), docIdLimit); + } +} + +template <typename T> +void +Test::testOptimizeCommon(bool isAnd) +{ + TermFieldMatchData tfmd; + TermFieldMatchDataArray tfmda; + tfmda.add(&tfmd); + + { + MultiSearch::Children children; + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release()); + + SearchIterator::UP s(T::create(children, false)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + EXPECT_TRUE(dynamic_cast<const T *>(s.get()) != NULL); + const MultiSearch & m(dynamic_cast<const MultiSearch &>(*s)); + EXPECT_EQUAL(1u, m.getChildren().size()); + EXPECT_TRUE(dynamic_cast<const BitVectorIterator *>(m.getChildren()[0]) != NULL); + } + { + MultiSearch::Children children; + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release()); + children.push_back(new EmptySearch()); + + SearchIterator::UP s(T::create(children, false)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + EXPECT_TRUE(dynamic_cast<const T *>(s.get()) != NULL); + const MultiSearch & m(dynamic_cast<const MultiSearch &>(*s)); + EXPECT_EQUAL(2u, m.getChildren().size()); + EXPECT_TRUE(dynamic_cast<const BitVectorIterator *>(m.getChildren()[0]) != NULL); + EXPECT_TRUE(dynamic_cast<const EmptySearch *>(m.getChildren()[1]) != NULL); + } + { + MultiSearch::Children children; + children.push_back(new EmptySearch()); + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release()); + + SearchIterator::UP s(T::create(children, false)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + EXPECT_TRUE(dynamic_cast<const T *>(s.get()) != NULL); + const MultiSearch & m(dynamic_cast<const MultiSearch &>(*s)); + EXPECT_EQUAL(2u, m.getChildren().size()); + EXPECT_TRUE(dynamic_cast<const EmptySearch *>(m.getChildren()[0]) != NULL); + EXPECT_TRUE(dynamic_cast<const BitVectorIterator *>(m.getChildren()[1]) != NULL); + } + { + MultiSearch::Children children; + children.push_back(new EmptySearch()); + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release()); + + SearchIterator::UP s(T::create(children, false)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + EXPECT_TRUE(s.get() != NULL); + EXPECT_TRUE(dynamic_cast<const T *>(s.get()) != NULL); + const MultiSearch & m(dynamic_cast<const MultiSearch &>(*s)); + EXPECT_EQUAL(2u, m.getChildren().size()); + EXPECT_TRUE(dynamic_cast<const EmptySearch *>(m.getChildren()[0]) != NULL); + EXPECT_TRUE(dynamic_cast<const MultiBitVectorIteratorBase *>(m.getChildren()[1]) != NULL); + EXPECT_FALSE(dynamic_cast<const MultiBitVectorIteratorBase *>(m.getChildren()[1])->isStrict()); + } + { + MultiSearch::Children children; + children.push_back(new EmptySearch()); + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, true).release()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release()); + + SearchIterator::UP s(T::create(children, false)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + EXPECT_TRUE(s.get() != NULL); + EXPECT_TRUE(dynamic_cast<const T *>(s.get()) != NULL); + const MultiSearch & m(dynamic_cast<const MultiSearch &>(*s)); + EXPECT_EQUAL(2u, m.getChildren().size()); + EXPECT_TRUE(dynamic_cast<const EmptySearch *>(m.getChildren()[0]) != NULL); + EXPECT_TRUE(dynamic_cast<const MultiBitVectorIteratorBase *>(m.getChildren()[1]) != NULL); + EXPECT_TRUE(dynamic_cast<const MultiBitVectorIteratorBase *>(m.getChildren()[1])->isStrict()); + } + { + MultiSearch::Children children; + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release()); + + SearchIterator::UP s(T::create(children, false)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + SearchIterator::UP filter(s->andWith(BitVectorIterator::create(_bvs[2].get(), tfmda, false), 9)); + + if (isAnd) { + EXPECT_TRUE(nullptr == filter.get()); + } else { + EXPECT_FALSE(nullptr == filter.get()); + } + + children.clear(); + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release()); + s.reset(T::create(children, true)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + filter = s->andWith(BitVectorIterator::create(_bvs[2].get(), tfmda, false), 9); + + if (isAnd) { + EXPECT_TRUE(nullptr == filter.get()); + } else { + EXPECT_FALSE(nullptr == filter.get()); + } + } +} + +template <typename T> +void +Test::testOptimizeAndOr() +{ + TermFieldMatchData tfmd; + TermFieldMatchDataArray tfmda; + tfmda.add(&tfmd); + + { + MultiSearch::Children children; + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release()); + + SearchIterator::UP s(T::create(children, false)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + EXPECT_TRUE(s.get() != NULL); + EXPECT_TRUE(dynamic_cast<const MultiBitVectorIteratorBase *>(s.get()) != NULL); + EXPECT_FALSE(dynamic_cast<const MultiBitVectorIteratorBase *>(s.get())->isStrict()); + } + { + MultiSearch::Children children; + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release()); + children.push_back(new EmptySearch()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release()); + + SearchIterator::UP s(T::create(children, false)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + EXPECT_TRUE(s.get() != NULL); + EXPECT_TRUE(dynamic_cast<const T *>(s.get()) != NULL); + const MultiSearch & m(dynamic_cast<const MultiSearch &>(*s)); + EXPECT_EQUAL(2u, m.getChildren().size()); + EXPECT_TRUE(dynamic_cast<const MultiBitVectorIteratorBase *>(m.getChildren()[0]) != NULL); + EXPECT_FALSE(dynamic_cast<const MultiBitVectorIteratorBase *>(m.getChildren()[0])->isStrict()); + EXPECT_TRUE(dynamic_cast<const EmptySearch *>(m.getChildren()[1]) != NULL); + } + { + MultiSearch::Children children; + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, false).release()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release()); + children.push_back(new EmptySearch()); + + SearchIterator::UP s(T::create(children, false)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + EXPECT_TRUE(s.get() != NULL); + EXPECT_TRUE(dynamic_cast<const T *>(s.get()) != NULL); + const MultiSearch & m(dynamic_cast<const MultiSearch &>(*s)); + EXPECT_EQUAL(2u, m.getChildren().size()); + EXPECT_TRUE(dynamic_cast<const MultiBitVectorIteratorBase *>(m.getChildren()[0]) != NULL); + EXPECT_FALSE(dynamic_cast<const MultiBitVectorIteratorBase *>(m.getChildren()[0])->isStrict()); + EXPECT_TRUE(dynamic_cast<const EmptySearch *>(m.getChildren()[1]) != NULL); + } + { + MultiSearch::Children children; + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, true).release()); + children.push_back(new EmptySearch()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release()); + + SearchIterator::UP s(T::create(children, false)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + EXPECT_TRUE(s.get() != NULL); + EXPECT_TRUE(dynamic_cast<const T *>(s.get()) != NULL); + const MultiSearch & m(dynamic_cast<const MultiSearch &>(*s)); + EXPECT_EQUAL(2u, m.getChildren().size()); + EXPECT_TRUE(dynamic_cast<const MultiBitVectorIteratorBase *>(m.getChildren()[0]) != NULL); + EXPECT_TRUE(dynamic_cast<const MultiBitVectorIteratorBase *>(m.getChildren()[0])->isStrict()); + EXPECT_TRUE(dynamic_cast<const EmptySearch *>(m.getChildren()[1]) != NULL); + } + { + MultiSearch::Children children; + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, true).release()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, false).release()); + children.push_back(new EmptySearch()); + + SearchIterator::UP s(T::create(children, false)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + EXPECT_TRUE(s.get() != NULL); + EXPECT_TRUE(dynamic_cast<const T *>(s.get()) != NULL); + const MultiSearch & m(dynamic_cast<const MultiSearch &>(*s)); + EXPECT_EQUAL(2u, m.getChildren().size()); + EXPECT_TRUE(dynamic_cast<const MultiBitVectorIteratorBase *>(m.getChildren()[0]) != NULL); + EXPECT_TRUE(dynamic_cast<const MultiBitVectorIteratorBase *>(m.getChildren()[0])->isStrict()); + EXPECT_TRUE(dynamic_cast<const EmptySearch *>(m.getChildren()[1]) != NULL); + } +} + +void +Test::testEndGuard() +{ + typedef AndSearch T; + TermFieldMatchData tfmd; + TermFieldMatchDataArray tfmda; + tfmda.add(&tfmd); + + MultiSearch::Children children; + children.push_back(BitVectorIterator::create(_bvs[0].get(), tfmda, true).release()); + children.push_back(BitVectorIterator::create(_bvs[1].get(), tfmda, true).release()); + SearchIterator::UP s(T::create(children, false)); + s = MultiBitVectorIteratorBase::optimize(std::move(s)); + s->initFullRange(); + EXPECT_TRUE(s.get() != NULL); + EXPECT_TRUE(dynamic_cast<const MultiBitVectorIteratorBase *>(s.get()) != NULL); + MultiSearch & m(dynamic_cast<MultiSearch &>(*s)); + EXPECT_TRUE(m.seek(0) || !m.seek(0)); + EXPECT_TRUE(m.seek(3) || !m.seek(3)); + EXPECT_FALSE(m.seek(_bvs[0]->size()+987)); +} + +int +Test::Main() +{ + TEST_INIT("multibitvectoriterator_test"); + setup(); + testBug7163266(); + testThatOptimizePreservesUnpack<OrSearch>(); + testThatOptimizePreservesUnpack<AndSearch>(); + TEST_FLUSH(); + testEndGuard(); + TEST_FLUSH(); + testAndNot(); + TEST_FLUSH(); + testAnd(); + TEST_FLUSH(); + testOr(); + TEST_FLUSH(); + testAndWith(); + TEST_FLUSH(); + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/queryeval/parallel_weak_and/.gitignore b/searchlib/src/tests/queryeval/parallel_weak_and/.gitignore new file mode 100644 index 00000000000..0a4881f0952 --- /dev/null +++ b/searchlib/src/tests/queryeval/parallel_weak_and/.gitignore @@ -0,0 +1 @@ +searchlib_parallel_weak_and_test_app diff --git a/searchlib/src/tests/queryeval/parallel_weak_and/CMakeLists.txt b/searchlib/src/tests/queryeval/parallel_weak_and/CMakeLists.txt new file mode 100644 index 00000000000..b76286bea65 --- /dev/null +++ b/searchlib/src/tests/queryeval/parallel_weak_and/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_parallel_weak_and_test_app + SOURCES + parallel_weak_and_test.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_parallel_weak_and_test_app COMMAND searchlib_parallel_weak_and_test_app) diff --git a/searchlib/src/tests/queryeval/parallel_weak_and/DESC b/searchlib/src/tests/queryeval/parallel_weak_and/DESC new file mode 100644 index 00000000000..f58343f384b --- /dev/null +++ b/searchlib/src/tests/queryeval/parallel_weak_and/DESC @@ -0,0 +1 @@ +parallel_weak_and test. Take a look at parallel_weak_and_test.cpp for details. diff --git a/searchlib/src/tests/queryeval/parallel_weak_and/FILES b/searchlib/src/tests/queryeval/parallel_weak_and/FILES new file mode 100644 index 00000000000..972727bfa00 --- /dev/null +++ b/searchlib/src/tests/queryeval/parallel_weak_and/FILES @@ -0,0 +1,2 @@ +weak_and_test.cpp +weak_and_bench.cpp diff --git a/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp new file mode 100644 index 00000000000..74aa052e486 --- /dev/null +++ b/searchlib/src/tests/queryeval/parallel_weak_and/parallel_weak_and_test.cpp @@ -0,0 +1,681 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/queryeval/fake_searchable.h> +#include <vespa/searchlib/queryeval/fake_requestcontext.h> +#include <vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h> +#include <vespa/searchlib/queryeval/wand/parallel_weak_and_search.h> +#include <vespa/searchlib/queryeval/simpleresult.h> +#include <vespa/searchlib/queryeval/test/eagerchild.h> +#include <vespa/searchlib/queryeval/test/leafspec.h> +#include <vespa/searchlib/queryeval/test/searchhistory.h> +#include <vespa/searchlib/queryeval/test/wandspec.h> +#include <vespa/searchlib/test/initrange.h> +#include <vespa/searchlib/test/document_weight_attribute_helper.h> +#include <vespa/searchlib/queryeval/document_weight_search_iterator.h> +#include <vespa/searchlib/fef/fef.h> + +using namespace search::query; +using namespace search::queryeval; +using namespace search::queryeval::test; + +typedef search::feature_t feature_t; +typedef wand::score_t score_t; +typedef ParallelWeakAndSearch::MatchParams MatchParams; +typedef ParallelWeakAndSearch::RankParams RankParams; +using search::test::InitRangeVerifier; +using search::test::DocumentWeightAttributeHelper; +using search::IDocumentWeightAttribute; +using search::fef::TermFieldMatchData; +using search::fef::MatchData; +using search::fef::MatchDataLayout; +using search::fef::TermFieldHandle; + + +struct Scores : public std::vector<score_t> +{ + Scores &add(score_t val) { + push_back(val); + return *this; + } +}; + +struct ScoresHistory : public std::vector<Scores> +{ + ScoresHistory &add(const Scores &s) { + push_back(s); + return *this; + } +}; + +std::ostream &operator << (std::ostream &out, const ScoresHistory &hist) +{ + out << "ScoresHistory:\n"; + for (size_t i = 0; i < hist.size(); ++i) { + const Scores &scores = hist[i]; + out << "[" << i << "]: "; + for (size_t j = 0; j < scores.size(); ++j) { + if (j != 0) { + out << ","; + } + out << scores[j]; + } + out << std::endl; + } + return out; +} + +struct TestHeap : public WeakAndHeap +{ + ScoresHistory history; + + TestHeap(uint32_t scoresToTrack_) : WeakAndHeap(scoresToTrack_), history() {} + virtual void adjust(score_t *begin, score_t *end) { + Scores scores; + for (score_t *itr = begin; itr != end; ++itr) { + scores.add(*itr); + } + history.push_back(scores); + setMinScore(1); + } + virtual size_t size() const { return history.size(); } +}; + +template <typename HeapType> +struct WandTestSpec : public WandSpec +{ + HeapType heap; + TermFieldMatchData rootMatchData; + MatchParams matchParams; + + WandTestSpec(uint32_t scoresToTrack, uint32_t scoresAdjustFrequency = 1, + score_t scoreThreshold = 0, double thresholdBoostFactor = 1) + : WandSpec(), + heap(scoresToTrack), + rootMatchData(), + matchParams(heap, scoreThreshold, thresholdBoostFactor, scoresAdjustFrequency) + {} + SearchIterator *create() { + MatchData::UP childrenMatchData = createMatchData(); + MatchData *tmp = childrenMatchData.get(); + return new TrackedSearch("PWAND", getHistory(), ParallelWeakAndSearch::create(getTerms(tmp), + matchParams, + RankParams(rootMatchData, + std::move(childrenMatchData)), + true)); + } +}; + +typedef WandTestSpec<TestHeap> WandSpecWithTestHeap; +typedef WandTestSpec<SharedWeakAndPriorityQueue> WandSpecWithRealHeap; + +FakeResult +doSearch(SearchIterator &sb, const TermFieldMatchData &tfmd) +{ + FakeResult retval; + sb.initFullRange(); + for (sb.seek(1); ! sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) { + sb.unpack(sb.getDocId()); + retval.doc(sb.getDocId()); + feature_t score = tfmd.getRawScore(); + retval.score(score); + } + return retval; +} + +SimpleResult +asSimpleResult(const FakeResult &result) +{ + SimpleResult retval; + for (size_t i = 0; i < result.inspect().size(); ++i) { + retval.addHit(result.inspect()[i].docId); + } + return retval; +} + +struct WandBlueprintSpec +{ + static const uint32_t fieldId = 0; + static const TermFieldHandle handle = 0; + std::vector<std::pair<std::string, int32_t> > tokens; + uint32_t docIdLimit = 0; + FakeRequestContext requestContext; + + WandBlueprintSpec &add(const std::string &token, int32_t weight) { + tokens.push_back(std::make_pair(token, weight)); + return *this; + } + + Node::UP createNode(uint32_t scoresToTrack = 100, + score_t scoreThreshold = 0, + double thresholdBoostFactor = 1) const { + SimpleWandTerm *node = new SimpleWandTerm("view", 0, Weight(0), + scoresToTrack, scoreThreshold, thresholdBoostFactor); + for (size_t i = 0; i < tokens.size(); ++i) { + node->append(Node::UP(new SimpleStringTerm(tokens[i].first, "view", 0, + Weight(tokens[i].second)))); + } + return Node::UP(node); + } + + Blueprint::UP blueprint(Searchable &searchable, const std::string &field, const search::query::Node &term) const { + FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle)); + Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, term); + EXPECT_TRUE(dynamic_cast<ParallelWeakAndBlueprint*>(bp.get()) != 0); + return bp; + } + + SearchIterator::UP iterator(Searchable &searchable, const std::string &field) const { + Node::UP term = createNode(); + Blueprint::UP bp = blueprint(searchable, field, *term); + MatchData::UP md(MatchData::makeTestInstance(0, 1, 1)); + bp->fetchPostings(true); + bp->setDocIdLimit(docIdLimit); + SearchIterator::UP sb = bp->createSearch(*md, true); + EXPECT_TRUE(dynamic_cast<ParallelWeakAndSearch*>(sb.get()) != 0); + return sb; + } + + FakeResult search(Searchable &searchable, const std::string &field) const { + Node::UP term = createNode(); + return search(searchable, field, *term); + } + + FakeResult search(Searchable &searchable, const std::string &field, const search::query::Node &term) const { + Blueprint::UP bp = blueprint(searchable, field, term); + MatchData::UP md(MatchData::makeTestInstance(0, 1, 1)); + bp->fetchPostings(true); + bp->setDocIdLimit(docIdLimit); + SearchIterator::UP sb = bp->createSearch(*md, true); + EXPECT_TRUE(dynamic_cast<ParallelWeakAndSearch*>(sb.get()) != 0); + return doSearch(*sb, *md->resolveTermField(handle)); + } +}; + +struct FixtureBase +{ + WandSpecWithRealHeap spec; + FakeResult result; + FixtureBase(uint32_t scoresToTrack, + uint32_t scoresAdjustFrequency, + score_t scoreThreshold = 0, + double boostFactor = 1.0) + : spec(scoresToTrack, scoresAdjustFrequency, scoreThreshold, boostFactor), + result() {} + void prepare() { + SearchIterator::UP si(spec.create()); + result = doSearch(*si, spec.rootMatchData); + } +}; + +struct AlgoSimpleFixture : public FixtureBase +{ + AlgoSimpleFixture() : FixtureBase(2, 1) { + spec.leaf(LeafSpec("A", 1).doc(1, 1).doc(2, 2).doc(3, 3).doc(4, 4).doc(5, 5).doc(6, 6)); + spec.leaf(LeafSpec("B", 4).doc(1, 1).doc(3, 3).doc(5, 5)); + prepare(); + } +}; + +struct AlgoAdvancedFixture : public FixtureBase +{ + AlgoAdvancedFixture() : FixtureBase(100, 1) { + spec.leaf(LeafSpec("1").doc(1, 1).doc(11, 1).doc(111, 1)); + spec.leaf(LeafSpec("2").doc(2, 1).doc(12, 1).doc(112, 1)); + spec.leaf(LeafSpec("3").doc(3, 1).doc(13, 1).doc(113, 1)); + spec.leaf(LeafSpec("4").doc(4, 1).doc(14, 1).doc(114, 1)); + spec.leaf(LeafSpec("5").doc(5, 1).doc(15, 1).doc(115, 1)); + prepare(); + } +}; + +struct AlgoSubsearchFixture : public FixtureBase +{ + AlgoSubsearchFixture() : FixtureBase(2, 1) { + spec.leaf(LeafSpec("A", 10).itr(new EagerChild(search::endDocId))); + spec.leaf(LeafSpec("B", 20).itr(new EagerChild(10))); + prepare(); + } +}; + +struct AlgoSameScoreFixture : public FixtureBase +{ + AlgoSameScoreFixture() : FixtureBase(1, 1) { + spec.leaf(LeafSpec("A").doc(1, 1).doc(2, 1)); + prepare(); + } +}; + +struct AlgoScoreThresholdFixture : public FixtureBase +{ + AlgoScoreThresholdFixture(score_t scoreThreshold) : FixtureBase(3, 1, scoreThreshold) { + spec.leaf(LeafSpec("A", 1).doc(1, 10).doc(2, 30)); + spec.leaf(LeafSpec("B", 2).doc(1, 20).doc(3, 40)); + prepare(); + } +}; + +struct AlgoLargeScoresFixture : public FixtureBase +{ + AlgoLargeScoresFixture(score_t scoreThreshold) : FixtureBase(3, 1, scoreThreshold) { + spec.leaf(LeafSpec("A", 60000).doc(1, 60000).doc(2, 70000)); + spec.leaf(LeafSpec("B", 70000).doc(1, 80000).doc(3, 90000)); + prepare(); + } +}; + +struct AlgoExhaustPastFixture : public FixtureBase +{ + AlgoExhaustPastFixture(score_t scoreThreshold) : FixtureBase(3, 1, scoreThreshold) { + spec.leaf(LeafSpec("A", 1).doc(1, 20).doc(3, 40).doc(5, 10)); + spec.leaf(LeafSpec("B", 1).doc(5, 10)); + spec.leaf(LeafSpec("C", 1).doc(5, 10)); + prepare(); + } +}; + + +TEST_F("require that algorithm prunes bad hits after enough good ones are obtained", AlgoSimpleFixture) +{ + FakeResult expect = FakeResult() + .doc(1).score(1 * 1 + 4 * 1) + .doc(2).score(1 * 2) + .doc(3).score(1 * 3 + 4 * 3) + .doc(5).score(1 * 5 + 4 * 5); + EXPECT_EQUAL(expect, f.result); +} + +TEST_F("require that algorithm uses subsearches as expected", AlgoSimpleFixture) { + EXPECT_EQUAL(SearchHistory() + .seek("PWAND", 1).seek("B", 1).step("B", 1).unpack("B", 1).step("PWAND", 1) + .unpack("PWAND", 1).seek("A", 1).step("A", 1).unpack("A", 1) + .seek("PWAND", 2).seek("B", 2).step("B", 3).seek("A", 2).step("A", 2).unpack("A", 2).step("PWAND", 2) + .unpack("PWAND", 2) + .seek("PWAND", 3).unpack("B", 3).step("PWAND", 3) + .unpack("PWAND", 3).seek("A", 3).step("A", 3).unpack("A", 3) + .seek("PWAND", 4).seek("B", 4).step("B", 5).seek("A", 4).step("A", 4).unpack("A", 4).unpack("B", 5).step("PWAND", 5) + .unpack("PWAND", 5).seek("A", 5).step("A", 5).unpack("A", 5) + .seek("PWAND", 6).seek("B", 6).step("B", search::endDocId).step("PWAND", search::endDocId), + f.spec.getHistory()); +} + +TEST_F("require that algorithm considers documents in the right order", AlgoAdvancedFixture) +{ + EXPECT_EQUAL(SimpleResult() + .addHit(1).addHit(2).addHit(3).addHit(4).addHit(5) + .addHit(11).addHit(12).addHit(13).addHit(14).addHit(15) + .addHit(111).addHit(112).addHit(113).addHit(114).addHit(115), asSimpleResult(f.result)); +} + +TEST_F("require that algorithm take initial docid for subsearches into account", AlgoSubsearchFixture) +{ + EXPECT_EQUAL(FakeResult().doc(10).score(20), f.result); + EXPECT_EQUAL(SearchHistory().seek("PWAND", 1).unpack("B", 10).step("PWAND", 10).unpack("PWAND", 10) + .seek("PWAND", 11).seek("B", 11).step("B", search::endDocId).step("PWAND", search::endDocId), + f.spec.getHistory()); +} + +TEST_F("require that algorithm uses first match when two matches have same score", AlgoSameScoreFixture) +{ + EXPECT_EQUAL(FakeResult().doc(1).score(100), f.result); +} + +TEST_F("require that algorithm uses initial score threshold (all hits greater)", AlgoScoreThresholdFixture(29)) +{ + EXPECT_EQUAL(FakeResult() + .doc(1).score(1 * 10 + 2 * 20) + .doc(2).score(1 * 30) + .doc(3).score(2 * 40), f.result); +} + +TEST_F("require that algorithm uses initial score threshold (2 hits greater)", AlgoScoreThresholdFixture(30)) +{ + EXPECT_EQUAL(FakeResult() + .doc(1).score(1 * 10 + 2 * 20) + .doc(3).score(2 * 40), f.result); +} + +TEST_F("require that algorithm uses initial score threshold (1 hit greater)", AlgoScoreThresholdFixture(50)) +{ + EXPECT_EQUAL(FakeResult() + .doc(3).score(2 * 40), f.result); +} + +TEST_F("require that algorithm uses initial score threshold (0 hits greater)", AlgoScoreThresholdFixture(80)) +{ + EXPECT_EQUAL(FakeResult(), f.result); +} + +TEST_F("require that algorithm handle large scores", AlgoLargeScoresFixture(60000L * 70000L)) +{ + EXPECT_EQUAL(FakeResult() + .doc(1).score(60000L * 60000L + 70000L * 80000L) + .doc(3).score(70000L * 90000L), f.result); +} + +TEST_F("require that algorithm steps all present terms when past is empty", AlgoExhaustPastFixture(25)) +{ + EXPECT_EQUAL(FakeResult() + .doc(3).score(40) + .doc(5).score(30), f.result); +} + +struct HeapFixture +{ + WandSpecWithTestHeap spec; + SimpleResult result; + HeapFixture() : spec(2, 2), result() { + spec.leaf(LeafSpec("A", 1).doc(1, 1).doc(2, 2).doc(3, 3).doc(4, 4).doc(5, 5).doc(6, 6)); + SearchIterator::UP sb(spec.create()); + result.search(*sb); + } +}; + +TEST_F("require that scores are collected in batches before adjusting heap", HeapFixture) +{ + EXPECT_EQUAL(SimpleResult().addHit(1).addHit(2).addHit(3).addHit(4).addHit(5).addHit(6), + f.result); + EXPECT_EQUAL(ScoresHistory().add(Scores().add(1).add(2)) + .add(Scores().add(3).add(4)) + .add(Scores().add(5).add(6)), + f.spec.heap.history); +} + + +struct SearchFixture : public FixtureBase +{ + SearchFixture() : FixtureBase(10, 1) { + spec.leaf(LeafSpec("A", 1).doc(1, 10).doc(2, 30)); + spec.leaf(LeafSpec("B", 2).doc(1, 20).doc(3, 40)); + prepare(); + } +}; + +TEST_F("require that dot product score is calculated", SearchFixture) +{ + FakeResult expect = FakeResult() + .doc(1).score(1 * 10 + 2 * 20) + .doc(2).score(1 * 30) + .doc(3).score(2 * 40); + EXPECT_EQUAL(expect, f.result); +} + + +struct BlueprintFixtureBase +{ + WandBlueprintSpec spec; + FakeSearchable searchable; + BlueprintFixtureBase() : spec(), searchable() {} + Blueprint::UP blueprint(const search::query::Node &term) { + return spec.blueprint(searchable, "field", term); + } + SearchIterator::UP iterator() { + return spec.iterator(searchable, "field"); + } + FakeResult search(const search::query::Node &term) { + return spec.search(searchable, "field", term); + } + FakeResult search() { + return spec.search(searchable, "field"); + } +}; + +struct BlueprintHitsFixture : public BlueprintFixtureBase +{ + FakeResult createResult(size_t hits) { + FakeResult result; + for (size_t i = 0; i < hits; ++i) { + result.doc(i + 1); + } + result.minMax(1, 10); + return result; + } + BlueprintHitsFixture(size_t hits_a, size_t hits_b, size_t docs) : BlueprintFixtureBase() { + spec.docIdLimit = docs + 1; + spec.add("A", 20).add("B", 10); + searchable.addResult("field", "A", createResult(hits_a)); + searchable.addResult("field", "B", createResult(hits_b)); + } + bool maxScoreFirst() { + SearchIterator::UP itr = iterator(); + const ParallelWeakAndSearch *wand = dynamic_cast<ParallelWeakAndSearch*>(itr.get()); + ASSERT_EQUAL(2u, wand->get_num_terms()); + return (wand->get_term_weight(0) == 20); + } +}; + +struct ThresholdBoostFixture : public FixtureBase +{ + FakeResult result; + ThresholdBoostFixture(double boost) : FixtureBase(1, 1, 800, boost) { + spec.leaf(LeafSpec("A").doc(1, 10)); + spec.leaf(LeafSpec("B").doc(2, 20)); + spec.leaf(LeafSpec("C").doc(3, 30)); + spec.leaf(LeafSpec("D").doc(4, 42)); + SearchIterator::UP si(spec.create()); + result = doSearch(*si, spec.rootMatchData); + } +}; + +struct BlueprintFixture : public BlueprintFixtureBase +{ + BlueprintFixture() : BlueprintFixtureBase() { + searchable.addResult("field", "A", FakeResult().doc(1).weight(10).pos(0).doc(2).weight(30).pos(0).minMax(0, 30)); + searchable.addResult("field", "B", FakeResult().doc(1).weight(20).pos(0).doc(3).weight(40).pos(0).minMax(0, 40)); + spec.add("A", 1).add("B", 2); + } +}; + +struct BlueprintLargeScoresFixture : public BlueprintFixtureBase +{ + BlueprintLargeScoresFixture() : BlueprintFixtureBase() { + searchable.addResult("field", "A", FakeResult().doc(1).weight(60000).pos(0).doc(2).weight(70000).pos(0).minMax(0, 70000)); + searchable.addResult("field", "B", FakeResult().doc(1).weight(80000).pos(0).doc(3).weight(90000).pos(0).minMax(0, 90000)); + spec.add("A", 60000).add("B", 70000); + } +}; + +struct BlueprintAsStringFixture : public BlueprintFixtureBase +{ + BlueprintAsStringFixture() : BlueprintFixtureBase() { + searchable.addResult("field", "A", FakeResult().doc(1).weight(10).pos(0).doc(2).weight(30).pos(0).minMax(0, 30)); + spec.add("A", 5); + } +}; + + +TEST_F("require that hit estimate is calculated", BlueprintFixture) +{ + Node::UP term = f.spec.createNode(); + Blueprint::UP bp = f.blueprint(*term); + EXPECT_EQUAL(4u, bp->getState().estimate().estHits); +} + +TEST_F("require that blueprint picks up docid limit", BlueprintFixture) +{ + Node::UP term = f.spec.createNode(57, 67, 77.7); + Blueprint::UP bp = f.blueprint(*term); + const ParallelWeakAndBlueprint * pbp = dynamic_cast<const ParallelWeakAndBlueprint *>(bp.get()); + EXPECT_EQUAL(0u, pbp->get_docid_limit()); + bp->setDocIdLimit(1000); + EXPECT_EQUAL(1000u, pbp->get_docid_limit()); +} + +TEST_F("require that scores to track, score threshold and threshold boost factor is passed down from query node to blueprint", BlueprintFixture) +{ + Node::UP term = f.spec.createNode(57, 67, 77.7); + Blueprint::UP bp = f.blueprint(*term); + const ParallelWeakAndBlueprint * pbp = dynamic_cast<const ParallelWeakAndBlueprint *>(bp.get()); + EXPECT_EQUAL(57u, pbp->getScores().getScoresToTrack()); + EXPECT_EQUAL(67u, pbp->getScoreThreshold()); + EXPECT_EQUAL(77.7, pbp->getThresholdBoostFactor()); +} + +TEST_F("require that search iterator is correctly setup and executed", BlueprintFixture) +{ + FakeResult expect = FakeResult() + .doc(1).score(1 * 10 + 2 * 20) + .doc(2).score(1 * 30) + .doc(3).score(2 * 40); + EXPECT_EQUAL(expect, f.search()); +} + +TEST_F("require that initial score threshold can be specified (1 hit greater)", BlueprintFixture) +{ + Node::UP term = f.spec.createNode(3, 50); + EXPECT_EQUAL(FakeResult() + .doc(3).score(2 * 40), f.search(*term)); +} + +TEST_F("require that large scores are handled", BlueprintLargeScoresFixture) +{ + Node::UP term = f.spec.createNode(3, 60000L * 70000L); + EXPECT_EQUAL(FakeResult() + .doc(1).score(60000L * 60000L + 70000L * 80000L) + .doc(3).score(70000L * 90000L), f.search(*term)); +} + +TEST_F("require that docid limit is propagated to search iterator", BlueprintFixture()) +{ + f1.spec.docIdLimit = 4050; + SearchIterator::UP itr = f1.iterator(); + const ParallelWeakAndSearch *wand = dynamic_cast<ParallelWeakAndSearch*>(itr.get()); + EXPECT_EQUAL(4050u, wand->getMatchParams().docIdLimit); +} + +TEST_FFF("require that terms are sorted for maximum skipping", + BlueprintHitsFixture(50, 50, 100), + BlueprintHitsFixture(60, 50, 100), + BlueprintHitsFixture(80, 50, 100)) +{ + EXPECT_TRUE(f1.maxScoreFirst()); + EXPECT_TRUE(f2.maxScoreFirst()); + EXPECT_FALSE(f3.maxScoreFirst()); +} + +TEST_FF("require that threshold boosting works as expected", ThresholdBoostFixture(1.0), ThresholdBoostFixture(2.0)) +{ + EXPECT_EQUAL(FakeResult() + .doc(1).score(1000) + .doc(2).score(2000) + .doc(3).score(3000) + .doc(4).score(4200), f1.result); + EXPECT_EQUAL(FakeResult() + .doc(2).score(2000) + .doc(4).score(4200), f2.result); +} + +TEST_F("require that asString() on blueprint works", BlueprintAsStringFixture) +{ + Node::UP term = f.spec.createNode(57, 67); + Blueprint::UP bp = f.blueprint(*term); + vespalib::string expStr = "search::queryeval::ParallelWeakAndBlueprint {\n" + " isTermLike: true\n" + " fields: FieldList {\n" + " [0]: Field {\n" + " fieldId: 0\n" + " handle: 0\n" + " isFilter: false\n" + " }\n" + " }\n" + " estimate: HitEstimate {\n" + " empty: false\n" + " estHits: 2\n" + " tree_size: 2\n" + " allow_termwise_eval: 0\n" + " }\n" + " sourceId: 4294967295\n" + " docid_limit: 0\n" + " _weights: std::vector {\n" + " [0]: 5\n" + " }\n" + " _terms: std::vector {\n" + " [0]: search::queryeval::FakeBlueprint {\n" + " isTermLike: true\n" + " fields: FieldList {\n" + " [0]: Field {\n" + " fieldId: 0\n" + " handle: 0\n" + " isFilter: false\n" + " }\n" + " }\n" + " estimate: HitEstimate {\n" + " empty: false\n" + " estHits: 2\n" + " tree_size: 1\n" + " allow_termwise_eval: 1\n" + " }\n" + " sourceId: 4294967295\n" + " docid_limit: 0\n" + " }\n" + " }\n" + "}\n"; + EXPECT_EQUAL(expStr, bp->asString()); +} + +using MatchParams = ParallelWeakAndSearch::MatchParams; +using RankParams = ParallelWeakAndSearch::RankParams; + +struct DummyHeap : public WeakAndHeap { + DummyHeap() : WeakAndHeap(9001) {} + void adjust(score_t *, score_t *) override {} +}; + +SearchIterator::UP create_wand(bool use_dwa, + TermFieldMatchData &tfmd, + const MatchParams &matchParams, + const std::vector<int32_t> &weights, + const std::vector<IDocumentWeightAttribute::LookupResult> &dict_entries, + const IDocumentWeightAttribute &attr, + bool strict) +{ + if (use_dwa) { + return ParallelWeakAndSearch::create(tfmd, matchParams, weights, dict_entries, attr, strict); + } + // use search iterators as children + MatchDataLayout layout; + std::vector<TermFieldHandle> handles; + for (size_t i = 0; i < weights.size(); ++i) { + handles.push_back(layout.allocTermField(tfmd.getFieldId())); + } + MatchData::UP childrenMatchData = layout.createMatchData(); + assert(childrenMatchData->getNumTermFields() == dict_entries.size()); + wand::Terms terms; + for (size_t i = 0; i < dict_entries.size(); ++i) { + terms.push_back(wand::Term(new DocumentWeightSearchIterator(*(childrenMatchData->resolveTermField(handles[i])), attr, dict_entries[i]), + weights[i], + dict_entries[i].posting_size, + childrenMatchData->resolveTermField(handles[i]))); + } + assert(terms.size() == dict_entries.size()); + return SearchIterator::UP(ParallelWeakAndSearch::create(terms, matchParams, RankParams(tfmd, std::move(childrenMatchData)), strict)); +} + +TEST("verify initRange") { + const size_t num_children = 7; + InitRangeVerifier ir; + DocumentWeightAttributeHelper helper; + helper.add_docs(ir.getDocIdLimit()); + auto full_list = ir.getExpectedDocIds(); + for (size_t i = 0; i < full_list.size(); ++i) { + helper.set_doc(full_list[i], i % num_children, 1); + } + std::vector<int32_t> weights(num_children, 1); + for (bool use_dwa: {false, true}) { + for (bool strict: {false, true}) { + DummyHeap dummy_heap; + TermFieldMatchData tfmd; + MatchParams match_params(dummy_heap, dummy_heap.getMinScore(), 1.0, 1); + match_params.setDocIdLimit(ir.getDocIdLimit()); + std::vector<IDocumentWeightAttribute::LookupResult> dict_entries; + for (size_t i = 0; i < num_children; ++i) { + dict_entries.push_back(helper.dwa().lookup(vespalib::make_string("%zu", i).c_str())); + } + auto search = create_wand(use_dwa, tfmd, match_params, weights, dict_entries, helper.dwa(), strict); + ir.verify(*search); + } + } +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/predicate/.gitignore b/searchlib/src/tests/queryeval/predicate/.gitignore new file mode 100644 index 00000000000..7f94446d571 --- /dev/null +++ b/searchlib/src/tests/queryeval/predicate/.gitignore @@ -0,0 +1,2 @@ +searchlib_predicate_blueprint_test_app +searchlib_predicate_search_test_app diff --git a/searchlib/src/tests/queryeval/predicate/CMakeLists.txt b/searchlib/src/tests/queryeval/predicate/CMakeLists.txt new file mode 100644 index 00000000000..e1c4ebf9aa8 --- /dev/null +++ b/searchlib/src/tests/queryeval/predicate/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_predicate_blueprint_test_app + SOURCES + predicate_blueprint_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_predicate_blueprint_test_app COMMAND searchlib_predicate_blueprint_test_app) +vespa_add_executable(searchlib_predicate_search_test_app + SOURCES + predicate_search_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_predicate_search_test_app COMMAND searchlib_predicate_search_test_app) diff --git a/searchlib/src/tests/queryeval/predicate/predicate_blueprint_test.cpp b/searchlib/src/tests/queryeval/predicate/predicate_blueprint_test.cpp new file mode 100644 index 00000000000..3b609849141 --- /dev/null +++ b/searchlib/src/tests/queryeval/predicate/predicate_blueprint_test.cpp @@ -0,0 +1,241 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for predicate_blueprint. + +#include <vespa/log/log.h> +LOG_SETUP("predicate_blueprint_test"); +#include <vespa/fastos/fastos.h> + +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/searchlib/attribute/predicate_attribute.h> +#include <vespa/searchlib/predicate/predicate_tree_annotator.h> +#include <vespa/searchlib/fef/termfieldmatchdataarray.h> +#include <vespa/searchlib/query/tree/predicate_query_term.h> +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/query/weight.h> +#include <vespa/searchlib/queryeval/field_spec.h> +#include <vespa/searchlib/queryeval/predicate_blueprint.h> +#include <vespa/searchlib/predicate/predicate_hash.h> +#include <vespa/vespalib/testkit/testapp.h> + +using namespace search; +using namespace search::predicate; +using search::fef::TermFieldMatchDataArray; +using search::query::PredicateQueryTerm; +using search::query::SimplePredicateQuery; +using search::query::Weight; +using search::queryeval::FieldSpecBase; +using search::queryeval::PredicateBlueprint; +using search::queryeval::SearchIterator; + +namespace { + +struct Fixture { + FieldSpecBase field; + AttributeVector::SP attribute; + vespalib::GenerationHandler generation_handler; + SimplePredicateQuery query; + + using IntervalRange = PredicateAttribute::IntervalRange; + + Fixture() + : field(42, 0), + attribute(new PredicateAttribute("f", attribute::Config(attribute::BasicType::PREDICATE))), + query(PredicateQueryTerm::UP(new PredicateQueryTerm), + "view", 0, Weight(1)) { + query.getTerm()->addFeature("key", "value"); + query.getTerm()->addRangeFeature("range_key", 42); + } + PredicateAttribute & guard() { + return dynamic_cast<PredicateAttribute &>(*attribute); + } + PredicateIndex & index() { + return predicate().getIndex(); + } + PredicateAttribute & predicate() { return static_cast<PredicateAttribute &>(*attribute); } + void resize(uint32_t doc_id) { + while (predicate().getNumDocs() <= doc_id) { + uint32_t tmp; + predicate().addDoc(tmp); + PredicateAttribute::MinFeatureHandle mfh = predicate().getMinFeatureVector(); + const_cast<uint8_t *>(mfh.first)[tmp] = 0; + } + } + void setIntervalRange(uint32_t doc_id, IntervalRange interval_range) { + const_cast<IntervalRange *>(predicate().getIntervalRangeVector())[doc_id] = interval_range; + } + void indexEmptyDocument(uint32_t doc_id, IntervalRange ir = 0x1) { + resize(doc_id); + index().indexEmptyDocument(doc_id); + setIntervalRange(doc_id, ir); + predicate().updateMaxIntervalRange(ir); + predicate().commit(false); + } + void indexDocument(uint32_t doc_id, const PredicateTreeAnnotations &annotations, IntervalRange ir = 0xffff) { + resize(doc_id); + index().indexDocument(doc_id, annotations); + setIntervalRange(doc_id, ir); + predicate().updateMaxIntervalRange(ir); + predicate().commit(false); + } +}; + +TEST_F("require that blueprint with empty index estimates empty.", Fixture) { + PredicateBlueprint blueprint(f.field, f.guard(), f.query); + EXPECT_TRUE(blueprint.getState().estimate().empty); + EXPECT_EQUAL(0u, blueprint.getState().estimate().estHits); +} + +TEST_F("require that blueprint with zero-constraint doc estimates non-empty.", + Fixture) { + f.indexEmptyDocument(42); + PredicateBlueprint blueprint(f.field, f.guard(), f.query); + EXPECT_FALSE(blueprint.getState().estimate().empty); + EXPECT_EQUAL(1u, blueprint.getState().estimate().estHits); +} + +const int min_feature = 1; +const uint32_t doc_id = 2; +const uint32_t interval = 0x0001ffff; + +TEST_F("require that blueprint with posting list entry estimates non-empty.", + Fixture) { + PredicateTreeAnnotations annotations(min_feature); + annotations.interval_map[PredicateHash::hash64("key=value")] = + std::vector<Interval>{{interval}}; + f.indexDocument(doc_id, annotations); + + PredicateBlueprint blueprint(f.field, f.guard(), f.query); + EXPECT_FALSE(blueprint.getState().estimate().empty); + EXPECT_EQUAL(0u, blueprint.getState().estimate().estHits); +} + +TEST_F("require that blueprint with 'bounds' posting list entry estimates " + "non-empty.", Fixture) { + PredicateTreeAnnotations annotations(min_feature); + annotations.bounds_map[PredicateHash::hash64("range_key=40")] = + std::vector<IntervalWithBounds>{{interval, 0x80000003}}; + f.indexDocument(doc_id, annotations); + + PredicateBlueprint blueprint(f.field, f.guard(), f.query); + EXPECT_FALSE(blueprint.getState().estimate().empty); + EXPECT_EQUAL(0u, blueprint.getState().estimate().estHits); +} + +TEST_F("require that blueprint with zstar-compressed estimates non-empty.", + Fixture) { + PredicateTreeAnnotations annotations(1); + annotations.interval_map[PredicateIndex::z_star_compressed_hash] = + std::vector<Interval>{{0xfffe0000}}; + f.indexDocument(doc_id, annotations); + PredicateBlueprint blueprint(f.field, f.guard(), f.query); + EXPECT_FALSE(blueprint.getState().estimate().empty); + EXPECT_EQUAL(0u, blueprint.getState().estimate().estHits); +} + +TEST_F("require that blueprint can create search", Fixture) { + PredicateTreeAnnotations annotations(1); + annotations.interval_map[PredicateHash::hash64("key=value")] = + std::vector<Interval>{{interval}}; + f.indexDocument(doc_id, annotations); + + PredicateBlueprint blueprint(f.field, f.guard(), f.query); + blueprint.fetchPostings(true); + TermFieldMatchDataArray tfmda; + SearchIterator::UP it = blueprint.createLeafSearch(tfmda, true); + ASSERT_TRUE(it.get()); + it->initFullRange(); + EXPECT_EQUAL(SearchIterator::beginId(), it->getDocId()); + EXPECT_FALSE(it->seek(doc_id - 1)); + EXPECT_EQUAL(doc_id, it->getDocId()); + EXPECT_TRUE(it->seek(doc_id)); + EXPECT_EQUAL(doc_id, it->getDocId()); + EXPECT_FALSE(it->seek(doc_id + 1)); + EXPECT_TRUE(it->isAtEnd()); +} + +TEST_F("require that blueprint can create more advanced search", Fixture) { + PredicateTreeAnnotations annotations(2); + annotations.interval_map[PredicateHash::hash64("key=value")] = + std::vector<Interval>{{0x00010001}}; + annotations.bounds_map[PredicateHash::hash64("range_key=40")] = + std::vector<IntervalWithBounds>{{0x00020010, 0x40000005}}; // [40..44] + f.indexDocument(doc_id, annotations, 0x10); + f.indexEmptyDocument(doc_id + 2); + + PredicateBlueprint blueprint(f.field, f.guard(), f.query); + blueprint.fetchPostings(true); + TermFieldMatchDataArray tfmda; + SearchIterator::UP it = blueprint.createLeafSearch(tfmda, true); + ASSERT_TRUE(it.get()); + it->initFullRange(); + EXPECT_EQUAL(SearchIterator::beginId(), it->getDocId()); + EXPECT_FALSE(it->seek(doc_id - 1)); + EXPECT_EQUAL(doc_id, it->getDocId()); + EXPECT_TRUE(it->seek(doc_id)); + EXPECT_EQUAL(doc_id, it->getDocId()); + EXPECT_FALSE(it->seek(doc_id + 1)); + EXPECT_EQUAL(doc_id + 2, it->getDocId()); + EXPECT_TRUE(it->seek(doc_id + 2)); + EXPECT_FALSE(it->seek(doc_id + 3)); + EXPECT_TRUE(it->isAtEnd()); +} + +TEST_F("require that blueprint can create NOT search", Fixture) { + PredicateTreeAnnotations annotations(1); + annotations.interval_map[PredicateIndex::z_star_hash] = + std::vector<Interval>{{0x00010000}, {0xffff0001}}; + f.indexDocument(doc_id, annotations); + + PredicateBlueprint blueprint(f.field, f.guard(), f.query); + blueprint.fetchPostings(true); + TermFieldMatchDataArray tfmda; + SearchIterator::UP it = blueprint.createLeafSearch(tfmda, true); + ASSERT_TRUE(it.get()); + it->initFullRange(); + EXPECT_TRUE(it->seek(doc_id)); + EXPECT_EQUAL(doc_id, it->getDocId()); + EXPECT_FALSE(it->seek(doc_id + 1)); +} + +TEST_F("require that blueprint can create compressed NOT search", Fixture) { + PredicateTreeAnnotations annotations(1); + annotations.interval_map[PredicateIndex::z_star_compressed_hash] = + std::vector<Interval>{{0xfffe0000}}; + f.indexDocument(doc_id, annotations); + + PredicateBlueprint blueprint(f.field, f.guard(), f.query); + blueprint.fetchPostings(true); + TermFieldMatchDataArray tfmda; + SearchIterator::UP it = blueprint.createLeafSearch(tfmda, true); + ASSERT_TRUE(it.get()); + it->initFullRange(); + EXPECT_TRUE(it->seek(doc_id)); + EXPECT_EQUAL(doc_id, it->getDocId()); + EXPECT_FALSE(it->seek(doc_id + 1)); +} + +TEST_F("require that blueprint can set up search with subqueries", Fixture) { + PredicateTreeAnnotations annotations(2); + annotations.interval_map[PredicateHash::hash64("key=value")] = + std::vector<Interval>{{0x00010001}}; + annotations.interval_map[PredicateHash::hash64("key2=value")] = + std::vector<Interval>{{0x0002ffff}}; + f.indexDocument(doc_id, annotations); + + SimplePredicateQuery query(PredicateQueryTerm::UP(new PredicateQueryTerm), + "view", 0, Weight(1)); + query.getTerm()->addFeature("key", "value", 1); + query.getTerm()->addFeature("key2", "value", 2); + + PredicateBlueprint blueprint(f.field, f.guard(), query); + blueprint.fetchPostings(true); + TermFieldMatchDataArray tfmda; + SearchIterator::UP it = blueprint.createLeafSearch(tfmda, true); + ASSERT_TRUE(it.get()); + it->initFullRange(); + EXPECT_FALSE(it->seek(doc_id)); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/predicate/predicate_search_test.cpp b/searchlib/src/tests/queryeval/predicate/predicate_search_test.cpp new file mode 100644 index 00000000000..5954d51ec9b --- /dev/null +++ b/searchlib/src/tests/queryeval/predicate/predicate_search_test.cpp @@ -0,0 +1,370 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for predicate_search. + +#include <vespa/log/log.h> +LOG_SETUP("predicate_search_test"); +#include <vespa/fastos/fastos.h> + +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchlib/fef/termfieldmatchdataarray.h> +#include <vespa/searchlib/queryeval/predicate_search.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/util/arraysize.h> + +using search::fef::TermFieldMatchData; +using search::fef::TermFieldMatchDataArray; +using namespace search::queryeval; +using namespace search::predicate; +using std::pair; +using std::vector; +using vespalib::arraysize; + +namespace { + +class MyPostingList : public PredicatePostingList { + vector<pair<uint32_t, uint32_t>> _entries; + size_t _index; + uint32_t _interval; + + void setInterval(uint32_t interval) { _interval = interval; } + +public: + MyPostingList(const vector<pair<uint32_t, uint32_t>> &entries) + : _entries(entries), + _index(0) { + } + MyPostingList(std::initializer_list<pair<uint32_t, uint32_t>> ilist) + : _entries(ilist.begin(), ilist.end()), + _index(0) { + } + + bool next(uint32_t doc_id) override { + if (_index < _entries.size()) { + while (_entries[_index].first <= doc_id) { + ++_index; + if (_index == _entries.size()) { + setDocId(search::endDocId); + return false; + } + } + setDocId(_entries[_index].first); + setInterval(_entries[_index].second); + return true; + } + setDocId(search::endDocId); + return false; + } + + bool nextInterval() override { + if (_index + 1 < _entries.size() && + _entries[_index].first == _entries[_index + 1].first) { + ++_index; + setInterval(_entries[_index].second); + return true; + } + return false; + } + uint32_t getInterval() const override { return _interval; } +}; + +template <int N> +vector<PredicatePostingList::UP> +make_posting_lists_vector(MyPostingList (&plists)[N]) { + vector<PredicatePostingList::UP> posting_lists; + for (int i = 0; i < N; ++i) { + posting_lists.emplace_back(std::make_unique<MyPostingList>(plists[i])); + } + return posting_lists; +} + +TermFieldMatchDataArray tfmda; +typedef std::vector<uint8_t> CV; +typedef std::vector<uint8_t> MF; +typedef std::vector<uint16_t> IR; + +TEST("Require that the skipping is efficient") { + const uint8_t min_feature[] = { 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7}; + const uint8_t kv[] = { 6,7,6,7,6,7,6,8,6,5,6,7,6,0,6,7, + 7,6,7,6,6,6,6,7,7,7,8,7,8,7,7,7,6,7}; + SkipMinFeature::UP skip = SkipMinFeature::create(min_feature, kv, 34); + EXPECT_EQUAL(1u, skip->next()); + EXPECT_EQUAL(3u, skip->next()); + EXPECT_EQUAL(5u, skip->next()); + EXPECT_EQUAL(7u, skip->next()); + EXPECT_EQUAL(11u, skip->next()); + EXPECT_EQUAL(15u, skip->next()); + EXPECT_EQUAL(16u, skip->next()); + EXPECT_EQUAL(18u, skip->next()); + EXPECT_EQUAL(23u, skip->next()); + EXPECT_EQUAL(24u, skip->next()); + EXPECT_EQUAL(25u, skip->next()); + EXPECT_EQUAL(26u, skip->next()); + EXPECT_EQUAL(27u, skip->next()); + EXPECT_EQUAL(28u, skip->next()); + EXPECT_EQUAL(29u, skip->next()); + EXPECT_EQUAL(30u, skip->next()); + EXPECT_EQUAL(31u, skip->next()); + EXPECT_EQUAL(33u, skip->next()); +} + +TEST("require that empty search yields no results") { + vector<PredicatePostingList::UP> posting_lists; + MF mf(3); CV cv(3); IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, std::move(posting_lists), tfmda); + search.initFullRange(); + EXPECT_EQUAL(SearchIterator::beginId(), search.getDocId()); + EXPECT_FALSE(search.seek(2)); + EXPECT_TRUE(search.isAtEnd()); +} + +TEST("require that simple search yields result") { + MyPostingList plists[] = {{{2, 0x0001ffff}}}; + MF mf{0, 0, 0}; + CV cv{0, 0, 1}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_EQUAL(SearchIterator::beginId(), search.getDocId()); + EXPECT_FALSE(search.seek(1)); + EXPECT_EQUAL(2u, search.getDocId()); + EXPECT_TRUE(search.seek(2)); + EXPECT_EQUAL(2u, search.getDocId()); + EXPECT_FALSE(search.seek(3)); + EXPECT_TRUE(search.isAtEnd()); +} + +TEST("require that minFeature (K) is used to prune results") { + MyPostingList plists[] = {{{2, 0x0001ffff}}, + {{5, 0x0001ffff}}}; + MF mf{0, 0, 3, 0, 0, 0}; + CV cv{1, 0, 0, 0, 0, 1}; + IR ir(6, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_FALSE(search.seek(2)); + EXPECT_EQUAL(5u, search.getDocId()); +} + +TEST("require that a high K (min_feature - 1) can yield results") { + MyPostingList plists[] = {{{2, 0x00010001}}, + {{2, 0x0002ffff}}}; + MF mf{0, 0, 2}; + CV cv{0, 0, 2}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_TRUE(search.seek(2)); +} + +TEST("require that we can skip past entries") { + MyPostingList plists[] = {{{2, 0x0001ffff}, + {5, 0x0001ffff}}}; + MF mf{0, 0, 0, 0, 0, 0}; + CV cv{0, 0, 1, 0, 0, 1}; + IR ir(6, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_TRUE(search.seek(5)); +} + +TEST("require that posting lists are sorted after advancing") { + MyPostingList plists[] = {{{1, 0x0001ffff}, + {5, 0x0001ffff}}, + {{2, 0x0001ffff}, + {4, 0x0001ffff}}}; + MF mf{0, 2, 0, 0, 0, 0}; + CV cv{0, 1, 1, 0, 1, 1}; + IR ir(6, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_FALSE(search.seek(1)); + EXPECT_FALSE(search.seek(3)); + EXPECT_TRUE(search.seek(4)); +} + +TEST("require that short interval ranges works") { + MyPostingList plists[] = {{{1, 0x00010001}, + {5, 0x00010001}}, + {{2, 0x00010001}, + {4, 0x00010001}}}; + MF mf{0, 2, 0, 0, 0, 0}; + CV cv{0, 1, 1, 0, 1, 1}; + IR ir(6, 0x0001); + PredicateSearch search(&mf[0], &ir[0], 0x1, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_FALSE(search.seek(1)); + EXPECT_FALSE(search.seek(3)); + EXPECT_TRUE(search.seek(4)); +} + +TEST("require that empty posting lists work") { + MyPostingList plists[] = {{}}; + MF mf(3); CV cv(3); IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_EQUAL(SearchIterator::beginId(), search.getDocId()); + EXPECT_FALSE(search.seek(2)); + EXPECT_TRUE(search.isAtEnd()); +} + +TEST("require that shorter posting list ending is ok") { + MyPostingList plists[] = {{{1, 0x0001ffff}, + {2, 0x0001ffff}}, + {{4, 0x0001ffff}}}; + MF mf{0, 0, 0, 0, 0}; + CV cv{0, 1, 1, 0, 1}; + IR ir(5, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_TRUE(search.seek(1)); + EXPECT_TRUE(search.seek(4)); +} + +TEST("require that sorting works for many posting lists") { + MyPostingList plists[] = {{{1, 0x0001ffff}, + {2, 0x0001ffff}}, + {{2, 0x0001ffff}, + {4, 0x0001ffff}}, + {{2, 0x0001ffff}, + {5, 0x0001ffff}}, + {{2, 0x0001ffff}, + {4, 0x0001ffff}}, + {{2, 0x0001ffff}, + {5, 0x0001ffff}}}; + MF mf{0, 1, 5, 0, 2, 2}; + CV cv{0, 1, 5, 0, 2, 2}; + IR ir(6, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_TRUE(search.seek(1)); + EXPECT_TRUE(search.seek(2)); + EXPECT_TRUE(search.seek(4)); + EXPECT_TRUE(search.seek(5)); +} + +TEST("require that insufficient interval coverage prevents match") { + MyPostingList plists[] = {{{2, 0x00010001}, + {3, 0x0002ffff}}}; + MF mf{0, 0, 0, 0}; + CV cv{0, 0, 1, 1}; + IR ir(4, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_FALSE(search.seek(2)); + EXPECT_FALSE(search.seek(3)); +} + +TEST("require that intervals are sorted") { + MyPostingList plists[] = {{{2, 0x00010001}}, + {{2, 0x0003ffff}}, + {{2, 0x00020002}}}; + MF mf{0, 0, 0}; + CV cv{0, 0, 3}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_TRUE(search.seek(2)); +} + +TEST("require that NOT is supported - no match") { + MyPostingList plists[] = {{{2, 0x00010001}}, // [l, r] + {{2, 0x00010000}, // [l, r]* + {2, 0xffff0001}}}; // [r+1, r+1]* + MF mf{0, 0, 0}; + CV cv{0, 0, 3}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_FALSE(search.seek(2)); +} + +TEST("require that NOT is supported - match") { + MyPostingList plists[] = {{{2, 0x00010000}, // [l, r]* + {2, 0xffff0001}}}; // [r+1, r+1]* + MF mf{0, 0, 0}; + CV cv{0, 0, 2}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_TRUE(search.seek(2)); +} + +TEST("require that NOT is supported - no match because of previous term") { + MyPostingList plists[] = {{{2, 0x00020001}, // [l, r]* + {2, 0xffff0002}}}; // [r+1, r+1]* + MF mf{0, 0, 0}; + CV cv{0, 0, 2}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_FALSE(search.seek(2)); +} + +TEST("require that NOT is supported - subqueries") { + MyPostingList plists[] = {{{2, 0x00010001}}, // [l, r] + {{2, 0x00010000}, // [l, r]* + {2, 0xffff0001}}}; // [r+1, r+1]* + plists[0].setSubquery(0xffff); + MF mf{0, 0, 0}; + CV cv{0, 0, 3}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_TRUE(search.seek(2)); +} + +TEST("require that there can be many intervals") { + MyPostingList plists[] = {{{2, 0x00010001}, + {2, 0x00020002}, + {2, 0x00030003}, + {2, 0x0001ffff}, + {2, 0x00040004}, + {2, 0x00050005}, + {2, 0x00060006}}}; + MF mf{0, 0, 0}; + CV cv{0, 0, 7}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_TRUE(search.seek(2)); +} + +TEST("require that match can require multiple postinglists.") { + MyPostingList plists[] = {{{2, 0x00010001}}, + {{2, 0x0002000b}, + {2, 0x00030003}}, + {{2, 0x00040003}}, + {{2, 0x00050004}}, + {{2, 0x00010008}, + {2, 0x00060006}}, + {{2, 0x00020002}, + {2, 0x0007ffff}}}; + MF mf{0, 0, 0}; + CV cv{0, 0, 9}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), tfmda); + search.initFullRange(); + EXPECT_TRUE(search.seek(2)); +} + +TEST("require that subquery bitmap is unpacked to subqueries.") { + MyPostingList plists[] = {{{2, 0x0001ffff}}}; + TermFieldMatchDataArray array; + TermFieldMatchData data; + array.add(&data); + MF mf{0, 0, 0}; + CV cv{0, 0, 1}; + IR ir(3, 0xffff); + PredicateSearch search(&mf[0], &ir[0], 0xffff, cv, make_posting_lists_vector(plists), array); + search.initFullRange(); + EXPECT_TRUE(search.seek(2)); + search.unpack(2); + EXPECT_EQUAL(0xffffffffffffffffULL, + static_cast<unsigned long long>(data.getSubqueries())); +} + + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/queryeval.cpp b/searchlib/src/tests/queryeval/queryeval.cpp new file mode 100644 index 00000000000..3a2070a1fd8 --- /dev/null +++ b/searchlib/src/tests/queryeval/queryeval.cpp @@ -0,0 +1,691 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> + +#include <vespa/searchlib/test/initrange.h> +#include <vespa/searchlib/common/bitvectoriterator.h> +#include <vespa/searchlib/queryeval/andnotsearch.h> +#include <vespa/searchlib/queryeval/andsearch.h> +#include <vespa/searchlib/queryeval/booleanmatchiteratorwrapper.h> +#include <vespa/searchlib/queryeval/nearsearch.h> +#include <vespa/searchlib/queryeval/orsearch.h> +#include <vespa/searchlib/queryeval/searchiterator.h> +#include <vespa/searchlib/queryeval/simpleresult.h> +#include <vespa/searchlib/queryeval/simplesearch.h> +#include <vespa/searchlib/queryeval/ranksearch.h> +#include <vespa/searchlib/queryeval/truesearch.h> +#include <vespa/searchlib/queryeval/emptysearch.h> +#include <vespa/searchlib/queryeval/sourceblendersearch.h> +#include <vespa/searchlib/queryeval/leaf_blueprints.h> +#include <vespa/searchlib/queryeval/intermediate_blueprints.h> +#include <vespa/searchlib/attribute/singlenumericattribute.hpp> +#include <vespa/searchlib/attribute/singlesmallnumericattribute.h> +#include <vespa/vespalib/test/insertion_operators.h> + +#include <vespa/searchlib/fef/fef.h> + +using namespace search::queryeval; +using search::fef::MatchData; +using search::fef::TermFieldMatchData; +using search::fef::TermFieldMatchDataArray; +using search::BitVector; +using search::BitVectorIterator; +using search::test::InitRangeVerifier; + +//----------------------------------------------------------------------------- + +template <typename T, typename V=std::vector<T> > +class Collect +{ +private: + V _data; + +public: + Collect &add(const T &t) { + _data.push_back(t); + return *this; + } + operator const V &() const { return _data; } +}; + +SearchIterator *simple(const std::string &tag) { + return &((new SimpleSearch(SimpleResult()))->tag(tag)); +} + +Collect<SearchIterator*, MultiSearch::Children> search2(const std::string &t1, const std::string &t2) { + return Collect<SearchIterator*, MultiSearch::Children>().add(simple(t1)).add(simple(t2)); +} + + +class ISourceSelectorDummy : public ISourceSelector +{ + static SourceStore _sourceStoreDummy; + +public: + static Iterator::UP + makeDummyIterator() + { + return Iterator::UP(new Iterator(_sourceStoreDummy)); + } +}; + +ISourceSelector::SourceStore ISourceSelectorDummy::_sourceStoreDummy("foo"); + +ISourceSelector::Iterator::UP selector() { + return ISourceSelectorDummy::makeDummyIterator(); +} + +//----------------------------------------------------------------------------- + +void testMultiSearch(SearchIterator & search) { + MultiSearch & ms = dynamic_cast<MultiSearch &>(search); + ms.initRange(3, 309); + EXPECT_EQUAL(2u, ms.getDocId()); + EXPECT_EQUAL(309u, ms.getEndId()); + for (const auto & child : ms.getChildren()) { + EXPECT_EQUAL(2u, child->getDocId()); + EXPECT_EQUAL(309u, child->getEndId()); + } +} + +TEST("test that OR.andWith is a NOOP") { + TermFieldMatchData tfmd; + MultiSearch::Children ch; + ch.push_back(new TrueSearch(tfmd)); + ch.push_back(new TrueSearch(tfmd)); + SearchIterator::UP search(OrSearch::create(ch, true)); + SearchIterator::UP filter(new TrueSearch(tfmd)); + + EXPECT_TRUE(nullptr != search->andWith(std::move(filter), 1).get()); +} + +TEST("test that non-strict AND.andWith is a NOOP") { + TermFieldMatchData tfmd; + MultiSearch::Children ch; + ch.push_back(new TrueSearch(tfmd)); + ch.push_back(new TrueSearch(tfmd)); + SearchIterator::UP search(AndSearch::create(ch, false)); + SearchIterator::UP filter(new TrueSearch(tfmd)); + filter = search->andWith(std::move(filter), 8); + EXPECT_TRUE(nullptr != filter.get()); +} + +TEST("test that strict AND.andWith steals filter and places it correctly based on estimate") { + TermFieldMatchData tfmd; + MultiSearch::Children ch; + ch.push_back(new TrueSearch(tfmd)); + ch.push_back(new TrueSearch(tfmd)); + SearchIterator::UP search(AndSearch::create(ch, true)); + static_cast<AndSearch &>(*search).estimate(7); + SearchIterator::UP filter(new TrueSearch(tfmd)); + SearchIterator * filterP = filter.get(); + + EXPECT_TRUE(nullptr == search->andWith(std::move(filter), 8).get()); + const MultiSearch::Children & andChildren = static_cast<MultiSearch &>(*search).getChildren(); + EXPECT_EQUAL(3u, andChildren.size()); + EXPECT_EQUAL(ch[0], andChildren[0]); + EXPECT_EQUAL(filterP, andChildren[1]); + EXPECT_EQUAL(ch[1], andChildren[2]); + + SearchIterator::UP filter2(new TrueSearch(tfmd)); + SearchIterator * filter2P = filter2.get(); + EXPECT_TRUE(nullptr == search->andWith(std::move(filter2), 6).get()); + EXPECT_EQUAL(4u, andChildren.size()); + EXPECT_EQUAL(filter2P, andChildren[0]); + EXPECT_EQUAL(ch[0], andChildren[1]); + EXPECT_EQUAL(filterP, andChildren[2]); + EXPECT_EQUAL(ch[1], andChildren[3]); +} + +class NonStrictTrueSearch : public TrueSearch +{ +public: + NonStrictTrueSearch(TermFieldMatchData & tfmd) : TrueSearch(tfmd) { } + Trinary is_strict() const override { return Trinary::False; } +}; + +TEST("test that strict AND.andWith does not place non-strict iterator first") { + TermFieldMatchData tfmd; + MultiSearch::Children ch; + ch.push_back(new TrueSearch(tfmd)); + ch.push_back(new TrueSearch(tfmd)); + SearchIterator::UP search(AndSearch::create(ch, true)); + static_cast<AndSearch &>(*search).estimate(7); + SearchIterator::UP filter(new NonStrictTrueSearch(tfmd)); + SearchIterator * filterP = filter.get(); + EXPECT_TRUE(nullptr == search->andWith(std::move(filter), 6).get()); + const MultiSearch::Children & andChildren = static_cast<MultiSearch &>(*search).getChildren(); + EXPECT_EQUAL(3u, andChildren.size()); + EXPECT_EQUAL(ch[0], andChildren[0]); + EXPECT_EQUAL(filterP, andChildren[1]); + EXPECT_EQUAL(ch[1], andChildren[2]); +} + +TEST("test that strict rank search forwards to its greedy first child") { + TermFieldMatchData tfmd; + SearchIterator::UP search( + RankSearch::create( + Collect<SearchIterator*, MultiSearch::Children>() + .add(AndSearch::create(search2("a", "b"), true)) + .add(new TrueSearch(tfmd)), + true) + ); + SearchIterator::UP filter(new TrueSearch(tfmd)); + EXPECT_TRUE(nullptr == search->andWith(std::move(filter), 8).get()); +} + +TEST("test that non-strict rank search does NOT forward to its greedy first child") { + TermFieldMatchData tfmd; + SearchIterator::UP search( + RankSearch::create( + Collect<SearchIterator*, MultiSearch::Children>() + .add(AndSearch::create(search2("a", "b"), true)) + .add(new TrueSearch(tfmd)), + false) + ); + SearchIterator::UP filter(new TrueSearch(tfmd)); + EXPECT_TRUE(nullptr != search->andWith(std::move(filter), 8).get()); +} + +TEST("test that strict andnot search forwards to its greedy first child") { + TermFieldMatchData tfmd; + SearchIterator::UP search( + AndNotSearch::create( + Collect<SearchIterator*, MultiSearch::Children>() + .add(AndSearch::create(search2("a", "b"), true)) + .add(new TrueSearch(tfmd)), + true) + ); + SearchIterator::UP filter(new TrueSearch(tfmd)); + EXPECT_TRUE(nullptr == search->andWith(std::move(filter), 8).get()); +} + +TEST("test that non-strict andnot search does NOT forward to its greedy first child") { + TermFieldMatchData tfmd; + SearchIterator::UP search( + AndNotSearch::create( + Collect<SearchIterator*, MultiSearch::Children>() + .add(AndSearch::create(search2("a", "b"), true)) + .add(new TrueSearch(tfmd)), + false) + ); + SearchIterator::UP filter(new TrueSearch(tfmd)); + EXPECT_TRUE(nullptr != search->andWith(std::move(filter), 8).get()); +} + +TEST("testAnd") { + SimpleResult a; + SimpleResult b; + a.addHit(5).addHit(10).addHit(16).addHit(30); + b.addHit(3).addHit(5).addHit(17).addHit(30).addHit(52); + + MatchData::UP md(MatchData::makeTestInstance(0, 100, 10)); + AndBlueprint *and_b = new AndBlueprint(); + and_b->addChild(Blueprint::UP(new SimpleBlueprint(a))); + and_b->addChild(Blueprint::UP(new SimpleBlueprint(b))); + Blueprint::UP bp(and_b); + bp->fetchPostings(true); + SearchIterator::UP and_ab = bp->createSearch(*md, true); + + EXPECT_TRUE(dynamic_cast<const AndSearch *>(and_ab.get()) != nullptr); + EXPECT_EQUAL(4u, dynamic_cast<AndSearch &>(*and_ab).estimate()); + SimpleResult res; + res.search(*and_ab); + SimpleResult expect; + expect.addHit(5).addHit(30); + + EXPECT_EQUAL(res, expect); +} + +TEST("mutisearch and initRange") { +} + +TEST("testOr") { + { + SimpleResult a; + SimpleResult b; + a.addHit(5).addHit(10); + b.addHit(5).addHit(17).addHit(30); + + MatchData::UP md(MatchData::makeTestInstance(0, 100, 10)); + OrBlueprint *or_b = new OrBlueprint(); + or_b->addChild(Blueprint::UP(new SimpleBlueprint(a))); + or_b->addChild(Blueprint::UP(new SimpleBlueprint(b))); + Blueprint::UP bp(or_b); + bp->fetchPostings(true); + SearchIterator::UP or_ab = bp->createSearch(*md, true); + + SimpleResult res; + res.search(*or_ab); + SimpleResult expect; + expect.addHit(5).addHit(10).addHit(17).addHit(30); + + EXPECT_EQUAL(res, expect); + } + { + TermFieldMatchData tfmd; + MultiSearch::Children ch; + ch.push_back(new TrueSearch(tfmd)); + ch.push_back(new TrueSearch(tfmd)); + ch.push_back(new TrueSearch(tfmd)); + SearchIterator::UP orSearch(OrSearch::create(ch, true)); + testMultiSearch(*orSearch); + } +} + +class TestInsertRemoveSearch : public MultiSearch +{ +public: + TestInsertRemoveSearch(const MultiSearch::Children & children) : + MultiSearch(children), + _accumRemove(0), + _accumInsert(0) + { } + virtual void onRemove(size_t index) { _accumRemove += index; } + virtual void onInsert(size_t index) { _accumInsert += index; } + size_t _accumRemove; + size_t _accumInsert; +private: + virtual void doSeek(uint32_t docid) { (void) docid; } +}; + +TEST("testMultiSearch") { + MultiSearch::Children children; + children.push_back(new EmptySearch()); + children.push_back(new EmptySearch()); + children.push_back(new EmptySearch()); + TestInsertRemoveSearch ms(children); + EXPECT_EQUAL(3u, ms.getChildren().size()); + EXPECT_EQUAL(children[0], ms.getChildren()[0]); + EXPECT_EQUAL(children[1], ms.getChildren()[1]); + EXPECT_EQUAL(children[2], ms.getChildren()[2]); + EXPECT_EQUAL(0u, ms._accumInsert); + EXPECT_EQUAL(0u, ms._accumRemove); + + EXPECT_EQUAL(children[1], ms.remove(1).get()); + EXPECT_EQUAL(2u, ms.getChildren().size()); + EXPECT_EQUAL(children[0], ms.getChildren()[0]); + EXPECT_EQUAL(children[2], ms.getChildren()[1]); + EXPECT_EQUAL(0u, ms._accumInsert); + EXPECT_EQUAL(1u, ms._accumRemove); + + children.push_back(new EmptySearch()); + ms.insert(1, SearchIterator::UP(children.back())); + EXPECT_EQUAL(3u, ms.getChildren().size()); + EXPECT_EQUAL(children[0], ms.getChildren()[0]); + EXPECT_EQUAL(children[3], ms.getChildren()[1]); + EXPECT_EQUAL(children[2], ms.getChildren()[2]); + EXPECT_EQUAL(1u, ms._accumInsert); + EXPECT_EQUAL(1u, ms._accumRemove); +} + +class DummySingleValueBitNumericAttributeBlueprint : public SimpleLeafBlueprint +{ +public: + DummySingleValueBitNumericAttributeBlueprint(const SimpleResult & result) : + SimpleLeafBlueprint(FieldSpecBaseList()), + _a("a"), + _sc(), + _tfmd() + { + for (size_t i(0); i < result.getHitCount(); i++) { + size_t docId(result.getHit(i)); + uint32_t curDoc(0); + for (_a.addDoc(curDoc); curDoc < docId; _a.addDoc(curDoc)); + _a.update(docId, 1); + } + _a.commit(); + _sc = _a.getSearch(search::QueryTermSimple::UP(new search::QueryTermSimple("1", search::QueryTermSimple::WORD)), + search::AttributeVector::SearchContext::Params().useBitVector(true)); + } + virtual SearchIterator::UP + createLeafSearch(const TermFieldMatchDataArray &tfmda, bool strict) const + { + (void) tfmda; + return _sc->createIterator(&_tfmd, strict); + } +private: + search::SingleValueBitNumericAttribute _a; + search::AttributeVector::SearchContext::UP _sc; + mutable TermFieldMatchData _tfmd; +}; + + +TEST("testAndNot") { + { + SimpleResult a; + SimpleResult b; + a.addHit(5).addHit(10); + b.addHit(5).addHit(17).addHit(30); + + MatchData::UP md(MatchData::makeTestInstance(0, 100, 10)); + AndNotBlueprint *andnot_b = new AndNotBlueprint(); + andnot_b->addChild(Blueprint::UP(new SimpleBlueprint(a))); + andnot_b->addChild(Blueprint::UP(new SimpleBlueprint(b))); + Blueprint::UP bp(andnot_b); + bp->fetchPostings(true); + SearchIterator::UP andnot_ab = bp->createSearch(*md, true); + + SimpleResult res; + res.search(*andnot_ab); + SimpleResult expect; + expect.addHit(10); + + EXPECT_EQUAL(res, expect); + } + { + SimpleResult a; + SimpleResult b; + a.addHit(1).addHit(5).addHit(10); + b.addHit(5).addHit(17).addHit(30); + + MatchData::UP md(MatchData::makeTestInstance(2, 100, 10)); + AndNotBlueprint *andnot_b = new AndNotBlueprint(); + andnot_b->addChild(Blueprint::UP(new SimpleBlueprint(a))); + andnot_b->addChild(Blueprint::UP(new DummySingleValueBitNumericAttributeBlueprint(b))); + Blueprint::UP bp(andnot_b); + bp->fetchPostings(true); + SearchIterator::UP andnot_ab = bp->createSearch(*md, true); + EXPECT_TRUE(dynamic_cast<const OptimizedAndNotForBlackListing *>(andnot_ab.get()) != NULL); + + SimpleResult res; + res.search(*andnot_ab); + SimpleResult expect; + expect.addHit(1).addHit(10); + + EXPECT_EQUAL(res, expect); + } + { + SimpleResult a; + SimpleResult b; + SimpleResult c; + a.addHit(1).addHit(5).addHit(10); + b.addHit(5).addHit(17).addHit(30); + c.addHit(1).addHit(5).addHit(10).addHit(17).addHit(30); + + MatchData::UP md(MatchData::makeTestInstance(0, 100, 10)); + AndNotBlueprint *andnot_b = new AndNotBlueprint(); + andnot_b->addChild(Blueprint::UP(new SimpleBlueprint(a))); + andnot_b->addChild(Blueprint::UP(new SimpleBlueprint(b))); + + AndBlueprint *and_b = new AndBlueprint(); + and_b->addChild(Blueprint::UP(new SimpleBlueprint(c))); + and_b->addChild(Blueprint::UP(andnot_b)); + Blueprint::UP bp(and_b); + bp->fetchPostings(true); + SearchIterator::UP and_cab = bp->createSearch(*md, true); + + SimpleResult res; + res.search(*and_cab); + SimpleResult expect; + expect.addHit(1).addHit(10); + + EXPECT_EQUAL(res, expect); + } + { + } +} + +TEST("testRank") { + { + SimpleResult a; + SimpleResult b; + a.addHit(5).addHit(10).addHit(16).addHit(30); + b.addHit(3).addHit(5).addHit(17).addHit(30).addHit(52); + + MatchData::UP md(MatchData::makeTestInstance(0, 100, 10)); + RankBlueprint *rank_b = new RankBlueprint(); + rank_b->addChild(Blueprint::UP(new SimpleBlueprint(a))); + rank_b->addChild(Blueprint::UP(new SimpleBlueprint(b))); + Blueprint::UP bp(rank_b); + bp->fetchPostings(true); + SearchIterator::UP rank_ab = bp->createSearch(*md, true); + + SimpleResult res; + res.search(*rank_ab); + SimpleResult expect; + expect.addHit(5).addHit(10).addHit(16).addHit(30); + + EXPECT_EQUAL(res, expect); + } +} + +TEST("testDump") { + typedef SourceBlenderSearch::Child Source; + SearchIterator::UP search( + AndSearch::create( + Collect<SearchIterator*, MultiSearch::Children>() + .add(AndNotSearch::create(search2("+", "-"), true)) + .add(AndSearch::create(search2("and_a", "and_b"), true)) + .add(new BooleanMatchIteratorWrapper(SearchIterator::UP(simple("wrapped")), TermFieldMatchDataArray())) + .add(new NearSearch(search2("near_a", "near_b"), + TermFieldMatchDataArray(), + 5u, true)) + .add(new ONearSearch(search2("onear_a", "onear_b"), + TermFieldMatchDataArray(), 10, true)) + .add(OrSearch::create(search2("or_a", "or_b"), false)) + .add(RankSearch::create(search2("rank_a", "rank_b"),false)) + .add(SourceBlenderSearch::create(selector(), Collect<Source, SourceBlenderSearch::Children>() + .add(Source(simple("blend_a"), 2)) + .add(Source(simple("blend_b"), 4)), true)) + , true)); + vespalib::string sas = search->asString(); + EXPECT_TRUE(sas.size() > 50); + // fprintf(stderr, "%s", search->asString().c_str()); +} + +TEST("testFieldSpec") { + EXPECT_EQUAL(8u, sizeof(FieldSpecBase)); + EXPECT_EQUAL(72u, sizeof(FieldSpec)); +} + + +const size_t unpack_child_cnt = 500; +const size_t max_unpack_size = 31; +const size_t max_unpack_index = 255; + +std::vector<size_t> vectorize(const UnpackInfo &unpack) { + std::vector<size_t> list; + unpack.each([&](size_t i){list.push_back(i);}, unpack_child_cnt); + return list; +} + +std::vector<size_t> fill_vector(size_t begin, size_t end) { + std::vector<size_t> list; + for (size_t i = begin; i < end; ++i) { + list.push_back(i); + } + return list; +} + +void verify_unpack(const UnpackInfo &unpack, const std::vector<size_t> &expect) { + std::vector<size_t> actual = vectorize(unpack); + EXPECT_EQUAL(unpack.empty(), expect.empty()); + EXPECT_EQUAL(unpack.unpackAll(), (expect.size() == unpack_child_cnt)); + EXPECT_EQUAL(expect, actual); + size_t child_idx = 0; + for (size_t next_unpack: expect) { + while (child_idx < next_unpack) { + EXPECT_FALSE(unpack.needUnpack(child_idx++)); + } + EXPECT_TRUE(unpack.needUnpack(child_idx++)); + } +} + +TEST("require that unpack info has expected memory footprint") { + EXPECT_EQUAL(32u, sizeof(UnpackInfo)); +} + +TEST("require that unpack info starts out empty") { + verify_unpack(UnpackInfo(), {}); +} + +TEST("require that unpack info force all unpacks all children") { + verify_unpack(UnpackInfo().forceAll(), fill_vector(0, unpack_child_cnt)); +} + +TEST("require that adding a large index to unpack info forces unpack all") { + UnpackInfo unpack; + unpack.add(0); + unpack.add(max_unpack_index); + verify_unpack(unpack, {0, max_unpack_index}); + unpack.add(max_unpack_index + 1); + verify_unpack(unpack, fill_vector(0, unpack_child_cnt)); +} + +TEST("require that adding too many children to unpack info forces unpack all") { + UnpackInfo unpack; + std::vector<size_t> expect; + for (size_t i = 0; i < max_unpack_size; ++i) { + unpack.add(i); + expect.push_back(i); + } + verify_unpack(unpack, expect); + unpack.add(100); + verify_unpack(unpack, fill_vector(0, unpack_child_cnt)); +} + +TEST("require that adding normal unpack info indexes works") { + UnpackInfo unpack; + unpack.add(3).add(5).add(7).add(14).add(50); + verify_unpack(unpack, {3,5,7,14,50}); +} + +TEST("require that adding unpack info indexes out of order works") { + UnpackInfo unpack; + unpack.add(5).add(3).add(7).add(50).add(14); + verify_unpack(unpack, {3,5,7,14,50}); +} + +TEST("require that basic insert remove of unpack info works") { + UnpackInfo unpack; + unpack.insert(1).insert(3); + verify_unpack(unpack, {1, 3}); + unpack.insert(0); + verify_unpack(unpack, {0, 2, 4}); + unpack.insert(3); + verify_unpack(unpack, {0, 2, 3, 5}); + unpack.remove(1); + verify_unpack(unpack, {0, 1, 2, 4}); + unpack.remove(1); + verify_unpack(unpack, {0, 1, 3}); + unpack.remove(1); + verify_unpack(unpack, {0, 2}); + unpack.remove(2); + verify_unpack(unpack, {0}); + unpack.remove(0); + verify_unpack(unpack, {}); +} + +TEST("require that inserting too many indexs into unpack info forces unpack all") { + for (bool unpack_inserted: {true, false}) { + UnpackInfo unpack; + for (size_t i = 0; i < max_unpack_size; ++i) { + unpack.add(i); + } + EXPECT_FALSE(unpack.unpackAll()); + unpack.insert(0, unpack_inserted); + if (unpack_inserted) { + verify_unpack(unpack, fill_vector(0, unpack_child_cnt)); + } else { + verify_unpack(unpack, fill_vector(1, max_unpack_size + 1)); + } + } +} + +TEST("require that implicitly overflowing indexes during insert in unpack info forces unpack all") { + for (bool unpack_inserted: {true, false}) { + UnpackInfo unpack; + unpack.insert(max_unpack_index); + EXPECT_FALSE(unpack.unpackAll()); + unpack.insert(5, unpack_inserted); + verify_unpack(unpack, fill_vector(0, unpack_child_cnt)); + } +} + +TEST("require that inserting a too high index into unpack info forces unpack all") { + for (bool unpack_inserted: {true, false}) { + UnpackInfo unpack; + for (size_t i = 0; i < 10; ++i) { + unpack.add(i); + } + EXPECT_FALSE(unpack.unpackAll()); + unpack.insert(max_unpack_index + 1, unpack_inserted); + if (unpack_inserted) { + verify_unpack(unpack, fill_vector(0, unpack_child_cnt)); + } else { + verify_unpack(unpack, fill_vector(0, 10)); + } + } +} + +TEST("require that we can insert indexes into unpack info that we do not unpack") { + UnpackInfo unpack; + unpack.add(10).add(20).add(30); + verify_unpack(unpack, {10, 20, 30}); + unpack.insert(5, false).insert(15, false).insert(25, false).insert(35, false); + verify_unpack(unpack, {11, 22, 33}); +} + +TEST("testTrueSearch") { + EXPECT_EQUAL(16u, sizeof(EmptySearch)); + EXPECT_EQUAL(24u, sizeof(TrueSearch)); + + TermFieldMatchData tfmd; + TrueSearch t(tfmd); + EXPECT_EQUAL(0u, t.getDocId()); + EXPECT_EQUAL(0u, t.getEndId()); + t.initRange(7, 10); + EXPECT_EQUAL(6u, t.getDocId()); + EXPECT_EQUAL(10u, t.getEndId()); + EXPECT_TRUE(t.seek(9)); + EXPECT_EQUAL(9u, t.getDocId()); + EXPECT_FALSE(t.isAtEnd()); + EXPECT_TRUE(t.seek(10)); + EXPECT_EQUAL(10u, t.getDocId()); + EXPECT_TRUE(t.isAtEnd()); + t.resetRange(); + t.initRange(4, 14); + EXPECT_EQUAL(3u, t.getDocId()); + EXPECT_EQUAL(14u, t.getEndId()); + EXPECT_FALSE(t.isAtEnd()); +} + +TEST("test InitRangeVerifier") { + InitRangeVerifier ir; + EXPECT_EQUAL(207u, ir.getDocIdLimit()); + EXPECT_EQUAL(41u, ir.getExpectedDocIds().size()); + auto inverted = InitRangeVerifier::invert(ir.getExpectedDocIds(), 300); + size_t numInverted = 300 - 41 - 1; + EXPECT_EQUAL(numInverted, inverted.size()); + EXPECT_EQUAL(2u, inverted[0]); + EXPECT_EQUAL(299u, inverted[numInverted - 1]); + ir.verify(*ir.createIterator(ir.getExpectedDocIds(), false)); + ir.verify(*ir.createIterator(ir.getExpectedDocIds(), true)); +} + +TEST("Test multisearch and andsearchstrict iterators adheres to initRange") { + InitRangeVerifier ir; + ir.verify( AndSearch::create({ ir.createIterator(ir.getExpectedDocIds(), false).release(), + ir.createFullIterator().release() }, false)); + + ir.verify( AndSearch::create({ ir.createIterator(ir.getExpectedDocIds(), true).release(), + ir.createFullIterator().release() }, true)); +} + +TEST("Test andnotsearchstrict iterators adheres to initRange") { + InitRangeVerifier ir; + + TEST_DO(ir.verify( AndNotSearch::create({ir.createIterator(ir.getExpectedDocIds(), false).release(), + ir.createEmptyIterator().release() }, false))); + TEST_DO(ir.verify( AndNotSearch::create({ir.createIterator(ir.getExpectedDocIds(), true).release(), + ir.createEmptyIterator().release() }, true))); + + auto inverted = InitRangeVerifier::invert(ir.getExpectedDocIds(), ir.getDocIdLimit()); + TEST_DO(ir.verify( AndNotSearch::create({ir.createFullIterator().release(), + ir.createIterator(inverted, false).release() }, false))); + TEST_DO(ir.verify( AndNotSearch::create({ir.createFullIterator().release(), + ir.createIterator(inverted, false).release() }, true))); +} + + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/simple_phrase/.cvsignore b/searchlib/src/tests/queryeval/simple_phrase/.cvsignore new file mode 100644 index 00000000000..78f4563a999 --- /dev/null +++ b/searchlib/src/tests/queryeval/simple_phrase/.cvsignore @@ -0,0 +1,3 @@ +.depend +Makefile +simple_phrase_test diff --git a/searchlib/src/tests/queryeval/simple_phrase/.gitignore b/searchlib/src/tests/queryeval/simple_phrase/.gitignore new file mode 100644 index 00000000000..bfdb1a61782 --- /dev/null +++ b/searchlib/src/tests/queryeval/simple_phrase/.gitignore @@ -0,0 +1,4 @@ +*_test +.depend +Makefile +searchlib_simple_phrase_test_app diff --git a/searchlib/src/tests/queryeval/simple_phrase/CMakeLists.txt b/searchlib/src/tests/queryeval/simple_phrase/CMakeLists.txt new file mode 100644 index 00000000000..bc9e664a8cf --- /dev/null +++ b/searchlib/src/tests/queryeval/simple_phrase/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_simple_phrase_test_app + SOURCES + simple_phrase_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_simple_phrase_test_app COMMAND searchlib_simple_phrase_test_app) diff --git a/searchlib/src/tests/queryeval/simple_phrase/DESC b/searchlib/src/tests/queryeval/simple_phrase/DESC new file mode 100644 index 00000000000..4b1ad693a7b --- /dev/null +++ b/searchlib/src/tests/queryeval/simple_phrase/DESC @@ -0,0 +1 @@ +simple_phrase test. Take a look at simple_phrase_test.cpp for details. diff --git a/searchlib/src/tests/queryeval/simple_phrase/FILES b/searchlib/src/tests/queryeval/simple_phrase/FILES new file mode 100644 index 00000000000..31d8e1af993 --- /dev/null +++ b/searchlib/src/tests/queryeval/simple_phrase/FILES @@ -0,0 +1 @@ +simple_phrase_test.cpp diff --git a/searchlib/src/tests/queryeval/simple_phrase/simple_phrase_test.cpp b/searchlib/src/tests/queryeval/simple_phrase/simple_phrase_test.cpp new file mode 100644 index 00000000000..f813d7203d0 --- /dev/null +++ b/searchlib/src/tests/queryeval/simple_phrase/simple_phrase_test.cpp @@ -0,0 +1,341 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("simple_phrase_test"); + +#include <vespa/searchlib/queryeval/fake_result.h> +#include <vespa/searchlib/queryeval/fake_searchable.h> +#include <vespa/searchlib/queryeval/fake_requestcontext.h> +#include <vespa/searchlib/queryeval/simple_phrase_blueprint.h> +#include <vespa/searchlib/queryeval/simple_phrase_search.h> +#include <vespa/searchlib/queryeval/blueprint.h> +#include <vespa/searchlib/fef/matchdatalayout.h> +#include <vespa/searchlib/fef/fef.h> +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/query/weight.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <algorithm> +#include <string> +#include <vector> + +using namespace search::queryeval; + +using search::fef::MatchData; +using search::fef::TermFieldMatchData; +using search::fef::TermFieldHandle; +using search::query::SimpleStringTerm; +using search::query::Weight; +using std::unique_ptr; +using std::copy; +using std::string; +using std::vector; + +namespace { + +struct MyTerm : public search::queryeval::SimpleLeafBlueprint { + MyTerm(const FieldSpec &field, uint32_t hits) + : search::queryeval::SimpleLeafBlueprint(field) + { + setEstimate(HitEstimate(hits, (hits == 0))); + } + virtual SearchIterator::UP createLeafSearch( + const search::fef::TermFieldMatchDataArray &, bool) const + { + return SearchIterator::UP(); + } +}; + +class Test : public vespalib::TestApp { + void requireThatIteratorFindsSimplePhrase(bool useBlueprint); + void requireThatIteratorFindsLongPhrase(bool useBlueprint); + void requireThatStrictIteratorFindsNextMatch(bool useBlueprint); + void requireThatPhrasesAreUnpacked(bool useBlueprint); + void requireThatTermsCanBeEvaluatedInPriorityOrder(); + void requireThatBlueprintExposesFieldWithEstimate(); + void requireThatBlueprintForcesPositionDataOnChildren(); + void requireThatIteratorHonorsFutureDoom(); + void requireThatIteratorHonorsDoom(); + void requireThatDoomIsPropagated(); + +public: + int Main(); +}; + +int +Test::Main() +{ + TEST_INIT("phrasesearch_test"); + + TEST_DO(requireThatIteratorFindsSimplePhrase(false)); + TEST_DO(requireThatIteratorFindsLongPhrase(false)); + TEST_DO(requireThatStrictIteratorFindsNextMatch(false)); + TEST_DO(requireThatPhrasesAreUnpacked(false)); + TEST_DO(requireThatTermsCanBeEvaluatedInPriorityOrder()); + + TEST_DO(requireThatIteratorFindsSimplePhrase(true)); + TEST_DO(requireThatIteratorFindsLongPhrase(true)); + TEST_DO(requireThatStrictIteratorFindsNextMatch(true)); + TEST_DO(requireThatPhrasesAreUnpacked(true)); + TEST_DO(requireThatBlueprintExposesFieldWithEstimate()); + TEST_DO(requireThatBlueprintForcesPositionDataOnChildren()); + TEST_DO(requireThatIteratorHonorsFutureDoom()); + TEST_DO(requireThatIteratorHonorsDoom()); + TEST_DO(requireThatDoomIsPropagated()); + + TEST_DONE(); +} + +const string field = "field"; +const uint32_t fieldId = 1; +const uint32_t doc_match = 42; +const uint32_t doc_no_match = 43; +const uint32_t phrase_handle = 1; + +class PhraseSearchTest +{ +private: + FakeRequestContext _requestContext; + FakeSearchable _index; + FieldSpec _phrase_fs; + SimplePhraseBlueprint _phrase; + std::vector<Blueprint::UP> _children; + MatchData::UP _md; + vector<uint32_t> _order; + uint32_t _pos; + bool _strict; + +public: + PhraseSearchTest(bool expiredDoom=false) : + _requestContext(nullptr, expiredDoom ? 0 : std::numeric_limits<int64_t>::max()), + _index(), + _phrase_fs(field, fieldId, phrase_handle), + _phrase(_phrase_fs, _requestContext), + _children(), + _md(MatchData::makeTestInstance(0, 100, 10)), + _order(), + _pos(1), + _strict(false) + {} + + TermFieldHandle childHandle(uint32_t idx) const { return (10 * idx + 11); } + + void setStrict(bool strict) { _strict = strict; } + void setOrder(const vector<uint32_t> &order) { _order = order; } + const TermFieldMatchData &tmd() const { return *_md->resolveTermField(phrase_handle); } + + PhraseSearchTest &addTerm(const string &term, bool last) { + return addTerm(term, FakeResult() + .doc(doc_match).pos(_pos) + .doc(doc_no_match).pos(_pos + last)); + } + + PhraseSearchTest &addTerm(const string &term, const FakeResult &r) { + _index.addResult(field, term, r); + ++_pos; + SimpleStringTerm term_node(term, field, 0, Weight(0)); + { + // make one child blueprint for explicit use + FieldSpecList fields; + fields.add(FieldSpec(field, fieldId, + childHandle(_children.size()))); + _children.push_back(_index.createBlueprint(_requestContext, fields, term_node)); + } + { + // and one to be used by the phrase blueprint + FieldSpecList fields; + fields.add(_phrase.getNextChildField(_phrase_fs)); + _phrase.addTerm(_index.createBlueprint(_requestContext, fields, term_node)); + } + _order.push_back(_order.size()); + return *this; + } + + void + fetchPostings(bool useBlueprint) + { + if (useBlueprint) { + _phrase.fetchPostings(_strict); + return; + } + for (size_t i = 0; i < _children.size(); ++i) { + _children[i]->fetchPostings(_strict); + } + } + + // NB: using blueprint will ignore eval order override + SearchIterator *createSearch(bool useBlueprint) { + SearchIterator::UP search; + if (useBlueprint) { + search = _phrase.createSearch(*_md, _strict); + } else { + search::fef::TermFieldMatchDataArray childMatch; + for (size_t i = 0; i < _children.size(); ++i) { + childMatch.add(_md->resolveTermField(childHandle(i))); + } + SimplePhraseSearch::Children children; + for (size_t i = 0; i < _children.size(); ++i) { + children.push_back(_children[i]->createSearch(*_md, _strict).release()); + } + search.reset(new SimplePhraseSearch(children, MatchData::UP(), + childMatch, _order, + *_md->resolveTermField(phrase_handle), + _strict)); + } + search->initFullRange(); + return search.release(); + } +}; + +void Test::requireThatIteratorFindsSimplePhrase(bool useBlueprint) { + PhraseSearchTest test; + test.addTerm("foo", 0).addTerm("bar", 1); + + test.fetchPostings(useBlueprint); + unique_ptr<SearchIterator> search(test.createSearch(useBlueprint)); + EXPECT_TRUE(!search->seek(1u)); + EXPECT_TRUE(search->seek(doc_match)); + EXPECT_TRUE(!search->seek(doc_no_match)); +} + +void Test::requireThatIteratorHonorsFutureDoom() { + PhraseSearchTest test; + test.addTerm("foo", 0).addTerm("bar", 1); + + test.fetchPostings(false); + vespalib::Clock clock; + vespalib::Doom futureDoom(clock, std::numeric_limits<int64_t>::max()); + unique_ptr<SearchIterator> search(test.createSearch(false)); + static_cast<SimplePhraseSearch &>(*search).setDoom(&futureDoom); + EXPECT_TRUE(!search->seek(1u)); + EXPECT_TRUE(search->seek(doc_match)); + EXPECT_TRUE(!search->seek(doc_no_match)); +} + +void Test::requireThatIteratorHonorsDoom() { + PhraseSearchTest test; + test.addTerm("foo", 0).addTerm("bar", 1); + + test.fetchPostings(false); + vespalib::Clock clock; + vespalib::Doom futureDoom(clock, 0); + unique_ptr<SearchIterator> search(test.createSearch(false)); + static_cast<SimplePhraseSearch &>(*search).setDoom(&futureDoom); + EXPECT_TRUE(!search->seek(1u)); + EXPECT_EQUAL(search->beginId(), search->getDocId()); + EXPECT_TRUE(!search->seek(doc_match)); + EXPECT_TRUE(search->isAtEnd()); + EXPECT_TRUE(!search->seek(doc_no_match)); + EXPECT_TRUE(search->isAtEnd()); +} + +void Test::requireThatDoomIsPropagated() { + PhraseSearchTest test(true); + test.addTerm("foo", 0).addTerm("bar", 1); + + test.fetchPostings(true); + unique_ptr<SearchIterator> search(test.createSearch(true)); + EXPECT_TRUE(!search->seek(1u)); + EXPECT_EQUAL(search->beginId(), search->getDocId()); + EXPECT_TRUE(!search->seek(doc_match)); + EXPECT_TRUE(search->isAtEnd()); + EXPECT_TRUE(!search->seek(doc_no_match)); + EXPECT_TRUE(search->isAtEnd()); +} + +void Test::requireThatIteratorFindsLongPhrase(bool useBlueprint) { + PhraseSearchTest test; + test.addTerm("foo", 0).addTerm("bar", 0).addTerm("baz", 0) + .addTerm("qux", 1); + + test.fetchPostings(useBlueprint); + unique_ptr<SearchIterator> search(test.createSearch(useBlueprint)); + EXPECT_TRUE(!search->seek(1u)); + EXPECT_TRUE(search->seek(doc_match)); + EXPECT_TRUE(!search->seek(doc_no_match)); +} + +void Test::requireThatStrictIteratorFindsNextMatch(bool useBlueprint) { + PhraseSearchTest test; + test.setStrict(true); + test.addTerm("foo", 0).addTerm("bar", 1); + + test.fetchPostings(useBlueprint); + unique_ptr<SearchIterator> search(test.createSearch(useBlueprint)); + EXPECT_TRUE(!search->seek(1u)); + EXPECT_EQUAL(doc_match, search->getDocId()); + EXPECT_TRUE(!search->seek(doc_no_match)); + EXPECT_TRUE(search->isAtEnd()); +} + +void Test::requireThatPhrasesAreUnpacked(bool useBlueprint) { + PhraseSearchTest test; + test.addTerm("foo", FakeResult() + .doc(doc_match).pos(1).pos(11).pos(21)); + test.addTerm("bar", FakeResult() + .doc(doc_match).pos(2).pos(16).pos(22)); + test.fetchPostings(useBlueprint); + unique_ptr<SearchIterator> search(test.createSearch(useBlueprint)); + EXPECT_TRUE(search->seek(doc_match)); + search->unpack(doc_match); + + EXPECT_EQUAL(doc_match, test.tmd().getDocId()); + EXPECT_EQUAL(2, std::distance(test.tmd().begin(), test.tmd().end())); + EXPECT_EQUAL(1u, test.tmd().begin()->getPosition()); + EXPECT_EQUAL(21u, (test.tmd().begin() + 1)->getPosition()); +} + +void Test::requireThatTermsCanBeEvaluatedInPriorityOrder() { + vector<uint32_t> order; + order.push_back(2); + order.push_back(0); + order.push_back(1); + PhraseSearchTest test; + test.addTerm("foo", 0).addTerm("bar", 1).addTerm("baz", 1); + test.setOrder(order); + + test.fetchPostings(false); + unique_ptr<SearchIterator> search(test.createSearch(false)); + EXPECT_TRUE(!search->seek(1u)); + EXPECT_TRUE(search->seek(doc_match)); + EXPECT_TRUE(!search->seek(doc_no_match)); +} + +void +Test::requireThatBlueprintExposesFieldWithEstimate() +{ + FakeRequestContext requestContext; + FieldSpec f("foo", 1, 1); + SimplePhraseBlueprint phrase(f, requestContext); + ASSERT_TRUE(phrase.getState().numFields() == 1); + EXPECT_EQUAL(f.getFieldId(), phrase.getState().field(0).getFieldId()); + EXPECT_EQUAL(f.getHandle(), phrase.getState().field(0).getHandle()); + + EXPECT_EQUAL(true, phrase.getState().estimate().empty); + EXPECT_EQUAL(0u, phrase.getState().estimate().estHits); + + phrase.addTerm(Blueprint::UP(new MyTerm(phrase.getNextChildField(f), 10))); + EXPECT_EQUAL(false, phrase.getState().estimate().empty); + EXPECT_EQUAL(10u, phrase.getState().estimate().estHits); + + phrase.addTerm(Blueprint::UP(new MyTerm(phrase.getNextChildField(f), 5))); + EXPECT_EQUAL(false, phrase.getState().estimate().empty); + EXPECT_EQUAL(5u, phrase.getState().estimate().estHits); + + phrase.addTerm(Blueprint::UP(new MyTerm(phrase.getNextChildField(f), 20))); + EXPECT_EQUAL(false, phrase.getState().estimate().empty); + EXPECT_EQUAL(5u, phrase.getState().estimate().estHits); +} + +void +Test::requireThatBlueprintForcesPositionDataOnChildren() +{ + FakeRequestContext requestContext; + FieldSpec f("foo", 1, 1, true); + SimplePhraseBlueprint phrase(f, requestContext); + EXPECT_TRUE(f.isFilter()); + EXPECT_TRUE(!phrase.getNextChildField(f).isFilter()); +} + +} // namespace + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/queryeval/sourceblender/.gitignore b/searchlib/src/tests/queryeval/sourceblender/.gitignore new file mode 100644 index 00000000000..e0d4b433d65 --- /dev/null +++ b/searchlib/src/tests/queryeval/sourceblender/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +sourceblender_test +searchlib_sourceblender_test_app diff --git a/searchlib/src/tests/queryeval/sourceblender/CMakeLists.txt b/searchlib/src/tests/queryeval/sourceblender/CMakeLists.txt new file mode 100644 index 00000000000..e566cb0fdf0 --- /dev/null +++ b/searchlib/src/tests/queryeval/sourceblender/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_sourceblender_test_app + SOURCES + sourceblender.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_sourceblender_test_app COMMAND searchlib_sourceblender_test_app) diff --git a/searchlib/src/tests/queryeval/sourceblender/DESC b/searchlib/src/tests/queryeval/sourceblender/DESC new file mode 100644 index 00000000000..437dd818eb7 --- /dev/null +++ b/searchlib/src/tests/queryeval/sourceblender/DESC @@ -0,0 +1 @@ +sourceblender test. Take a look at sourceblender.cpp for details. diff --git a/searchlib/src/tests/queryeval/sourceblender/FILES b/searchlib/src/tests/queryeval/sourceblender/FILES new file mode 100644 index 00000000000..97be7bcaf53 --- /dev/null +++ b/searchlib/src/tests/queryeval/sourceblender/FILES @@ -0,0 +1 @@ +sourceblender.cpp diff --git a/searchlib/src/tests/queryeval/sourceblender/sourceblender.cpp b/searchlib/src/tests/queryeval/sourceblender/sourceblender.cpp new file mode 100644 index 00000000000..2cfcf4e3f1d --- /dev/null +++ b/searchlib/src/tests/queryeval/sourceblender/sourceblender.cpp @@ -0,0 +1,169 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("sourceblender_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/queryeval/sourceblendersearch.h> +#include <vespa/searchlib/queryeval/simplesearch.h> +#include <vespa/searchlib/queryeval/simpleresult.h> +#include <vespa/searchlib/queryeval/intermediate_blueprints.h> +#include <vespa/searchlib/queryeval/leaf_blueprints.h> +#include <vespa/searchlib/test/initrange.h> +#include <vespa/searchlib/common/bitvectoriterator.h> +#include <vespa/searchlib/attribute/fixedsourceselector.h> + +using namespace search::queryeval; +using namespace search::fef; +using namespace search; +using std::make_unique; + +/** + * Proxy search used to verify unpack pattern + **/ +class UnpackChecker : public SearchIterator +{ +private: + SearchIterator::UP _search; + SimpleResult _unpacked; + +protected: + virtual void doSeek(uint32_t docid) { + _search->seek(docid); + setDocId(_search->getDocId()); + } + virtual void doUnpack(uint32_t docid) { + _unpacked.addHit(docid); + _search->unpack(docid); + } + +public: + UnpackChecker(SearchIterator *search) : _search(search), _unpacked() {} + const SimpleResult &getUnpacked() const { return _unpacked; } +}; + +class MySelector : public search::FixedSourceSelector +{ +public: + MySelector(int defaultSource) : search::FixedSourceSelector(defaultSource, "fs") { } + MySelector & set(Source s, uint32_t docId) { + setSource(s, docId); + return *this; + } +}; + +//----------------------------------------------------------------------------- + +TEST("test strictness") { + MatchData::UP md(MatchData::makeTestInstance(0, 100, 10)); + for (uint32_t i = 0; i < 2; ++i) { + bool strict = (i == 0); + + SimpleResult a; + SimpleResult b; + + a.addHit(2).addHit(5).addHit(6).addHit(8); + b.addHit(3).addHit(5).addHit(6).addHit(7); + + MySelector *sel = new MySelector(5); + sel->set(2, 1).set(3, 2).set(5, 2).set(7, 1); + + SourceBlenderBlueprint *blend_b = new SourceBlenderBlueprint(*sel); + Blueprint::UP a_b(new SimpleBlueprint(a)); + Blueprint::UP b_b(new SimpleBlueprint(b)); + a_b->setSourceId(1); + b_b->setSourceId(2); + blend_b->addChild(std::move(a_b)); + blend_b->addChild(std::move(b_b)); + Blueprint::UP bp(blend_b); + bp->fetchPostings(strict); + SearchIterator::UP search = bp->createSearch(*md, strict); + search->initFullRange(); + SearchIterator &blend = *search; + + EXPECT_TRUE(!blend.seek(1u)); + if (strict) { + EXPECT_EQUAL(2u, blend.getDocId()); + } else { + EXPECT_EQUAL(blend.beginId(), blend.getDocId()); + } + EXPECT_TRUE(blend.seek(5)); + EXPECT_EQUAL(5u, blend.getDocId()); + EXPECT_TRUE(!blend.seek(6)); + if (strict) { + EXPECT_TRUE(blend.isAtEnd()); + } else { + EXPECT_EQUAL(5u, blend.getDocId()); + } + delete sel; + } +} + +TEST("test full sourceblender search") { + SimpleResult a; + SimpleResult b; + SimpleResult c; + + a.addHit(2).addHit(11).addHit(21).addHit(34); + b.addHit(3).addHit(11).addHit(21).addHit(33); + c.addHit(4).addHit(11).addHit(21).addHit(32); + + // these are all handed over to the blender + UnpackChecker *ua = new UnpackChecker(new SimpleSearch(a)); + UnpackChecker *ub = new UnpackChecker(new SimpleSearch(b)); + UnpackChecker *uc = new UnpackChecker(new SimpleSearch(c)); + auto sel = make_unique<MySelector>(5); + + sel->set(2, 1).set(3, 2).set(11, 2).set(21, 3).set(34, 1); + SourceBlenderSearch::Children abc; + abc.push_back(SourceBlenderSearch::Child(ua, 1)); + abc.push_back(SourceBlenderSearch::Child(ub, 2)); + abc.push_back(SourceBlenderSearch::Child(uc, 3)); + + SearchIterator::UP blend(SourceBlenderSearch::create(sel->createIterator(), abc, true)); + SimpleResult result; + result.search(*blend); + + SimpleResult expect_result; + expect_result.addHit(2).addHit(3).addHit(11).addHit(21).addHit(34); + + SimpleResult expect_unpacked_a; + expect_unpacked_a.addHit(2).addHit(34); + + SimpleResult expect_unpacked_b; + expect_unpacked_b.addHit(3).addHit(11); + + SimpleResult expect_unpacked_c; + expect_unpacked_c.addHit(21); + + EXPECT_EQUAL(expect_result, result); + EXPECT_EQUAL(expect_unpacked_a, ua->getUnpacked()); + EXPECT_EQUAL(expect_unpacked_b, ub->getUnpacked()); + EXPECT_EQUAL(expect_unpacked_c, uc->getUnpacked()); +} + +using search::test::InitRangeVerifier; + +SourceBlenderSearch::Children +createChildren(const std::vector<InitRangeVerifier::DocIds> & indexes, const InitRangeVerifier & ir, bool strict) { + SourceBlenderSearch::Children children; + for (size_t index(0); index < indexes.size(); index++) { + children.emplace_back(ir.createIterator(indexes[index], strict).release(), index); + } + return children; +} + +TEST("test init range") { + InitRangeVerifier ir; + std::vector<InitRangeVerifier::DocIds> indexes(3); + auto sel = make_unique<MySelector>(ir.getDocIdLimit()); + for (uint32_t docId : ir.getExpectedDocIds()) { + const size_t indexId = docId%indexes.size(); + sel->set(docId, indexId); + indexes[indexId].push_back(docId); + } + TermFieldMatchData tfmd; + TEST_DO(ir.verify(SourceBlenderSearch::create(sel->createIterator(), createChildren(indexes, ir, false), false))); + TEST_DO(ir.verify(SourceBlenderSearch::create(sel->createIterator(), createChildren(indexes, ir, true), true))); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/sparse_vector_benchmark/.gitignore b/searchlib/src/tests/queryeval/sparse_vector_benchmark/.gitignore new file mode 100644 index 00000000000..b0ce58fa658 --- /dev/null +++ b/searchlib/src/tests/queryeval/sparse_vector_benchmark/.gitignore @@ -0,0 +1,6 @@ +/log.txt +/report.head +/report.html +/plot.* +/*.png +searchlib_sparse_vector_benchmark_test_app diff --git a/searchlib/src/tests/queryeval/sparse_vector_benchmark/CMakeLists.txt b/searchlib/src/tests/queryeval/sparse_vector_benchmark/CMakeLists.txt new file mode 100644 index 00000000000..8d4aa8c10be --- /dev/null +++ b/searchlib/src/tests/queryeval/sparse_vector_benchmark/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_sparse_vector_benchmark_test_app + SOURCES + sparse_vector_benchmark_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_sparse_vector_benchmark_test_app COMMAND searchlib_sparse_vector_benchmark_test_app BENCHMARK) diff --git a/searchlib/src/tests/queryeval/sparse_vector_benchmark/FILES b/searchlib/src/tests/queryeval/sparse_vector_benchmark/FILES new file mode 100644 index 00000000000..48eda2416c9 --- /dev/null +++ b/searchlib/src/tests/queryeval/sparse_vector_benchmark/FILES @@ -0,0 +1 @@ +sparse_vector_benchmark_test.cpp diff --git a/searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp b/searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp new file mode 100644 index 00000000000..68d7dec0f87 --- /dev/null +++ b/searchlib/src/tests/queryeval/sparse_vector_benchmark/sparse_vector_benchmark_test.cpp @@ -0,0 +1,429 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> + +#include "../weak_and/rise_wand.h" +#include "../weak_and/rise_wand.hpp" +#include <vespa/searchlib/fef/matchdatalayout.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchlib/fef/termfieldmatchdataarray.h> +#include <vespa/searchlib/queryeval/andnotsearch.h> +#include <vespa/searchlib/queryeval/andsearch.h> +#include <vespa/searchlib/queryeval/dot_product_search.h> +#include <vespa/searchlib/queryeval/fake_search.h> +#include <vespa/searchlib/util/rand48.h> +#include <vespa/searchlib/queryeval/orsearch.h> +#include <vespa/searchlib/queryeval/simpleresult.h> +#include <vespa/searchlib/queryeval/wand/weak_and_search.h> +#include <vespa/searchlib/queryeval/weighted_set_term_search.h> +#include <vespa/vespalib/util/box.h> +#include <vespa/vespalib/util/stringfmt.h> + +using namespace search::fef; +using namespace search::queryeval; +using namespace vespalib; + +namespace { + +//----------------------------------------------------------------------------- + +struct Writer { + FILE *file; + Writer(const std::string &file_name) { + file = fopen(file_name.c_str(), "w"); + assert(file != 0); + } + void write(const char *data, size_t size) const { + fwrite(data, 1, size, file); + } + void fmt(const char *format, ...) const +#ifdef __GNUC__ + __attribute__ ((format (printf,2,3))) +#endif + { + va_list ap; + va_start(ap, format); + vfprintf(file, format, ap); + va_end(ap); + } + ~Writer() { fclose(file); } +}; + +//----------------------------------------------------------------------------- + +// top-level html report (global, used by plots and graphs directly) +class Report +{ +private: + Writer _html; + +public: + Report(const std::string &file) : _html(file) { + _html.fmt("<html>\n"); + _html.fmt("<head><title>Sparse Vector Search Benchmark Report</title></head>\n"); + _html.fmt("<body>\n"); + _html.fmt("<h1>Sparse Vector Search Benchmark Report</h1>\n"); + } + void addPlot(const std::string &title, const std::string &png_file) { + _html.fmt("<h3>%s</h3>\n", title.c_str()); + _html.fmt("<img src=\"%s\">\n", png_file.c_str()); + } + ~Report() { + _html.fmt("<h2>Test Log with Numbers</h2>\n"); + _html.fmt("<pre>\n"); + // html file needs external termination + } +}; + +Report report("report.head"); + +//----------------------------------------------------------------------------- + +// a single graph within a plot +class Graph +{ +private: + Writer _writer; + +public: + typedef std::unique_ptr<Graph> UP; + Graph(const std::string &file) : _writer(file) {} + void addValue(double x, double y) { _writer.fmt("%g %g\n", x, y); } +}; + +// a plot possibly containing multiple graphs +class Plot +{ +private: + std::string _name; + int _graphs; + Writer _writer; + static int _plots; + +public: + typedef std::unique_ptr<Plot> UP; + + Plot(const std::string &title) : _name(vespalib::make_string("plot.%d", _plots++)), _graphs(0), + _writer(vespalib::make_string("%s.gnuplot", _name.c_str())) { + std::string png_file = vespalib::make_string("%s.png", _name.c_str()); + _writer.fmt("set term png size 1200,800\n"); + _writer.fmt("set output '%s'\n", png_file.c_str()); + _writer.fmt("set title '%s'\n", title.c_str()); + _writer.fmt("set xlabel 'term count'\n"); + _writer.fmt("set ylabel 'time (ms)'\n"); + report.addPlot(title, png_file); + } + + ~Plot() { + _writer.fmt("\n"); + } + + Graph::UP createGraph(const std::string &legend) { + std::string file = vespalib::make_string("%s.graph.%d", _name.c_str(), _graphs); + _writer.fmt("%s '%s' using 1:2 title '%s' w lines", + (_graphs == 0) ? "plot " : ",", file.c_str(), legend.c_str()); + ++_graphs; + return Graph::UP(new Graph(file)); + } + + static UP createPlot(const std::string &title) { return UP(new Plot(title)); } +}; + +int Plot::_plots = 0; + +//----------------------------------------------------------------------------- + +uint32_t default_weight = 100; +double max_time = 1000000.0; + +//----------------------------------------------------------------------------- + +struct ChildFactory { + ChildFactory() {} + virtual std::string name() const = 0; + virtual SearchIterator *createChild(uint32_t idx, uint32_t limit) const = 0; + virtual ~ChildFactory() {} +}; + +struct SparseVectorFactory { + virtual std::string name() const = 0; + virtual SearchIterator *createSparseVector(ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const = 0; + virtual ~SparseVectorFactory() {} +}; + +struct FilterStrategy { + virtual std::string name() const = 0; + virtual SearchIterator *createRoot(SparseVectorFactory &vectorFactory, ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const = 0; + virtual ~FilterStrategy() {} +}; + +//----------------------------------------------------------------------------- + +struct ModSearch : SearchIterator { + uint32_t step; + uint32_t limit; + ModSearch(uint32_t step_in, uint32_t limit_in) : step(step_in), limit(limit_in) { setDocId(step); } + virtual void doSeek(uint32_t docid) { + assert(docid > getDocId()); + uint32_t hit = (docid / step) * step; + if (hit < docid) { + hit += step; + } + if (hit < limit) { + assert(hit >= docid); + setDocId(hit); + } else { + setAtEnd(); + } + } + virtual void doUnpack(uint32_t) {} +}; + +struct ModSearchFactory : ChildFactory { + uint32_t bias; + ModSearchFactory() : bias(1) {} + explicit ModSearchFactory(int b) : bias(b) {} + virtual std::string name() const { + return vespalib::make_string("ModSearch(%u)", bias); + } + virtual SearchIterator *createChild(uint32_t idx, uint32_t limit) const { + return new ModSearch(bias + idx, limit); + } +}; + +//----------------------------------------------------------------------------- + +struct VespaWandFactory : SparseVectorFactory { + uint32_t n; + VespaWandFactory(uint32_t n_in) : n(n_in) {} + virtual std::string name() const { + return vespalib::make_string("VespaWand(%u)", n); + } + virtual SearchIterator *createSparseVector(ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const { + wand::Terms terms; + for (size_t i = 0; i < childCnt; ++i) { + terms.push_back(wand::Term(childFactory.createChild(i, limit), default_weight, limit / (i + 1))); + } + return WeakAndSearch::create(terms, n, true); + } +}; + +struct RiseWandFactory : SparseVectorFactory { + uint32_t n; + RiseWandFactory(uint32_t n_in) : n(n_in) {} + virtual std::string name() const { + return vespalib::make_string("RiseWand(%u)", n); + } + virtual SearchIterator *createSparseVector(ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const { + wand::Terms terms; + for (size_t i = 0; i < childCnt; ++i) { + terms.push_back(wand::Term(childFactory.createChild(i, limit), default_weight, limit / (i + 1))); + } + return new rise::TermFrequencyRiseWand(terms, n); + } +}; + +struct WeightedSetFactory : SparseVectorFactory { + mutable TermFieldMatchData tfmd; + virtual std::string name() const { + return vespalib::make_string("WeightedSet"); + } + virtual SearchIterator *createSparseVector(ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const { + std::vector<SearchIterator*> terms; + std::vector<int32_t> weights; + for (size_t i = 0; i < childCnt; ++i) { + terms.push_back(childFactory.createChild(i, limit)); + weights.push_back(default_weight); + } + return WeightedSetTermSearch::create(terms, tfmd, weights); + } +}; + +struct DotProductFactory : SparseVectorFactory { + mutable TermFieldMatchData tfmd; + virtual std::string name() const { + return vespalib::make_string("DotProduct"); + } + virtual SearchIterator *createSparseVector(ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const { + MatchDataLayout layout; + std::vector<TermFieldHandle> handles; + for (size_t i = 0; i < childCnt; ++i) { + handles.push_back(layout.allocTermField(0)); + } + std::vector<SearchIterator*> terms; + std::vector<TermFieldMatchData*> childMatch; + std::vector<int32_t> weights; + MatchData::UP md = layout.createMatchData(); + for (size_t i = 0; i < childCnt; ++i) { + terms.push_back(childFactory.createChild(i, limit)); + childMatch.push_back(md->resolveTermField(handles[i])); + weights.push_back(default_weight); + } + return DotProductSearch::create(terms, tfmd, childMatch, weights, std::move(md)).release(); + } +}; + +struct OrFactory : SparseVectorFactory { + virtual std::string name() const { + return vespalib::make_string("Or"); + } + virtual SearchIterator *createSparseVector(ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const { + OrSearch::Children children; + for (size_t i = 0; i < childCnt; ++i) { + children.push_back(childFactory.createChild(i, limit)); + } + return OrSearch::create(children, true); + } +}; + +//----------------------------------------------------------------------------- + +struct NoFilterStrategy : FilterStrategy { + virtual std::string name() const { + return vespalib::make_string("NoFilter"); + } + virtual SearchIterator *createRoot(SparseVectorFactory &vectorFactory, ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const { + return vectorFactory.createSparseVector(childFactory, childCnt, limit); + } +}; + +struct PositiveFilterBeforeStrategy : FilterStrategy { + virtual std::string name() const { + return vespalib::make_string("PositiveBefore"); + } + virtual SearchIterator *createRoot(SparseVectorFactory &vectorFactory, ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const { + AndSearch::Children children; + children.push_back(new ModSearch(2, limit)); // <- 50% hits (hardcoded) + children.push_back(vectorFactory.createSparseVector(childFactory, childCnt, limit)); + return AndSearch::create(children, true); + } +}; + +struct NegativeFilterAfterStrategy : FilterStrategy { + virtual std::string name() const { + return vespalib::make_string("NegativeAfter"); + } + virtual SearchIterator *createRoot(SparseVectorFactory &vectorFactory, ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) const { + AndNotSearch::Children children; + children.push_back(vectorFactory.createSparseVector(childFactory, childCnt, limit)); + children.push_back(new ModSearch(2, limit)); // <- 50% hits (hardcoded) + return AndNotSearch::create(children, true); + } +}; + +//----------------------------------------------------------------------------- + +struct Result { + double time_ms; + uint32_t num_hits; + Result() : time_ms(max_time), num_hits(0) {} + Result(double t, uint32_t n) : time_ms(t), num_hits(n) {} + void combine(const Result &r) { + if (time_ms == max_time) { + *this = r; + } else { + assert(num_hits == r.num_hits); + time_ms = std::min(time_ms, r.time_ms); + } + } + std::string toString() const { + return vespalib::make_string("%u hits, %g ms", num_hits, time_ms); + } +}; + +Result run_single_benchmark(FilterStrategy &filterStrategy, SparseVectorFactory &vectorFactory, ChildFactory &childFactory, uint32_t childCnt, uint32_t limit) { + SearchIterator::UP search(filterStrategy.createRoot(vectorFactory, childFactory, childCnt, limit)); + SearchIterator &sb = *search; + uint32_t num_hits = 0; + FastOS_Time timer; + timer.SetNow(); + for (sb.seek(1); !sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) { + ++num_hits; + sb.unpack(sb.getDocId()); + } + return Result(timer.MilliSecsToNow(), num_hits); +} + +//----------------------------------------------------------------------------- + +// one setup is used to produce all graphs in a single plot +class Setup +{ +private: + FilterStrategy &_filterStrategy; + ChildFactory &_childFactory; + uint32_t _limit; + Plot::UP _plot; + + std::string make_title() const { + return vespalib::make_string("%u docs, filter:%s, terms:%s", _limit, _filterStrategy.name().c_str(), _childFactory.name().c_str()); + } + +public: + Setup(FilterStrategy &fs, ChildFactory &cf, uint32_t lim) : _filterStrategy(fs), _childFactory(cf), _limit(lim) { + _plot = Plot::createPlot(make_title()); + fprintf(stderr, "benchmark setup: %s\n", make_title().c_str()); + } + + void benchmark(SparseVectorFactory &svf, const std::vector<uint32_t> &child_counts) { + Graph::UP graph = _plot->createGraph(svf.name()); + fprintf(stderr, " search operator: %s\n", svf.name().c_str()); + for (size_t i = 0; i < child_counts.size(); ++i) { + uint32_t childCnt = child_counts[i]; + Result result; + for (int j = 0; j < 5; ++j) { + result.combine(run_single_benchmark(_filterStrategy, svf, _childFactory, childCnt, _limit)); + } + graph->addValue(childCnt, result.time_ms); + fprintf(stderr, " %u children => %s\n", childCnt, result.toString().c_str()); + } + } +}; + +//----------------------------------------------------------------------------- + +void benchmark_all_operators(Setup &setup, const std::vector<uint32_t> &child_counts) { + VespaWandFactory vespaWand256(256); + RiseWandFactory riseWand256(256); + WeightedSetFactory weightedSet; + DotProductFactory dotProduct; + OrFactory plain_or; + setup.benchmark(vespaWand256, child_counts); + setup.benchmark(riseWand256, child_counts); + setup.benchmark(weightedSet, child_counts); + setup.benchmark(dotProduct, child_counts); + setup.benchmark(plain_or, child_counts); +} + +//----------------------------------------------------------------------------- + +Box<uint32_t> make_full_child_counts() { + return Box<uint32_t>() + .add(10).add(20).add(30).add(40).add(50).add(60).add(70).add(80).add(90) + .add(100).add(125).add(150).add(175) + .add(200).add(250).add(300).add(350).add(400).add(450) + .add(500).add(600).add(700).add(800).add(900) + .add(1000).add(1200).add(1400).add(1600).add(1800) + .add(2000); +} + +//----------------------------------------------------------------------------- + +} // namespace <unnamed> + +TEST_FFF("benchmark", NoFilterStrategy(), ModSearchFactory(), Setup(f1, f2, 5000000)) { + benchmark_all_operators(f3, make_full_child_counts()); +} + +TEST_FFF("benchmark", NoFilterStrategy(), ModSearchFactory(8), Setup(f1, f2, 5000000)) { + benchmark_all_operators(f3, make_full_child_counts()); +} + +TEST_FFF("benchmark", PositiveFilterBeforeStrategy(), ModSearchFactory(), Setup(f1, f2, 5000000)) { + benchmark_all_operators(f3, make_full_child_counts()); +} + +TEST_FFF("benchmark", NegativeFilterAfterStrategy(), ModSearchFactory(), Setup(f1, f2, 5000000)) { + benchmark_all_operators(f3, make_full_child_counts()); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/termwise_eval/.gitignore b/searchlib/src/tests/queryeval/termwise_eval/.gitignore new file mode 100644 index 00000000000..b6b345775f6 --- /dev/null +++ b/searchlib/src/tests/queryeval/termwise_eval/.gitignore @@ -0,0 +1 @@ +searchlib_termwise_eval_test_app diff --git a/searchlib/src/tests/queryeval/termwise_eval/CMakeLists.txt b/searchlib/src/tests/queryeval/termwise_eval/CMakeLists.txt new file mode 100644 index 00000000000..ab9362f6e99 --- /dev/null +++ b/searchlib/src/tests/queryeval/termwise_eval/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_termwise_eval_test_app + SOURCES + termwise_eval_test.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_termwise_eval_test_app COMMAND searchlib_termwise_eval_test_app) diff --git a/searchlib/src/tests/queryeval/termwise_eval/termwise_eval_test.cpp b/searchlib/src/tests/queryeval/termwise_eval/termwise_eval_test.cpp new file mode 100644 index 00000000000..625d9928048 --- /dev/null +++ b/searchlib/src/tests/queryeval/termwise_eval/termwise_eval_test.cpp @@ -0,0 +1,641 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/vespalib/util/stringfmt.h> +#include <vespa/searchlib/queryeval/searchiterator.h> +#include <vespa/searchlib/queryeval/andnotsearch.h> +#include <vespa/searchlib/queryeval/andsearch.h> +#include <vespa/searchlib/queryeval/orsearch.h> +#include <vespa/searchlib/queryeval/termwise_search.h> +#include <vespa/searchlib/queryeval/intermediate_blueprints.h> +#include <vespa/searchlib/queryeval/termwise_blueprint_helper.h> +#include <vespa/vespalib/test/insertion_operators.h> +#include <vespa/searchlib/test/initrange.h> +#include <vespa/searchlib/common/bitvectoriterator.h> + +using namespace vespalib; +using namespace search; +using namespace search::fef; +using namespace search::queryeval; + +//----------------------------------------------------------------------------- + +const uint32_t my_field = 0; + +//----------------------------------------------------------------------------- + +struct MyTerm : public SearchIterator { + size_t pos; + bool is_strict; + std::vector<uint32_t> hits; + MyTerm(const std::vector<uint32_t> &hits_in, bool is_strict_in) + : pos(0), is_strict(is_strict_in), hits(hits_in) {} + void initRange(uint32_t beginid, uint32_t endid) override { + SearchIterator::initRange(beginid, endid); + if (is_strict) { + doSeek(beginid); + } + } + void resetRange() override { + SearchIterator::resetRange(); + pos = 0; + } + void doSeek(uint32_t docid) override { + while ((pos < hits.size()) && (hits[pos] < docid)) { + ++pos; + } + if (is_strict) { + if ((pos == hits.size()) || isAtEnd(hits[pos])) { + setAtEnd(); + } else { + setDocId(hits[pos]); + } + } else { + if (isAtEnd(docid)) { + setAtEnd(); + } else if ((pos < hits.size()) && (hits[pos] == docid)) { + setDocId(docid); + } + } + } + void doUnpack(uint32_t) override {} + void visitMembers(vespalib::ObjectVisitor &visitor) const { + visit(visitor, "hits", hits); + visit(visitor, "strict", is_strict); + } +}; + +struct MyBlueprint : SimpleLeafBlueprint { + std::vector<uint32_t> hits; + MyBlueprint(const std::vector<uint32_t> &hits_in) + : SimpleLeafBlueprint(FieldSpecBaseList()), hits(hits_in) + { + setEstimate(HitEstimate(hits.size(), hits.empty())); + } + MyBlueprint(const std::vector<uint32_t> &hits_in, bool allow_termwise_eval) + : SimpleLeafBlueprint(FieldSpecBaseList()), hits(hits_in) + { + setEstimate(HitEstimate(hits.size(), hits.empty())); + set_allow_termwise_eval(allow_termwise_eval); + } + MyBlueprint(const std::vector<uint32_t> &hits_in, bool allow_termwise_eval, TermFieldHandle handle) + : SimpleLeafBlueprint(FieldSpecBase(my_field, handle)), hits(hits_in) + { + setEstimate(HitEstimate(hits.size(), hits.empty())); + set_allow_termwise_eval(allow_termwise_eval); + } + SearchIterator::UP createLeafSearch(const fef::TermFieldMatchDataArray &, + bool strict) const override + { + return SearchIterator::UP(new MyTerm(hits, strict)); + } +}; + +struct MyOr : OrBlueprint { + bool use_my_value; + bool my_value; + MyOr(bool use_my_value_in, bool my_value_in = true) + : use_my_value(use_my_value_in), my_value(my_value_in) {} + bool supports_termwise_children() const override { + if (use_my_value) { + return my_value; + } + // the default value for intermediate blueprints + return IntermediateBlueprint::supports_termwise_children(); + } +}; + +//----------------------------------------------------------------------------- + +UnpackInfo no_unpack() { return UnpackInfo(); } + +UnpackInfo selective_unpack() { + UnpackInfo unpack; + unpack.add(0); // 'only unpack first child' => trigger selective unpack + return unpack; +} + +SearchIterator *TERM(std::initializer_list<uint32_t> hits, bool strict) { + return new MyTerm(hits, strict); +} + +SearchIterator *ANDNOT(std::initializer_list<SearchIterator *> children, bool strict) { + return AndNotSearch::create(children, strict); +} + +SearchIterator *AND(std::initializer_list<SearchIterator *> children, bool strict) { + return AndSearch::create(children, strict); +} + +SearchIterator *ANDz(std::initializer_list<SearchIterator *> children, bool strict) { + return AndSearch::create(children, strict, no_unpack()); +} + +SearchIterator *ANDs(std::initializer_list<SearchIterator *> children, bool strict) { + return AndSearch::create(children, strict, selective_unpack()); +} + +SearchIterator *OR(std::initializer_list<SearchIterator *> children, bool strict) { + return OrSearch::create(children, strict); +} + +SearchIterator *ORz(std::initializer_list<SearchIterator *> children, bool strict) { + return OrSearch::create(children, strict, no_unpack()); +} + +SearchIterator *ORs(std::initializer_list<SearchIterator *> children, bool strict) { + return OrSearch::create(children, strict, selective_unpack()); +} + +//----------------------------------------------------------------------------- + +template <typename T> +std::unique_ptr<T> UP(T *t) { return std::unique_ptr<T>(t); } + +//----------------------------------------------------------------------------- + +SearchIterator::UP make_search(bool strict) { + return UP(AND({OR({TERM({2,7}, true), + TERM({4,8}, true), + TERM({5,6,9}, true)}, true), + OR({TERM({1,4,7}, false), + TERM({2,5,8}, true), + TERM({3,6}, false)}, false), + OR({TERM({1,2,3}, false), + TERM({4,6}, false), + TERM({8,9}, false)}, false)}, strict)); +} + +SearchIterator::UP make_filter_search(bool strict) { + return UP(ANDNOT({TERM({1,2,3,4,5,6,7,8,9}, true), + TERM({1,9}, false), + TERM({3,7}, true), + TERM({5}, false)}, strict)); +} + +void add_if_inside(uint32_t docid, uint32_t begin, uint32_t end, std::vector<uint32_t> &expect) { + if (docid >= begin && docid < end) { + expect.push_back(docid); + } +} + +std::vector<uint32_t> make_expect(uint32_t begin, uint32_t end) { + std::vector<uint32_t> expect; + add_if_inside(2, begin, end, expect); + add_if_inside(4, begin, end, expect); + add_if_inside(6, begin, end, expect); + add_if_inside(8, begin, end, expect); + return expect; +} + +void verify(const std::vector<uint32_t> &expect, SearchIterator &search, uint32_t begin, uint32_t end) { + std::vector<uint32_t> actual; + search.initRange(begin, end); + for (uint32_t docid = begin; docid < end; ++docid) { + if (search.seek(docid)) { + actual.push_back(docid); + } + } + EXPECT_EQUAL(expect, actual); +} + +//----------------------------------------------------------------------------- + +MatchData::UP make_match_data() { + uint32_t num_features = 0; + uint32_t num_handles = 100; + uint32_t num_fields = 1; + return MatchData::makeTestInstance(num_features, num_handles, num_fields); +} + +//----------------------------------------------------------------------------- + +TEST("require that pseudo term produces correct results") { + TEST_DO(verify({1,2,3,4,5}, *UP(TERM({1,2,3,4,5}, true)), 1, 6)); + TEST_DO(verify({1,2,3,4,5}, *UP(TERM({1,2,3,4,5}, false)), 1, 6)); + TEST_DO(verify({3,4,5}, *UP(TERM({1,2,3,4,5}, true)), 3, 6)); + TEST_DO(verify({3,4,5}, *UP(TERM({1,2,3,4,5}, false)), 3, 6)); + TEST_DO(verify({1,2,3}, *UP(TERM({1,2,3,4,5}, true)), 1, 4)); + TEST_DO(verify({1,2,3}, *UP(TERM({1,2,3,4,5}, false)), 1, 4)); +} + +TEST("require that normal search gives expected results") { + auto search = make_search(true); + TEST_DO(verify(make_expect(1, 10), *search, 1, 10)); +} + +TEST("require that filter search gives expected results") { + auto search = make_filter_search(true); + TEST_DO(verify(make_expect(1, 10), *search, 1, 10)); +} + +TEST("require that termwise AND/OR search produces appropriate results") { + for (uint32_t begin: {1, 2, 5}) { + for (uint32_t end: {6, 7, 10}) { + for (bool strict_search: {true, false}) { + for (bool strict_wrapper: {true, false}) { + TEST_STATE(make_string("begin: %u, end: %u, strict_search: %s, strict_wrapper: %s", + begin, end, strict_search ? "true" : "false", + strict_wrapper ? "true" : "false").c_str()); + auto search = make_termwise(make_search(strict_search), strict_wrapper); + TEST_DO(verify(make_expect(begin, end), *search, begin, end)); + } + } + } + } +} + +TEST("require that termwise filter search produces appropriate results") { + for (uint32_t begin: {1, 2, 5}) { + for (uint32_t end: {6, 7, 10}) { + for (bool strict_search: {true, false}) { + for (bool strict_wrapper: {true, false}) { + TEST_STATE(make_string("begin: %u, end: %u, strict_search: %s, strict_wrapper: %s", + begin, end, strict_search ? "true" : "false", + strict_wrapper ? "true" : "false").c_str()); + auto search = make_termwise(make_filter_search(strict_search), strict_wrapper); + TEST_DO(verify(make_expect(begin, end), *search, begin, end)); + } + } + } + } +} + +TEST("require that termwise ANDNOT with single term works") { + TEST_DO(verify({2,3,4}, *make_termwise(UP(ANDNOT({TERM({1,2,3,4,5}, true)}, true)), true), 2, 5)); +} + +TEST("require that pseudo term is rewindable") { + auto search = UP(TERM({1,2,3,4,5}, true)); + TEST_DO(verify({3,4,5}, *search, 3, 6)); + search->resetRange(); + TEST_DO(verify({1,2,3,4}, *search, 1, 5)); +} + +TEST("require that termwise wrapper is rewindable") { + auto search = make_termwise(make_search(true), true); + TEST_DO(verify(make_expect(3, 7), *search, 3, 7)); + search->resetRange(); + TEST_DO(verify(make_expect(1, 5), *search, 1, 5)); +} + +//----------------------------------------------------------------------------- + +TEST("require that leaf blueprints allow termwise evaluation by default") { + MyBlueprint bp({}); + EXPECT_TRUE(bp.getState().allow_termwise_eval()); +} + +TEST("require that leaf blueprints can enable/disable termwise evaluation") { + MyBlueprint enable({}, true); + MyBlueprint disable({}, false); + EXPECT_TRUE(enable.getState().allow_termwise_eval()); + EXPECT_FALSE(disable.getState().allow_termwise_eval()); +} + +TEST("require that intermediate blueprints disallow termwise evaluation by default") { + MyOr bp(false); + bp.addChild(UP(new MyBlueprint({}, true))); + bp.addChild(UP(new MyBlueprint({}, true))); + EXPECT_FALSE(bp.getState().allow_termwise_eval()); +} + +TEST("require that intermediate blueprints can enable/disable termwise evaluation") { + MyOr enable(true, true); + enable.addChild(UP(new MyBlueprint({}, true))); + enable.addChild(UP(new MyBlueprint({}, true))); + EXPECT_TRUE(enable.getState().allow_termwise_eval()); + MyOr disable(true, false); + disable.addChild(UP(new MyBlueprint({}, true))); + disable.addChild(UP(new MyBlueprint({}, true))); + EXPECT_FALSE(disable.getState().allow_termwise_eval()); +} + +TEST("require that intermediate blueprints cannot be termwise unless all its children are termwise") { + MyOr bp(true, true); + bp.addChild(UP(new MyBlueprint({}, true))); + bp.addChild(UP(new MyBlueprint({}, false))); + EXPECT_FALSE(bp.getState().allow_termwise_eval()); +} + +//----------------------------------------------------------------------------- + +TEST("require that leafs have tree size 1") { + MyBlueprint bp({}); + EXPECT_EQUAL(1u, bp.getState().tree_size()); +} + +TEST("require that tree size is accumulated correctly by intermediate nodes") { + MyOr bp(false); + EXPECT_EQUAL(1u, bp.getState().tree_size()); + bp.addChild(UP(new MyBlueprint({}))); + bp.addChild(UP(new MyBlueprint({}))); + EXPECT_EQUAL(3u, bp.getState().tree_size()); + auto child = UP(new MyOr(false)); + child->addChild(UP(new MyBlueprint({}))); + child->addChild(UP(new MyBlueprint({}))); + bp.addChild(std::move(child)); + EXPECT_EQUAL(6u, bp.getState().tree_size()); +} + +//----------------------------------------------------------------------------- + +TEST("require that any blueprint node can obtain the root") { + MyOr bp(false); + bp.addChild(UP(new MyBlueprint({1,2,3}))); + bp.addChild(UP(new MyBlueprint({1,2,3,4,5,6}))); + EXPECT_TRUE(&bp != &bp.getChild(0)); + EXPECT_TRUE(&bp != &bp.getChild(1)); + EXPECT_TRUE(&bp == &bp.getChild(0).root()); + EXPECT_TRUE(&bp == &bp.getChild(1).root()); + EXPECT_TRUE(&bp == &bp.root()); +} + +//----------------------------------------------------------------------------- + +TEST("require that match data keeps track of the termwise limit") { + auto md = make_match_data(); + EXPECT_EQUAL(1.0, md->get_termwise_limit()); + md->set_termwise_limit(0.03); + EXPECT_EQUAL(0.03, md->get_termwise_limit()); +} + +//----------------------------------------------------------------------------- + +TEST("require that terwise test search string dump is detailed enough") { + EXPECT_EQUAL(make_termwise(UP(OR({TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true)}, true)), true)->asString(), + make_termwise(UP(OR({TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true)}, true)), true)->asString()); + + EXPECT_NOT_EQUAL(make_termwise(UP(OR({TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true)}, true)), true)->asString(), + make_termwise(UP(OR({TERM({1,2,3}, true), TERM({2,3}, false), TERM({3}, true)}, true)), true)->asString()); + + EXPECT_NOT_EQUAL(make_termwise(UP(OR({TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true)}, true)), true)->asString(), + make_termwise(UP(OR({TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true)}, false)), true)->asString()); + + EXPECT_NOT_EQUAL(make_termwise(UP(OR({TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true)}, true)), true)->asString(), + make_termwise(UP(OR({TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true)}, true)), false)->asString()); + + EXPECT_NOT_EQUAL(make_termwise(UP(OR({TERM({1,2,3}, true), TERM({2,3}, true), TERM({3}, true)}, true)), true)->asString(), + make_termwise(UP(OR({TERM({1,2,3}, true), TERM({3}, true), TERM({2,3}, true)}, true)), true)->asString()); +} + +//----------------------------------------------------------------------------- + +TEST("require that basic termwise evaluation works") { + auto md = make_match_data(); + md->set_termwise_limit(0.0); + md->resolveTermField(1)->tagAsNotNeeded(); + md->resolveTermField(2)->tagAsNotNeeded(); + OrBlueprint my_or; + my_or.addChild(UP(new MyBlueprint({1}, true, 1))); + my_or.addChild(UP(new MyBlueprint({2}, true, 2))); + for (bool strict: {true, false}) { + EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(), + make_termwise(UP(OR({TERM({1}, strict), TERM({2}, strict)}, strict)), strict)->asString()); + } +} + +TEST("require that the hit rate must be high enough for termwise evaluation to be activated") { + auto md = make_match_data(); + md->set_termwise_limit(1.0); // <- + md->resolveTermField(1)->tagAsNotNeeded(); + md->resolveTermField(2)->tagAsNotNeeded(); + OrBlueprint my_or; + my_or.addChild(UP(new MyBlueprint({1}, true, 1))); + my_or.addChild(UP(new MyBlueprint({2}, true, 2))); + for (bool strict: {true, false}) { + EXPECT_TRUE(my_or.createSearch(*md, strict)->asString().find("TermwiseSearch") == vespalib::string::npos); + } +} + +TEST("require that enough unranked termwise terms are present for termwise evaluation to be activated") { + auto md = make_match_data(); + md->set_termwise_limit(0.0); + md->resolveTermField(1)->tagAsNotNeeded(); + md->resolveTermField(2)->tagAsNotNeeded(); + OrBlueprint my_or; + my_or.addChild(UP(new MyBlueprint({1}, true, 1))); + my_or.addChild(UP(new MyBlueprint({2}, false, 2))); // <- not termwise + my_or.addChild(UP(new MyBlueprint({3}, true, 3))); // <- ranked + for (bool strict: {true, false}) { + EXPECT_TRUE(my_or.createSearch(*md, strict)->asString().find("TermwiseSearch") == vespalib::string::npos); + } +} + +TEST("require that termwise evaluation can be multi-level, but not duplicated") { + auto md = make_match_data(); + md->set_termwise_limit(0.0); + md->resolveTermField(1)->tagAsNotNeeded(); + md->resolveTermField(2)->tagAsNotNeeded(); + md->resolveTermField(3)->tagAsNotNeeded(); + OrBlueprint my_or; + my_or.addChild(UP(new MyBlueprint({1}, true, 1))); + auto child = UP(new OrBlueprint()); + child->addChild(UP(new MyBlueprint({2}, true, 2))); + child->addChild(UP(new MyBlueprint({3}, true, 3))); + my_or.addChild(std::move(child)); + for (bool strict: {true, false}) { + EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(), + make_termwise(UP(OR({TERM({1}, strict), ORz({TERM({2}, strict), TERM({3}, strict)}, strict)}, strict)), strict)->asString()); + } +} + +//----------------------------------------------------------------------------- + +TEST("require that OR can be completely termwise") { + auto md = make_match_data(); + md->set_termwise_limit(0.0); + md->resolveTermField(1)->tagAsNotNeeded(); + md->resolveTermField(2)->tagAsNotNeeded(); + OrBlueprint my_or; + my_or.addChild(UP(new MyBlueprint({1}, true, 1))); + my_or.addChild(UP(new MyBlueprint({2}, true, 2))); + for (bool strict: {true, false}) { + EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(), + make_termwise(UP(OR({TERM({1}, strict), TERM({2}, strict)}, strict)), strict)->asString()); + } +} + +TEST("require that OR can be partially termwise") { + auto md = make_match_data(); + md->set_termwise_limit(0.0); + md->resolveTermField(1)->tagAsNotNeeded(); + md->resolveTermField(3)->tagAsNotNeeded(); + OrBlueprint my_or; + my_or.addChild(UP(new MyBlueprint({1}, true, 1))); + my_or.addChild(UP(new MyBlueprint({2}, true, 2))); + my_or.addChild(UP(new MyBlueprint({3}, true, 3))); + for (bool strict: {true, false}) { + EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(), + UP(ORs({make_termwise(UP(OR({TERM({1}, strict), TERM({3}, strict)}, strict)), strict).release(), TERM({2}, strict)}, strict))->asString()); + } +} + +TEST("require that OR puts termwise subquery at the right place") { + auto md = make_match_data(); + md->set_termwise_limit(0.0); + md->resolveTermField(2)->tagAsNotNeeded(); + md->resolveTermField(3)->tagAsNotNeeded(); + OrBlueprint my_or; + my_or.addChild(UP(new MyBlueprint({1}, true, 1))); + my_or.addChild(UP(new MyBlueprint({2}, true, 2))); + my_or.addChild(UP(new MyBlueprint({3}, true, 3))); + for (bool strict: {true, false}) { + EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(), + UP(ORs({TERM({1}, strict), make_termwise(UP(OR({TERM({2}, strict), TERM({3}, strict)}, strict)), strict).release()}, strict))->asString()); + } +} + +TEST("require that OR can use termwise eval also when having non-termwise children") { + auto md = make_match_data(); + md->set_termwise_limit(0.0); + md->resolveTermField(1)->tagAsNotNeeded(); + md->resolveTermField(2)->tagAsNotNeeded(); + md->resolveTermField(3)->tagAsNotNeeded(); + OrBlueprint my_or; + my_or.addChild(UP(new MyBlueprint({1}, false, 1))); + my_or.addChild(UP(new MyBlueprint({2}, true, 2))); + my_or.addChild(UP(new MyBlueprint({3}, true, 3))); + for (bool strict: {true, false}) { + EXPECT_EQUAL(my_or.createSearch(*md, strict)->asString(), + UP(ORz({TERM({1}, strict), make_termwise(UP(OR({TERM({2}, strict), TERM({3}, strict)}, strict)), strict).release()}, strict))->asString()); + } +} + +//----------------------------------------------------------------------------- + +TEST("require that AND can be completely termwise") { + auto md = make_match_data(); + md->set_termwise_limit(0.0); + md->resolveTermField(1)->tagAsNotNeeded(); + md->resolveTermField(2)->tagAsNotNeeded(); + AndBlueprint my_and; + my_and.addChild(UP(new MyBlueprint({1}, true, 1))); + my_and.addChild(UP(new MyBlueprint({2}, true, 2))); + for (bool strict: {true, false}) { + EXPECT_EQUAL(my_and.createSearch(*md, strict)->asString(), + make_termwise(UP(AND({TERM({1}, strict), TERM({2}, false)}, strict)), strict)->asString()); + } +} + +TEST("require that AND can be partially termwise") { + auto md = make_match_data(); + md->set_termwise_limit(0.0); + md->resolveTermField(1)->tagAsNotNeeded(); + md->resolveTermField(3)->tagAsNotNeeded(); + AndBlueprint my_and; + my_and.addChild(UP(new MyBlueprint({1}, true, 1))); + my_and.addChild(UP(new MyBlueprint({2}, true, 2))); + my_and.addChild(UP(new MyBlueprint({3}, true, 3))); + for (bool strict: {true, false}) { + EXPECT_EQUAL(my_and.createSearch(*md, strict)->asString(), + UP(ANDs({make_termwise(UP(AND({TERM({1}, strict), TERM({3}, false)}, strict)), strict).release(), TERM({2}, false)}, strict))->asString()); + } +} + +TEST("require that AND puts termwise subquery at the right place") { + auto md = make_match_data(); + md->set_termwise_limit(0.0); + md->resolveTermField(2)->tagAsNotNeeded(); + md->resolveTermField(3)->tagAsNotNeeded(); + AndBlueprint my_and; + my_and.addChild(UP(new MyBlueprint({1}, true, 1))); + my_and.addChild(UP(new MyBlueprint({2}, true, 2))); + my_and.addChild(UP(new MyBlueprint({3}, true, 3))); + for (bool strict: {true, false}) { + EXPECT_EQUAL(my_and.createSearch(*md, strict)->asString(), + UP(ANDs({TERM({1}, strict), make_termwise(UP(AND({TERM({2}, false), TERM({3}, false)}, false)), false).release()}, strict))->asString()); + } +} + +TEST("require that AND can use termwise eval also when having non-termwise children") { + auto md = make_match_data(); + md->set_termwise_limit(0.0); + md->resolveTermField(1)->tagAsNotNeeded(); + md->resolveTermField(2)->tagAsNotNeeded(); + md->resolveTermField(3)->tagAsNotNeeded(); + AndBlueprint my_and; + my_and.addChild(UP(new MyBlueprint({1}, false, 1))); + my_and.addChild(UP(new MyBlueprint({2}, true, 2))); + my_and.addChild(UP(new MyBlueprint({3}, true, 3))); + for (bool strict: {true, false}) { + EXPECT_EQUAL(my_and.createSearch(*md, strict)->asString(), + UP(ANDz({TERM({1}, strict), make_termwise(UP(AND({TERM({2}, false), TERM({3}, false)}, false)), false).release()}, strict))->asString()); + } +} + +//----------------------------------------------------------------------------- + +TEST("require that ANDNOT can be completely termwise") { + auto md = make_match_data(); + md->set_termwise_limit(0.0); + md->resolveTermField(1)->tagAsNotNeeded(); + AndNotBlueprint my_andnot; + my_andnot.addChild(UP(new MyBlueprint({1}, true, 1))); + my_andnot.addChild(UP(new MyBlueprint({2}, true, 2))); + for (bool strict: {true, false}) { + EXPECT_EQUAL(my_andnot.createSearch(*md, strict)->asString(), + make_termwise(UP(ANDNOT({TERM({1}, strict), TERM({2}, false)}, strict)), strict)->asString()); + } +} + +TEST("require that ANDNOT can be partially termwise") { + auto md = make_match_data(); + md->set_termwise_limit(0.0); + AndNotBlueprint my_andnot; + my_andnot.addChild(UP(new MyBlueprint({1}, true, 1))); + my_andnot.addChild(UP(new MyBlueprint({2}, true, 2))); + my_andnot.addChild(UP(new MyBlueprint({3}, true, 3))); + for (bool strict: {true, false}) { + EXPECT_EQUAL(my_andnot.createSearch(*md, strict)->asString(), + UP(ANDNOT({TERM({1}, strict), make_termwise(UP(OR({TERM({2}, false), TERM({3}, false)}, false)), false).release()}, strict))->asString()); + } +} + +TEST("require that ANDNOT can be partially termwise with first child being termwise") { + auto md = make_match_data(); + md->set_termwise_limit(0.0); + md->resolveTermField(1)->tagAsNotNeeded(); + AndNotBlueprint my_andnot; + my_andnot.addChild(UP(new MyBlueprint({1}, true, 1))); + my_andnot.addChild(UP(new MyBlueprint({2}, false, 2))); + my_andnot.addChild(UP(new MyBlueprint({3}, true, 3))); + for (bool strict: {true, false}) { + EXPECT_EQUAL(my_andnot.createSearch(*md, strict)->asString(), + UP(ANDNOT({make_termwise(UP(ANDNOT({TERM({1}, strict), TERM({3}, false)}, strict)), strict).release(), TERM({2}, false)}, strict))->asString()); + } +} + +//----------------------------------------------------------------------------- + +TEST("require that termwise blueprint helper calculates unpack info correctly") { + OrBlueprint my_or; + my_or.addChild(UP(new MyBlueprint({1}, false, 1))); // termwise not allowed + my_or.addChild(UP(new MyBlueprint({2}, false, 2))); // termwise not allowed and ranked + my_or.addChild(UP(new MyBlueprint({3}, true, 3))); + my_or.addChild(UP(new MyBlueprint({4}, true, 4))); // ranked + my_or.addChild(UP(new MyBlueprint({5}, true, 5))); + MultiSearch::Children dummy_searches(5, nullptr); + UnpackInfo unpack; // non-termwise unpack info + unpack.add(1); + unpack.add(3); + TermwiseBlueprintHelper helper(my_or, dummy_searches, unpack); + EXPECT_EQUAL(helper.children.size(), 3u); + EXPECT_EQUAL(helper.termwise.size(), 2u); + EXPECT_EQUAL(helper.first_termwise, 2u); + EXPECT_TRUE(!helper.termwise_unpack.needUnpack(0)); + EXPECT_TRUE(helper.termwise_unpack.needUnpack(1)); + EXPECT_TRUE(!helper.termwise_unpack.needUnpack(2)); + EXPECT_TRUE(helper.termwise_unpack.needUnpack(3)); + EXPECT_TRUE(!helper.termwise_unpack.needUnpack(4)); + EXPECT_TRUE(!helper.termwise_unpack.needUnpack(5)); +} + +TEST("test that init range works for terwise too.") { + search::test::InitRangeVerifier ir; + ir.verify(*make_termwise(ir.createIterator(ir.getExpectedDocIds(), false), false)); + ir.verify(*make_termwise(ir.createIterator(ir.getExpectedDocIds(), true), true)); +} + +//----------------------------------------------------------------------------- + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/weak_and/.gitignore b/searchlib/src/tests/queryeval/weak_and/.gitignore new file mode 100644 index 00000000000..5bbecb89249 --- /dev/null +++ b/searchlib/src/tests/queryeval/weak_and/.gitignore @@ -0,0 +1,7 @@ +/weak_and_bench +/weak_and_test_expensive +/parallel_weak_and_bench +searchlib_weak_and_test_app +searchlib_parallel_weak_and_bench_app +searchlib_weak_and_bench_app +searchlib_weak_and_test_expensive_app diff --git a/searchlib/src/tests/queryeval/weak_and/CMakeLists.txt b/searchlib/src/tests/queryeval/weak_and/CMakeLists.txt new file mode 100644 index 00000000000..b3839ac75f3 --- /dev/null +++ b/searchlib/src/tests/queryeval/weak_and/CMakeLists.txt @@ -0,0 +1,30 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_weak_and_test_app + SOURCES + weak_and_test.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_weak_and_test_app COMMAND searchlib_weak_and_test_app) +vespa_add_executable(searchlib_weak_and_test_expensive_app + SOURCES + weak_and_test_expensive.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_weak_and_test_expensive_app NO_VALGRIND COMMAND searchlib_weak_and_test_expensive_app) +vespa_add_executable(searchlib_weak_and_bench_app + SOURCES + weak_and_bench.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_weak_and_test_expensive_app COMMAND searchlib_weak_and_bench_app BENCHMARK) +vespa_add_executable(searchlib_parallel_weak_and_bench_app + SOURCES + parallel_weak_and_bench.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_weak_and_test_expensive_app COMMAND searchlib_parallel_weak_and_bench_app BENCHMARK) diff --git a/searchlib/src/tests/queryeval/weak_and/FILES b/searchlib/src/tests/queryeval/weak_and/FILES new file mode 100644 index 00000000000..972727bfa00 --- /dev/null +++ b/searchlib/src/tests/queryeval/weak_and/FILES @@ -0,0 +1,2 @@ +weak_and_test.cpp +weak_and_bench.cpp diff --git a/searchlib/src/tests/queryeval/weak_and/parallel_weak_and_bench.cpp b/searchlib/src/tests/queryeval/weak_and/parallel_weak_and_bench.cpp new file mode 100644 index 00000000000..8e5b7fc7b85 --- /dev/null +++ b/searchlib/src/tests/queryeval/weak_and/parallel_weak_and_bench.cpp @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include "wand_bench_setup.hpp" + +TEST_FF("benchmark", VespaParallelWandFactory(1000), WandSetup(f1, 10, 10000000)) { f2.benchmark(); } +TEST_FF("benchmark", DotProductRiseWandFactory(1000), WandSetup(f1, 10, 10000000)) { f2.benchmark(); } +TEST_FF("benchmark", VespaParallelWandFactory(1000), WandSetup(f1, 100, 10000000)) { f2.benchmark(); } +TEST_FF("benchmark", DotProductRiseWandFactory(1000), WandSetup(f1, 100, 10000000)) { f2.benchmark(); } +TEST_FF("benchmark", VespaParallelWandFactory(1000), WandSetup(f1, 1000, 10000000)) { f2.benchmark(); } +TEST_FF("benchmark", DotProductRiseWandFactory(1000), WandSetup(f1, 1000, 10000000)) { f2.benchmark(); } + +TEST_FFF("benchmark", VespaParallelWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2, 10, 10000000)) { f3.benchmark(); } +TEST_FFF("benchmark", DotProductRiseWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2, 10, 10000000)) { f3.benchmark(); } +TEST_FFF("benchmark", VespaParallelWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2, 100, 10000000)) { f3.benchmark(); } +TEST_FFF("benchmark", DotProductRiseWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2, 100, 10000000)) { f3.benchmark(); } +TEST_FFF("benchmark", VespaParallelWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2, 1000, 10000000)) { f3.benchmark(); } +TEST_FFF("benchmark", DotProductRiseWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2, 1000, 10000000)) { f3.benchmark(); } + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/weak_and/rise_wand.h b/searchlib/src/tests/queryeval/weak_and/rise_wand.h new file mode 100644 index 00000000000..f130f0d1012 --- /dev/null +++ b/searchlib/src/tests/queryeval/weak_and/rise_wand.h @@ -0,0 +1,132 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/searchlib/queryeval/wand/weak_and_search.h> +#include <vespa/searchlib/queryeval/wand/wand_parts.h> +#include <vespa/vespalib/util/priority_queue.h> +#include <functional> + +using search::queryeval::wand::DotProductScorer; +using search::queryeval::wand::TermFrequencyScorer; +using namespace search::queryeval; + +namespace rise { + +struct TermFreqScorer +{ + static int64_t calculateMaxScore(const wand::Term &term) { + return TermFrequencyScorer::calculateMaxScore(term); + } + static int64_t calculateScore(const wand::Term &term, uint32_t docId) { + term.search->unpack(docId); + return term.maxScore; + } +}; + +template <typename Scorer, typename Cmp> +class RiseWand : public search::queryeval::SearchIterator +{ +public: + typedef uint32_t docid_t; + typedef uint64_t score_t; + typedef search::queryeval::wand::Terms Terms; + typedef search::queryeval::SearchIterator *PostingStreamPtr; + +private: + // comparator class that compares two streams. The variables a and b are + // logically indices into the streams vector. + class StreamComparator : public std::binary_function<uint16_t, uint16_t, bool> + { + private: + const docid_t *_streamDocIds; + //const addr_t *const *_streamPayloads; + + public: + StreamComparator(const docid_t *streamDocIds); + //const addr_t *const *streamPayloads); + inline bool operator()(const uint16_t a, const uint16_t b); + }; + + // number of streams present in the query + uint32_t _numStreams; + + // we own our substreams + std::vector<PostingStreamPtr> _streams; + + size_t _lastPivotIdx; + + // array of current doc ids for the various streams + docid_t *_streamDocIds; + + // two arrays of indices into the _streams vector. This is used for merge. + // inplace_merge is not as efficient as the copy merge. + uint16_t *_streamIndices; + uint16_t *_streamIndicesAux; + + // comparator that compares two streams + StreamComparator _streamComparator; + + //------------------------------------------------------------------------- + // variables used for scoring and pruning + + size_t _n; + score_t _limit; + score_t *_streamScores; + vespalib::PriorityQueue<score_t> _scores; + Terms _terms; + + //------------------------------------------------------------------------- + + /** + * Find the pivot feature index + * + * @param threshold score threshold + * @param pivotIdx pivot index + * + * @return whether a valid pivot index is found + */ + bool _findPivotFeatureIdx(const score_t threshold, uint32_t &pivotIdx); + + /** + * let the first numStreamsToMove streams in the stream + * vector move to the next doc, and sort them. + * + * @param numStreamsToMove the number of streams that should move + */ + void _moveStreamsAndSort(const uint32_t numStreamsToMove); + + /** + * let the first numStreamsToMove streams in the stream + * vector move to desiredDocId or to the first docId greater than + * desiredDocId if desiredDocId does not exist in this stream, + * and sort them. + * + * @param numStreamsToMove the number of streams that should move + * @param desiredDocId desired doc id + * + */ + void _moveStreamsToDocAndSort(const uint32_t numStreamsToMove, const docid_t desiredDocId); + + /** + * do sort and merge for WAND + * + * @param numStreamsToSort the number of streams (starting from the first one) should + * be sorted and then merge sort with the rest + * + */ + void _sortMerge(const uint32_t numStreamsToSort); + +public: + RiseWand(const Terms &terms, uint32_t n); + virtual ~RiseWand(); + void next(); + virtual void doSeek(uint32_t docid); + virtual void doUnpack(uint32_t docid); +}; + +typedef RiseWand<TermFreqScorer, std::greater_equal<uint64_t> > TermFrequencyRiseWand; +typedef RiseWand<DotProductScorer, std::greater<uint64_t> > DotProductRiseWand; + +} // namespacve rise + diff --git a/searchlib/src/tests/queryeval/weak_and/rise_wand.hpp b/searchlib/src/tests/queryeval/weak_and/rise_wand.hpp new file mode 100644 index 00000000000..02420e6c35d --- /dev/null +++ b/searchlib/src/tests/queryeval/weak_and/rise_wand.hpp @@ -0,0 +1,238 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/fastos/fastos.h> +#include "rise_wand.h" +#include <vespa/searchlib/queryeval/wand/wand_parts.h> +#include <math.h> +#include <iostream> + +using search::queryeval::wand::TermFrequencyScorer; + +namespace rise { + +template <typename Scorer, typename Cmp> +RiseWand<Scorer, Cmp>::RiseWand(const Terms &terms, uint32_t n) + : _numStreams(0), + _streams(), + _lastPivotIdx(0), + _streamDocIds(new docid_t[terms.size()]), + _streamIndices(new uint16_t[terms.size()]), + _streamIndicesAux(new uint16_t[terms.size()]), + _streamComparator(_streamDocIds), + _n(n), + _limit(1), + _streamScores(new score_t[terms.size()]), + _scores(), + _terms(terms) +{ + for (size_t i = 0; i < terms.size(); ++i) { + _terms[i].maxScore = Scorer::calculateMaxScore(terms[i]); + _streamScores[i] = _terms[i].maxScore; + _streams.push_back(terms[i].search); + } + _numStreams = _streams.size(); + if (_numStreams == 0) { + setAtEnd(); + } + for (uint32_t i=0; i<_numStreams; ++i) { + _streamIndices[i] = i; + } + for (uint32_t i=0; i<_numStreams; ++i) { + _streamDocIds[i] = _streams[i]->getDocId(); + } + std::sort(_streamIndices, _streamIndices+_numStreams, _streamComparator); +} + +template <typename Scorer, typename Cmp> +RiseWand<Scorer, Cmp>::~RiseWand() +{ + for (size_t i = 0; i < _streams.size(); ++i) { + delete _streams[i]; + } + delete [] _streamScores; + delete [] _streamIndicesAux; + delete [] _streamIndices; + delete [] _streamDocIds; +} + +template <typename Scorer, typename Cmp> +void +RiseWand<Scorer, Cmp>::next() +{ + + // We do not check whether the stream is already at the end + // here based on the assumption that application won't call + // next() for streams that are already at the end, or atleast + // won't do this frequently. + + uint32_t pivotIdx; + docid_t pivotDocId = search::endDocId; + score_t threshold = _limit; + + while (true) { + + if (!_findPivotFeatureIdx(threshold, pivotIdx)) { + setAtEnd(); + return; + } + + pivotDocId = _streamDocIds[_streamIndices[pivotIdx]]; + + if (_streamDocIds[_streamIndices[0]] == _streamDocIds[_streamIndices[pivotIdx]]) { + + // Found candidate. All cursors before (*_streams)[pivotIdx] point to + // the same doc and this doc is the candidate for full evaluation. + setDocId(pivotDocId); + + // Advance pivotIdx sufficiently so that all instances of pivotDocId are included + while (pivotIdx < _numStreams-1 && _streamDocIds[_streamIndices[pivotIdx+1]] == pivotDocId) { + ++pivotIdx; + } + + _lastPivotIdx = pivotIdx; + return; // scoring and threshold adjustment is done in doUnpack + + } else { // not all cursors upto the pivot are aligned at the same doc yet + + // decreases pivotIdx to the first stream pointing at the pivotDocId + while (pivotIdx && _streamDocIds[_streamIndices[pivotIdx-1]] == pivotDocId) { + --pivotIdx; + } + + _moveStreamsToDocAndSort(pivotIdx, pivotDocId); + } + + } /* while (true) */ +} + +template <typename Scorer, typename Cmp> +bool +RiseWand<Scorer, Cmp>::_findPivotFeatureIdx(const score_t threshold, uint32_t &pivotIdx) +{ + uint32_t idx; + score_t accumUB = 0; + for (idx=0; + !Cmp()(accumUB, threshold) && idx < _numStreams; + ++idx) { + accumUB += _streamScores[_streamIndices[idx]]; + } + + if( Cmp()(accumUB, threshold) ) { + pivotIdx = idx - 1; + return true; + } + return false; +} + +template <typename Scorer, typename Cmp> +void +RiseWand<Scorer, Cmp>::_moveStreamsAndSort(const uint32_t numStreamsToMove) +{ + for (uint32_t i=0; i<numStreamsToMove; ++i) { + _streams[_streamIndices[i]]->seek(_streams[_streamIndices[i]]->getDocId() + 1); + _streamDocIds[_streamIndices[i]] = _streams[_streamIndices[i]]->getDocId(); + } + _sortMerge(numStreamsToMove); +} + +template <typename Scorer, typename Cmp> +void +RiseWand<Scorer, Cmp>::_moveStreamsToDocAndSort(const uint32_t numStreamsToMove, + const docid_t desiredDocId) +{ + for (uint32_t i=0; i<numStreamsToMove; ++i) { + _streams[_streamIndices[i]]->seek(desiredDocId); + _streamDocIds[_streamIndices[i]] = _streams[_streamIndices[i]]->getDocId(); + } + _sortMerge(numStreamsToMove); +} + +template <typename Scorer, typename Cmp> +inline +void RiseWand<Scorer, Cmp>::_sortMerge(const uint32_t numStreamsToMove) +{ + for (uint32_t i=0; i<numStreamsToMove; ++i) { + _streamIndicesAux[i] = _streamIndices[i]; + } + std::sort(_streamIndicesAux, _streamIndicesAux+numStreamsToMove, _streamComparator); + + uint16_t j=numStreamsToMove, k=0, i=0; + while (i < numStreamsToMove && j < _numStreams) { + if (_streamComparator(_streamIndicesAux[i], _streamIndices[j])) { + _streamIndices[k++] = _streamIndicesAux[i++]; + } + else { + _streamIndices[k++] = _streamIndices[j++]; + } + } + + if (j == _numStreams) { + while (i < numStreamsToMove) { + _streamIndices[k++] = _streamIndicesAux[i++]; + } + } + + while (_numStreams && + _streamDocIds[_streamIndices[_numStreams-1]] == search::endDocId) { + --_numStreams; + } +} + +template <typename Scorer, typename Cmp> +void +RiseWand<Scorer, Cmp>::doSeek(uint32_t docid) +{ + if (getDocId() != beginId() && (docid - 1) == getDocId()) { + _moveStreamsAndSort(_lastPivotIdx + 1); + } else { + _moveStreamsToDocAndSort(_numStreams, docid); + } + next(); +} + +template <typename Scorer, typename Cmp> +void +RiseWand<Scorer, Cmp>::doUnpack(uint32_t docid) +{ + score_t score = 0; + for (size_t i = 0; i <= _lastPivotIdx; ++i) { + score += Scorer::calculateScore(_terms[_streamIndices[i]], docid); + } + if (_scores.size() < _n || _scores.front() < score) { + _scores.push(score); + if (_scores.size() > _n) { + _scores.pop_front(); + } + if (_scores.size() == _n) { + _limit = _scores.front(); + } + } +} + +/** + ************ BEGIN STREAM COMPARTOR ********************* + */ +template <typename Scorer, typename Cmp> +RiseWand<Scorer, Cmp>::StreamComparator::StreamComparator( + const docid_t *streamDocIds) + : _streamDocIds(streamDocIds) +{ +} + +template <typename Scorer, typename Cmp> +inline bool +RiseWand<Scorer, Cmp>::StreamComparator::operator()(const uint16_t a, + const uint16_t b) +{ + if (_streamDocIds[a] < _streamDocIds[b]) return true; + return false; +} + +/** + ************ END STREAM COMPARTOR ********************* + */ + +} // namespace rise + diff --git a/searchlib/src/tests/queryeval/weak_and/wand_bench_setup.hpp b/searchlib/src/tests/queryeval/weak_and/wand_bench_setup.hpp new file mode 100644 index 00000000000..4c7116edfc4 --- /dev/null +++ b/searchlib/src/tests/queryeval/weak_and/wand_bench_setup.hpp @@ -0,0 +1,248 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchlib/queryeval/andsearch.h> +#include <vespa/searchlib/queryeval/andnotsearch.h> +#include <vespa/searchlib/queryeval/fake_search.h> +#include <vespa/searchlib/queryeval/orsearch.h> +#include <vespa/searchlib/queryeval/wand/parallel_weak_and_search.h> +#include <vespa/searchlib/queryeval/simpleresult.h> +#include <vespa/searchlib/queryeval/wand/weak_and_search.h> +#include <vespa/searchlib/fef/matchdatalayout.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchlib/fef/termfieldmatchdataarray.h> +#include <vespa/vespalib/util/stringfmt.h> +#include "rise_wand.h" +#include "rise_wand.hpp" + +using namespace search::fef; +using namespace search::queryeval; +using namespace vespalib; + +typedef ParallelWeakAndSearch::MatchParams PWMatchParams; +typedef ParallelWeakAndSearch::RankParams PWRankParams; + +namespace { + +struct Stats { + size_t hitCnt; + size_t seekCnt; + size_t unpackCnt; + size_t skippedDocs; + size_t skippedHits; + Stats() : hitCnt(0), seekCnt(0), unpackCnt(0), + skippedDocs(0), skippedHits(0) {} + void hit() { + ++hitCnt; + } + void seek(size_t docs, size_t hits) { + ++seekCnt; + skippedDocs += docs; + skippedHits += hits; + } + void unpack() { + ++unpackCnt; + } + void print() { + fprintf(stderr, "Stats: hits=%zu, seeks=%zu, unpacks=%zu, skippedDocs=%zu, skippedHits=%zu\n", + hitCnt, seekCnt, unpackCnt, skippedDocs, skippedHits); + } +}; + +struct ModSearch : SearchIterator { + Stats &stats; + uint32_t step; + uint32_t limit; + MinMaxPostingInfo info; + TermFieldMatchData *tfmd; + ModSearch(Stats &stats_in, uint32_t step_in, uint32_t limit_in, int32_t maxWeight, TermFieldMatchData *tfmd_in) + : stats(stats_in), step(step_in), limit(limit_in), info(0, maxWeight), tfmd(tfmd_in) { } + void initRange(uint32_t begin, uint32_t end) override { + SearchIterator::initRange(begin, end); + setDocId(step); + } + virtual void doSeek(uint32_t docid) { + assert(docid > getDocId()); + uint32_t skippedDocs = (docid - getDocId() - 1); + uint32_t skippedHits = (skippedDocs / step); + stats.seek(skippedDocs, skippedHits); + uint32_t hit = (docid / step) * step; + if (hit < docid) { + hit += step; + } + if (hit < limit) { + assert(hit >= docid); + setDocId(hit); + } else { + setAtEnd(); + } + } + virtual void doUnpack(uint32_t docid) { + if (tfmd != NULL) { + tfmd->reset(docid); + search::fef::TermFieldMatchDataPosition pos; + pos.setElementWeight(info.getMaxWeight()); + tfmd->appendPosition(pos); + } + stats.unpack(); + } + virtual const PostingInfo *getPostingInfo() const { return &info; } +}; + +struct WandFactory { + virtual std::string name() const = 0; + virtual SearchIterator::UP create(const wand::Terms &terms) = 0; + virtual ~WandFactory() {} +}; + +struct VespaWandFactory : WandFactory { + uint32_t n; + VespaWandFactory(uint32_t n_in) : n(n_in) {} + virtual std::string name() const { return make_string("VESPA WAND (n=%u)", n); } + virtual SearchIterator::UP create(const wand::Terms &terms) { + return SearchIterator::UP(WeakAndSearch::create(terms, n, true)); + } +}; + +struct VespaArrayWandFactory : WandFactory { + uint32_t n; + VespaArrayWandFactory(uint32_t n_in) : n(n_in) {} + virtual std::string name() const { return make_string("VESPA ARRAY WAND (n=%u)", n); } + virtual SearchIterator::UP create(const wand::Terms &terms) { + return SearchIterator::UP(WeakAndSearch::createArrayWand(terms, n, true)); + } +}; + +struct VespaHeapWandFactory : WandFactory { + uint32_t n; + VespaHeapWandFactory(uint32_t n_in) : n(n_in) {} + virtual std::string name() const { return make_string("VESPA HEAP WAND (n=%u)", n); } + virtual SearchIterator::UP create(const wand::Terms &terms) { + return SearchIterator::UP(WeakAndSearch::createHeapWand(terms, n, true)); + } +}; + +struct VespaParallelWandFactory : public WandFactory { + SharedWeakAndPriorityQueue scores; + TermFieldMatchData rootMatchData; + VespaParallelWandFactory(uint32_t n) : scores(n), rootMatchData() {} + virtual std::string name() const { return make_string("VESPA PWAND (n=%u)", scores.getScoresToTrack()); } + virtual SearchIterator::UP create(const wand::Terms &terms) { + return SearchIterator::UP(ParallelWeakAndSearch::create(terms, + PWMatchParams(scores, 0, 1, 1), + PWRankParams(rootMatchData, MatchData::UP()), true)); + } +}; + +struct VespaParallelArrayWandFactory : public VespaParallelWandFactory { + VespaParallelArrayWandFactory(uint32_t n) : VespaParallelWandFactory(n) {} + virtual std::string name() const { return make_string("VESPA ARRAY PWAND (n=%u)", scores.getScoresToTrack()); } + virtual SearchIterator::UP create(const wand::Terms &terms) { + return SearchIterator::UP(ParallelWeakAndSearch::createArrayWand(terms, + PWMatchParams(scores, 0, 1, 1), + PWRankParams(rootMatchData, MatchData::UP()), true)); + } +}; + +struct VespaParallelHeapWandFactory : public VespaParallelWandFactory { + VespaParallelHeapWandFactory(uint32_t n) : VespaParallelWandFactory(n) {} + virtual std::string name() const { return make_string("VESPA HEAP PWAND (n=%u)", scores.getScoresToTrack()); } + virtual SearchIterator::UP create(const wand::Terms &terms) { + return SearchIterator::UP(ParallelWeakAndSearch::createHeapWand(terms, + PWMatchParams(scores, 0, 1, 1), + PWRankParams(rootMatchData, MatchData::UP()), true)); + } +}; + +struct TermFrequencyRiseWandFactory : WandFactory { + uint32_t n; + TermFrequencyRiseWandFactory(uint32_t n_in) : n(n_in) {} + virtual std::string name() const { return make_string("RISE WAND TF (n=%u)", n); } + virtual SearchIterator::UP create(const wand::Terms &terms) { + return SearchIterator::UP(new rise::TermFrequencyRiseWand(terms, n)); + } +}; + +struct DotProductRiseWandFactory : WandFactory { + uint32_t n; + DotProductRiseWandFactory(uint32_t n_in) : n(n_in) {} + virtual std::string name() const { return make_string("RISE WAND DP (n=%u)", n); } + virtual SearchIterator::UP create(const wand::Terms &terms) { + return SearchIterator::UP(new rise::DotProductRiseWand(terms, n)); + } +}; + +struct FilterFactory : WandFactory { + WandFactory &factory; + Stats stats; + uint32_t n; + FilterFactory(WandFactory &f, uint32_t n_in) : factory(f), n(n_in) {} + virtual std::string name() const { return make_string("Filter (mod=%u) [%s]", n, factory.name().c_str()); } + virtual SearchIterator::UP create(const wand::Terms &terms) { + AndNotSearch::Children children; + children.push_back(factory.create(terms).release()); + children.push_back(new ModSearch(stats, n, search::endDocId, n, NULL)); + return SearchIterator::UP(AndNotSearch::create(children, true)); + } +}; + +struct Setup { + Stats stats; + double minTimeMs; + Setup() : stats(), minTimeMs(10000000.0) {} + virtual ~Setup() {} + virtual std::string name() const = 0; + virtual SearchIterator::UP create() = 0; + void perform() { + SearchIterator::UP search = create(); + SearchIterator &sb = *search; + FastOS_Time timer; + timer.SetNow(); + for (sb.seek(1); !sb.isAtEnd(); sb.seek(sb.getDocId() + 1)) { + stats.hit(); + sb.unpack(sb.getDocId()); + } + double ms = timer.MilliSecsToNow(); + if (ms < minTimeMs) { + minTimeMs = ms; + } + } + void benchmark() { + fprintf(stderr, "running benchmark for %s...\n", name().c_str()); + for (size_t i = 0; i < 5; ++i) { + perform(); + if (i == 0) { + stats.print(); + } + } + fprintf(stderr, "time (ms): %g\n", minTimeMs); + } +}; + +struct WandSetup : Setup { + WandFactory &factory; + uint32_t childCnt; + uint32_t limit; + uint32_t weight; + MatchData::UP matchData; + WandSetup(WandFactory &f, uint32_t c, uint32_t l) : Setup(), factory(f), childCnt(c), limit(l), weight(100), matchData() {} + virtual std::string name() const { + return make_string("Wand Setup (terms=%u,docs=%u) [%s]", childCnt, limit, factory.name().c_str()); + } + virtual SearchIterator::UP create() { + MatchDataLayout layout; + std::vector<TermFieldHandle> handles; + for (size_t i = 0; i < childCnt; ++i) { + handles.push_back(layout.allocTermField(0)); + } + matchData = layout.createMatchData(); + wand::Terms terms; + for (size_t i = 1; i <= childCnt; ++i) { + TermFieldMatchData *tfmd = matchData->resolveTermField(handles[i-1]); + terms.push_back(wand::Term(new ModSearch(stats, i, limit, i, tfmd), weight, limit / i, tfmd)); + } + return factory.create(terms); + } +}; + +} // namespace <unnamed> diff --git a/searchlib/src/tests/queryeval/weak_and/weak_and_bench.cpp b/searchlib/src/tests/queryeval/weak_and/weak_and_bench.cpp new file mode 100644 index 00000000000..1eba66a524f --- /dev/null +++ b/searchlib/src/tests/queryeval/weak_and/weak_and_bench.cpp @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include "wand_bench_setup.hpp" + +TEST_FF("benchmark", VespaWandFactory(1000), WandSetup(f1, 10, 10000000)) { f2.benchmark(); } +TEST_FF("benchmark", TermFrequencyRiseWandFactory(1000), WandSetup(f1, 10, 10000000)) { f2.benchmark(); } +TEST_FF("benchmark", VespaWandFactory(1000), WandSetup(f1, 100, 10000000)) { f2.benchmark(); } +TEST_FF("benchmark", TermFrequencyRiseWandFactory(1000), WandSetup(f1, 100, 10000000)) { f2.benchmark(); } +TEST_FF("benchmark", VespaWandFactory(1000), WandSetup(f1, 1000, 10000000)) { f2.benchmark(); } +TEST_FF("benchmark", TermFrequencyRiseWandFactory(1000), WandSetup(f1, 1000, 10000000)) { f2.benchmark(); } + +TEST_FFF("benchmark", VespaWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2, 10, 10000000)) { f3.benchmark(); } +TEST_FFF("benchmark", TermFrequencyRiseWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2, 10, 10000000)) { f3.benchmark(); } +TEST_FFF("benchmark", VespaWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2, 100, 10000000)) { f3.benchmark(); } +TEST_FFF("benchmark", TermFrequencyRiseWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2, 100, 10000000)) { f3.benchmark(); } +TEST_FFF("benchmark", VespaWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2, 1000, 10000000)) { f3.benchmark(); } +TEST_FFF("benchmark", TermFrequencyRiseWandFactory(1000), FilterFactory(f1, 2), WandSetup(f2, 1000, 10000000)) { f3.benchmark(); } + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp b/searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp new file mode 100644 index 00000000000..3c64db1eb84 --- /dev/null +++ b/searchlib/src/tests/queryeval/weak_and/weak_and_test.cpp @@ -0,0 +1,128 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchlib/queryeval/fake_search.h> +#include <vespa/searchlib/queryeval/wand/weak_and_search.h> +#include <vespa/searchlib/queryeval/simpleresult.h> +#include <vespa/searchlib/queryeval/simplesearch.h> +#include <vespa/searchlib/queryeval/test/eagerchild.h> +#include <vespa/searchlib/queryeval/test/leafspec.h> +#include <vespa/searchlib/queryeval/test/searchhistory.h> +#include <vespa/searchlib/queryeval/test/trackedsearch.h> +#include <vespa/searchlib/queryeval/test/wandspec.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchlib/fef/termfieldmatchdataarray.h> +#include <vespa/searchlib/test/initrange.h> + +using namespace search::fef; +using namespace search::queryeval; +using namespace search::queryeval::test; +using search::test::InitRangeVerifier; + +typedef SearchHistory History; + +namespace { + +struct MyWandSpec : public WandSpec +{ + uint32_t n; + + MyWandSpec(uint32_t n_) : WandSpec(), n(n_) {} + SearchIterator *create() { + return new TrackedSearch("WAND", getHistory(), WeakAndSearch::create(getTerms(), n, true)); + } +}; + +struct SimpleWandFixture { + MyWandSpec spec; + SimpleResult hits; + SimpleWandFixture() : spec(2), hits() { + spec.leaf(LeafSpec("foo").doc(1).doc(2).doc(3).doc(4).doc(5).doc(6)); + spec.leaf(LeafSpec("bar").doc(1).doc(3).doc(5)); + SearchIterator::UP search(spec.create()); + hits.search(*search); + } +}; + +struct AdvancedWandFixture { + MyWandSpec spec; + SimpleResult hits; + AdvancedWandFixture() : spec(100), hits() { + spec.leaf(LeafSpec("1").doc(1).doc(11).doc(111)); + spec.leaf(LeafSpec("2").doc(2).doc(12).doc(112)); + spec.leaf(LeafSpec("3").doc(3).doc(13).doc(113)); + spec.leaf(LeafSpec("4").doc(4).doc(14).doc(114)); + spec.leaf(LeafSpec("5").doc(5).doc(15).doc(115)); + SearchIterator::UP search(spec.create()); + hits.search(*search); + } +}; + +struct WeightOrder { + bool operator()(const wand::Term &t1, const wand::Term &t2) const { + return (t1.weight < t2.weight); + } +}; + +} // namespace <unnamed> + +TEST_F("require that wand prunes bad hits after enough good ones are obtained", SimpleWandFixture) { + EXPECT_EQUAL(SimpleResult().addHit(1).addHit(2).addHit(3).addHit(5), f.hits); +} + +TEST_F("require that wand uses subsearches as expected", SimpleWandFixture) { + EXPECT_EQUAL(History() + .seek("WAND", 1).seek("bar", 1).step("bar", 1).step("WAND", 1) + .unpack("WAND", 1).seek("foo", 1).step("foo", 1).unpack("bar", 1).unpack("foo", 1) + .seek("WAND", 2).seek("bar", 2).step("bar", 3).seek("foo", 2).step("foo", 2).step("WAND", 2) + .unpack("WAND", 2).unpack("foo", 2) + .seek("WAND", 3).step("WAND", 3) + .unpack("WAND", 3).seek("foo", 3).step("foo", 3).unpack("bar", 3).unpack("foo", 3) + .seek("WAND", 4).seek("bar", 4).step("bar", 5).seek("foo", 5).step("foo", 5).step("WAND", 5) + .unpack("WAND", 5).unpack("bar", 5).unpack("foo", 5) + .seek("WAND", 6).seek("bar", 6).step("bar", search::endDocId).step("WAND", search::endDocId), + f.spec.getHistory()); +} + +TEST_F("require that documents are considered in the right order", AdvancedWandFixture) { + EXPECT_EQUAL(SimpleResult() + .addHit(1).addHit(2).addHit(3).addHit(4).addHit(5) + .addHit(11).addHit(12).addHit(13).addHit(14).addHit(15) + .addHit(111).addHit(112).addHit(113).addHit(114).addHit(115), f.hits); +} + +TEST("require that initial docid for subsearches are taken into account") { + History history; + wand::Terms terms; + terms.push_back(wand::Term(new TrackedSearch("foo", history, new EagerChild(search::endDocId)), 100, 1)); + terms.push_back(wand::Term(new TrackedSearch("bar", history, new EagerChild(10)), 100, 2)); + SearchIterator::UP search(new TrackedSearch("WAND", history, WeakAndSearch::create(terms, 2, true))); + SimpleResult hits; + hits.search(*search); + EXPECT_EQUAL(SimpleResult().addHit(10), hits); + EXPECT_EQUAL(History().seek("WAND", 1).step("WAND", 10).unpack("WAND", 10).unpack("bar", 10) + .seek("WAND", 11).seek("bar", 11).step("bar", search::endDocId).step("WAND", search::endDocId), + history); +} + +TEST("verify initRange with search iterator children") { + const size_t num_children = 7; + InitRangeVerifier ir; + using DocIds = InitRangeVerifier::DocIds; + std::vector<DocIds> split_lists(num_children); + auto full_list = ir.getExpectedDocIds(); + for (size_t i = 0; i < full_list.size(); ++i) { + split_lists[i % num_children].push_back(full_list[i]); + } + for (bool strict: {false, true}) { + wand::Terms terms; + for (size_t i = 0; i < num_children; ++i) { + terms.emplace_back(ir.createIterator(split_lists[i], strict).release(), + 100, split_lists[i].size()); + } + SearchIterator::UP itr(WeakAndSearch::create(terms, -1, strict)); + ir.verify(*itr); + } +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/weak_and/weak_and_test_expensive.cpp b/searchlib/src/tests/queryeval/weak_and/weak_and_test_expensive.cpp new file mode 100644 index 00000000000..8f60b6dd8c7 --- /dev/null +++ b/searchlib/src/tests/queryeval/weak_and/weak_and_test_expensive.cpp @@ -0,0 +1,102 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include "wand_bench_setup.hpp" + +using namespace rise; + +namespace { + +template <typename WeakAndType, typename RiseType> +void checkWandHits(WandFactory &vespa, WandFactory &rise, uint32_t step, uint32_t filter) { + WandSetup vespaSetup(vespa, 500, 5000000); + WandSetup riseSetup(rise, 500, 5000000); + SearchIterator::UP s1 = vespaSetup.create(); + s1->initFullRange(); + SearchIterator::UP s2 = riseSetup.create(); + s2->initFullRange(); + ASSERT_TRUE(dynamic_cast<WeakAndType*>(s1.get()) != 0); + ASSERT_TRUE(dynamic_cast<WeakAndType*>(s2.get()) == 0); + ASSERT_TRUE(dynamic_cast<RiseType*>(s2.get()) != 0); + ASSERT_TRUE(dynamic_cast<RiseType*>(s1.get()) == 0); + s1->seek(1); + s2->seek(1); + while (!s1->isAtEnd() && + !s2->isAtEnd()) + { + ASSERT_EQUAL(s1->getDocId(), s2->getDocId()); + if ((filter == 0) || ((s1->getDocId() % filter) != 0)) { + s1->unpack(s1->getDocId()); + s2->unpack(s2->getDocId()); + } + s1->seek(s1->getDocId() + step); + s2->seek(s2->getDocId() + step); + } + ASSERT_TRUE(s1->isAtEnd()); + ASSERT_TRUE(s2->isAtEnd()); +} + +} // namespace <unnamed> + +TEST("require that mod search works") { + Stats stats; + SearchIterator::UP search(new ModSearch(stats, 3, 8, 3, NULL)); + SimpleResult hits; + hits.search(*search); + EXPECT_EQUAL(SimpleResult().addHit(3).addHit(6), hits); +} + +//---- WeakAndSearch ------------------------------------------------------------------------------ + +TEST_FF("require that (array) WAND and RISE WAND gives the same hits", + VespaArrayWandFactory(500), TermFrequencyRiseWandFactory(500)) +{ + checkWandHits<WeakAndSearch, TermFrequencyRiseWand>(f1, f2, 1, 0); +} + +TEST_FF("require that (heap) WAND and RISE WAND gives the same hits", + VespaHeapWandFactory(500), TermFrequencyRiseWandFactory(500)) +{ + checkWandHits<WeakAndSearch, TermFrequencyRiseWand>(f1, f2, 1, 0); +} + +TEST_FF("require that (array) WAND and RISE WAND gives the same hits with filtering and skipping", + VespaArrayWandFactory(500), TermFrequencyRiseWandFactory(500)) +{ + checkWandHits<WeakAndSearch, TermFrequencyRiseWand>(f1, f2, 123, 5); +} + +TEST_FF("require that (heap) WAND and RISE WAND gives the same hits with filtering and skipping", + VespaHeapWandFactory(500), TermFrequencyRiseWandFactory(500)) +{ + checkWandHits<WeakAndSearch, TermFrequencyRiseWand>(f1, f2, 123, 5); +} + + +//---- ParallelWeakAndSearch ---------------------------------------------------------------------- + +TEST_FF("require that (array) PWAND and RISE WAND gives the same hits", + VespaParallelArrayWandFactory(500), DotProductRiseWandFactory(500)) +{ + checkWandHits<ParallelWeakAndSearch, DotProductRiseWand>(f1, f2, 1, 0); +} + +TEST_FF("require that (heap) PWAND and RISE WAND gives the same hits", + VespaParallelHeapWandFactory(500), DotProductRiseWandFactory(500)) +{ + checkWandHits<ParallelWeakAndSearch, DotProductRiseWand>(f1, f2, 1, 0); +} + +TEST_FF("require that (array) PWAND and RISE WAND gives the same hits with filtering and skipping", + VespaParallelArrayWandFactory(500), DotProductRiseWandFactory(500)) +{ + checkWandHits<ParallelWeakAndSearch, DotProductRiseWand>(f1, f2, 123, 5); +} + +TEST_FF("require that (heap) PWAND and RISE WAND gives the same hits with filtering and skipping", + VespaParallelHeapWandFactory(500), DotProductRiseWandFactory(500)) +{ + checkWandHits<ParallelWeakAndSearch, DotProductRiseWand>(f1, f2, 123, 5); +} + + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/weak_and_heap/.gitignore b/searchlib/src/tests/queryeval/weak_and_heap/.gitignore new file mode 100644 index 00000000000..b10f1cb370d --- /dev/null +++ b/searchlib/src/tests/queryeval/weak_and_heap/.gitignore @@ -0,0 +1 @@ +searchlib_weak_and_heap_test_app diff --git a/searchlib/src/tests/queryeval/weak_and_heap/CMakeLists.txt b/searchlib/src/tests/queryeval/weak_and_heap/CMakeLists.txt new file mode 100644 index 00000000000..cacf4987aff --- /dev/null +++ b/searchlib/src/tests/queryeval/weak_and_heap/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_weak_and_heap_test_app + SOURCES + weak_and_heap_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_weak_and_heap_test_app COMMAND searchlib_weak_and_heap_test_app) diff --git a/searchlib/src/tests/queryeval/weak_and_heap/DESC b/searchlib/src/tests/queryeval/weak_and_heap/DESC new file mode 100644 index 00000000000..447bfc21e7c --- /dev/null +++ b/searchlib/src/tests/queryeval/weak_and_heap/DESC @@ -0,0 +1 @@ +weak_and_heap test. Take a look at weak_and_heap_test.cpp for details. diff --git a/searchlib/src/tests/queryeval/weak_and_heap/FILES b/searchlib/src/tests/queryeval/weak_and_heap/FILES new file mode 100644 index 00000000000..05d3f4c5df0 --- /dev/null +++ b/searchlib/src/tests/queryeval/weak_and_heap/FILES @@ -0,0 +1 @@ +weak_and_heap_test.cpp diff --git a/searchlib/src/tests/queryeval/weak_and_heap/weak_and_heap_test.cpp b/searchlib/src/tests/queryeval/weak_and_heap/weak_and_heap_test.cpp new file mode 100644 index 00000000000..ee44abf2b27 --- /dev/null +++ b/searchlib/src/tests/queryeval/weak_and_heap/weak_and_heap_test.cpp @@ -0,0 +1,101 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchlib/queryeval/wand/weak_and_heap.h> + +using namespace search::queryeval; +typedef wand::score_t score_t; + +struct Scores : public std::vector<score_t> { + Scores &s(score_t val) { + push_back(val); + return *this; + } +}; + +void +adjust(WeakAndHeap &heap, const Scores &scores) +{ + Scores tmp = scores; + heap.adjust(&tmp[0], &tmp[0] + tmp.size()); +} + +void +assertScores(const Scores &exp, SharedWeakAndPriorityQueue &heap) +{ + ASSERT_EQUAL(exp.size(), heap.getScores().size()); + for (size_t i = 0; i < exp.size(); ++i) { + score_t front = heap.getScores().front(); + EXPECT_EQUAL(exp[i], front); + heap.getScores().pop_front(); + } +} + +struct NullFixture { + SharedWeakAndPriorityQueue h; + NullFixture() : h(0) {} +}; + +struct EmptyFixture { + SharedWeakAndPriorityQueue h; + EmptyFixture() : h(4) {} +}; + +struct FilledFixture { + SharedWeakAndPriorityQueue h; + FilledFixture() : h(4) { + adjust(h, Scores().s(3).s(5).s(7).s(9)); + EXPECT_EQUAL(3, h.getMinScore()); + } +}; + +TEST_F("require that SharedWeakAndPriorityQueue with 0 size gives max threshold", NullFixture) +{ + EXPECT_EQUAL(std::numeric_limits<score_t>::max(), f.h.getMinScore()); + adjust(f.h, Scores().s(100)); + EXPECT_EQUAL(std::numeric_limits<score_t>::max(), f.h.getMinScore()); +} + +TEST_F("require that SharedWeakAndPriorityQueue can be filled one-by-one", EmptyFixture) +{ + adjust(f.h, Scores().s(4)); + EXPECT_EQUAL(0, f.h.getMinScore()); + adjust(f.h, Scores().s(3)); + EXPECT_EQUAL(0, f.h.getMinScore()); + adjust(f.h, Scores().s(2)); + EXPECT_EQUAL(0, f.h.getMinScore()); + adjust(f.h, Scores().s(1)); + EXPECT_EQUAL(1, f.h.getMinScore()); + assertScores(Scores().s(1).s(2).s(3).s(4), f.h); +} + +TEST_F("require that SharedWeakAndPriorityQueue can be filled all-at-once", EmptyFixture) +{ + adjust(f.h, Scores().s(4).s(3).s(2).s(1)); + EXPECT_EQUAL(1, f.h.getMinScore()); + assertScores(Scores().s(1).s(2).s(3).s(4), f.h); +} + +TEST_F("require that SharedWeakAndPriorityQueue can be adjusted one-by-one", FilledFixture) +{ + adjust(f.h, Scores().s(2)); + EXPECT_EQUAL(3, f.h.getMinScore()); + adjust(f.h, Scores().s(3)); + EXPECT_EQUAL(3, f.h.getMinScore()); + adjust(f.h, Scores().s(6)); + EXPECT_EQUAL(5, f.h.getMinScore()); + adjust(f.h, Scores().s(8)); + EXPECT_EQUAL(6, f.h.getMinScore()); + adjust(f.h, Scores().s(4)); + EXPECT_EQUAL(6, f.h.getMinScore()); + assertScores(Scores().s(6).s(7).s(8).s(9), f.h); +} + +TEST_F("require that SharedWeakAndPriorityQueue can be adjusted all-at-once", FilledFixture) +{ + adjust(f.h, Scores().s(2).s(3).s(6).s(8).s(4)); + EXPECT_EQUAL(6, f.h.getMinScore()); + assertScores(Scores().s(6).s(7).s(8).s(9), f.h); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/weak_and_scorers/.gitignore b/searchlib/src/tests/queryeval/weak_and_scorers/.gitignore new file mode 100644 index 00000000000..18fa7afeed4 --- /dev/null +++ b/searchlib/src/tests/queryeval/weak_and_scorers/.gitignore @@ -0,0 +1 @@ +searchlib_weak_and_scorers_test_app diff --git a/searchlib/src/tests/queryeval/weak_and_scorers/CMakeLists.txt b/searchlib/src/tests/queryeval/weak_and_scorers/CMakeLists.txt new file mode 100644 index 00000000000..74a37c8fce8 --- /dev/null +++ b/searchlib/src/tests/queryeval/weak_and_scorers/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_weak_and_scorers_test_app + SOURCES + weak_and_scorers_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_weak_and_scorers_test_app COMMAND searchlib_weak_and_scorers_test_app) diff --git a/searchlib/src/tests/queryeval/weak_and_scorers/DESC b/searchlib/src/tests/queryeval/weak_and_scorers/DESC new file mode 100644 index 00000000000..ceaf1028aae --- /dev/null +++ b/searchlib/src/tests/queryeval/weak_and_scorers/DESC @@ -0,0 +1 @@ +weak_and_scorers test. Take a look at weak_and_scorers_test.cpp for details. diff --git a/searchlib/src/tests/queryeval/weak_and_scorers/FILES b/searchlib/src/tests/queryeval/weak_and_scorers/FILES new file mode 100644 index 00000000000..7f3b71a9f34 --- /dev/null +++ b/searchlib/src/tests/queryeval/weak_and_scorers/FILES @@ -0,0 +1 @@ +weak_and_scorers_test.cpp diff --git a/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp b/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp new file mode 100644 index 00000000000..2dec1762c27 --- /dev/null +++ b/searchlib/src/tests/queryeval/weak_and_scorers/weak_and_scorers_test.cpp @@ -0,0 +1,67 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchlib/queryeval/searchiterator.h> +#include <vespa/searchlib/queryeval/wand/wand_parts.h> + +using namespace search::queryeval; +using search::fef::TermFieldMatchData; +using search::fef::TermFieldMatchDataPosition; + +typedef wand::Term Term; + +struct TestIterator : public SearchIterator +{ + MinMaxPostingInfo _info; + int32_t _termWeight; + bool _useInfo; + TermFieldMatchData _tfmd; + uint32_t _unpackDocId; + + typedef std::unique_ptr<TestIterator> UP; + TestIterator(int32_t maxWeight, int32_t termWeight, bool useInfo) + : _info(0, maxWeight), + _termWeight(termWeight), + _useInfo(useInfo), + _unpackDocId(0) + {} + virtual void doSeek(uint32_t docId) { + (void) docId; + } + virtual void doUnpack(uint32_t docId) { + _unpackDocId = docId; + _tfmd.appendPosition(TermFieldMatchDataPosition(0, 0, _termWeight, 1)); + } + virtual const PostingInfo *getPostingInfo() const { + return (_useInfo ? &_info : NULL); + } + static UP create(int32_t maxWeight, int32_t termWeight, bool useInfo) { + return UP(new TestIterator(maxWeight, termWeight, useInfo)); + } +}; + +TEST("require that DotProductScorer calculates max score") +{ + TestIterator::UP itr = TestIterator::create(10, 0, true); + Term term(itr.get(), 5, 0); + EXPECT_EQUAL(50, wand::DotProductScorer::calculateMaxScore(term)); +} + +TEST("require that DotProductScorer uses default max weight when not available in search iterator") +{ + TestIterator::UP itr = TestIterator::create(10, 0, false); + Term term(itr.get(), 5, 0); + int64_t exp = (int64_t)5 * std::numeric_limits<int32_t>::max(); + EXPECT_EQUAL(exp, wand::DotProductScorer::calculateMaxScore(term)); +} + +TEST("require that DotProductScorer calculates term score") +{ + TestIterator::UP itr = TestIterator::create(0, 7, false); + Term term(itr.get(), 5, 0, &itr->_tfmd); + EXPECT_EQUAL(35, wand::DotProductScorer::calculateScore(term, 11)); + EXPECT_EQUAL(11u, itr->_unpackDocId); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/queryeval/weighted_set_term/.gitignore b/searchlib/src/tests/queryeval/weighted_set_term/.gitignore new file mode 100644 index 00000000000..ab8cbb5bd5a --- /dev/null +++ b/searchlib/src/tests/queryeval/weighted_set_term/.gitignore @@ -0,0 +1 @@ +searchlib_weighted_set_term_test_app diff --git a/searchlib/src/tests/queryeval/weighted_set_term/CMakeLists.txt b/searchlib/src/tests/queryeval/weighted_set_term/CMakeLists.txt new file mode 100644 index 00000000000..4083762d115 --- /dev/null +++ b/searchlib/src/tests/queryeval/weighted_set_term/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_weighted_set_term_test_app + SOURCES + weighted_set_term_test.cpp + DEPENDS + searchlib + searchlib_test +) +vespa_add_test(NAME searchlib_weighted_set_term_test_app COMMAND searchlib_weighted_set_term_test_app) diff --git a/searchlib/src/tests/queryeval/weighted_set_term/DESC b/searchlib/src/tests/queryeval/weighted_set_term/DESC new file mode 100644 index 00000000000..040554bdd0e --- /dev/null +++ b/searchlib/src/tests/queryeval/weighted_set_term/DESC @@ -0,0 +1 @@ +weighted_set_term test. Take a look at weighted_set_term_test.cpp for details. diff --git a/searchlib/src/tests/queryeval/weighted_set_term/FILES b/searchlib/src/tests/queryeval/weighted_set_term/FILES new file mode 100644 index 00000000000..9912bc9a4a2 --- /dev/null +++ b/searchlib/src/tests/queryeval/weighted_set_term/FILES @@ -0,0 +1 @@ +weighted_set_term_test.cpp diff --git a/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp new file mode 100644 index 00000000000..7436913b642 --- /dev/null +++ b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp @@ -0,0 +1,240 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("weighted_set_term_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/queryeval/weighted_set_term_search.h> + +#include <vespa/searchlib/fef/fef.h> +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/queryeval/field_spec.h> +#include <vespa/searchlib/queryeval/searchiterator.h> +#include <vespa/searchlib/queryeval/blueprint.h> +#include <vespa/searchlib/queryeval/fake_result.h> +#include <vespa/searchlib/queryeval/fake_searchable.h> +#include <vespa/searchlib/queryeval/fake_requestcontext.h> +#include <vespa/searchlib/queryeval/weighted_set_term_search.h> +#include <vespa/searchlib/test/initrange.h> +#include <vespa/searchlib/test/document_weight_attribute_helper.h> +#include <memory> +#include <string> +#include <map> + +using namespace search; +using namespace search::query; +using namespace search::fef; +using namespace search::queryeval; +using search::test::InitRangeVerifier; +using search::test::DocumentWeightAttributeHelper; + +namespace { + +void setupFakeSearchable(FakeSearchable &fake) { + for (size_t docid = 1; docid < 10; ++docid) { + std::string token1 = vespalib::make_string("%zu", docid); + std::string token2 = vespalib::make_string("1%zu", docid); + std::string token3 = vespalib::make_string("2%zu", docid); + + fake.addResult("field", token1, FakeResult().doc(docid)); + fake.addResult("multi-field", token1, FakeResult().doc(docid)); + fake.addResult("multi-field", token2, FakeResult().doc(docid)); + fake.addResult("multi-field", token3, FakeResult().doc(docid)); + } +} + +struct WS { + static const uint32_t fieldId = 42; + MatchDataLayout layout; + TermFieldHandle handle; + std::vector<std::pair<std::string, uint32_t> > tokens; + + WS() : layout(), handle(layout.allocTermField(fieldId)), tokens() { + MatchData::UP tmp = layout.createMatchData(); + ASSERT_TRUE(tmp->resolveTermField(handle)->getFieldId() == fieldId); + } + + WS &add(const std::string &token, uint32_t weight) { + tokens.push_back(std::make_pair(token, weight)); + return *this; + } + + Node::UP createNode() const { + SimpleWeightedSetTerm *node = new SimpleWeightedSetTerm("view", 0, Weight(0)); + for (size_t i = 0; i < tokens.size(); ++i) { + node->append(Node::UP(new SimpleStringTerm(tokens[i].first, "view", 0, Weight(tokens[i].second)))); + } + return Node::UP(node); + } + + bool isGenericSearch(Searchable &searchable, const std::string &field, bool strict) const { + FakeRequestContext requestContext; + MatchData::UP md = layout.createMatchData(); + Node::UP node = createNode(); + FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle)); + queryeval::Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, *node); + bp->fetchPostings(strict); + SearchIterator::UP sb = bp->createSearch(*md, strict); + return (dynamic_cast<WeightedSetTermSearch*>(sb.get()) != 0); + } + + FakeResult search(Searchable &searchable, const std::string &field, bool strict) const { + FakeRequestContext requestContext; + MatchData::UP md = layout.createMatchData(); + Node::UP node = createNode(); + FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle)); + queryeval::Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, *node); + bp->fetchPostings(strict); + SearchIterator::UP sb = bp->createSearch(*md, strict); + sb->initFullRange(); + FakeResult result; + for (uint32_t docId = 1; docId < 10; ++docId) { + if (sb->seek(docId)) { + sb->unpack(docId); + result.doc(docId); + TermFieldMatchData &data = *md->resolveTermField(handle); + FieldPositionsIterator itr = data.getIterator(); + for (; itr.valid(); itr.next()) { + result.elem(itr.getElementId()); + result.weight(itr.getElementWeight()); + result.pos(itr.getPosition()); + } + } + } + return result; + } +}; + +struct MockSearch : public SearchIterator { + int seekCnt; + int _initial; + MockSearch(uint32_t initial) : SearchIterator(), seekCnt(0), _initial(initial) { } + void initRange(uint32_t begin, uint32_t end) override { + SearchIterator::initRange(begin, end); + setDocId(_initial); + } + virtual void doSeek(uint32_t) { + ++seekCnt; + setAtEnd(); + } + virtual void doUnpack(uint32_t) {} +}; + +struct MockFixture { + MockSearch *mock; + TermFieldMatchData tfmd; + std::unique_ptr<SearchIterator> search; + MockFixture(uint32_t initial) : mock(0), tfmd(), search() { + std::vector<SearchIterator*> children; + std::vector<int32_t> weights; + mock = new MockSearch(initial); + children.push_back(mock); + weights.push_back(1); + search.reset(WeightedSetTermSearch::create(children, tfmd, weights)); + } +}; + +} // namespace <unnamed> + +TEST("testSimple") { + FakeSearchable index; + setupFakeSearchable(index); + FakeResult expect = FakeResult() + .doc(3).elem(0).weight(30).pos(0) + .doc(5).elem(0).weight(50).pos(0) + .doc(7).elem(0).weight(70).pos(0); + WS ws = WS().add("7", 70).add("5", 50).add("3", 30).add("100", 1000); + EXPECT_TRUE(ws.isGenericSearch(index, "field", true)); + EXPECT_TRUE(ws.isGenericSearch(index, "field", false)); + EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", true)); + EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", false)); + + EXPECT_EQUAL(expect, ws.search(index, "field", true)); + EXPECT_EQUAL(expect, ws.search(index, "field", false)); + EXPECT_EQUAL(expect, ws.search(index, "multi-field", true)); + EXPECT_EQUAL(expect, ws.search(index, "multi-field", false)); +} + +TEST("testMulti") { + FakeSearchable index; + setupFakeSearchable(index); + FakeResult expect = FakeResult() + .doc(3).elem(0).weight(230).pos(0).elem(0).weight(130).pos(0).elem(0).weight(30).pos(0) + .doc(5).elem(0).weight(150).pos(0).elem(0).weight(50).pos(0) + .doc(7).elem(0).weight(70).pos(0); + WS ws = WS().add("7", 70).add("5", 50).add("3", 30) + .add("15", 150).add("13", 130) + .add("23", 230).add("100", 1000); + EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", true)); + EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", false)); + + EXPECT_EQUAL(expect, ws.search(index, "multi-field", true)); + EXPECT_EQUAL(expect, ws.search(index, "multi-field", false)); +} + +TEST_F("test Eager Empty Child", MockFixture(search::endDocId)) { + MockSearch *mock = f1.mock; + SearchIterator &search = *f1.search; + search.initFullRange(); + EXPECT_EQUAL(search.beginId(), search.getDocId()); + EXPECT_TRUE(!search.seek(1)); + EXPECT_TRUE(search.isAtEnd()); + EXPECT_EQUAL(0, mock->seekCnt); +} + +TEST_F("test Eager Matching Child", MockFixture(5)) { + MockSearch *mock = f1.mock; + SearchIterator &search = *f1.search; + search.initFullRange(); + EXPECT_EQUAL(search.beginId(), search.getDocId()); + EXPECT_TRUE(!search.seek(3)); + EXPECT_EQUAL(5u, search.getDocId()); + EXPECT_EQUAL(0, mock->seekCnt); + EXPECT_TRUE(search.seek(5)); + EXPECT_EQUAL(5u, search.getDocId()); + EXPECT_EQUAL(0, mock->seekCnt); + EXPECT_TRUE(!search.seek(7)); + EXPECT_TRUE(search.isAtEnd()); + EXPECT_EQUAL(1, mock->seekCnt); +} + +TEST("verify initRange with search iterator children") { + const size_t num_children = 7; + InitRangeVerifier ir; + using DocIds = InitRangeVerifier::DocIds; + std::vector<DocIds> split_lists(num_children); + auto full_list = ir.getExpectedDocIds(); + for (size_t i = 0; i < full_list.size(); ++i) { + split_lists[i % num_children].push_back(full_list[i]); + } + bool strict = true; + std::vector<SearchIterator*> children; + for (size_t i = 0; i < num_children; ++i) { + children.push_back(ir.createIterator(split_lists[i], strict).release()); + } + TermFieldMatchData tfmd; + std::vector<int32_t> weights(num_children, 1); + SearchIterator::UP itr(WeightedSetTermSearch::create(children, tfmd, weights)); + ir.verify(*itr); +} + +TEST("verify initRange with document weight iterator children") { + const size_t num_children = 7; + InitRangeVerifier ir; + DocumentWeightAttributeHelper helper; + helper.add_docs(ir.getDocIdLimit()); + auto full_list = ir.getExpectedDocIds(); + for (size_t i = 0; i < full_list.size(); ++i) { + helper.set_doc(full_list[i], i % num_children, 1); + } + TermFieldMatchData tfmd; + std::vector<int32_t> weights(num_children, 1); + std::vector<DocumentWeightIterator> children; + for (size_t i = 0; i < num_children; ++i) { + auto dict_entry = helper.dwa().lookup(vespalib::make_string("%zu", i).c_str()); + helper.dwa().create(dict_entry.posting_idx, children); + } + SearchIterator::UP itr(WeightedSetTermSearch::create(tfmd, weights, std::move(children))); + ir.verify(*itr); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/rankingexpression/feature_name_extractor/.gitignore b/searchlib/src/tests/rankingexpression/feature_name_extractor/.gitignore new file mode 100644 index 00000000000..88c86c1720e --- /dev/null +++ b/searchlib/src/tests/rankingexpression/feature_name_extractor/.gitignore @@ -0,0 +1 @@ +searchlib_feature_name_extractor_test_app diff --git a/searchlib/src/tests/rankingexpression/feature_name_extractor/CMakeLists.txt b/searchlib/src/tests/rankingexpression/feature_name_extractor/CMakeLists.txt new file mode 100644 index 00000000000..b1b81efd840 --- /dev/null +++ b/searchlib/src/tests/rankingexpression/feature_name_extractor/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_feature_name_extractor_test_app + SOURCES + feature_name_extractor_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_feature_name_extractor_test_app COMMAND searchlib_feature_name_extractor_test_app) diff --git a/searchlib/src/tests/rankingexpression/feature_name_extractor/FILES b/searchlib/src/tests/rankingexpression/feature_name_extractor/FILES new file mode 100644 index 00000000000..6f6f6c1df43 --- /dev/null +++ b/searchlib/src/tests/rankingexpression/feature_name_extractor/FILES @@ -0,0 +1 @@ +feature_name_extractor_test.cpp diff --git a/searchlib/src/tests/rankingexpression/feature_name_extractor/feature_name_extractor_test.cpp b/searchlib/src/tests/rankingexpression/feature_name_extractor/feature_name_extractor_test.cpp new file mode 100644 index 00000000000..12ce67a586a --- /dev/null +++ b/searchlib/src/tests/rankingexpression/feature_name_extractor/feature_name_extractor_test.cpp @@ -0,0 +1,79 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchlib/features/rankingexpression/feature_name_extractor.h> + +using search::features::rankingexpression::FeatureNameExtractor; + +void verify_extract(const vespalib::string &input, + const vespalib::string &expect_symbol, + const vespalib::string &expect_after) +{ + FeatureNameExtractor extractor; + const char *pos_in = input.data(); + const char *end_in = input.data() + input.size(); + vespalib::string symbol_out; + const char *pos_out = nullptr; + extractor.extract_symbol(pos_in, end_in, pos_out, symbol_out); + ASSERT_TRUE(pos_out != nullptr); + vespalib::string after(pos_out, end_in); + EXPECT_EQUAL(expect_symbol, symbol_out); + EXPECT_EQUAL(expect_after, after); +} + +TEST("require that basic names are extracted correctly") { + TEST_DO(verify_extract("foo+", "foo", "+")); + TEST_DO(verify_extract("foo.out+", "foo.out", "+")); + TEST_DO(verify_extract("foo(p1,p2)+", "foo(p1,p2)", "+")); + TEST_DO(verify_extract("foo(p1,p2).out+", "foo(p1,p2).out", "+")); +} + +TEST("require that special characters are allowed in prefix and suffix") { + TEST_DO(verify_extract("_@$+", "_@$", "+")); + TEST_DO(verify_extract("_@$.$@_+", "_@$.$@_", "+")); + TEST_DO(verify_extract("_@$(p1,p2)+", "_@$(p1,p2)", "+")); + TEST_DO(verify_extract("_@$(p1,p2).$@_+", "_@$(p1,p2).$@_", "+")); +} + +TEST("require that dot is only allowed in suffix") { + TEST_DO(verify_extract("foo.bar+", "foo.bar", "+")); + TEST_DO(verify_extract("foo.bar.out+", "foo.bar.out", "+")); + TEST_DO(verify_extract("foo.bar(p1,p2)+", "foo.bar", "(p1,p2)+")); + TEST_DO(verify_extract("foo.bar(p1,p2).out+", "foo.bar", "(p1,p2).out+")); + TEST_DO(verify_extract("foo(p1,p2).out.bar+", "foo(p1,p2).out.bar", "+")); +} + +TEST("require that parameters can be nested") { + TEST_DO(verify_extract("foo(p1(a,b),p2(c,d(e,f))).out+", "foo(p1(a,b),p2(c,d(e,f))).out", "+")); +} + +TEST("require that space is allowed among parameters") { + TEST_DO(verify_extract("foo( p1 ( a , b ) ).out+", "foo( p1 ( a , b ) ).out", "+")); +} + +TEST("require that space is now allowed outside parameters") { + TEST_DO(verify_extract("foo +", "foo", " +")); + TEST_DO(verify_extract("foo . out+", "foo", " . out+")); + TEST_DO(verify_extract("foo. out+", "foo.", " out+")); + TEST_DO(verify_extract("foo (p1,p2)+", "foo", " (p1,p2)+")); + TEST_DO(verify_extract("foo(p1,p2) +", "foo(p1,p2)", " +")); + TEST_DO(verify_extract("foo(p1,p2) .out+", "foo(p1,p2)", " .out+")); + TEST_DO(verify_extract("foo(p1,p2).out +", "foo(p1,p2).out", " +")); +} + +TEST("require that parameters can be scientific numbers") { + TEST_DO(verify_extract("foo(1.3E+3,-1.9e-10).out+", "foo(1.3E+3,-1.9e-10).out", "+")); +} + +TEST("require that quoted parenthesis are not counted") { + TEST_DO(verify_extract("foo(a,b,\")\").out+", "foo(a,b,\")\").out", "+")); +} + +TEST("require that escaped quotes does not unquote") { + TEST_DO(verify_extract("foo(a,b,\"\\\")\").out+", "foo(a,b,\"\\\")\").out", "+")); +} + +TEST("require that escaped escape does not hinder unquote") { + TEST_DO(verify_extract("foo(a,b,\"\\\\\")\").out+", "foo(a,b,\"\\\\\")", "\").out+")); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/rankingexpression/rankingexpressionlist b/searchlib/src/tests/rankingexpression/rankingexpressionlist new file mode 100644 index 00000000000..2ff1350025b --- /dev/null +++ b/searchlib/src/tests/rankingexpression/rankingexpressionlist @@ -0,0 +1,160 @@ +# This file is a list of semicolon separated strings. The first string is the expression to be parsed, whereas all +# following strings are allowed ways to print the parsed expression. If no alternatives are given, the expression can be +# printed as the original. Note that all strings are trimmed before they are parsed / compared. + 1 +1.0; 1.0; 1 +1e1; 1e1; 10 +1e-1; 1e-1; 0.1 +1.0e1; 1.0e1; 10 +1.0e-1; 1.0e-1; 0.1 +-1; -1 +1 + -1; 1 + -1; 1 - 1 +-1 + 1; -1 + 1 +tan(10) +1 + 1 + 1 + 2 + 1 - 2 + 1 * 2 + 1 / 2 + 1 + 2 - 3 + 1 + 2 - 3 * 4 + 1 + 2 - 3 * 4 / 5 +1+2-3*4/5; 1 + 2 - 3 * 4 / 5 +(1) +(1)+ 2; (1) + 2 +(1)+(2); (1) + (2) +(1)+(2)-3; (1) + (2) - 3 +(1)+(2)-(3); (1) + (2) - (3) +(1)+(2)-(3)*4; (1) + (2) - (3) * 4 +(1)+(2)-(3)*(4); (1) + (2) - (3) * (4) +(1)+(2)-(3)*(4)/5; (1) + (2) - (3) * (4) / 5 +(1)+(2)-(3)*(4)/(5); (1) + (2) - (3) * (4) / (5) + 1 +(2)-(3)*(4)/(5); 1 + (2) - (3) * (4) / (5) + 1 + 2 -(3)*(4)/(5); 1 + 2 - (3) * (4) / (5) + 1 + 2 - 3 *(4)/(5); 1 + 2 - 3 * (4) / (5) + 1 + 2 - 3 * 4 /(5); 1 + 2 - 3 * 4 / (5) + 1 + 2 - 3 * 4 / 5 ; 1 + 2 - 3 * 4 / 5 +(1 + 2) +(1 + 2)- 3; (1 + 2) - 3 +(1 + 2 - 3) +(1 + 2 - 3)* 4; (1 + 2 - 3) * 4 +(1 + 2 - 3 * 4) +(1 + 2 - 3 * 4)/ 5; (1 + 2 - 3 * 4) / 5 +(1 + 2 - 3 * 4 / 5) + 1 +(2 - 3 * 4 / 5); 1 + (2 - 3 * 4 / 5) + 1 + 2 -(3 * 4 / 5); 1 + 2 - (3 * 4 / 5) + 1 + 2 - 3 *(4 / 5); 1 + 2 - 3 * (4 / 5) +1+2-3*(4/5); 1 + 2 - 3 * (4 / 5) +log(1) +log( 1 ); log(1) +log( 1 + 2 ); log(1 + 2) +log( 1 + 2 - 3 ); log(1 + 2 - 3) +log( 1 + 2 - 3 * 4 ); log(1 + 2 - 3 * 4) +log( 1 + 2 - 3 * 4 / 5 ); log(1 + 2 - 3 * 4 / 5) +log((1 + 2)- 3 * 4 / 5 ); log((1 + 2) - 3 * 4 / 5) +log( 1 +(2 - 3)* 4 / 5 ); log(1 + (2 - 3) * 4 / 5) +log( 1 + 2 -(3 * 4)/ 5 ); log(1 + 2 - (3 * 4) / 5) +log( 1 + 2 - 3 *(4 / 5)); log(1 + 2 - 3 * (4 / 5)) +log(1+2-3*4/5); log(1 + 2 - 3 * 4 / 5) +""; "" +"foo" +"foo\"" +(1+"foo"); (1 + "foo") +if("foo" == "bar", 1, 2); if ("foo" == "bar", 1, 2) +cosh(1); cosh(1) +cosh (1); cosh(1) +cosh ( 1 ); cosh(1) +cosh ( foo ); cosh(foo) +cosh ( foo.out ); cosh(foo.out) +cosh ( foo ( bar ) . out ); cosh(foo(bar).out) +sin(10) +cos(10) +tan(10) +acos(10) +asin(10) +atan(10) +cosh(10) +sinh(10) +tanh(10) +exp(10) +log(10) +log10(10) +sqrt(10) +ceil(10) +fabs(10) +floor(10) +atan2(10, 20); atan2(10,20) +ldexp(10, 20); ldexp(10,20) +pow(10, 20); pow(10,20) +fmod(10, 20); fmod(10,20) +min(0, 1); min(0,1) +max(1, 0); max(1,0) +if(1<2,3,4); if (1 < 2, 3, 4) +if(1>2,3,4); if (1 > 2, 3, 4) +if(1==2,3,4); if (1 == 2, 3, 4) +if(1~=2,3,4); if (1 ~= 2, 3, 4) +if(1<=2,3,4); if (1 <= 2, 3, 4) +if(1>=2,3,4); if (1 >= 2, 3, 4) +if(1>=2,3,4,0.3); if (1 >= 2, 3, 4, 0.3) +if(1>=2,3,4,0.5); if (1 >= 2, 3, 4, 0.5) +if (1 < 2, 3, 4); if (1 < 2, 3, 4) +if (1+2 < 3, 4, 5); if (1 + 2 < 3, 4, 5) +if (1 < 2+3, 4, 5); if (1 < 2 + 3, 4, 5) +if (1 < 2, 3+4, 5); if (1 < 2, 3 + 4, 5) +if (1 < 2, 3, 4+5); if (1 < 2, 3, 4 + 5) +if (foo in [bar], 6, 9); if (foo in [bar], 6, 9) +if (foo in [bar,baz], 6, 9); if (foo in [bar, baz], 6, 9) +if (foo in [bar,baz,cox], 6, 9); if (foo in [bar, baz, cox], 6, 9) +if (foo in [bar], 6, 9) +if (foo in [bar, baz], 6, 9) +if (foo in [bar, baz, cox], 6, 9) +if (foo in [ bar ], 6, 9); if (foo in [bar], 6, 9) +if (foo in [ bar, baz ], 6, 9); if (foo in [bar, baz], 6, 9) +if (foo in [ bar, baz, cox ], 6, 9); if (foo in [bar, baz, cox], 6, 9) +feature; feature +fe@ture; fe@ture +featur@; featur@ +fe$ture; fe$ture +featur$; featur$ +feature.out; feature.out +feature .out; feature.out +feature . out; feature.out +feature.out.out; feature.out.out +feature.if +feature.in +feature(arg1); feature(arg1) +feature (arg1); feature(arg1) +feature ( arg1); feature(arg1) +feature ( arg1 ); feature(arg1) +feature(arg1,arg2); feature(arg1,arg2) +feature(arg1 ,arg2); feature(arg1,arg2) +feature(arg1 , arg2); feature(arg1,arg2) +feature(arg1 , arg2).out; feature(arg1,arg2).out +feature(arg1 , arg2) . out; feature(arg1,arg2).out +feature("\",difficult","\")arguments\\").out +feature(arg1,arg2).out; feature(arg1,arg2).out +feature(if) +feature(in) +feature(cos) +feature("cos(1,2)") +feature(cos,sin,tan,cosh,sinh,tanh,acos,asin,atan,exp,log10,log,sqrt,ceil,fabs,floor) +feature(cos,"sin(1,2)",3) +rankingExpression(foo@92c9e83e1b665d2c.fe5dbbcea5ce7e29).rankingScript +rankingExpression(foo@92c9e83e1b665d2c.2e5dbbcea5ce7e29).rankingScript +mysum ( mysum(4, 4), value( 4 ), value(4) ); mysum(mysum(4,4),value(4),value(4)) +"\\" +"\"" +"\f" +"\female" +"\n" +"\nude" +"\r" +"fa\rt" +"\t" +"fe\tish" +"\x10081977" +"10\x081977" +"1008\x1977" +"100819\x77" +if(1.09999~=1.1,2,3); if (1.09999 ~= 1.1, 2, 3) diff --git a/searchlib/src/tests/ranksetup/.gitignore b/searchlib/src/tests/ranksetup/.gitignore new file mode 100644 index 00000000000..754597f65f8 --- /dev/null +++ b/searchlib/src/tests/ranksetup/.gitignore @@ -0,0 +1,5 @@ +.depend +Makefile +ranksetup_test +/.gdbinit +searchlib_ranksetup_test_app diff --git a/searchlib/src/tests/ranksetup/CMakeLists.txt b/searchlib/src/tests/ranksetup/CMakeLists.txt new file mode 100644 index 00000000000..712f1ffefa4 --- /dev/null +++ b/searchlib/src/tests/ranksetup/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_ranksetup_test_app + SOURCES + ranksetup_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_ranksetup_test_app COMMAND searchlib_ranksetup_test_app) diff --git a/searchlib/src/tests/ranksetup/DESC b/searchlib/src/tests/ranksetup/DESC new file mode 100644 index 00000000000..37f7cc6f2c5 --- /dev/null +++ b/searchlib/src/tests/ranksetup/DESC @@ -0,0 +1 @@ +ranksetup test. Take a look at ranksetup.cpp for details. diff --git a/searchlib/src/tests/ranksetup/FILES b/searchlib/src/tests/ranksetup/FILES new file mode 100644 index 00000000000..f1fce1d28ff --- /dev/null +++ b/searchlib/src/tests/ranksetup/FILES @@ -0,0 +1 @@ +ranksetup.cpp diff --git a/searchlib/src/tests/ranksetup/ranksetup_test.cpp b/searchlib/src/tests/ranksetup/ranksetup_test.cpp new file mode 100644 index 00000000000..aee04ef4cb7 --- /dev/null +++ b/searchlib/src/tests/ranksetup/ranksetup_test.cpp @@ -0,0 +1,922 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("ranksetup_test"); +#include <vespa/vespalib/testkit/testapp.h> + +#include <map> +#include <string> +#include <vector> + +#include <vespa/searchlib/common/feature.h> + +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/attributevector.h> +#include <vespa/searchlib/attribute/attributevector.hpp> + +#include <vespa/searchlib/fef/blueprint.h> +#include <vespa/searchlib/fef/blueprintfactory.h> +#include <vespa/searchlib/fef/featureexecutor.h> +#include <vespa/searchlib/fef/featurenamebuilder.h> +#include <vespa/searchlib/fef/idumpfeaturevisitor.h> +#include <vespa/searchlib/fef/indexproperties.h> +#include <vespa/searchlib/fef/matchdatalayout.h> +#include <vespa/searchlib/fef/rank_program.h> +#include <vespa/searchlib/fef/ranksetup.h> +#include <vespa/searchlib/fef/utils.h> + +#include <vespa/searchlib/fef/test/indexenvironment.h> +#include <vespa/searchlib/fef/test/queryenvironment.h> +#include <vespa/searchlib/fef/test/rankresult.h> + +#include <vespa/searchlib/features/rankingexpressionfeature.h> +#include <vespa/searchlib/features/setup.h> +#include <vespa/searchlib/features/valuefeature.h> +#include <vespa/searchlib/fef/test/plugin/chain.h> +#include <vespa/searchlib/fef/test/plugin/double.h> +#include <vespa/searchlib/fef/test/plugin/setup.h> +#include <vespa/searchlib/fef/test/plugin/staticrank.h> +#include <vespa/searchlib/fef/test/plugin/sum.h> +#include <vespa/searchlib/fef/test/plugin/cfgvalue.h> +#include <vespa/searchlib/fef/test/dummy_dependency_handler.h> + +using namespace search::fef; +using namespace search::features; +using namespace search::fef::test; +using search::feature_t; + +typedef FeatureNameBuilder FNB; + +//----------------------------------------------------------------------------- +// DumpFeatureVisitor +//----------------------------------------------------------------------------- +class DumpFeatureVisitor : public IDumpFeatureVisitor +{ +public: + DumpFeatureVisitor() {} + virtual void visitDumpFeature(const vespalib::string & name) { + std::cout << "dump feature: " << name << std::endl; + } +}; + + +//----------------------------------------------------------------------------- +// RankEnvironment +//----------------------------------------------------------------------------- +class RankEnvironment +{ +private: + const BlueprintFactory & _factory; + const IIndexEnvironment & _indexEnv; + const IQueryEnvironment & _queryEnv; + +public: + RankEnvironment(const BlueprintFactory & bfactory, + const IIndexEnvironment & indexEnv, const IQueryEnvironment & queryEnv) : + _factory(bfactory), _indexEnv(indexEnv), _queryEnv(queryEnv) {} + + const BlueprintFactory & factory() const { return _factory; } + const IIndexEnvironment & indexEnvironment() const { return _indexEnv; } + const IQueryEnvironment & queryEnvironment() const { return _queryEnv; } +}; + + +//----------------------------------------------------------------------------- +// RankExecutor +//----------------------------------------------------------------------------- +class RankExecutor +{ +private: + vespalib::string _initRank; + vespalib::string _finalRank; + const RankEnvironment & _rankEnv; + MatchDataLayout _layout; + std::unique_ptr<RankSetup> _rs; + RankProgram::UP _firstPhaseProgram; + RankProgram::UP _secondPhaseProgram; + +public: + RankExecutor(const vespalib::string &initRank, + const vespalib::string &finalRank, const RankEnvironment &rankEnv) : + _initRank(initRank), _finalRank(finalRank), _rankEnv(rankEnv), _layout(), + _rs(), _firstPhaseProgram(), _secondPhaseProgram() {} + bool setup(); + RankResult execute(uint32_t docId = 0); +}; + +bool +RankExecutor::setup() +{ + _rs = std::unique_ptr<RankSetup>(new RankSetup(_rankEnv.factory(), _rankEnv.indexEnvironment())); + if (_initRank.empty()) { + return false; + } + _rs->setFirstPhaseRank(_initRank); + + if (!_finalRank.empty()) { + _rs->setSecondPhaseRank(_finalRank); + } + + if (!_rs->compile()) { + return false; + } + + _firstPhaseProgram = _rs->create_first_phase_program(); + _firstPhaseProgram->setup(_layout, _rankEnv.queryEnvironment()); + if (!_finalRank.empty()) { + _secondPhaseProgram = _rs->create_second_phase_program(); + _secondPhaseProgram->setup(_layout, _rankEnv.queryEnvironment()); + } + return true; +} + +RankResult +RankExecutor::execute(uint32_t docId) +{ + RankResult result; + _firstPhaseProgram->run(docId); + result.addScore(_initRank, *Utils::getScoreFeature(*_firstPhaseProgram)); + + if (_secondPhaseProgram.get() != nullptr) { + _secondPhaseProgram->run(docId); + result.addScore(_finalRank, *Utils::getScoreFeature(*_secondPhaseProgram)); + } + + return result; +} + + +//----------------------------------------------------------------------------- +// FeatureDumper +//----------------------------------------------------------------------------- +class FeatureDumper +{ +private: + const RankEnvironment & _rankEnv; + RankSetup _setup; + MatchDataLayout _layout; + RankProgram::UP _rankProgram; + +public: + FeatureDumper(const RankEnvironment & rankEnv) : + _rankEnv(rankEnv), + _setup(_rankEnv.factory(), _rankEnv.indexEnvironment()), + _layout(), + _rankProgram() {} + void addDumpFeature(const vespalib::string &name); + void configure(); + bool setup(); + RankResult dump(); +}; + +void +FeatureDumper::addDumpFeature(const vespalib::string &name) +{ + _setup.addDumpFeature(name); +} + +void +FeatureDumper::configure() +{ + _setup.configure(); +} + +bool +FeatureDumper::setup() +{ + if (!_setup.compile()) { + return false; + } + + _rankProgram = _setup.create_dump_program(); + _rankProgram->setup(_layout, _rankEnv.queryEnvironment()); + return true; +} + +RankResult +FeatureDumper::dump() +{ + _rankProgram->run(1); + std::map<vespalib::string, feature_t> features = Utils::getSeedFeatures(*_rankProgram); + RankResult retval; + for (auto itr = features.begin(); itr != features.end(); ++itr) { + retval.addScore(itr->first, itr->second); + } + return retval; +} + + +//----------------------------------------------------------------------------- +// RankSetupTest +//----------------------------------------------------------------------------- +class RankSetupTest : public vespalib::TestApp +{ +private: + BlueprintFactory _factory; + search::AttributeManager _manager; + IndexEnvironment _indexEnv; + QueryEnvironment _queryEnv; + RankEnvironment _rankEnv; + DumpFeatureVisitor _visitor; + + void testValueBlueprint(); + void testDoubleBlueprint(); + void testSumBlueprint(); + void testStaticRankBlueprint(); + void testChainBlueprint(); + void testCfgValueBlueprint(); + void testCompilation(); + void testRankSetup(); + bool testExecution(const vespalib::string & initRank, feature_t initScore, + const vespalib::string & finalRank = "", feature_t finalScore = 0.0f, uint32_t docId = 0); + bool testExecution(const RankEnvironment &rankEnv, + const vespalib::string & initRank, feature_t initScore, + const vespalib::string & finalRank = "", feature_t finalScore = 0.0f, uint32_t docId = 0); + void testExecution(); + void testFeatureDump(); + + void checkFeatures(std::map<vespalib::string, feature_t> &exp, std::map<vespalib::string, feature_t> &actual); + void testFeatureNormalization(); + +public: + RankSetupTest(); + int Main(); +}; + + +void +RankSetupTest::testValueBlueprint() +{ + ValueBlueprint prototype; + prototype.visitDumpFeatures(_indexEnv, _visitor); + { // basic test + Blueprint::UP bp = prototype.createInstance(); + DummyDependencyHandler deps(*bp); + bp->setName("value"); + EXPECT_EQUAL(bp->getName(), "value"); + std::vector<vespalib::string> params; + params.push_back("5.5"); + params.push_back("10.5"); + EXPECT_TRUE(bp->setup(_indexEnv, params)); + EXPECT_EQUAL(deps.input.size(), 0u); + EXPECT_EQUAL(deps.output.size(), 2u); + EXPECT_EQUAL(deps.output[0], "0"); + EXPECT_EQUAL(deps.output[1], "1"); + + FeatureExecutor::LP fe = bp->createExecutor(_queryEnv); + ValueExecutor * vfe = static_cast<ValueExecutor *>(fe.get()); + EXPECT_EQUAL(vfe->getValues().size(), 2u); + EXPECT_EQUAL(vfe->getValues()[0], 5.5f); + EXPECT_EQUAL(vfe->getValues()[1], 10.5f); + } + { // invalid params + Blueprint::UP bp = prototype.createInstance(); + DummyDependencyHandler deps(*bp); + std::vector<vespalib::string> params; + EXPECT_TRUE(!bp->setup(_indexEnv, params)); + } +} + +void +RankSetupTest::testDoubleBlueprint() +{ + DoubleBlueprint prototype; + prototype.visitDumpFeatures(_indexEnv, _visitor); + { // basic test + Blueprint::UP bp = prototype.createInstance(); + DummyDependencyHandler deps(*bp); + std::vector<vespalib::string> params; + params.push_back("value(5.5).0"); + params.push_back("value(10.5).0"); + EXPECT_TRUE(bp->setup(_indexEnv, params)); + EXPECT_EQUAL(deps.input.size(), 2u); + EXPECT_EQUAL(deps.input[0], "value(5.5).0"); + EXPECT_EQUAL(deps.input[1], "value(10.5).0"); + EXPECT_EQUAL(deps.output.size(), 2u); + EXPECT_EQUAL(deps.output[0], "0"); + EXPECT_EQUAL(deps.output[1], "1"); + } +} + +void +RankSetupTest::testSumBlueprint() +{ + SumBlueprint prototype; + prototype.visitDumpFeatures(_indexEnv, _visitor); + { // basic test + Blueprint::UP bp = prototype.createInstance(); + DummyDependencyHandler deps(*bp); + std::vector<vespalib::string> params; + params.push_back("value(5.5, 10.5).0"); + params.push_back("value(5.5, 10.5).1"); + EXPECT_TRUE(bp->setup(_indexEnv, params)); + EXPECT_EQUAL(deps.input.size(), 2u); + EXPECT_EQUAL(deps.input[0], "value(5.5, 10.5).0"); + EXPECT_EQUAL(deps.input[1], "value(5.5, 10.5).1"); + EXPECT_EQUAL(deps.output.size(), 1u); + EXPECT_EQUAL(deps.output[0], "out"); + } +} + +void +RankSetupTest::testStaticRankBlueprint() +{ + StaticRankBlueprint prototype; + { // basic test + Blueprint::UP bp = prototype.createInstance(); + DummyDependencyHandler deps(*bp); + std::vector<vespalib::string> params; + params.push_back("sr1"); + EXPECT_TRUE(bp->setup(_indexEnv, params)); + EXPECT_EQUAL(deps.input.size(), 0u); + EXPECT_EQUAL(deps.output.size(), 1u); + EXPECT_EQUAL(deps.output[0], "out"); + } + { // invalid params + Blueprint::UP bp = prototype.createInstance(); + DummyDependencyHandler deps(*bp); + std::vector<vespalib::string> params; + EXPECT_TRUE(!bp->setup(_indexEnv, params)); + params.push_back("sr1"); + params.push_back("sr2"); + EXPECT_TRUE(!bp->setup(_indexEnv, params)); + } +} + +void +RankSetupTest::testChainBlueprint() +{ + ChainBlueprint prototype; + { // chaining + Blueprint::UP bp = prototype.createInstance(); + DummyDependencyHandler deps(*bp); + std::vector<vespalib::string> params; + params.push_back("basic"); + params.push_back("2"); + params.push_back("4"); + EXPECT_TRUE(bp->setup(_indexEnv, params)); + EXPECT_EQUAL(deps.input.size(), 1u); + EXPECT_EQUAL(deps.input[0], "chain(basic,1,4)"); + } + { // leaf node + Blueprint::UP bp = prototype.createInstance(); + DummyDependencyHandler deps(*bp); + std::vector<vespalib::string> params; + params.push_back("basic"); + params.push_back("1"); + params.push_back("4"); + EXPECT_TRUE(bp->setup(_indexEnv, params)); + EXPECT_EQUAL(deps.input.size(), 1u); + EXPECT_EQUAL(deps.input[0], "value(4)"); + } + { // cycle + Blueprint::UP bp = prototype.createInstance(); + DummyDependencyHandler deps(*bp); + std::vector<vespalib::string> params; + params.push_back("cycle"); + params.push_back("1"); + params.push_back("4"); + EXPECT_TRUE(bp->setup(_indexEnv, params)); + EXPECT_EQUAL(deps.input.size(), 1u); + EXPECT_EQUAL(deps.input[0], "chain(cycle,4,4)"); + } + { // invalid params + Blueprint::UP bp = prototype.createInstance(); + DummyDependencyHandler deps(*bp); + std::vector<vespalib::string> params; + EXPECT_TRUE(!bp->setup(_indexEnv, params)); + params.push_back("basic"); + params.push_back("0"); + params.push_back("4"); + EXPECT_TRUE(!bp->setup(_indexEnv, params)); + } +} + +void +RankSetupTest::testCfgValueBlueprint() +{ + CfgValueBlueprint prototype; + IndexEnvironment indexEnv; + indexEnv.getProperties().add("test_cfgvalue(foo).value", "1.0"); + indexEnv.getProperties().add("test_cfgvalue(foo).value", "2.0"); + indexEnv.getProperties().add("test_cfgvalue(foo).value", "3.0"); + + { // basic test + Blueprint::UP bp = prototype.createInstance(); + DummyDependencyHandler deps(*bp); + bp->setName("test_cfgvalue(foo)"); + std::vector<vespalib::string> params; + params.push_back("foo"); + + EXPECT_TRUE(bp->setup(indexEnv, params)); + EXPECT_EQUAL(deps.input.size(), 0u); + EXPECT_EQUAL(deps.output.size(), 3u); + EXPECT_EQUAL(deps.output[0], "0"); + EXPECT_EQUAL(deps.output[1], "1"); + EXPECT_EQUAL(deps.output[2], "2"); + + FeatureExecutor::LP fe = bp->createExecutor(_queryEnv); + ValueExecutor *vfe = static_cast<ValueExecutor *>(fe.get()); + EXPECT_EQUAL(vfe->getValues().size(), 3u); + EXPECT_EQUAL(vfe->getValues()[0], 1.0f); + EXPECT_EQUAL(vfe->getValues()[1], 2.0f); + EXPECT_EQUAL(vfe->getValues()[2], 3.0f); + } +} + + +void +RankSetupTest::testCompilation() +{ + { // unknown blueprint + RankSetup rs(_factory, _indexEnv); + rs.setFirstPhaseRank("unknown"); + EXPECT_TRUE(!rs.compile()); + } + { // unknown output for initial rank + RankSetup rs(_factory, _indexEnv); + rs.setFirstPhaseRank("value(2).1"); + EXPECT_TRUE(!rs.compile()); + } + { // unknown output for dependency + RankSetup rs(_factory, _indexEnv); + rs.setFirstPhaseRank(FNB().baseName("mysum").parameter("value(2).1").buildName()); + EXPECT_TRUE(!rs.compile()); + } + { // illegal input parameters + RankSetup rs(_factory, _indexEnv); + rs.setFirstPhaseRank("value.0"); + EXPECT_TRUE(!rs.compile()); + } + { // illegal feature name + RankSetup rs(_factory, _indexEnv); + rs.setFirstPhaseRank("value(2)."); + EXPECT_TRUE(!rs.compile()); + } + { // almost too deep dependency graph + RankSetup rs(_factory, _indexEnv); + std::ostringstream oss; + oss << "chain(basic," << (BlueprintResolver::MAX_DEP_DEPTH - 1) << ",4)"; // gives tree height == MAX_DEP_DEPTH + rs.setFirstPhaseRank(oss.str()); + EXPECT_TRUE(rs.compile()); + } + { // too deep dependency graph + RankSetup rs(_factory, _indexEnv); + std::ostringstream oss; + oss << "chain(basic," << BlueprintResolver::MAX_DEP_DEPTH << ",4)"; // gives tree height == MAX_DEP_DEPTH + 1 + rs.setFirstPhaseRank(oss.str()); + EXPECT_TRUE(!rs.compile()); + } + { // cycle + RankSetup rs(_factory, _indexEnv); + // c(c,4,2) -> c(c,3,2) -> c(c,2,2) -> c(c,1,2) -> c(c,2,2) + rs.setFirstPhaseRank("chain(cycle,4,2)"); + EXPECT_TRUE(!rs.compile()); + } +} + +void RankSetupTest::testRankSetup() +{ + using namespace search::fef::indexproperties; + IndexEnvironment env; + env.getProperties().add(rank::FirstPhase::NAME, "firstphase"); + env.getProperties().add(rank::SecondPhase::NAME, "secondphase"); + env.getProperties().add(dump::Feature::NAME, "foo"); + env.getProperties().add(dump::Feature::NAME, "bar"); + env.getProperties().add(matching::NumThreadsPerSearch::NAME, "3"); + env.getProperties().add(matchphase::DegradationAttribute::NAME, "mystaticrankattr"); + env.getProperties().add(matchphase::DegradationAscendingOrder::NAME, "true"); + env.getProperties().add(matchphase::DegradationMaxHits::NAME, "12345"); + env.getProperties().add(matchphase::DegradationMaxFilterCoverage::NAME, "0.19"); + env.getProperties().add(matchphase::DegradationSamplePercentage::NAME, "0.9"); + env.getProperties().add(matchphase::DegradationPostFilterMultiplier::NAME, "0.7"); + env.getProperties().add(matchphase::DiversityAttribute::NAME, "mycategoryattr"); + env.getProperties().add(matchphase::DiversityMinGroups::NAME, "37"); + env.getProperties().add(matchphase::DiversityCutoffFactor::NAME, "7.1"); + env.getProperties().add(matchphase::DiversityCutoffStrategy::NAME, "strict"); + env.getProperties().add(hitcollector::HeapSize::NAME, "50"); + env.getProperties().add(hitcollector::ArraySize::NAME, "60"); + env.getProperties().add(hitcollector::EstimatePoint::NAME, "70"); + env.getProperties().add(hitcollector::EstimateLimit::NAME, "80"); + env.getProperties().add(hitcollector::RankScoreDropLimit::NAME, "90.5"); + + RankSetup rs(_factory, env); + rs.configure(); + EXPECT_EQUAL(rs.getFirstPhaseRank(), vespalib::string("firstphase")); + EXPECT_EQUAL(rs.getSecondPhaseRank(), vespalib::string("secondphase")); + ASSERT_TRUE(rs.getDumpFeatures().size() == 2); + EXPECT_EQUAL(rs.getDumpFeatures()[0], vespalib::string("foo")); + EXPECT_EQUAL(rs.getDumpFeatures()[1], vespalib::string("bar")); + EXPECT_EQUAL(rs.getNumThreadsPerSearch(), 3u); + EXPECT_EQUAL(rs.getDegradationAttribute(), "mystaticrankattr"); + EXPECT_EQUAL(rs.isDegradationOrderAscending(), true); + EXPECT_EQUAL(rs.getDegradationMaxHits(), 12345u); + EXPECT_EQUAL(rs.getDegradationSamplePercentage(), 0.9); + EXPECT_EQUAL(rs.getDegradationMaxFilterCoverage(), 0.19); + EXPECT_EQUAL(rs.getDegradationPostFilterMultiplier(), 0.7); + EXPECT_EQUAL(rs.getDiversityAttribute(), "mycategoryattr"); + EXPECT_EQUAL(rs.getDiversityMinGroups(), 37u); + EXPECT_EQUAL(rs.getDiversityCutoffFactor(), 7.1); + EXPECT_EQUAL(rs.getDiversityCutoffStrategy(), "strict"); + EXPECT_EQUAL(rs.getHeapSize(), 50u); + EXPECT_EQUAL(rs.getArraySize(), 60u); + EXPECT_EQUAL(rs.getEstimatePoint(), 70u); + EXPECT_EQUAL(rs.getEstimateLimit(), 80u); + EXPECT_EQUAL(rs.getRankScoreDropLimit(), 90.5); +} + +bool +RankSetupTest::testExecution(const vespalib::string & initRank, feature_t initScore, + const vespalib::string & finalRank, feature_t finalScore, uint32_t docId) +{ + return testExecution(_rankEnv, initRank, initScore, finalRank, finalScore, docId); +} + +bool +RankSetupTest::testExecution(const RankEnvironment &rankEnv, const vespalib::string & initRank, feature_t initScore, + const vespalib::string & finalRank, feature_t finalScore, uint32_t docId) +{ + bool ok = true; + RankExecutor re(initRank, finalRank, rankEnv); + ok = ok && re.setup(); + EXPECT_TRUE(ok); + RankResult exp; + exp.addScore(initRank, initScore); + if (finalRank != "") { + exp.addScore(finalRank, finalScore); + } + RankResult rs = re.execute(docId); + ok = ok && (exp == rs); + EXPECT_EQUAL(exp, rs); + return ok; +} + +void +RankSetupTest::testExecution() +{ + { // value executor + vespalib::string v = FNB().baseName("value").parameter("5.5").parameter("10.5").buildName(); + EXPECT_TRUE(testExecution(v + ".0", 5.5f)); + EXPECT_TRUE(testExecution(v + ".0", 5.5f, v + ".1", 10.5f)); + EXPECT_TRUE(testExecution(v, 5.5f)); + } + { // double executor + vespalib::string d1 = FNB().baseName("double").parameter("value(2).0").parameter("value(8).0").buildName(); + vespalib::string d2 = FNB().baseName("double").parameter("value(2)").parameter("value(8)").buildName(); + EXPECT_TRUE(testExecution(d1 + ".0", 4.0f)); + EXPECT_TRUE(testExecution(d1 + ".0", 4.0f, d1 + ".1", 16.0f)); + EXPECT_TRUE(testExecution(d2, 4.0f)); + } + { // sum executor + vespalib::string s1 = FNB().baseName("mysum").parameter("value(2).0").parameter("value(4).0").output("out").buildName(); + vespalib::string s2 = FNB().baseName("mysum").parameter("value(2)").parameter("value(4)").buildName(); + EXPECT_TRUE(testExecution(s1, 6.0f)); + EXPECT_TRUE(testExecution(s2, 6.0f)); + } + { // static rank executor + vespalib::string sr1 = "staticrank(staticrank1)"; + vespalib::string sr2 = "staticrank(staticrank2)"; + for (uint32_t i = 0; i < 5; ++i) { + EXPECT_TRUE(testExecution(sr1, static_cast<feature_t>(i + 100), + sr2, static_cast<feature_t>(i + 200), i)); + } + } + { // test topologic sorting + vespalib::string v1 = "value(2)"; + vespalib::string d1 = FNB().baseName("double").parameter(v1).buildName(); + vespalib::string d2 = FNB().baseName("double").parameter(d1).buildName(); + + { + vespalib::string s1 = FNB().baseName("mysum").parameter(v1).parameter(d1).parameter(d2).buildName(); + EXPECT_TRUE(testExecution(s1, 14.0f)); + } + { + vespalib::string s1 = FNB().baseName("mysum").parameter(d2).parameter(d1).parameter(v1).buildName(); + EXPECT_TRUE(testExecution(s1, 14.0f)); + } + } + { // output used by more than one + vespalib::string v1 = "value(2)"; + vespalib::string d1 = FNB().baseName("double").parameter(v1).buildName(); + vespalib::string d2 = FNB().baseName("double").parameter(v1).buildName(); + vespalib::string s1 = FNB().baseName("mysum").parameter(d1).parameter(d2).buildName(); + EXPECT_TRUE(testExecution(s1, 8.0f)); + } + { // output not shared between phases + vespalib::string v1 = "value(2)"; + vespalib::string v2 = "value(8)"; + vespalib::string d1 = FNB().baseName("double").parameter(v1).buildName(); + vespalib::string d2 = FNB().baseName("double").parameter(v2).buildName(); + EXPECT_TRUE(testExecution(d1, 4.0f, d2, 16.0f)); + } + { // output shared between phases + vespalib::string v1 = "value(2)"; + vespalib::string v2 = "value(8)"; + vespalib::string v3 = "value(32)"; + vespalib::string d1 = FNB().baseName("double").parameter(v1).buildName(); + vespalib::string d2 = FNB().baseName("double").parameter(v2).buildName(); + vespalib::string d3 = FNB().baseName("double").parameter(v3).buildName(); + vespalib::string s1 = FNB().baseName("mysum").parameter(d1).parameter(d2).buildName(); + vespalib::string s2 = FNB().baseName("mysum").parameter(d2).parameter(d3).buildName(); + EXPECT_TRUE(testExecution(s1, 20.0f, s2, 80.0f)); + } + { // max dependency depth + uint32_t maxDepth = BlueprintResolver::MAX_DEP_DEPTH; + std::ostringstream oss; + oss << "chain(basic," << (maxDepth - 1) << ",4)"; // gives tree height == MAX_DEP_DEPTH; + EXPECT_TRUE(testExecution(oss.str(), 4.0f)); + } + { + IndexEnvironment indexEnv; + indexEnv.getProperties().add("test_cfgvalue(foo).value", "1.0"); + indexEnv.getProperties().add("test_cfgvalue(foo).value", "2.0"); + indexEnv.getProperties().add("test_cfgvalue(bar).value", "5.0"); + + vespalib::string s = FNB().baseName("mysum") + .parameter("test_cfgvalue(foo).0") + .parameter("test_cfgvalue(foo).1") + .buildName(); + + EXPECT_TRUE(testExecution(RankEnvironment(_factory, indexEnv, _queryEnv), + s, 3.0f, "test_cfgvalue(bar).0", 5.0f)); + } +} + +void +RankSetupTest::testFeatureDump() +{ + { + FeatureDumper dumper(_rankEnv); + dumper.addDumpFeature("value(2)"); + dumper.addDumpFeature("value(4)"); + dumper.addDumpFeature("double(value(4))"); + dumper.addDumpFeature("double(value(8))"); + dumper.addDumpFeature("mysum(value(4),value(16))"); + dumper.addDumpFeature("mysum(double(value(8)),double(value(32)))"); + EXPECT_TRUE(dumper.setup()); + + RankResult exp; + exp.addScore("value(2)", 2.0f); + exp.addScore("value(4)", 4.0f); + exp.addScore(FNB().baseName("double").parameter("value(4)").buildName(), 8.0f); + exp.addScore(FNB().baseName("double").parameter("value(8)").buildName(), 16.0f); + exp.addScore(FNB().baseName("mysum").parameter("value(4)").parameter("value(16)").buildName(), 20.0f); + exp.addScore(FNB().baseName("mysum"). + parameter(FNB().baseName("double").parameter("value(8)").buildName()). + parameter(FNB().baseName("double").parameter("value(32)").buildName()). + buildName(), 80.0f); + EXPECT_EQUAL(exp, dumper.dump()); + } + { + FeatureDumper dumper(_rankEnv); + dumper.addDumpFeature("value(50)"); + dumper.addDumpFeature("value(100)"); + EXPECT_TRUE(dumper.setup()); + RankResult exp; + exp.addScore("value(50)", 50.0f); + exp.addScore("value(100)", 100.0f); + EXPECT_EQUAL(exp, dumper.dump()); + } + { + FeatureDumper dumper(_rankEnv); + dumper.addDumpFeature(FNB().baseName("rankingExpression").parameter("if(4<2,3,4)").buildName()); + EXPECT_TRUE(dumper.setup()); + RankResult exp; + exp.addScore(FNB().baseName("rankingExpression").parameter("if(4<2,3,4)").buildName(), 4.0f); + EXPECT_EQUAL(exp, dumper.dump()); + } + + { + FeatureDumper dumper(_rankEnv); + dumper.addDumpFeature(FNB().baseName("rankingExpression").parameter("if(mysum(value(12),value(10))>2,3,4)").buildName()); + EXPECT_TRUE(dumper.setup()); + RankResult exp; + exp.addScore(FNB().baseName("rankingExpression").parameter("if(mysum(value(12),value(10))>2,3,4)").buildName(), 3.0f); + EXPECT_EQUAL(exp, dumper.dump()); + } + { // dump features indicated by visitation + IndexEnvironment indexEnv; + indexEnv.getProperties().add("test_cfgvalue(foo).value", "1.0"); + indexEnv.getProperties().add("test_cfgvalue(bar).value", "5.0"); + indexEnv.getProperties().add("test_cfgvalue.dump", "test_cfgvalue(foo)"); + indexEnv.getProperties().add("test_cfgvalue.dump", "test_cfgvalue(bar)"); + indexEnv.getProperties().add(indexproperties::rank::FirstPhase::NAME, ""); + indexEnv.getProperties().add(indexproperties::rank::SecondPhase::NAME, ""); + + RankEnvironment rankEnv(_factory, indexEnv, _queryEnv); + FeatureDumper dumper(rankEnv); + dumper.configure(); + EXPECT_TRUE(dumper.setup()); + RankResult exp; + exp.addScore("test_cfgvalue(foo)", 1.0); + exp.addScore("test_cfgvalue(bar)", 5.0); + EXPECT_EQUAL(exp, dumper.dump()); + } + { // ignore features indicated by visitation + IndexEnvironment indexEnv; + indexEnv.getProperties().add("test_cfgvalue(foo).value", "1.0"); + indexEnv.getProperties().add("test_cfgvalue(bar).value", "5.0"); + indexEnv.getProperties().add("test_cfgvalue.dump", "test_cfgvalue(foo)"); + indexEnv.getProperties().add("test_cfgvalue.dump", "test_cfgvalue(bar)"); + indexEnv.getProperties().add(indexproperties::dump::IgnoreDefaultFeatures::NAME, "true"); + indexEnv.getProperties().add(indexproperties::dump::Feature::NAME, "test_cfgvalue(foo)"); + indexEnv.getProperties().add(indexproperties::rank::FirstPhase::NAME, ""); + indexEnv.getProperties().add(indexproperties::rank::SecondPhase::NAME, ""); + + RankEnvironment rankEnv(_factory, indexEnv, _queryEnv); + FeatureDumper dumper(rankEnv); + dumper.configure(); + EXPECT_TRUE(dumper.setup()); + RankResult exp; + exp.addScore("test_cfgvalue(foo)", 1.0); + EXPECT_EQUAL(exp, dumper.dump()); + } +} + +void +RankSetupTest::checkFeatures(std::map<vespalib::string, feature_t> &exp, std::map<vespalib::string, feature_t> &actual) +{ + typedef std::map<vespalib::string, feature_t>::const_iterator ITR; + if (!EXPECT_EQUAL(exp.size(), actual.size())) { + return; + } + ITR exp_itr = exp.begin(); + ITR exp_end = exp.end(); + ITR actual_itr = actual.begin(); + ITR actual_end = actual.end(); + for (; exp_itr != exp_end && actual_itr != actual_end; ++exp_itr, ++actual_itr) { + EXPECT_EQUAL(exp_itr->first, actual_itr->first); + EXPECT_APPROX(exp_itr->second, actual_itr->second, 0.001); + } + EXPECT_EQUAL(exp_itr == exp_end, actual_itr == actual_end); +} + +void +RankSetupTest::testFeatureNormalization() +{ + BlueprintFactory factory; + factory.addPrototype(Blueprint::SP(new ValueBlueprint())); + factory.addPrototype(Blueprint::SP(new SumBlueprint())); + + IndexEnvironment idxEnv; + RankSetup rankSetup(factory, idxEnv); + + rankSetup.setFirstPhaseRank(" mysum ( value ( 1 ) , value ( 1 ) ) "); + rankSetup.setSecondPhaseRank(" mysum ( value ( 2 ) , value ( 2 ) ) "); + rankSetup.addSummaryFeature(" mysum ( value ( 5 ) , value ( 5 ) ) "); + rankSetup.addSummaryFeature(" mysum ( \"value( 5 )\" , \"value( 5 )\" ) "); + rankSetup.addDumpFeature(" mysum ( value ( 10 ) , value ( 10 ) ) "); + rankSetup.addDumpFeature(" mysum ( \"value( 10 )\" , \"value( 10 )\" ) "); + + ASSERT_TRUE(rankSetup.compile()); + + { // RANK context + MatchDataLayout layout; + QueryEnvironment queryEnv; + RankProgram::UP firstPhaseProgram = rankSetup.create_first_phase_program(); + RankProgram::UP secondPhaseProgram = rankSetup.create_second_phase_program(); + RankProgram::UP summaryProgram = rankSetup.create_summary_program(); + firstPhaseProgram->setup(layout, queryEnv); + secondPhaseProgram->setup(layout, queryEnv); + summaryProgram->setup(layout, queryEnv); + + firstPhaseProgram->run(1); + EXPECT_APPROX(2.0, *Utils::getScoreFeature(*firstPhaseProgram), 0.001); + secondPhaseProgram->run(1); + EXPECT_APPROX(4.0, *Utils::getScoreFeature(*secondPhaseProgram), 0.001); + summaryProgram->run(1); + + { // rank seed features + std::map<vespalib::string, feature_t> actual = Utils::getSeedFeatures(*summaryProgram); + std::map<vespalib::string, feature_t> exp; + exp["mysum(value(5),value(5))"] = 10.0; + exp["mysum(\"value( 5 )\",\"value( 5 )\")"] = 10.0; + TEST_DO(checkFeatures(exp, actual)); + } + { // all rank features (1. phase) + std::map<vespalib::string, feature_t> actual = Utils::getAllFeatures(*firstPhaseProgram); + std::map<vespalib::string, feature_t> exp; + exp["value(1)"] = 1.0; + exp["value(1).0"] = 1.0; + exp["mysum(value(1),value(1))"] = 2.0; + exp["mysum(value(1),value(1)).out"] = 2.0; + TEST_DO(checkFeatures(exp, actual)); + } + { // all rank features (2. phase) + std::map<vespalib::string, feature_t> actual = Utils::getAllFeatures(*secondPhaseProgram); + std::map<vespalib::string, feature_t> exp; + exp["value(2)"] = 2.0; + exp["value(2).0"] = 2.0; + exp["mysum(value(2),value(2))"] = 4.0; + exp["mysum(value(2),value(2)).out"] = 4.0; + TEST_DO(checkFeatures(exp, actual)); + } + { // all rank features (summary) + std::map<vespalib::string, feature_t> actual = Utils::getAllFeatures(*summaryProgram); + std::map<vespalib::string, feature_t> exp; + exp["value(5)"] = 5.0; + exp["value(5).0"] = 5.0; + exp["mysum(value(5),value(5))"] = 10.0; + exp["mysum(value(5),value(5)).out"] = 10.0; + exp["mysum(\"value( 5 )\",\"value( 5 )\")"] = 10.0; + exp["mysum(\"value( 5 )\",\"value( 5 )\").out"] = 10.0; + TEST_DO(checkFeatures(exp, actual)); + } + } + + { // DUMP context + MatchDataLayout layout; + QueryEnvironment queryEnv; + RankProgram::UP rankProgram = rankSetup.create_dump_program(); + rankProgram->setup(layout, queryEnv); + rankProgram->run(1); + + { // dump seed features + std::map<vespalib::string, feature_t> actual = Utils::getSeedFeatures(*rankProgram); + std::map<vespalib::string, feature_t> exp; + exp["mysum(value(10),value(10))"] = 20.0; + exp["mysum(\"value( 10 )\",\"value( 10 )\")"] = 20.0; + TEST_DO(checkFeatures(exp, actual)); + } + + { // all dump features + std::map<vespalib::string, feature_t> actual = Utils::getAllFeatures(*rankProgram); + std::map<vespalib::string, feature_t> exp; + + exp["value(10)"] = 10.0; + exp["value(10).0"] = 10.0; + + exp["mysum(value(10),value(10))"] = 20.0; + exp["mysum(value(10),value(10)).out"] = 20.0; + + exp["mysum(\"value( 10 )\",\"value( 10 )\")"] = 20.0; + exp["mysum(\"value( 10 )\",\"value( 10 )\").out"] = 20.0; + + TEST_DO(checkFeatures(exp, actual)); + } + } +} + + +RankSetupTest::RankSetupTest() : + _factory(), + _manager(), + _indexEnv(), + _queryEnv(), + _rankEnv(_factory, _indexEnv, _queryEnv), + _visitor() +{ + // register blueprints + setup_fef_test_plugin(_factory); + _factory.addPrototype(Blueprint::SP(new ValueBlueprint())); + _factory.addPrototype(Blueprint::SP(new RankingExpressionBlueprint())); + + // setup an original attribute manager with two attributes + search::attribute::Config cfg(search::attribute::BasicType::INT32, + search::attribute::CollectionType::SINGLE); + search::AttributeVector::SP av1 = + search::AttributeFactory::createAttribute("staticrank1", cfg); + search::AttributeVector::SP av2 = + search::AttributeFactory::createAttribute("staticrank2", cfg); + av1->addDocs(5); + av2->addDocs(5); + for (uint32_t i = 0; i < 5; ++i) { + (static_cast<search::IntegerAttribute *>(av1.get()))->update(i, i + 100); + (static_cast<search::IntegerAttribute *>(av2.get()))->update(i, i + 200); + } + av1->commit(); + av2->commit(); + _manager.add(av1); + _manager.add(av2); + + // set the index environment + _queryEnv.setIndexEnv(&_indexEnv); + + // set the manager + _queryEnv.overrideAttributeManager(&_manager); +} + + +int +RankSetupTest::Main() +{ + TEST_INIT("ranksetup_test"); + + testValueBlueprint(); + testDoubleBlueprint(); + testSumBlueprint(); + testStaticRankBlueprint(); + testChainBlueprint(); + testCfgValueBlueprint(); + + testCompilation(); + testRankSetup(); + testExecution(); + testFeatureDump(); + testFeatureNormalization(); + + TEST_DONE(); +} + +TEST_APPHOOK(RankSetupTest); diff --git a/searchlib/src/tests/ranksetup/verify_feature/.gitignore b/searchlib/src/tests/ranksetup/verify_feature/.gitignore new file mode 100644 index 00000000000..69a39cd13f2 --- /dev/null +++ b/searchlib/src/tests/ranksetup/verify_feature/.gitignore @@ -0,0 +1 @@ +searchlib_verify_feature_test_app diff --git a/searchlib/src/tests/ranksetup/verify_feature/CMakeLists.txt b/searchlib/src/tests/ranksetup/verify_feature/CMakeLists.txt new file mode 100644 index 00000000000..8ffd79fe327 --- /dev/null +++ b/searchlib/src/tests/ranksetup/verify_feature/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_verify_feature_test_app + SOURCES + verify_feature_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_verify_feature_test_app COMMAND searchlib_verify_feature_test_app) diff --git a/searchlib/src/tests/ranksetup/verify_feature/FILES b/searchlib/src/tests/ranksetup/verify_feature/FILES new file mode 100644 index 00000000000..652373e33da --- /dev/null +++ b/searchlib/src/tests/ranksetup/verify_feature/FILES @@ -0,0 +1 @@ +verify_feature_test.cpp diff --git a/searchlib/src/tests/ranksetup/verify_feature/verify_feature_test.cpp b/searchlib/src/tests/ranksetup/verify_feature/verify_feature_test.cpp new file mode 100644 index 00000000000..1e49cd4aae6 --- /dev/null +++ b/searchlib/src/tests/ranksetup/verify_feature/verify_feature_test.cpp @@ -0,0 +1,58 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchlib/fef/fef.h> +#include <vespa/searchlib/fef/test/indexenvironment.h> +#include <vespa/searchlib/fef/test/plugin/setup.h> +#include <vespa/searchlib/features/valuefeature.h> + +using namespace search::features; +using namespace search::fef::test; +using namespace search::fef; + +struct RankFixture { + BlueprintFactory factory; + IndexEnvironment indexEnv; + + RankFixture() : factory(), indexEnv() { + setup_fef_test_plugin(factory); + factory.addPrototype(Blueprint::SP(new ValueBlueprint())); + } + + bool verify(const std::string &feature) const { + return verifyFeature(factory, indexEnv, feature, "feature verification test"); + } +}; + +TEST_F("verify valid rank feature", RankFixture) { + EXPECT_TRUE(f1.verify("value(1, 2, 3).0")); + EXPECT_TRUE(f1.verify("value(1, 2, 3).1")); + EXPECT_TRUE(f1.verify("value(1, 2, 3).2")); +} + +TEST_F("verify unknown feature", RankFixture) { + EXPECT_FALSE(f1.verify("unknown")); +} + +TEST_F("verify unknown output", RankFixture) { + EXPECT_FALSE(f1.verify("value(1, 2, 3).3")); +} + +TEST_F("verify illegal input parameters", RankFixture) { + EXPECT_FALSE(f1.verify("value.0")); +} + +TEST_F("verify illegal feature name", RankFixture) { + EXPECT_FALSE(f1.verify("value(2).")); +} + +TEST_F("verify too deep dependency graph", RankFixture) { + EXPECT_TRUE(f1.verify("chain(basic, 63, 4)")); + EXPECT_FALSE(f1.verify("chain(basic, 64, 4)")); +} + +TEST_F("verify dependency cycle", RankFixture) { + EXPECT_FALSE(f1.verify("chain(cycle, 4, 2)")); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/sha1/.gitignore b/searchlib/src/tests/sha1/.gitignore new file mode 100644 index 00000000000..e69de29bb2d --- /dev/null +++ b/searchlib/src/tests/sha1/.gitignore diff --git a/searchlib/src/tests/sort/.gitignore b/searchlib/src/tests/sort/.gitignore new file mode 100644 index 00000000000..7207ff4596d --- /dev/null +++ b/searchlib/src/tests/sort/.gitignore @@ -0,0 +1,8 @@ +.depend +Makefile +sort_test +uca_stress +/sortbenchmark +searchlib_sort_test_app +searchlib_sortbenchmark_app +searchlib_uca_stress_app diff --git a/searchlib/src/tests/sort/CMakeLists.txt b/searchlib/src/tests/sort/CMakeLists.txt new file mode 100644 index 00000000000..1830952bffd --- /dev/null +++ b/searchlib/src/tests/sort/CMakeLists.txt @@ -0,0 +1,22 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_sortbenchmark_app + SOURCES + sortbenchmark.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_sortbenchmark_app COMMAND searchlib_sortbenchmark_app BENCHMARK) +vespa_add_executable(searchlib_sort_test_app + SOURCES + sort_test.cpp + DEPENDS + searchlib +) +#vespa_add_test(NAME searchlib_sort_test_app COMMAND searchlib_sort_test_app) +vespa_add_executable(searchlib_uca_stress_app + SOURCES + uca.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_uca_stress_app COMMAND searchlib_uca_stress_app BENCHMARK) diff --git a/searchlib/src/tests/sort/DESC b/searchlib/src/tests/sort/DESC new file mode 100644 index 00000000000..ad8ab11f5ba --- /dev/null +++ b/searchlib/src/tests/sort/DESC @@ -0,0 +1 @@ +Testing templatized radixsort. diff --git a/searchlib/src/tests/sort/FILES b/searchlib/src/tests/sort/FILES new file mode 100644 index 00000000000..e2ef9d3c1ab --- /dev/null +++ b/searchlib/src/tests/sort/FILES @@ -0,0 +1 @@ +sort.cpp diff --git a/searchlib/src/tests/sort/javaorder.zh b/searchlib/src/tests/sort/javaorder.zh new file mode 100644 index 00000000000..0d29efc99bd --- /dev/null +++ b/searchlib/src/tests/sort/javaorder.zh @@ -0,0 +1,158 @@ + + + +30雜誌30雜誌 +asiatwnewsasiatwnews +AZ時尚旅遊AZ時尚旅遊 +bobo小天才養成誌bobo小天才養成誌 +Career職場情報誌Career職場情報誌 +CheersCheers雜誌 +EMBAEMBA雜誌 +ETtodayETtoday +FASHION QUEEN時尚女王FASHION QUEEN時尚女王 +iLOOKiLOOK電影雜誌 +men&#39;s uno男人誌men&#39;s uno男人誌 +Money 錢Money 錢 +NOWnewsNOWnews +NOWnews今日新聞網 +PAR表演藝術PAR表演藝術雜誌 +Press Association ImagesPress Association Images +Smart智富月刊Smart智富月刊 +Taipei WalkerTaipei Walker +TSNATSNA +TVBSTVBS +Yahoo! Taiwan Specials without layoutYahoo! Taiwan Specials without layout +Yahoo奇摩video.yahoo.com(勿用) +Yahoo奇摩Yahoo奇摩(爆新聞) +Yahoo奇摩Yahoo奇摩(新聞) +Yahoo奇摩Yahoo奇摩(影音) +Yahoo奇摩新聞Yahoo奇摩新聞(報氣象) +YourNewsYourNews +Y特別企畫Y特別企畫 +愛爾達愛爾達 +愛爾達愛爾達電視 +財訊快報財訊快報 +財訊快報財訊快報季刊 +財訊快報季刊財訊快報季刊 +財訊快報季刊財訊快報季刊 +財訊雙週刊財訊雙週刊 +常春月刊常春月刊 +朝鮮日報朝鮮日報 +達志達志 +達志達志 +達志達志 +大家健康雜誌大家健康雜誌 +大師輕鬆讀大師輕鬆讀 +大台灣旅遊網大台灣旅遊網 +東森新聞東森新聞 +東森新聞東森新聞 +東星東星 +俄羅斯新聞網俄羅斯新聞網 +法新社法新社 +非凡新聞非凡新聞 +非凡新聞節目非凡新聞節目 +富爾特消費新聞富爾特消費新聞 +公共電視公共電視 +公視公視 +古美術古美術 +管理雜誌管理雜誌 +光華雜誌台灣光華雜誌 +廣編特輯廣編特輯 +廣告雜誌廣告雜誌 +國際商情雙周刊國際商情雙周刊 +哈佛商業評論哈佛商業評論 +韓國朝鮮日報韓國朝鮮日報 +韓國中央日報韓國中央日報 +韓國中央日報韓國中央日報 +韓星網韓星網 +韓星網韓星網 +華人健康網華人健康網 +華視華視 +華視華視 +華視華視 +環境資訊中心環境資訊中心 +健康醫療網健康醫療網 +健康醫療網健康醫療網 +講義雜誌講義雜誌 +教育廣播電台國立教育廣播電台 +今藝術今藝術 +今周刊今周刊 +今周刊今周刊 +經理人經理人月刊 +鉅亨網鉅亨網 +軍聞社軍聞社 +卡優新聞網卡優新聞網 +康健雜誌康健雜誌 +科學人科學人雜誌 +客家電視客家電視台 +酷搜圖聞酷搜圖聞 +理財周刊理財周刊 +麗台運動報麗台運動報 +聯合文學聯合文學 +聯合新聞網聯合新聞網 +路透社路透社 +路透社路透社 +旅遊經旅遊經 +羅開Golf 頻道羅開Golf 頻道 +媽媽寶寶媽媽寶寶 +美麗佳人美麗佳人雜誌 +美聯社美聯社 +美通社美通社 +民視民視 +民視民視 +明報周刊明報周刊 +男人幫男人幫 +能力雜誌能力雜誌 +年代新聞年代新聞 +年代新聞年代新聞 +年代新聞年代新聞 +年代新聞年代新聞 +紐約時報中文網.紐約時報中文網. +紐約時報中文網.紐約時報中文網. +紐約時報中文網紐約時報中文網 +儂儂雜誌儂儂雜誌 +蓬勃網球蓬勃網球 +蘋果日報蘋果日報 +親子天下親子天下 +全國廣播全國廣播 +全球中央全球中央雜誌 +商業周刊商業周刊 +數位家庭數位家庭 +數位時代數位時代 +台灣立報台灣立報 +台灣新生報台灣新生報 +台灣醒報台灣醒報 +台灣醒報台灣醒報 +臺灣時報臺灣時報 +天下雜誌天下雜誌 +統一獅 Video統一獅 Video +玩高爾夫玩高爾夫 +旺報旺報 +先探投資週刊先探投資週刊 +現代保險健康理財雜誌現代保險健康理財雜誌 +香港中文大學EMBA Videos香港中文大學EMBA +新頭殼新頭殼 +新新聞周刊新新聞周刊 +兄弟象 Video兄弟象 Video +訊息快遞Yahoo 奇摩新聞訊息快遞 +野球人野球人 +壹電視壹電視 +壹電視壹電視 +壹蘋果壹蘋果 +義大犀牛 Video義大犀牛 Video +優活健康網優活健康網 +原視原視 +遠見雜誌遠見雜誌 +張老師月刊張老師月刊 +整形達人整形達人 +中廣中廣新聞網 +中華日報中華日報 +中華職棒中華職棒 +中時電子報中時電子報 +中央廣播電台中央廣播電台 +中央日報中央日報 +中央社中央社 +中央社中央社 +自立晚報自立晚報 +自由時報自由時報 +自由時報自由時報 diff --git a/searchlib/src/tests/sort/sort_test.cpp b/searchlib/src/tests/sort/sort_test.cpp new file mode 100644 index 00000000000..cf5e1a1cb1f --- /dev/null +++ b/searchlib/src/tests/sort/sort_test.cpp @@ -0,0 +1,295 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/common/sort.h> +#include <vespa/searchlib/common/sortspec.h> +#include <vespa/searchlib/common/converters.h> +#include <vespa/vespalib/util/array.h> +#include <vector> +#include <fstream> +#include <iostream> +#include <iomanip> +#include <stdexcept> +#include <unicode/ustring.h> + +LOG_SETUP("sort_test"); + +using vespalib::Array; +using namespace search::common; +using vespalib::ConstBufferRef; + +class Test : public vespalib::TestApp +{ +public: + int Main(); + void testUnsignedIntegerSort(); + template <typename T> + void testSignedIntegerSort(); + void testStringSort(); + void testIcu(); + void testStringCaseInsensitiveSort(); + void testSortSpec(); + void testSameAsJavaOrder(); +}; + +struct LoadedStrings +{ + LoadedStrings(const char * v=NULL) : _value(v), _currRadix(_value) { } + + class ValueRadix + { + public: + char operator () (LoadedStrings & x) const { + unsigned char c(*x._currRadix); + if (c) { + x._currRadix++; + } + return c; + } + }; + + class ValueCompare : public std::binary_function<LoadedStrings, LoadedStrings, bool> { + public: + bool operator() (const LoadedStrings & x, const LoadedStrings & y) const { + return strcmp(x._value, y._value) < 0; + } + }; + const char * _value; + const char * _currRadix; +}; + +void Test::testIcu() +{ + { + const std::string src("Creation of Bob2007 this is atumated string\this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string;this is atumated string; _ 12345567890-=,./;'[;"); + std::vector<uint16_t> u16Buffer(100); + UErrorCode status = U_ZERO_ERROR; + int32_t u16Wanted(0); + u_strFromUTF8(&u16Buffer[0], u16Buffer.size(), &u16Wanted, src.c_str(), -1, &status); + ASSERT_TRUE(U_SUCCESS(status) || (status == U_INVALID_CHAR_FOUND) || ((status == U_BUFFER_OVERFLOW_ERROR) && (u16Wanted > (int)u16Buffer.size()))); + } +} + +void Test::testUnsignedIntegerSort() +{ + search::NumericRadixSorter<uint32_t, true> S; + S(NULL, 0); + + Array<uint32_t> array1(1); + array1[0] = 1567; + S(&array1[0], 1); + ASSERT_TRUE(array1[0] == 1567); + + unsigned int N(0x100000); + Array<uint32_t> array(N); + unsigned seed(1); + for(size_t i(0); i < N; i++) { + array[i] = rand_r(&seed); + } + S(&array[0], N); + for (size_t i(1); i < N; i++) { + ASSERT_TRUE(array[i] >= array[i-1]); + } +} + +template<typename T> +class IntOrder { +public: + uint64_t operator () (T v) const { return v ^ (std::numeric_limits<T>::max() + 1); } +}; + +template <typename T> +void Test::testSignedIntegerSort() +{ + search::NumericRadixSorter<T, true> S; + S(NULL, 0); + + Array<T> array1(1); + array1[0] = 1567; + S(&array1[0], 1); + ASSERT_TRUE(array1[0] == 1567); + + unsigned int N(0x100000); + Array<T> array(N); + unsigned seed(1); + for(size_t i(0); i < N; i++) { + T v = rand_r(&seed); + array[i] = (i%2) ? v : -v; + } + S(&array[0], N); + for (size_t i(1); i < N; i++) { + ASSERT_TRUE(array[i] >= array[i-1]); + } +} + +void Test::testStringSort() +{ + Array<LoadedStrings> array1(1); + + unsigned int N(0x1000); + Array<LoadedStrings> loaded(N); + std::vector<uint32_t> radixScratchPad(N); + search::radix_sort(LoadedStrings::ValueRadix(), LoadedStrings::ValueCompare(), search::AlwaysEof<LoadedStrings>(), 1, static_cast<LoadedStrings *>(NULL), 0, &radixScratchPad[0], 0); + + array1[0] = LoadedStrings("a"); + search::radix_sort(LoadedStrings::ValueRadix(), LoadedStrings::ValueCompare(), search::AlwaysEof<LoadedStrings>(), 1, &array1[0], 1, &radixScratchPad[0], 0); + ASSERT_TRUE(strcmp(array1[0]._value, "a") == 0); + + loaded[0] = LoadedStrings("a"); + for(size_t i(1); i < N; i++) { + loaded[i] = LoadedStrings(""); + } + + search::radix_sort(LoadedStrings::ValueRadix(), LoadedStrings::ValueCompare(), search::AlwaysEof<LoadedStrings>(), 1, &loaded[0], N, &radixScratchPad[0], 0); + LoadedStrings::ValueCompare vc; + for(size_t i(1); i < N; i++) { + ASSERT_TRUE( ! vc(loaded[i], loaded[i-1])); + } +} + +void Test::testStringCaseInsensitiveSort() +{ +} + +void Test::testSortSpec() +{ + { + SortSpec sortspec("-name"); + EXPECT_EQUAL(sortspec.size(), 1u); + EXPECT_EQUAL(sortspec[0]._field, "name"); + EXPECT_TRUE( ! sortspec[0]._ascending); + EXPECT_TRUE(sortspec[0]._converter.get() == NULL); + } + + { + SortSpec sortspec("-lowercase(name)"); + EXPECT_EQUAL(sortspec.size(), 1u); + EXPECT_EQUAL(sortspec[0]._field, "name"); + EXPECT_TRUE( ! sortspec[0]._ascending); + EXPECT_TRUE(sortspec[0]._converter.get() != NULL); + EXPECT_TRUE(dynamic_cast<LowercaseConverter *>(sortspec[0]._converter.get()) != NULL); + } + + { + SortSpec sortspec("-uca(name,nn_no)"); + EXPECT_EQUAL(sortspec.size(), 1u); + EXPECT_EQUAL(sortspec[0]._field, "name"); + EXPECT_TRUE( ! sortspec[0]._ascending); + EXPECT_TRUE(sortspec[0]._converter.get() != NULL); + EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL); + } + { + SortSpec sortspec("-uca(name,nn_no,PRIMARY)"); + EXPECT_EQUAL(sortspec.size(), 1u); + EXPECT_EQUAL(sortspec[0]._field, "name"); + EXPECT_TRUE( ! sortspec[0]._ascending); + EXPECT_TRUE(sortspec[0]._converter.get() != NULL); + EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL); + } + { + SortSpec sortspec("-uca(name,nn_no,SECONDARY)"); + EXPECT_EQUAL(sortspec.size(), 1u); + EXPECT_EQUAL(sortspec[0]._field, "name"); + EXPECT_TRUE( ! sortspec[0]._ascending); + EXPECT_TRUE(sortspec[0]._converter.get() != NULL); + EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL); + } + { + SortSpec sortspec("-uca(name,nn_no,TERTIARY)"); + EXPECT_EQUAL(sortspec.size(), 1u); + EXPECT_EQUAL(sortspec[0]._field, "name"); + EXPECT_TRUE( ! sortspec[0]._ascending); + EXPECT_TRUE(sortspec[0]._converter.get() != NULL); + EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL); + } + { + SortSpec sortspec("-uca(name,nn_no,QUATERNARY)"); + EXPECT_EQUAL(sortspec.size(), 1u); + EXPECT_EQUAL(sortspec[0]._field, "name"); + EXPECT_TRUE( ! sortspec[0]._ascending); + EXPECT_TRUE(sortspec[0]._converter.get() != NULL); + EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL); + } + { + SortSpec sortspec("-uca(name,nn_no,IDENTICAL)"); + EXPECT_EQUAL(sortspec.size(), 1u); + EXPECT_EQUAL(sortspec[0]._field, "name"); + EXPECT_TRUE( ! sortspec[0]._ascending); + EXPECT_TRUE(sortspec[0]._converter.get() != NULL); + EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL); + } + { + SortSpec sortspec("-uca(name,zh)"); + EXPECT_EQUAL(sortspec.size(), 1u); + EXPECT_EQUAL(sortspec[0]._field, "name"); + EXPECT_TRUE( ! sortspec[0]._ascending); + EXPECT_TRUE(sortspec[0]._converter.get() != NULL); + EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL); + } + { + SortSpec sortspec("-uca(name,finnes_ikke)"); + EXPECT_EQUAL(sortspec.size(), 1u); + EXPECT_EQUAL(sortspec[0]._field, "name"); + EXPECT_TRUE( ! sortspec[0]._ascending); + EXPECT_TRUE(sortspec[0]._converter.get() != NULL); + EXPECT_TRUE(dynamic_cast<UcaConverter *>(sortspec[0]._converter.get()) != NULL); + } + { + try { + SortSpec sortspec("-uca(name,nn_no,NTERTIARY)"); + EXPECT_TRUE(false); + } catch (const std::runtime_error & e) { + EXPECT_TRUE(true); + EXPECT_TRUE(strcmp(e.what(), "Illegal uca collation strength : NTERTIARY") == 0); + } + } +} + +void Test::testSameAsJavaOrder() +{ + std::vector<vespalib::string> javaOrder; + std::ifstream is("javaorder.zh"); + while (!is.eof()) { + std::string line; + getline(is, line); + if (!is.eof()) { + javaOrder.push_back(line); + } + } + EXPECT_EQUAL(158u, javaOrder.size()); + search::common::UcaConverter uca("zh", "PRIMARY"); + vespalib::ConstBufferRef fkey = uca.convert(vespalib::ConstBufferRef(javaOrder[0].c_str(), javaOrder[0].size())); + vespalib::string prev(fkey.c_str(), fkey.size()); + for (size_t i(1); i < javaOrder.size(); i++) { + vespalib::ConstBufferRef key = uca.convert(vespalib::ConstBufferRef(javaOrder[i].c_str(), javaOrder[i].size())); + vespalib::HexDump dump(key.c_str(), key.size()); + vespalib::string current(key.c_str(), key.size()); + UErrorCode status(U_ZERO_ERROR); + UCollationResult cr = uca.getCollator().compareUTF8(javaOrder[i-1].c_str(), javaOrder[i].c_str(), status); + std::cout << std::setw(3) << i << ": " << status << "(" << u_errorName(status) << ") - " << cr << " '" << dump << "' : '" << javaOrder[i] << "'" << std::endl; + EXPECT_TRUE(prev <= current); + EXPECT_TRUE(U_SUCCESS(status)); + EXPECT_TRUE(cr == UCOL_LESS || cr == UCOL_EQUAL); + prev = current; + } +} + + +TEST_APPHOOK(Test); + +int Test::Main() +{ + TEST_INIT("sort_test"); + + testUnsignedIntegerSort(); + testSignedIntegerSort<int32_t>(); + testSignedIntegerSort<int64_t>(); + testStringSort(); + testStringCaseInsensitiveSort(); + testSortSpec(); + testIcu(); + testSameAsJavaOrder(); + + TEST_DONE(); +} diff --git a/searchlib/src/tests/sort/sortbenchmark.cpp b/searchlib/src/tests/sort/sortbenchmark.cpp new file mode 100644 index 00000000000..1309cf57d5d --- /dev/null +++ b/searchlib/src/tests/sort/sortbenchmark.cpp @@ -0,0 +1,115 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/common/sort.h> +#include <vespa/vespalib/util/array.h> +#include <vector> + +LOG_SETUP("sort_test"); + +using vespalib::Array; +using vespalib::ConstBufferRef; + +class Test : public vespalib::TestApp +{ +public: + typedef std::vector<uint32_t> V; + std::vector< std::vector<uint32_t> > _data; + int Main(); + void generateVectors(size_t numVectors, size_t values); + V merge(); + void twoWayMerge(); + V cat() const; +}; + +void Test::generateVectors(size_t numVectors, size_t values) +{ + _data.resize(numVectors); + for (size_t j(0); j < numVectors; j++) { + V & v(_data[j]); + v.resize(values); + for (size_t i(0); i < values; i++) { + v[i] = i; + } + } +} + +Test::V Test::merge() +{ + twoWayMerge(); + return _data[0]; +} + +void Test::twoWayMerge() +{ + std::vector<V> n((_data.size()+1)/2); + + for ( size_t i(0), m(_data.size()/2); i < m; i++) { + const V & a = _data[i*2 + 0]; + const V & b = _data[i*2 + 1]; + n[i].resize(a.size() + b.size()); + std::merge(a.begin(), a.end(), b.begin(), b.end(), n[i].begin()); + } + if (_data.size()%2) { + n[n.size()-1].swap(_data[_data.size() - 1]); + } + _data.swap(n); + if (_data.size() > 1) { + twoWayMerge(); + } +} + +Test::V Test::cat() const +{ + size_t sum(0); + for (size_t i(0), m(_data.size()); i < m; i++) { + sum += _data[i].size(); + } + V c; + c.reserve(sum); + for (size_t i(0), m(_data.size()); i < m; i++) { + const V & v(_data[i]); + c.insert(c.end(), v.begin(), v.end()); + } + + return c; +} + +TEST_APPHOOK(Test); + +int Test::Main() +{ + TEST_INIT("sortbenchmark"); + size_t numVectors(11); + size_t values(10000000); + vespalib::string type("radix"); + if (_argc > 1) { + values = strtol(_argv[1], NULL, 0); + if (_argc > 2) { + numVectors = strtol(_argv[2], NULL, 0); + if (_argc > 2) { + type = _argv[3]; + } + } + } + + printf("Start with %ld vectors with %ld values and type '%s'(radix, qsort, merge)\n", numVectors, values, type.c_str()); + generateVectors(numVectors, values); + printf("Start cat\n"); + V v = cat(); + printf("Cat %ld values\n", v.size()); + if (type == "merge") { + V m = merge(); + printf("Merged %ld values\n", m.size()); + } else if (type == "qsort") { + std::sort(v.begin(), v.end()); + printf("sorted %ld value with std::sort\n", v.size()); + } else { + search::NumericRadixSorter<uint32_t, true> S; + S(&v[0], v.size()); + printf("sorted %ld value with radix::sort\n", v.size()); + } + + TEST_DONE(); +} diff --git a/searchlib/src/tests/sort/uca.cpp b/searchlib/src/tests/sort/uca.cpp new file mode 100644 index 00000000000..b9225c94a66 --- /dev/null +++ b/searchlib/src/tests/sort/uca.cpp @@ -0,0 +1,121 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/common/sort.h> +#include <vespa/searchlib/common/sortspec.h> +#include <vespa/searchlib/common/converters.h> +#include <vespa/vespalib/util/array.h> +#include <memory> +#include <string> +#include <vector> +#include <stdexcept> +#include <unicode/ustring.h> + +LOG_SETUP("uca_stress"); + +using icu::Collator; + +class Test : public vespalib::TestApp +{ +public: + int Main(); + void testFromDat(); +}; + + +void Test::testFromDat() +{ + size_t badnesses = 0; + + std::string startMark("abc"); + std::string midMark("def"); + std::string endMark("ghi"); + + UErrorCode status = U_ZERO_ERROR; + auto coll = std::unique_ptr<Collator>(Collator::createInstance(icu::Locale("en"), status)); + + coll->setStrength(Collator::PRIMARY); + + std::vector<uint16_t> u16buffer(100); + std::vector<uint8_t> u8buffer(10); + + int fd = open("sort-blobs.dat", O_RDONLY); + char sbuf[4]; + + int num=0; + + uint32_t atleast = 0; + + while (read(fd, sbuf, 4) == 4) { + if (startMark == sbuf) { + uint32_t len = 0; + int r = read(fd, &len, 4); + + EXPECT_EQUAL(4, r); + r = read(fd, sbuf, 4); + EXPECT_EQUAL(4, r); + EXPECT_EQUAL(midMark, sbuf); + + if (u16buffer.size() < len) { + u16buffer.resize(len); + } + r = read(fd, &u16buffer[0], len*2); + EXPECT_EQUAL((int)len*2, r); + + r = read(fd, sbuf, 4); + EXPECT_EQUAL(4, r); + EXPECT_EQUAL(endMark, sbuf); + + uint32_t wanted = coll->getSortKey(&u16buffer[0], len, NULL, 0); + + EXPECT_TRUE(wanted > 0); + EXPECT_TRUE(wanted >= len); + EXPECT_TRUE(wanted < len*6); + + if (wanted + 20 > u8buffer.size()) { + u8buffer.resize(wanted+20); + } + + for (uint32_t pretend = 1; pretend < wanted+8; ++pretend) { + memset(&u8buffer[0], 0x99, u8buffer.size()); + uint32_t got = coll->getSortKey(&u16buffer[0], len, &u8buffer[0], pretend); + EXPECT_EQUAL(wanted, got); + + if (u8buffer[pretend+1] != 0x99) { + printf("wrote 2 bytes too far: wanted space %d, pretend allocated %d, last good=%02x, bad=%02x %02x\n", + wanted, pretend, u8buffer[pretend-1], + u8buffer[pretend], u8buffer[pretend+1]); + } else if (u8buffer[pretend] != 0x99) { + ++badnesses; + if (wanted > atleast) { + atleast = wanted; + printf("wrote 1 byte too far: wanted space %d, pretend allocated %d, last good=%02x, bad=%02x\n", + wanted, pretend, u8buffer[pretend-1], u8buffer[pretend]); + } + } + } + + memset(&u8buffer[0], 0x99, u8buffer.size()); + uint32_t got = coll->getSortKey(&u16buffer[0], len, &u8buffer[0], u8buffer.size()); + EXPECT_EQUAL(wanted, got); + + EXPECT_EQUAL('\0', u8buffer[got-1]); + EXPECT_EQUAL((uint8_t)0x99, u8buffer[got]); + } + if (++num >= 10000) { + TEST_FLUSH(); + num=0; + } + } + EXPECT_EQUAL(0u, badnesses); +} + +TEST_APPHOOK(Test); + +int Test::Main() +{ + TEST_INIT("uca_stress"); + testFromDat(); + TEST_DONE(); +} diff --git a/searchlib/src/tests/sortresults/.gitignore b/searchlib/src/tests/sortresults/.gitignore new file mode 100644 index 00000000000..1b191671559 --- /dev/null +++ b/searchlib/src/tests/sortresults/.gitignore @@ -0,0 +1,7 @@ +*.core +.depend +Makefile +core +core.* +sortresults +searchlib_sortresults_app diff --git a/searchlib/src/tests/sortresults/CMakeLists.txt b/searchlib/src/tests/sortresults/CMakeLists.txt new file mode 100644 index 00000000000..6889e4c517d --- /dev/null +++ b/searchlib/src/tests/sortresults/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_sortresults_app + SOURCES + sorttest.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_sortresults_app COMMAND searchlib_sortresults_app) diff --git a/searchlib/src/tests/sortresults/sorttest.cpp b/searchlib/src/tests/sortresults/sorttest.cpp new file mode 100644 index 00000000000..4c3326f4cbc --- /dev/null +++ b/searchlib/src/tests/sortresults/sorttest.cpp @@ -0,0 +1,99 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2001-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + + +#include <vespa/fastos/fastos.h> +#include <vespa/searchlib/common/bitvector.h> +#include <vespa/searchlib/common/sortresults.h> + +using search::RankedHit; + +unsigned int +myrandom() +{ + unsigned int r; + r = random() & 0xffff; + r <<= 16; + r += random() & 0xffff; + return r; +} + + +bool +test_sort(unsigned int caseNum, unsigned int n, unsigned int ntop) +{ + bool ok = true; + double minmax; + unsigned int i; + RankedHit *array; + + if (ntop == 0) { + printf("CASE %03d: [%d/%d] PASS\n", caseNum, ntop, n); + return true; + } + if (ntop > n) + ntop = n; + + array = new RankedHit[n]; + assert(array != NULL); + + for (i = 0; i < n; i++) { + array[i]._docId = i; + array[i]._rankValue = myrandom(); + } + FastS_SortResults(array, n, ntop); + + minmax = array[ntop - 1]._rankValue; + for(i = 0; i < n; i++) { + if (i < ntop && i > 0 + && array[i]._rankValue > array[i - 1]._rankValue) { + printf("ERROR: rank(%d) > rank(%d)\n", + i, i - 1); + ok = false; + break; + } + if (i >= ntop && + array[i]._rankValue > minmax) { + printf("ERROR: rank(%d) > rank(%d)\n", + i, ntop - 1); + ok = false; + break; + } + } + delete [] array; + printf("CASE %03d: [%d/%d] %s\n", caseNum, ntop, n, + (ok)? "PASS" : "FAIL"); + return ok; +} + + +int +main(int argc, char **argv) +{ + (void) argc; + (void) argv; + + bool ok = true; + unsigned int caseNum = 0; + unsigned int i; + + ok &= test_sort(++caseNum, 1, 1); + for (i = 0; i < 5; i++) { + ok &= test_sort(++caseNum, 2, 2); + } + for (i = 0; i < 5; i++) { + ok &= test_sort(++caseNum, 50, 50); + } + for (i = 0; i < 5; i++) { + ok &= test_sort(++caseNum, 50000, 1); + ok &= test_sort(++caseNum, 50000, 500); + ok &= test_sort(++caseNum, 50000, 1000); + ok &= test_sort(++caseNum, 50000, 2000); + ok &= test_sort(++caseNum, 50000, 5000); + ok &= test_sort(++caseNum, 50000, 10000); + ok &= test_sort(++caseNum, 50000, 50000); + } + printf("CONCLUSION: TEST %s\n", (ok)? "PASSED" : "FAILED"); + return (ok)? 0 : 1; +} diff --git a/searchlib/src/tests/sortspec/.gitignore b/searchlib/src/tests/sortspec/.gitignore new file mode 100644 index 00000000000..dff7336208a --- /dev/null +++ b/searchlib/src/tests/sortspec/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +multilevelsort_test +searchlib_multilevelsort_test_app diff --git a/searchlib/src/tests/sortspec/CMakeLists.txt b/searchlib/src/tests/sortspec/CMakeLists.txt new file mode 100644 index 00000000000..a436bb744c5 --- /dev/null +++ b/searchlib/src/tests/sortspec/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_multilevelsort_test_app + SOURCES + multilevelsort.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_multilevelsort_test_app COMMAND searchlib_multilevelsort_test_app) diff --git a/searchlib/src/tests/sortspec/multilevelsort.cpp b/searchlib/src/tests/sortspec/multilevelsort.cpp new file mode 100644 index 00000000000..f151bfaf132 --- /dev/null +++ b/searchlib/src/tests/sortspec/multilevelsort.cpp @@ -0,0 +1,413 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("multilevelsort_test"); +#include <vespa/searchlib/common/sortresults.h> +#include <vespa/searchlib/attribute/attribute.h> +#include <vespa/searchlib/attribute/attributeguard.h> +#include <vespa/searchlib/attribute/integerbase.h> +#include <vespa/searchlib/attribute/floatbase.h> +#include <vespa/searchlib/attribute/stringbase.h> +#include <vespa/searchlib/attribute/attributefactory.h> +#include <vespa/searchlib/attribute/attributevector.hpp> +#include <vespa/vespalib/testkit/testapp.h> +#include <map> +#include <sstream> + +using namespace search; + +typedef FastS_SortSpec::VectorRef VectorRef; +typedef IntegerAttributeTemplate<uint8_t> Uint8; +typedef IntegerAttributeTemplate<int8_t> Int8; +typedef IntegerAttributeTemplate<uint16_t> Uint16; +typedef IntegerAttributeTemplate<int16_t> Int16; +typedef IntegerAttributeTemplate<uint32_t> Uint32; +typedef IntegerAttributeTemplate<int32_t> Int32; +typedef IntegerAttributeTemplate<uint64_t> Uint64; +typedef IntegerAttributeTemplate<int64_t> Int64; +typedef FloatingPointAttributeTemplate<float> Float; +typedef FloatingPointAttributeTemplate<double> Double; +typedef std::map<std::string, AttributeVector::SP > VectorMap; +typedef AttributeVector::SP AttributePtr; +using search::attribute::Config; +using search::attribute::BasicType; +using search::attribute::CollectionType; + +class MultilevelSortTest : public vespalib::TestApp +{ +public: + enum AttrType { + INT8, + INT16, + INT32, + INT64, + FLOAT, + DOUBLE, + STRING, + RANK, + DOCID, + NONE + }; + struct Spec { + Spec() : _name("unknown"), _type(NONE), _asc(true) {} + Spec(const std::string &name, AttrType type) : _name(name), _type(type), _asc(true) {} + Spec(const std::string &name, AttrType type, bool asc) : _name(name), _type(type), _asc(asc) {} + std::string _name; + AttrType _type; + bool _asc; + }; +private: + int _sortMethod; + template<typename T> + T getRandomValue() { + T min = std::numeric_limits<T>::min(); + T max = std::numeric_limits<T>::max(); + return min + static_cast<T>((max - min) * (((float)rand() / (float)RAND_MAX))); + } + template<typename T> + void fill(IntegerAttribute *attr, uint32_t size, uint32_t unique = 0); + template<typename T> + void fill(FloatingPointAttribute *attr, uint32_t size, uint32_t unique = 0); + void fill(StringAttribute *attr, uint32_t size, const std::vector<std::string> &values); + template<typename T, typename V> + int compareTemplate(T *vector, uint32_t a, uint32_t b); + int compare(AttributeVector *vector, AttrType type, + uint32_t a, uint32_t b); + void sortAndCheck(const std::vector<Spec> &spec, uint32_t num, + uint32_t unique, const std::vector<std::string> &strValues); + void testSortMethod(int method); +public: + MultilevelSortTest() : _sortMethod(0) { srand(time(NULL)); } + int Main(); +}; + +template<typename T> +void MultilevelSortTest::fill(IntegerAttribute *attr, uint32_t size, + uint32_t unique) +{ + ASSERT_TRUE(attr->addDocs(size)); + std::vector<T> values; + for (uint32_t j = 0; j < unique; ++j) { + if (j % 2 == 0) { + values.push_back(std::numeric_limits<T>::min() + static_cast<T>(j)); + } else { + values.push_back(std::numeric_limits<T>::max() - static_cast<T>(j)); + } + } + for (uint32_t i = 0; i < size; ++i) { + if (unique == 0) { + attr->update(i, getRandomValue<T>()); + } else { + uint32_t idx = rand() % values.size(); + attr->update(i, values[idx]); + } + } +} + +template<typename T> +void MultilevelSortTest::fill(FloatingPointAttribute *attr, uint32_t size, + uint32_t unique) +{ + ASSERT_TRUE(attr->addDocs(size)); + std::vector<T> values; + for (uint32_t j = 0; j < unique; ++j) { + if (j % 2 == 0) { + values.push_back(std::numeric_limits<T>::min() + static_cast<T>(j)); + } else { + values.push_back(std::numeric_limits<T>::max() - static_cast<T>(j)); + } + } + for (uint32_t i = 0; i < size; ++i) { + if (unique == 0) { + attr->update(i, getRandomValue<T>()); + } else { + uint32_t idx = rand() % values.size(); + //LOG(info, "fill vector<%s>::doc<%d> = %f (idx=%d)", attr->getName().c_str(), i, values[idx], idx); + attr->update(i, values[idx]); + } + } +} + +void +MultilevelSortTest::fill(StringAttribute *attr, uint32_t size, + const std::vector<std::string> &values) +{ + ASSERT_TRUE(attr->addDocs(size)); + for (uint32_t i = 0; i < size; ++i) { + if (values.empty()) { + uint32_t len = 1 + static_cast<uint32_t>(127 * (((float)rand() / (float)RAND_MAX))); + std::string value; + for (uint32_t j = 0; j < len; ++j) { + char c = 'a' + static_cast<char>(('Z' - 'a') * (((float)rand() / (float)RAND_MAX))); + value.append(1, c); + } + attr->update(i, value.c_str()); + } else { + uint32_t idx = rand() % values.size(); + //LOG(info, "fill vector<%s>::doc<%d> = %s (idx=%d)", attr->getName().c_str(), + // i, values[idx].c_str(), idx); + attr->update(i, values[idx].c_str()); + } + } +} + +template<typename T, typename V> +int +MultilevelSortTest::compareTemplate(T *vector, uint32_t a, uint32_t b) +{ + V va; + V vb; + vector->getAll(a, &va, 1); + vector->getAll(b, &vb, 1); + if (va == vb) { + return 0; + } else if (va < vb) { + return -1; + } + return 1; +} + +int +MultilevelSortTest::compare(AttributeVector *vector, AttrType type, + uint32_t a, uint32_t b) +{ + if (type == INT8) { + return compareTemplate<Int8, int8_t>(static_cast<Int8*>(vector), a, b); + } else if (type == INT16) { + return compareTemplate<Int16, int16_t>(static_cast<Int16*>(vector), a, b); + } else if (type == INT32) { + return compareTemplate<Int32, int32_t>(static_cast<Int32*>(vector), a, b); + } else if (type == INT64) { + return compareTemplate<Int64, int64_t>(static_cast<Int64*>(vector), a, b); + } else if (type == FLOAT) { + return compareTemplate<Float, float>(static_cast<Float*>(vector), a, b); + } else if (type == DOUBLE) { + return compareTemplate<Double, double>(static_cast<Double*>(vector), a, b); + } else if (type == STRING) { + StringAttribute *vString = static_cast<StringAttribute*>(vector); + const char *va = vString->get(a); + const char *vb = vString->get(b); + std::string sa(va); + std::string sb(vb); + if (sa == sb) { + return 0; + } else if (sa < sb) { + return -1; + } + return 1; + } else { + ASSERT_TRUE(false); + return 0; + } +} + +void +MultilevelSortTest::sortAndCheck(const std::vector<Spec> &spec, uint32_t num, + uint32_t unique, const std::vector<std::string> &strValues) +{ + VectorMap vec; + // generate attribute vectors + for (uint32_t i = 0; i < spec.size(); ++i) { + std::string name = spec[i]._name; + AttrType type = spec[i]._type; + if (type == INT8) { + Config cfg(BasicType::INT8, CollectionType::SINGLE); + vec[name] = AttributeFactory::createAttribute(name, cfg); + fill<int8_t>(static_cast<IntegerAttribute *>(vec[name].get()), num, unique); + } else if (type == INT16) { + Config cfg(BasicType::INT16, CollectionType::SINGLE); + vec[name] = AttributeFactory::createAttribute(name, cfg); + fill<int16_t>(static_cast<IntegerAttribute *>(vec[name].get()), num, unique); + } else if (type == INT32) { + Config cfg(BasicType::INT32, CollectionType::SINGLE); + vec[name] = AttributeFactory::createAttribute(name, cfg); + fill<int32_t>(static_cast<IntegerAttribute *>(vec[name].get()), num, unique); + } else if (type == INT64) { + Config cfg(BasicType::INT64, CollectionType::SINGLE); + vec[name] = AttributeFactory::createAttribute(name, cfg); + fill<int64_t>(static_cast<IntegerAttribute *>(vec[name].get()), num, unique); + } else if (type == FLOAT) { + Config cfg(BasicType::FLOAT, CollectionType::SINGLE); + vec[name] = AttributeFactory::createAttribute(name, cfg); + fill<float>(static_cast<FloatingPointAttribute *>(vec[name].get()), num, unique); + } else if (type == DOUBLE) { + Config cfg(BasicType::DOUBLE, CollectionType::SINGLE); + vec[name] = AttributeFactory::createAttribute(name, cfg); + fill<double>(static_cast<FloatingPointAttribute *>(vec[name].get()), num, unique); + } else if (type == STRING) { + Config cfg(BasicType::STRING, CollectionType::SINGLE); + vec[name] = AttributeFactory::createAttribute(name, cfg); + fill(static_cast<StringAttribute *>(vec[name].get()), num, strValues); + } + if (vec[name].get() != NULL) + vec[name]->commit(); + } + + RankedHit *hits = new RankedHit[num]; + for (uint32_t i = 0; i < num; ++i) { + hits[i]._docId = i; + hits[i]._rankValue = getRandomValue<uint32_t>(); + } + + vespalib::Clock clock; + vespalib::Doom doom(clock, std::numeric_limits<long>::max()); + FastS_SortSpec sorter(doom, _sortMethod); + // init sorter with sort data + for(uint32_t i = 0; i < spec.size(); ++i) { + AttributeGuard ag; + if (spec[i]._type == RANK) { + sorter._vectors.push_back + (VectorRef(spec[i]._asc ? FastS_SortSpec::ASC_RANK : + FastS_SortSpec::DESC_RANK, NULL, NULL)); + } else if (spec[i]._type == DOCID) { + sorter._vectors.push_back + (VectorRef(spec[i]._asc ? FastS_SortSpec::ASC_DOCID : + FastS_SortSpec::DESC_DOCID, NULL, NULL)); + } else { + const search::attribute::IAttributeVector * v = vec[spec[i]._name].get(); + sorter._vectors.push_back + (VectorRef(spec[i]._asc ? FastS_SortSpec::ASC_VECTOR : + FastS_SortSpec::DESC_VECTOR, v, NULL)); + } + } + + FastOS_Time timer; + timer.SetNow(); + sorter.sortResults(hits, num, num); + LOG(info, "sort time = %f ms", timer.MilliSecsToNow()); + + uint32_t *offsets = new uint32_t[num + 1]; + char *buf = new char[sorter.getSortDataSize(0, num)]; + sorter.copySortData(0, num, offsets, buf); + + // check results + for (uint32_t i = 0; i < num - 1; ++i) { + for (uint32_t j = 0; j < spec.size(); ++j) { + int cmp = 0; + if (spec[j]._type == RANK) { + if (hits[i]._rankValue < hits[i+1]._rankValue) { + cmp = -1; + } else if (hits[i]._rankValue > hits[i+1]._rankValue) { + cmp = 1; + } + } else if (spec[j]._type == DOCID) { + if (hits[i]._docId < hits[i+1]._docId) { + cmp = -1; + } else if (hits[i]._docId > hits[i+1]._docId) { + cmp = 1; + } + } else { + AttributeVector *av = vec[spec[j]._name].get(); + cmp = compare(av, spec[j]._type, + hits[i]._docId, hits[i+1]._docId); + } + if (spec[j]._asc) { + EXPECT_TRUE(cmp <= 0); + if (cmp < 0) { + break; + } + } else { + EXPECT_TRUE(cmp >= 0); + if (cmp > 0) { + break; + } + } + } + // check binary sort data + uint32_t minLen = std::min(sorter._sortDataArray[i]._len, + sorter._sortDataArray[i+1]._len); + int cmp = memcmp(&sorter._binarySortData[0] + sorter._sortDataArray[i]._idx, + &sorter._binarySortData[0] + sorter._sortDataArray[i+1]._idx, + minLen); + EXPECT_TRUE(cmp <= 0); + EXPECT_TRUE(sorter._sortDataArray[i]._len == (offsets[i+1] - offsets[i])); + cmp = memcmp(&sorter._binarySortData[0] + sorter._sortDataArray[i]._idx, + buf + offsets[i], sorter._sortDataArray[i]._len); + EXPECT_TRUE(cmp == 0); + } + EXPECT_TRUE(sorter._sortDataArray[num-1]._len == (offsets[num] - offsets[num-1])); + int cmp = memcmp(&sorter._binarySortData[0] + sorter._sortDataArray[num-1]._idx, + buf + offsets[num-1], sorter._sortDataArray[num-1]._len); + EXPECT_TRUE(cmp == 0); + + delete [] hits; + delete [] offsets; + delete [] buf; +} + +int +MultilevelSortTest::Main() +{ + TEST_INIT("multilevelsort_test"); + + testSortMethod(0); + testSortMethod(1); + testSortMethod(2); + + TEST_DONE(); +} + +void MultilevelSortTest::testSortMethod(int method) +{ + _sortMethod = method; + { + std::vector<Spec> spec; + spec.push_back(Spec("int8", INT8)); + spec.push_back(Spec("int16", INT16)); + spec.push_back(Spec("int32", INT32)); + spec.push_back(Spec("int64", INT64)); + spec.push_back(Spec("float", FLOAT)); + spec.push_back(Spec("double", DOUBLE)); + spec.push_back(Spec("string", STRING)); + spec.push_back(Spec("rank", RANK)); + spec.push_back(Spec("docid", DOCID)); + + std::vector<std::string> strValues; + strValues.push_back("applications"); + strValues.push_back("places"); + strValues.push_back("system"); + strValues.push_back("vespa search core"); + + srand(12345); + sortAndCheck(spec, 5000, 4, strValues); + srand(time(NULL)); + sortAndCheck(spec, 5000, 4, strValues); + + strValues.push_back("multilevelsort"); + strValues.push_back("trondheim"); + strValues.push_back("ubuntu"); + strValues.push_back("fastserver4"); + + srand(56789); + sortAndCheck(spec, 5000, 8, strValues); + srand(time(NULL)); + sortAndCheck(spec, 5000, 8, strValues); + } + { + std::vector<std::string> none; + uint32_t num = 50; + sortAndCheck(std::vector<Spec>(1, Spec("int8", INT8, true)), num, 0, none); + sortAndCheck(std::vector<Spec>(1, Spec("int16", INT16, true)), num, 0, none); + sortAndCheck(std::vector<Spec>(1, Spec("int32", INT32, true)), num, 0, none); + sortAndCheck(std::vector<Spec>(1, Spec("int64", INT64, true)), num, 0, none); + sortAndCheck(std::vector<Spec>(1, Spec("float", FLOAT, true)), num, 0, none); + sortAndCheck(std::vector<Spec>(1, Spec("double", DOUBLE, true)), num, 0, none); + sortAndCheck(std::vector<Spec>(1, Spec("string", STRING, true)), num, 0, none); + sortAndCheck(std::vector<Spec>(1, Spec("rank", RANK, true)), num, 0, none); + sortAndCheck(std::vector<Spec>(1, Spec("docid", DOCID, true)), num, 0, none); + + sortAndCheck(std::vector<Spec>(1, Spec("int8", INT8, false)), num, 0, none); + sortAndCheck(std::vector<Spec>(1, Spec("int16", INT16, false)), num, 0, none); + sortAndCheck(std::vector<Spec>(1, Spec("int32", INT32, false)), num, 0, none); + sortAndCheck(std::vector<Spec>(1, Spec("int64", INT64, false)), num, 0, none); + sortAndCheck(std::vector<Spec>(1, Spec("float", FLOAT, false)), num, 0, none); + sortAndCheck(std::vector<Spec>(1, Spec("double", DOUBLE, false)), num, 0, none); + sortAndCheck(std::vector<Spec>(1, Spec("string", STRING, false)), num, 0, none); + sortAndCheck(std::vector<Spec>(1, Spec("rank", RANK, false)), num, 0, none); + sortAndCheck(std::vector<Spec>(1, Spec("docid", DOCID, false)), num, 0, none); + } + +} + + +TEST_APPHOOK(MultilevelSortTest); diff --git a/searchlib/src/tests/stackdumpiterator/.gitignore b/searchlib/src/tests/stackdumpiterator/.gitignore new file mode 100644 index 00000000000..6f8239c6cc1 --- /dev/null +++ b/searchlib/src/tests/stackdumpiterator/.gitignore @@ -0,0 +1,7 @@ +*.core +.depend +Makefile +core +core.* +stackdumpiteratortest +searchlib_stackdumpiteratortest_app diff --git a/searchlib/src/tests/stackdumpiterator/CMakeLists.txt b/searchlib/src/tests/stackdumpiterator/CMakeLists.txt new file mode 100644 index 00000000000..dceeaad3f26 --- /dev/null +++ b/searchlib/src/tests/stackdumpiterator/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_stackdumpiteratortest_app + SOURCES + stackdumpiteratortest.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_stackdumpiteratortest_app COMMAND searchlib_stackdumpiteratortest_app - *1000) diff --git a/searchlib/src/tests/stackdumpiterator/stackdumpiteratortest.cpp b/searchlib/src/tests/stackdumpiterator/stackdumpiteratortest.cpp new file mode 100644 index 00000000000..19ce69550f7 --- /dev/null +++ b/searchlib/src/tests/stackdumpiterator/stackdumpiteratortest.cpp @@ -0,0 +1,316 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2001-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#include <vespa/fastos/fastos.h> +#include <vespa/searchlib/parsequery/simplequerystack.h> +#include <vespa/searchlib/parsequery/stackdumpiterator.h> +#include "stackdumpiteratortest.h" + +#define NUMTESTS 5 + +int +StackDumpIteratorTest::Main() +{ + int doTest[NUMTESTS]; + int low, high, accnum, num; + int indicator; + bool verify = false; + int multiplier = 1; + bool failed = false; + + if (_argc == 1) { + Usage(_argv[0]); + return 1; + } + + // default initialize to not run any tests. + for (int n = 0; n < NUMTESTS; n++) + doTest[n] = 0; + + // parse the command line arguments + for (int i = 1; i < _argc; i++) { + low = 0; + high = NUMTESTS - 1; + char *p = _argv[i]; + + // Check if a multiplier is specified + if (*p == '*') { + p++; + accnum = 0; + while (*p != '\0') { + num = *p - '0'; + accnum = accnum * 10 + num; + p++; + } + multiplier = accnum; + continue; + } + + // Default is to run the tests specified, unless the first char is '/' + indicator = 1; + if (*p == '/') { + p++; + indicator = 0; + } + + // Find the first number + accnum = 0; + while (*p != '-' && *p != '\0') { + num = *p - '0'; + accnum = accnum * 10 + num; + p++; + } + if (accnum >= NUMTESTS) + continue; + low = accnum; + // Check for range operator + if (*p == '-') { + p++; + // Find the second number + accnum = 0; + while (*p != '\0') { + num = *p - '0'; + accnum = accnum * 10 + num; + p++; + } + if (accnum > 0) + high = accnum < NUMTESTS ? accnum : NUMTESTS-1; + } else + high = low; + + // Indicate the runrequest for the desired range. + for (int j = low; j <= high; j++) + doTest[j] = indicator; + } + + // Remove unused tests. + // doTest[1] = 0; + + // Remember time + if (multiplier > 1) { + printf("Running all tests %d times.\n", multiplier); + verify = false; + } else { + verify = true; + } + + int testCnt = 0; + + FastOS_Time timer; + timer.SetNow(); + + // Actually run the tests that we wanted. + for (int j = 0; j < multiplier; j++) + for (int k = 0; k < NUMTESTS; k++) + if (doTest[k] == 1) { + if (!RunTest(k, verify)) + failed = true; + testCnt++; + } + + // Print time taken + double timeTaken = timer.MilliSecsToNow(); + + printf("Time taken : %f ms\n", timeTaken); + printf("Number of tests run: %d\n", testCnt); + double avgTestPrMSec = static_cast<double>(testCnt) / timeTaken; + printf("Tests pr Sec: %f\n", avgTestPrMSec * 1000.0); + + return failed ? 1 : 0; +} + +#define ITERATOR_NOERROR 0x0 +#define ITERATOR_ERROR_WRONG_NUM 0x1 +#define ITERATOR_ERROR_WRONG_TYPE 0x2 +#define ITERATOR_ERROR_WRONG_ARITY 0x4 +#define ITERATOR_ERROR_WRONG_INDEX 0x10 +#define ITERATOR_ERROR_WRONG_TERM 0x20 +#define ITERATOR_ERROR_WRONG_GETINDEX 0x40 +#define ITERATOR_ERROR_WRONG_GETTERM 0x80 +#define ITERATOR_ERROR_WRONG_SIZE 0x100 + +bool +StackDumpIteratorTest::ShowResult(int testNo, + search::SimpleQueryStackDumpIterator &actual, + search::SimpleQueryStack &correct, + unsigned int expected) +{ + unsigned int results = 0; + const char *idx_ptr; + const char *term_ptr; + size_t idx_len; + size_t term_len; + + int num = 0; + + search::ParseItem *item; + + printf("%03d: ", testNo); + + while (actual.next()) { + actual.getIndexName(&idx_ptr, &idx_len); + actual.getTerm(&term_ptr, &term_len); + +#if 0 + printf("StackItem #%d: %d %d '%.*s:%.*s'\n", + actual.getNum(), + actual.getType(), + actual.getArity(), + idx_len, idx_ptr, + term_len, term_ptr); +#endif + + item = correct.Pop(); + + if (num++ != actual.getNum()) { + results |= ITERATOR_ERROR_WRONG_NUM; + delete item; + break; + } + if (item->Type() != actual.getType()) { + results |= ITERATOR_ERROR_WRONG_TYPE; + delete item; + break; + } + if (item->_arity != actual.getArity()) { + results |= ITERATOR_ERROR_WRONG_ARITY; + delete item; + break; + } + if (strncmp(item->_indexName.c_str(), idx_ptr, idx_len) != 0) { + results |= ITERATOR_ERROR_WRONG_INDEX; + delete item; + break; + } + if (strncmp(item->_term.c_str(), term_ptr, term_len) != 0) { + results |= ITERATOR_ERROR_WRONG_TERM; + delete item; + break; + } + + delete item; + } + if (correct.GetSize() != 0) results |= ITERATOR_ERROR_WRONG_SIZE; + + if (results == expected) + printf("ok\n"); + else + printf("fail. exp: %x, result: %x\n", expected, results); + + return results == expected; +} + +/** + * + * @param testno The test to run. + * @param verify Verify the result of the test. + */ +bool +StackDumpIteratorTest::RunTest(int testno, bool verify) +{ + search::SimpleQueryStack stack; + search::RawBuf buf(32768); + + switch (testno) { + case 0: + { + // Simple term query + stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foobar")); + + stack.AppendBuffer(&buf); + search::SimpleQueryStackDumpIterator si(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen())); + + if (verify) + return ShowResult(testno, si, stack, ITERATOR_NOERROR); + break; + } + + case 1: + { + // multi term query + stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foo", "foobar")); + stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foo", "xyzzy")); + stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "bar", "baz")); + stack.Push(new search::ParseItem(search::ParseItem::ITEM_OR, 2)); + stack.Push(new search::ParseItem(search::ParseItem::ITEM_AND, 2)); + + stack.AppendBuffer(&buf); + search::SimpleQueryStackDumpIterator si(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen())); + + if (verify) + return ShowResult(testno, si, stack, ITERATOR_NOERROR); + break; + } + + case 2: + { + // all stack items + stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foo", "foobar")); + stack.Push(new search::ParseItem(search::ParseItem::ITEM_NUMTERM, "foo", "[0;22]")); + stack.Push(new search::ParseItem(search::ParseItem::ITEM_PREFIXTERM, "bar", "baz")); + stack.Push(new search::ParseItem(search::ParseItem::ITEM_PHRASE, 3, "bar")); + stack.Push(new search::ParseItem(search::ParseItem::ITEM_OR, 2)); + stack.Push(new search::ParseItem(search::ParseItem::ITEM_AND, 3)); + stack.Push(new search::ParseItem(search::ParseItem::ITEM_RANK, 5)); + stack.Push(new search::ParseItem(search::ParseItem::ITEM_NOT, 3)); + + stack.AppendBuffer(&buf); + search::SimpleQueryStackDumpIterator si(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen())); + + if (verify) + return ShowResult(testno, si, stack, ITERATOR_NOERROR); + break; + } + + case 3: + { + // malicous type in buffer + stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foo", "foobar")); + stack.AppendBuffer(&buf); + *buf.GetWritableDrainPos(0) = 0x1e; + search::SimpleQueryStackDumpIterator si(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen())); + if (verify) + return ShowResult(testno, si, stack, ITERATOR_ERROR_WRONG_SIZE); + break; + } + + case 4: + { + // malicous length in buffer + stack.Push(new search::ParseItem(search::ParseItem::ITEM_TERM, "foo", "foobar")); + stack.AppendBuffer(&buf); + *buf.GetWritableDrainPos(1) = 0xba; + search::SimpleQueryStackDumpIterator si(vespalib::stringref(buf.GetDrainPos(), buf.GetUsedLen())); + if (verify) + return ShowResult(testno, si, stack, ITERATOR_ERROR_WRONG_SIZE); + break; + } + + + default: + { + printf("%03d: no such test\n", testno); + } + } + + return true; +} + +void +StackDumpIteratorTest::Usage(char *progname) +{ + printf("%s {testnospec}+\n\ + Where testnospec is:\n\ + num: single test\n\ + num-num: inclusive range (open range permitted)\n",progname); + printf("There are tests from %d to %d\n\n", 0, NUMTESTS-1); +} + +int +main(int argc, char** argv) +{ + StackDumpIteratorTest tester; + return tester.Entry(argc, argv); +} + diff --git a/searchlib/src/tests/stackdumpiterator/stackdumpiteratortest.h b/searchlib/src/tests/stackdumpiterator/stackdumpiteratortest.h new file mode 100644 index 00000000000..63d41005932 --- /dev/null +++ b/searchlib/src/tests/stackdumpiterator/stackdumpiteratortest.h @@ -0,0 +1,17 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2001-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#pragma once + +#include <vespa/fastos/fastos.h> +#include <vespa/searchlib/parsequery/stackdumpiterator.h> + +class StackDumpIteratorTest : public FastOS_Application +{ + int Main(); + void Usage(char *progname); + bool ShowResult(int testNo, search::SimpleQueryStackDumpIterator &actual, search::SimpleQueryStack &correct, unsigned int expected); + bool RunTest(int i, bool verify); +}; + diff --git a/searchlib/src/tests/stackdumpiterator/testowner.ATS b/searchlib/src/tests/stackdumpiterator/testowner.ATS new file mode 100644 index 00000000000..6d03b0836a4 --- /dev/null +++ b/searchlib/src/tests/stackdumpiterator/testowner.ATS @@ -0,0 +1 @@ +vlarsen diff --git a/searchlib/src/tests/stringenum/.gitignore b/searchlib/src/tests/stringenum/.gitignore new file mode 100644 index 00000000000..7a2f1dd659f --- /dev/null +++ b/searchlib/src/tests/stringenum/.gitignore @@ -0,0 +1,8 @@ +*.core +.depend +Makefile +core +core.* +stringenum +tmp.enum +searchlib_stringenum_test_app diff --git a/searchlib/src/tests/stringenum/CMakeLists.txt b/searchlib/src/tests/stringenum/CMakeLists.txt new file mode 100644 index 00000000000..b59e739be47 --- /dev/null +++ b/searchlib/src/tests/stringenum/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_stringenum_test_app + SOURCES + stringenum_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_stringenum_test_app COMMAND searchlib_stringenum_test_app) diff --git a/searchlib/src/tests/stringenum/stringenum_test.cpp b/searchlib/src/tests/stringenum/stringenum_test.cpp new file mode 100644 index 00000000000..8c6ef64dbe3 --- /dev/null +++ b/searchlib/src/tests/stringenum/stringenum_test.cpp @@ -0,0 +1,147 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2001-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("stringenum"); +#include <vespa/fastlib/io/bufferedfile.h> +#include <vespa/searchlib/util/stringenum.h> + + +#include <vespa/vespalib/testkit/testapp.h> + +using namespace vespalib; + +class MyApp : public vespalib::TestApp +{ +public: + void CheckLookup( search::util::StringEnum *strEnum, const char *str, int value); + int Main(); + + MyApp(void) {} +}; + + +void +MyApp::CheckLookup( search::util::StringEnum *strEnum, const char *str, int value) +{ + EXPECT_EQUAL(0, strcmp(str, strEnum->Lookup(value))); + EXPECT_EQUAL(value, strEnum->Lookup(str)); +} + + +int +MyApp::Main() +{ + TEST_INIT("stringenum_test"); + + search::util::StringEnum enum1; + search::util::StringEnum enum2; + + // check number of entries + EXPECT_EQUAL(enum1.GetNumEntries(), 0u); + EXPECT_EQUAL(enum2.GetNumEntries(), 0u); + + // check add non-duplicates + EXPECT_EQUAL(enum1.Add("zero"), 0); + EXPECT_EQUAL(enum1.Add("one"), 1); + EXPECT_EQUAL(enum1.Add("two"), 2); + EXPECT_EQUAL(enum1.Add("three"), 3); + EXPECT_EQUAL(enum1.Add("four"), 4); + EXPECT_EQUAL(enum1.Add("five"), 5); + EXPECT_EQUAL(enum1.Add("six"), 6); + EXPECT_EQUAL(enum1.Add("seven"), 7); + EXPECT_EQUAL(enum1.Add("eight"), 8); + EXPECT_EQUAL(enum1.Add("nine"), 9); + + // check add duplicates + EXPECT_EQUAL(enum1.Add("four"), 4); + EXPECT_EQUAL(enum1.Add("eight"), 8); + EXPECT_EQUAL(enum1.Add("six"), 6); + EXPECT_EQUAL(enum1.Add("seven"), 7); + EXPECT_EQUAL(enum1.Add("one"), 1); + EXPECT_EQUAL(enum1.Add("nine"), 9); + EXPECT_EQUAL(enum1.Add("five"), 5); + EXPECT_EQUAL(enum1.Add("zero"), 0); + EXPECT_EQUAL(enum1.Add("two"), 2); + EXPECT_EQUAL(enum1.Add("three"), 3); + + // check add non-duplicate + EXPECT_EQUAL(enum1.Add("ten"), 10); + + // check mapping and reverse mapping + EXPECT_EQUAL(enum1.GetNumEntries(), 11u); + TEST_DO(CheckLookup(&enum1, "zero", 0)); + TEST_DO(CheckLookup(&enum1, "one", 1)); + TEST_DO(CheckLookup(&enum1, "two", 2)); + TEST_DO(CheckLookup(&enum1, "three", 3)); + TEST_DO(CheckLookup(&enum1, "four", 4)); + TEST_DO(CheckLookup(&enum1, "five", 5)); + TEST_DO(CheckLookup(&enum1, "six", 6)); + TEST_DO(CheckLookup(&enum1, "seven", 7)); + TEST_DO(CheckLookup(&enum1, "eight", 8)); + TEST_DO(CheckLookup(&enum1, "nine", 9)); + TEST_DO(CheckLookup(&enum1, "ten", 10)); + + TEST_FLUSH(); + + // save/load + EXPECT_TRUE(enum1.Save("tmp.enum")); + EXPECT_TRUE(enum2.Load("tmp.enum")); + + // check mapping and reverse mapping + EXPECT_EQUAL(enum2.GetNumEntries(), 11u); + TEST_DO(CheckLookup(&enum2, "zero", 0)); + TEST_DO(CheckLookup(&enum2, "one", 1)); + TEST_DO(CheckLookup(&enum2, "two", 2)); + TEST_DO(CheckLookup(&enum2, "three", 3)); + TEST_DO(CheckLookup(&enum2, "four", 4)); + TEST_DO(CheckLookup(&enum2, "five", 5)); + TEST_DO(CheckLookup(&enum2, "six", 6)); + TEST_DO(CheckLookup(&enum2, "seven", 7)); + TEST_DO(CheckLookup(&enum2, "eight", 8)); + TEST_DO(CheckLookup(&enum2, "nine", 9)); + TEST_DO(CheckLookup(&enum2, "ten", 10)); + + // add garbage + enum2.Add("sfsdffgdfh"); + enum2.Add("sf24dfsgg3"); + enum2.Add("sfwertfgdh"); + enum2.Add("sfewrgtsfh"); + enum2.Add("sfgdsdgdfh"); + + TEST_FLUSH(); + + // reload + EXPECT_TRUE(enum2.Load("tmp.enum")); + + // check garbage lost + EXPECT_EQUAL(enum2.GetNumEntries(), 11u); + EXPECT_EQUAL(-1, enum2.Lookup("sfewrgtsfh")); + // check mapping and reverse mapping + TEST_DO(CheckLookup(&enum2, "zero", 0)); + TEST_DO(CheckLookup(&enum2, "one", 1)); + TEST_DO(CheckLookup(&enum2, "two", 2)); + TEST_DO(CheckLookup(&enum2, "three", 3)); + TEST_DO(CheckLookup(&enum2, "four", 4)); + TEST_DO(CheckLookup(&enum2, "five", 5)); + TEST_DO(CheckLookup(&enum2, "six", 6)); + TEST_DO(CheckLookup(&enum2, "seven", 7)); + TEST_DO(CheckLookup(&enum2, "eight", 8)); + TEST_DO(CheckLookup(&enum2, "nine", 9)); + TEST_DO(CheckLookup(&enum2, "ten", 10)); + + // clear + enum1.Clear(); + enum2.Clear(); + + // check number of entries + EXPECT_EQUAL(enum1.GetNumEntries(), 0u); + EXPECT_EQUAL(enum2.GetNumEntries(), 0u); + + TEST_DONE(); +} + +TEST_APPHOOK(MyApp); diff --git a/searchlib/src/tests/transactionlog/.gitignore b/searchlib/src/tests/transactionlog/.gitignore new file mode 100644 index 00000000000..a7bdcf0397d --- /dev/null +++ b/searchlib/src/tests/transactionlog/.gitignore @@ -0,0 +1,7 @@ +.depend +Makefile +translogserver_test +translogclient_test + +searchlib_translogclient_test_app +searchlib_translogserver_test_app diff --git a/searchlib/src/tests/transactionlog/CMakeLists.txt b/searchlib/src/tests/transactionlog/CMakeLists.txt new file mode 100644 index 00000000000..545c81ba45f --- /dev/null +++ b/searchlib/src/tests/transactionlog/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_translogserver_test_app + SOURCES + translogserver_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_translogserver_test_app COMMAND searchlib_translogserver_test_app) +vespa_add_executable(searchlib_translogclient_test_app + SOURCES + translogclient_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_translogclient_test_app COMMAND sh translogclient_test.sh) diff --git a/searchlib/src/tests/transactionlog/DESC b/searchlib/src/tests/transactionlog/DESC new file mode 100644 index 00000000000..db53d59fb6c --- /dev/null +++ b/searchlib/src/tests/transactionlog/DESC @@ -0,0 +1 @@ +Thios is a test of the tls/tlc interface. diff --git a/searchlib/src/tests/transactionlog/FILES b/searchlib/src/tests/transactionlog/FILES new file mode 100644 index 00000000000..babcf181807 --- /dev/null +++ b/searchlib/src/tests/transactionlog/FILES @@ -0,0 +1,2 @@ +translogclient.cpp +translogserver.cpp diff --git a/searchlib/src/tests/transactionlog/translogclient_test.cpp b/searchlib/src/tests/transactionlog/translogclient_test.cpp new file mode 100644 index 00000000000..775654d23fc --- /dev/null +++ b/searchlib/src/tests/transactionlog/translogclient_test.cpp @@ -0,0 +1,926 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/searchlib/transactionlog/translogclient.h> +#include <vespa/searchlib/transactionlog/translogserver.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/objects/identifiable.h> +#include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <vespa/log/log.h> +#include <map> +LOG_SETUP("translogclient_test"); + +using namespace search; +using namespace transactionlog; +using namespace document; +using namespace vespalib; +using search::index::DummyFileHeaderContext; + +vespalib::string myhex(const void * b, size_t sz) +{ + static const char * hextab="0123456789ABCDEF"; + const unsigned char * c = static_cast<const unsigned char *>(b); + vespalib::string s; + s.reserve(sz*2); + for (size_t i=0; i < sz; i++) { + s += hextab[c[i] >> 4]; + s += hextab[c[i] & 0x0f]; + } + return s; +} + +class Test : public vespalib::TestApp +{ +public: + int Main(); +private: + bool createDomainTest(TransLogClient & tls, const vespalib::string & name, size_t preExistingDomains=0); + TransLogClient::Session::UP openDomainTest(TransLogClient & tls, const vespalib::string & name); + bool fillDomainTest(TransLogClient::Session * s1, const vespalib::string & name); + void fillDomainTest(TransLogClient::Session * s1, size_t numPackets, size_t numEntries); + void fillDomainTest(TransLogClient::Session * s1, size_t numPackets, size_t numEntries, size_t entrySize); + uint32_t countFiles(const vespalib::string &dir); + void checkFilledDomainTest(const TransLogClient::Session::UP &s1, size_t numEntries); + bool visitDomainTest(TransLogClient & tls, TransLogClient::Session * s1, const vespalib::string & name); + bool subscribeDomainTest(TransLogClient & tls, const vespalib::string & name); + bool partialUpdateTest(); + bool test1(); + bool testRemove(); + void createAndFillDomain(const vespalib::string & name, DomainPart::Crc crcMethod, size_t preExistingDomains); + void verifyDomain(const vespalib::string & name); + void testCrcVersions(); + bool test2(); + void testMany(); + void testErase(); + void testSync(); + void testTruncateOnShortRead(); + void testTruncateOnVersionMismatch(); +}; + +TEST_APPHOOK(Test); + +class CallBackTest : public TransLogClient::Subscriber::Callback +{ +private: + virtual RPC::Result receive(const Packet & packet); + virtual void inSync() { _inSync = true; } + virtual void eof() { _eof = true; } + typedef std::map<SerialNum, ByteBuffer> PacketMap; + PacketMap _packetMap; +public: + CallBackTest() : _inSync(false), _eof(false) { } + size_t size() const { return _packetMap.size(); } + bool hasSerial(SerialNum n) const { return (_packetMap.find(n) != _packetMap.end()); } + void clear() { _inSync = false; _eof = false; _packetMap.clear(); } + const ByteBuffer & packet(SerialNum n) { return (_packetMap.find(n)->second); } + + bool _inSync; + bool _eof; +}; + +RPC::Result CallBackTest::receive(const Packet & p) +{ + vespalib::nbostream h(p.getHandle().c_str(), p.getHandle().size(), true); + LOG(info,"CallBackTest::receive (%zu, %zu, %zu)(%s)", h.rp(), h.size(), h.capacity(), myhex(h.peek(), h.size()).c_str()); + while(h.size() > 0) { + Packet::Entry e; + e.deserialize(h); + LOG(info,"CallBackTest::receive (%zu, %zu, %zu)(%s)", h.rp(), h.size(), h.capacity(), myhex(e.data().c_str(), e.data().size()).c_str()); + _packetMap[e.serial()] = ByteBuffer(e.data().c_str(), e.data().size()); + } + return RPC::OK; +} + +class CallBackManyTest : public TransLogClient::Subscriber::Callback +{ +private: + virtual RPC::Result receive(const Packet & packet); + virtual void inSync() { _inSync = true; } + virtual void eof() { _eof = true; } +public: + CallBackManyTest(size_t start) : _inSync(false), _eof(false), _count(start), _value(start) { } + void clear() { _inSync = false; _eof = false; _count = 0; _value = 0; } + bool _inSync; + bool _eof; + size_t _count; + size_t _value; +}; + +RPC::Result CallBackManyTest::receive(const Packet & p) +{ + nbostream h(p.getHandle().c_str(), p.getHandle().size(), true); + for(;h.size() > 0; _count++, _value++) { + Packet::Entry e; + e.deserialize(h); + assert(e.data().size() == 8); + size_t v = *(const size_t*) e.data().c_str(); + assert(_count+1 == e.serial()); + assert(v == _value); + (void) v; + } + return RPC::OK; +} + +class CallBackUpdate : public TransLogClient::Subscriber::Callback +{ +public: + typedef std::map<SerialNum, Identifiable *> PacketMap; +private: + virtual RPC::Result receive(const Packet & packet); + virtual void inSync() { _inSync = true; } + virtual void eof() { _eof = true; } + PacketMap _packetMap; +public: + CallBackUpdate() : _inSync(false), _eof(false) { } + virtual ~CallBackUpdate() { while (_packetMap.begin() != _packetMap.end()) { delete _packetMap.begin()->second; _packetMap.erase(_packetMap.begin()); } } + bool hasSerial(SerialNum n) const { return (_packetMap.find(n) != _packetMap.end()); } + const PacketMap & map() const { return _packetMap; } + bool _inSync; + bool _eof; +}; + + +RPC::Result CallBackUpdate::receive(const Packet & packet) +{ + nbostream h(packet.getHandle().c_str(), packet.getHandle().size(), true); + while (h.size() > 0) { + Packet::Entry e; + e.deserialize(h); + const vespalib::Identifiable::RuntimeClass * cl(vespalib::Identifiable::classFromId(e.type())); + if (cl) { + vespalib::Identifiable * obj(cl->create()); + if (obj->inherits(Identifiable::classId)) { + Identifiable * ser = static_cast<Identifiable *>(obj); + nbostream is(e.data().c_str(), e.data().size()); + try { + is >> *ser; + } catch (std::exception & ex) { + LOG(warning, "Failed deserializing (%" PRId64 ", %s) bb(%zu, %zu, %zu)=%s what=%s", e.serial(), cl->name(), is.rp(), is.size(), is.capacity(), myhex(is.peek(), is.size()).c_str(), ex.what()); + assert(false); + return RPC::ERROR; + } + assert(is.state() == nbostream::ok); + assert(is.size() == 0); + _packetMap[e.serial()] = ser; + } else { + LOG(warning, "Packet::Entry(%" PRId64 ", %s) is not a Identifiable", e.serial(), cl->name()); + } + } else { + LOG(warning, "Packet::Entry(%" PRId64 ", %d) is not recognized by vespalib::Identifiable", e.serial(), e.type()); + } + } + return RPC::OK; +} + +class CallBackStatsTest : public TransLogClient::Session::Callback +{ +private: + virtual RPC::Result receive(const Packet & packet); + virtual void inSync() { _inSync = true; } + virtual void eof() { _eof = true; } +public: + CallBackStatsTest() : _inSync(false), _eof(false), + _count(0), _inOrder(0), + _firstSerial(0), _lastSerial(0), + _prevSerial(0) { } + void clear() { _inSync = false; _eof = false; _count = 0; _inOrder = 0; + _firstSerial = 0; _lastSerial = 0; _inOrder = 0; } + bool _inSync; + bool _eof; + uint64_t _count; + uint64_t _inOrder; // increase when next entry is one above previous + SerialNum _firstSerial; + SerialNum _lastSerial; + SerialNum _prevSerial; +}; + +RPC::Result CallBackStatsTest::receive(const Packet & p) +{ + nbostream h(p.getHandle().c_str(), p.getHandle().size(), true); + for(;h.size() > 0; ++_count) { + Packet::Entry e; + e.deserialize(h); + SerialNum s = e.serial(); + if (_count == 0) { + _firstSerial = s; + _lastSerial = s; + } + if (s == _prevSerial + 1) { + ++_inOrder; + } + _prevSerial = s; + if (_firstSerial > s) { + _firstSerial = s; + } + if (_lastSerial < s) { + _lastSerial = s; + } + } + return RPC::OK; +} + +#define CID_TestIdentifiable 0x5762314 + +class TestIdentifiable : public Identifiable +{ +public: + DECLARE_IDENTIFIABLE(TestIdentifiable); + TestIdentifiable() { } +}; + +IMPLEMENT_IDENTIFIABLE(TestIdentifiable, Identifiable); + +bool Test::partialUpdateTest() +{ + bool retval(false); + DummyFileHeaderContext fileHeaderContext; + TransLogServer tlss("test7", 18377, ".", fileHeaderContext, 0x10000); + TransLogClient tls("tcp/localhost:18377"); + + TransLogClient::Session::UP s1 = openDomainTest(tls, "test1"); + TransLogClient::Session & session = *s1; + + TestIdentifiable du; + + nbostream os; + os << du; + + vespalib::ConstBufferRef bb(os.c_str(), os.size()); + LOG(info, "DU : %s", myhex(bb.c_str(), bb.size()).c_str()); + Packet::Entry e(7, du.getClass().id(), bb); + Packet pa; + pa.add(e); + pa.close(); + ASSERT_TRUE(session.commit(vespalib::ConstBufferRef(pa.getHandle().c_str(), pa.getHandle().size()))); + + CallBackUpdate ca; + TransLogClient::Visitor::UP visitor = tls.createVisitor("test1", ca); + ASSERT_TRUE(visitor.get()); + ASSERT_TRUE( visitor->visit(5, 7) ); + for (size_t i(0); ! ca._eof && (i < 1000); i++ ) { FastOS_Thread::Sleep(10); } + ASSERT_TRUE( ! ca._inSync ); + ASSERT_TRUE( ca._eof ); + ASSERT_TRUE( ca.map().size() == 1); + ASSERT_TRUE( ca.hasSerial(7) ); + + CallBackUpdate ca1; + TransLogClient::Visitor::UP visitor1 = tls.createVisitor("test1", ca1); + ASSERT_TRUE(visitor1.get()); + ASSERT_TRUE( visitor1->visit(4, 5) ); + for (size_t i(0); ! ca1._eof && (i < 1000); i++ ) { FastOS_Thread::Sleep(10); } + ASSERT_TRUE( ! ca1._inSync ); + ASSERT_TRUE( ca1._eof ); + ASSERT_TRUE( ca1.map().size() == 0); + + CallBackUpdate ca2; + TransLogClient::Visitor::UP visitor2 = tls.createVisitor("test1", ca2); + ASSERT_TRUE(visitor2.get()); + ASSERT_TRUE( visitor2->visit(5, 6) ); + for (size_t i(0); ! ca2._eof && (i < 1000); i++ ) { FastOS_Thread::Sleep(10); } + ASSERT_TRUE( ! ca2._inSync ); + ASSERT_TRUE( ca2._eof ); + ASSERT_TRUE( ca2.map().size() == 0); + + CallBackUpdate ca3; + TransLogClient::Visitor::UP visitor3 = tls.createVisitor("test1", ca3); + ASSERT_TRUE(visitor3.get()); + ASSERT_TRUE( visitor3->visit(5, 1000) ); + for (size_t i(0); ! ca3._eof && (i < 1000); i++ ) { FastOS_Thread::Sleep(10); } + ASSERT_TRUE( ! ca3._inSync ); + ASSERT_TRUE( ca3._eof ); + ASSERT_TRUE( ca3.map().size() == 1); + ASSERT_TRUE( ca3.hasSerial(7) ); + + return retval; +} + +bool Test::createDomainTest(TransLogClient & tls, const vespalib::string & name, size_t preExistingDomains) +{ + bool retval(true); + std::vector<vespalib::string> dir; + tls.listDomains(dir); + EXPECT_EQUAL (dir.size(), preExistingDomains); + TransLogClient::Session::UP s1 = tls.open(name); + ASSERT_TRUE (s1.get() == NULL); + retval = tls.create(name); + ASSERT_TRUE (retval); + dir.clear(); + tls.listDomains(dir); + EXPECT_EQUAL (dir.size(), preExistingDomains+1); +// ASSERT_TRUE (dir[0] == name); + return retval; +} + +TransLogClient::Session::UP Test::openDomainTest(TransLogClient & tls, const vespalib::string & name) +{ + TransLogClient::Session::UP s1 = tls.open(name); + ASSERT_TRUE (s1.get() != NULL); + return s1; +} + +bool Test::fillDomainTest(TransLogClient::Session * s1, const vespalib::string & name) +{ + bool retval(true); + Packet::Entry e1(1, 1, vespalib::ConstBufferRef("Content in buffer A", 20)); + Packet::Entry e2(2, 2, vespalib::ConstBufferRef("Content in buffer B", 20)); + Packet::Entry e3(3, 1, vespalib::ConstBufferRef("Content in buffer C", 20)); + + Packet a; + ASSERT_TRUE (a.add(e1)); + Packet b; + ASSERT_TRUE (b.add(e2)); + ASSERT_TRUE (b.add(e3)); + ASSERT_TRUE (!b.add(e1)); + a.close(); + b.close(); + ASSERT_TRUE (s1->commit(vespalib::ConstBufferRef(a.getHandle().c_str(), a.getHandle().size()))); + ASSERT_TRUE (s1->commit(vespalib::ConstBufferRef(b.getHandle().c_str(), b.getHandle().size()))); + try { + s1->commit(vespalib::ConstBufferRef(a.getHandle().c_str(), a.getHandle().size())); + ASSERT_TRUE(false); + } catch (const std::exception & e) { + EXPECT_EQUAL(vespalib::string("commit failed with code -2. server says: Exception during commit on " + name + " : Incomming serial number(1) must be bigger than the last one (3)."), e.what()); + } + EXPECT_EQUAL(a.size(), 1u); + EXPECT_EQUAL(a.range().from(), 1u); + EXPECT_EQUAL(a.range().to(), 1u); + EXPECT_EQUAL(b.size(), 2u); + EXPECT_EQUAL(b.range().from(), 2u); + EXPECT_EQUAL(b.range().to(), 3u); + EXPECT_TRUE(a.merge(b)); + EXPECT_EQUAL(a.size(), 3u); + EXPECT_EQUAL(a.range().from(), 1u); + EXPECT_EQUAL(a.range().to(), 3u); + + Packet::Entry e; + vespalib::nbostream h(a.getHandle().c_str(), a.getHandle().size()); + e.deserialize(h); + e.deserialize(h); + e.deserialize(h); + EXPECT_EQUAL(h.size(), 0u); + + return retval; +} + +void Test::fillDomainTest(TransLogClient::Session * s1, size_t numPackets, size_t numEntries) +{ + size_t value(0); + for(size_t i=0; i < numPackets; i++) { + std::unique_ptr<Packet> p(new Packet()); + for(size_t j=0; j < numEntries; j++, value++) { + Packet::Entry e(value+1, j+1, vespalib::ConstBufferRef((const char *)&value, sizeof(value))); + if ( ! p->add(e) ) { + p->close(); + ASSERT_TRUE(s1->commit(vespalib::ConstBufferRef(p->getHandle().c_str(), p->getHandle().size()))); + p.reset(new Packet()); + ASSERT_TRUE(p->add(e)); + } + } + p->close(); + ASSERT_TRUE(s1->commit(vespalib::ConstBufferRef(p->getHandle().c_str(), p->getHandle().size()))); + } +} + + +void +Test::fillDomainTest(TransLogClient::Session * s1, + size_t numPackets, size_t numEntries, + size_t entrySize) +{ + size_t value(0); + std::vector<char> entryBuffer(entrySize); + for(size_t i=0; i < numPackets; i++) { + std::unique_ptr<Packet> p(new Packet()); + for(size_t j=0; j < numEntries; j++, value++) { + Packet::Entry e(value+1, j+1, vespalib::ConstBufferRef((const char *)&entryBuffer[0], entryBuffer.size())); + if ( ! p->add(e) ) { + p->close(); + ASSERT_TRUE(s1->commit(vespalib::ConstBufferRef(p->getHandle().c_str(), p->getHandle().size()))); + p.reset(new Packet()); + ASSERT_TRUE(p->add(e)); + } + } + p->close(); + ASSERT_TRUE(s1->commit(vespalib::ConstBufferRef(p->getHandle().c_str(), p->getHandle().size()))); + } +} + + +uint32_t +Test::countFiles(const vespalib::string &dir) +{ + uint32_t res = 0; + FastOS_DirectoryScan dirScan(dir.c_str()); + while (dirScan.ReadNext()) { + const char *ename = dirScan.GetName(); + if (strcmp(ename, ".") == 0 || + strcmp(ename, "..") == 0) + continue; + ++res; + } + return res; +} + + +void +Test::checkFilledDomainTest(const TransLogClient::Session::UP &s1, + size_t numEntries) +{ + SerialNum b(0), e(0); + size_t c(0); + EXPECT_TRUE(s1->status(b, e, c)); + EXPECT_EQUAL(b, 1u); + EXPECT_EQUAL(e, numEntries); + EXPECT_EQUAL(c, numEntries); +} + + +bool Test::visitDomainTest(TransLogClient & tls, TransLogClient::Session * s1, const vespalib::string & name) +{ + bool retval(true); + + SerialNum b(0), e(0); + size_t c(0); + EXPECT_TRUE(s1->status(b, e, c)); + EXPECT_EQUAL(b, 1u); + EXPECT_EQUAL(e, 3u); + EXPECT_EQUAL(c, 3u); + + CallBackTest ca; + TransLogClient::Visitor::UP visitor = tls.createVisitor(name, ca); + ASSERT_TRUE(visitor.get()); + EXPECT_TRUE( visitor->visit(0, 1) ); + for (size_t i(0); ! ca._eof && (i < 60000); i++ ) { FastOS_Thread::Sleep(10); } + EXPECT_TRUE( ! ca._inSync ); + EXPECT_TRUE( ca._eof ); + EXPECT_TRUE( ! ca.hasSerial(0) ); + EXPECT_TRUE( ca.hasSerial(1) ); + EXPECT_TRUE( ! ca.hasSerial(2) ); + ca.clear(); + + visitor = tls.createVisitor(name, ca); + ASSERT_TRUE(visitor.get()); + EXPECT_TRUE( visitor->visit(1, 2) ); + for (size_t i(0); ! ca._eof && (i < 60000); i++ ) { FastOS_Thread::Sleep(10); } + EXPECT_TRUE( ! ca._inSync ); + EXPECT_TRUE( ca._eof ); + EXPECT_TRUE( ! ca.hasSerial(0) ); + EXPECT_TRUE( ! ca.hasSerial(1) ); + EXPECT_TRUE( ca.hasSerial(2) ); + EXPECT_TRUE( ! ca.hasSerial(3) ); + ca.clear(); + + visitor = tls.createVisitor(name, ca); + EXPECT_TRUE(visitor.get()); + EXPECT_TRUE( visitor->visit(0, 3) ); + for (size_t i(0); ! ca._eof && (i < 60000); i++ ) { FastOS_Thread::Sleep(10); } + EXPECT_TRUE( ! ca._inSync ); + EXPECT_TRUE( ca._eof ); + EXPECT_TRUE( ! ca.hasSerial(0) ); + EXPECT_TRUE( ca.hasSerial(1) ); + EXPECT_TRUE( ca.hasSerial(2) ); + EXPECT_TRUE( ca.hasSerial(3) ); + ca.clear(); + + visitor = tls.createVisitor(name, ca); + ASSERT_TRUE(visitor.get()); + EXPECT_TRUE( visitor->visit(2, 3) ); + for (size_t i(0); ! ca._eof && (i < 60000); i++ ) { FastOS_Thread::Sleep(10); } + EXPECT_TRUE( ! ca._inSync ); + EXPECT_TRUE( ca._eof ); + EXPECT_TRUE( ! ca.hasSerial(0) ); + EXPECT_TRUE( !ca.hasSerial(1) ); + EXPECT_TRUE( !ca.hasSerial(2) ); + EXPECT_TRUE( ca.hasSerial(3) ); + ca.clear(); + + return retval; +} + +bool Test::subscribeDomainTest(TransLogClient & tls, const vespalib::string & name) +{ + bool retval(true); + CallBackTest ca; + TransLogClient::Subscriber::UP subscriber = tls.createSubscriber(name, ca); + ASSERT_TRUE(subscriber.get()); + ASSERT_TRUE( subscriber->subscribe(0) ); + for (size_t i(0); ! ca._inSync && (i < 60000); i++ ) { FastOS_Thread::Sleep(10); } + ASSERT_TRUE( ca._inSync ); + ASSERT_TRUE( ! ca.hasSerial(0) ); + ASSERT_TRUE( ! ca._eof ); + ASSERT_TRUE( ca.hasSerial(1) ); + ASSERT_TRUE( ca.hasSerial(2) ); + ASSERT_TRUE( ca.hasSerial(3) ); + return retval; +} + +bool Test::test1() +{ + DummyFileHeaderContext fileHeaderContext; + TransLogServer tlss("test7", 18377, ".", fileHeaderContext, 0x10000); + TransLogClient tls("tcp/localhost:18377"); + + vespalib::string name("test1"); + createDomainTest(tls, name); + TransLogClient::Session::UP s1 = openDomainTest(tls, name); + fillDomainTest(s1.get(), name); + visitDomainTest(tls, s1.get(), name); + subscribeDomainTest(tls, name); + return true; +} + +void Test::createAndFillDomain(const vespalib::string & name, DomainPart::Crc crcMethod, size_t preExistingDomains) +{ + DummyFileHeaderContext fileHeaderContext; + TransLogServer tlss("test13", 18377, ".", fileHeaderContext, 0x10000, false, 4, crcMethod); + TransLogClient tls("tcp/localhost:18377"); + + createDomainTest(tls, name, preExistingDomains); + TransLogClient::Session::UP s1 = openDomainTest(tls, name); + fillDomainTest(s1.get(), name); +} + +void Test::verifyDomain(const vespalib::string & name) +{ + DummyFileHeaderContext fileHeaderContext; + TransLogServer tlss("test13", 18377, ".", fileHeaderContext, 0x10000); + TransLogClient tls("tcp/localhost:18377"); + TransLogClient::Session::UP s1 = openDomainTest(tls, name); + visitDomainTest(tls, s1.get(), name); +} + +void Test::testCrcVersions() +{ + createAndFillDomain("ccitt_crc32", DomainPart::ccitt_crc32, 0); + createAndFillDomain("xxh64", DomainPart::xxh64, 1); + + verifyDomain("ccitt_crc32"); + verifyDomain("xxh64"); +} + +bool Test::testRemove() +{ + DummyFileHeaderContext fileHeaderContext; + TransLogServer tlss("testremove", 18377, ".", fileHeaderContext, 0x10000); + TransLogClient tls("tcp/localhost:18377"); + + vespalib::string name("test-delete"); + createDomainTest(tls, name); + TransLogClient::Session::UP s1 = openDomainTest(tls, name); + fillDomainTest(s1.get(), name); + visitDomainTest(tls, s1.get(), name); + subscribeDomainTest(tls, name); + ASSERT_TRUE(tls.remove(name)); + + return true; +} + +bool Test::test2() +{ + DummyFileHeaderContext fileHeaderContext; + TransLogServer tlss("test7", 18377, ".", fileHeaderContext, 0x10000); + TransLogClient tls("tcp/localhost:18377"); + + vespalib::string name("test1"); + TransLogClient::Session::UP s1 = openDomainTest(tls, name); + visitDomainTest(tls, s1.get(), name); + subscribeDomainTest(tls, name); + return true; +} + +namespace { + +void +assertVisitStats(TransLogClient &tls, const vespalib::string &domain, + SerialNum visitStart, SerialNum visitEnd, + SerialNum expFirstSerial, SerialNum expLastSerial, + uint64_t expCount, uint64_t expInOrder) +{ + CallBackStatsTest ca; + TransLogClient::Visitor::UP visitor = tls.createVisitor(domain, ca); + ASSERT_TRUE(visitor.get()); + ASSERT_TRUE( visitor->visit(visitStart, visitEnd) ); + for (size_t i(0); ! ca._eof && (i < 60000); i++ ) { + FastOS_Thread::Sleep(10); + } + ASSERT_TRUE(!ca._inSync); + ASSERT_TRUE(ca._eof); + EXPECT_EQUAL(expFirstSerial, ca._firstSerial); + EXPECT_EQUAL(expLastSerial, ca._lastSerial); + EXPECT_EQUAL(expCount, ca._count); + EXPECT_EQUAL(expInOrder, ca._inOrder); +} + +void +assertStatus(TransLogClient::Session &s, + SerialNum expFirstSerial, SerialNum expLastSerial, + uint64_t expCount) +{ + SerialNum b(0), e(0); + size_t c(0); + EXPECT_TRUE(s.status(b, e, c)); + EXPECT_EQUAL(expFirstSerial, b); + EXPECT_EQUAL(expLastSerial, e); + EXPECT_EQUAL(expCount, c); +} + +} + + +void Test::testMany() +{ + const unsigned int NUM_PACKETS = 1000; + const unsigned int NUM_ENTRIES = 100; + const unsigned int TOTAL_NUM_ENTRIES = NUM_PACKETS * NUM_ENTRIES; + { + DummyFileHeaderContext fileHeaderContext; + TransLogServer tlss("test8", 18377, ".", fileHeaderContext, 0x80000); + TransLogClient tls("tcp/localhost:18377"); + + createDomainTest(tls, "many", 0); + TransLogClient::Session::UP s1 = openDomainTest(tls, "many"); + fillDomainTest(s1.get(), NUM_PACKETS, NUM_ENTRIES); + SerialNum b(0), e(0); + size_t c(0); + EXPECT_TRUE(s1->status(b, e, c)); + EXPECT_EQUAL(b, 1u); + EXPECT_EQUAL(e, TOTAL_NUM_ENTRIES); + EXPECT_EQUAL(c, TOTAL_NUM_ENTRIES); + CallBackManyTest ca(2); + TransLogClient::Visitor::UP visitor = tls.createVisitor("many", ca); + ASSERT_TRUE(visitor.get()); + ASSERT_TRUE( visitor->visit(2, TOTAL_NUM_ENTRIES) ); + for (size_t i(0); ! ca._eof && (i < 60000); i++ ) { FastOS_Thread::Sleep(10); } + ASSERT_TRUE( ! ca._inSync ); + ASSERT_TRUE( ca._eof ); + EXPECT_EQUAL(ca._count, TOTAL_NUM_ENTRIES); + EXPECT_EQUAL(ca._value, TOTAL_NUM_ENTRIES); + } + { + DummyFileHeaderContext fileHeaderContext; + TransLogServer tlss("test8", 18377, ".", fileHeaderContext, 0x1000000); + TransLogClient tls("tcp/localhost:18377"); + + TransLogClient::Session::UP s1 = openDomainTest(tls, "many"); + SerialNum b(0), e(0); + size_t c(0); + EXPECT_TRUE(s1->status(b, e, c)); + EXPECT_EQUAL(b, 1u); + EXPECT_EQUAL(e, TOTAL_NUM_ENTRIES); + EXPECT_EQUAL(c, TOTAL_NUM_ENTRIES); + CallBackManyTest ca(2); + TransLogClient::Visitor::UP visitor = tls.createVisitor("many", ca); + ASSERT_TRUE(visitor.get()); + ASSERT_TRUE( visitor->visit(2, TOTAL_NUM_ENTRIES) ); + for (size_t i(0); ! ca._eof && (i < 60000); i++ ) { FastOS_Thread::Sleep(10); } + ASSERT_TRUE( ! ca._inSync ); + ASSERT_TRUE( ca._eof ); + EXPECT_EQUAL(ca._count, TOTAL_NUM_ENTRIES); + EXPECT_EQUAL(ca._value, TOTAL_NUM_ENTRIES); + } +} + +void Test::testErase() +{ + const unsigned int NUM_PACKETS = 1000; + const unsigned int NUM_ENTRIES = 100; + const unsigned int TOTAL_NUM_ENTRIES = NUM_PACKETS * NUM_ENTRIES; + { + DummyFileHeaderContext fileHeaderContext; + TransLogServer tlss("test12", 18377, ".", fileHeaderContext, 0x80000); + TransLogClient tls("tcp/localhost:18377"); + + createDomainTest(tls, "erase", 0); + TransLogClient::Session::UP s1 = openDomainTest(tls, "erase"); + fillDomainTest(s1.get(), NUM_PACKETS, NUM_ENTRIES); + } + { + DummyFileHeaderContext fileHeaderContext; + TransLogServer tlss("test12", 18377, ".", fileHeaderContext, 0x1000000); + TransLogClient tls("tcp/localhost:18377"); + + TransLogClient::Session::UP s1 = openDomainTest(tls, "erase"); + + // Before erase + TEST_DO(assertVisitStats(tls, "erase", 2, TOTAL_NUM_ENTRIES, + 3, TOTAL_NUM_ENTRIES, + TOTAL_NUM_ENTRIES -2, TOTAL_NUM_ENTRIES - 3)); + DomainStats domainStats = tlss.getDomainStats(); + DomainInfo domainInfo = domainStats["erase"]; + size_t numParts = domainInfo.parts.size(); + LOG(info, "%zu parts", numParts); + for (uint32_t partId = 0; partId < numParts; ++partId) { + const PartInfo &part = domainInfo.parts[partId]; + LOG(info, + "part %u from %" PRIu64 " to %" PRIu64 ", " + "count %zu, numBytes %zu", + partId, + (uint64_t) part.range.from(), (uint64_t) part.range.to(), + part.count, part.byteSize); + } + ASSERT_LESS_EQUAL(2u, numParts); + // Erase everything before second to last domainpart file + SerialNum eraseSerial = domainInfo.parts[numParts - 2].range.from(); + s1->erase(eraseSerial); + TEST_DO(assertVisitStats(tls, "erase", 2, TOTAL_NUM_ENTRIES, + eraseSerial, TOTAL_NUM_ENTRIES, + TOTAL_NUM_ENTRIES + 1 - eraseSerial, + TOTAL_NUM_ENTRIES - eraseSerial)); + TEST_DO(assertStatus(*s1, eraseSerial, TOTAL_NUM_ENTRIES, + domainInfo.parts[numParts - 2].count + + domainInfo.parts[numParts - 1].count)); + // No apparent effect of erasing just first entry in 2nd to last part + s1->erase(eraseSerial + 1); + TEST_DO(assertVisitStats(tls, "erase", 2, TOTAL_NUM_ENTRIES, + eraseSerial, TOTAL_NUM_ENTRIES, + TOTAL_NUM_ENTRIES + 1 - eraseSerial, + TOTAL_NUM_ENTRIES - eraseSerial)); + TEST_DO(assertStatus(*s1, eraseSerial + 1, TOTAL_NUM_ENTRIES, + domainInfo.parts[numParts - 2].count + + domainInfo.parts[numParts - 1].count)); + // No apparent effect of erasing almost all of 2nd to last part + SerialNum eraseSerial2 = domainInfo.parts[numParts - 2].range.to(); + s1->erase(eraseSerial2); + TEST_DO(assertVisitStats(tls, "erase", 2, TOTAL_NUM_ENTRIES, + eraseSerial, TOTAL_NUM_ENTRIES, + TOTAL_NUM_ENTRIES + 1 - eraseSerial, + TOTAL_NUM_ENTRIES - eraseSerial)); + TEST_DO(assertStatus(*s1, eraseSerial2, TOTAL_NUM_ENTRIES, + domainInfo.parts[numParts - 2].count + + domainInfo.parts[numParts - 1].count)); + // Erase everything before last domainpart file + eraseSerial = domainInfo.parts[numParts - 1].range.from(); + s1->erase(eraseSerial); + TEST_DO(assertVisitStats(tls, "erase", 2, TOTAL_NUM_ENTRIES, + eraseSerial, TOTAL_NUM_ENTRIES, + TOTAL_NUM_ENTRIES + 1 - eraseSerial, + TOTAL_NUM_ENTRIES - eraseSerial)); + TEST_DO(assertStatus(*s1, eraseSerial, TOTAL_NUM_ENTRIES, + domainInfo.parts[numParts - 1].count)); + // No apparent effect of erasing just first entry in last part + s1->erase(eraseSerial + 1); + TEST_DO(assertVisitStats(tls, "erase", 2, TOTAL_NUM_ENTRIES, + eraseSerial, TOTAL_NUM_ENTRIES, + TOTAL_NUM_ENTRIES + 1 - eraseSerial, + TOTAL_NUM_ENTRIES - eraseSerial)); + TEST_DO(assertStatus(*s1, eraseSerial + 1, TOTAL_NUM_ENTRIES, + domainInfo.parts[numParts - 1].count)); + // No apparent effect of erasing almost all of last part + eraseSerial2 = domainInfo.parts[numParts - 1].range.to(); + s1->erase(eraseSerial2); + TEST_DO(assertVisitStats(tls, "erase", 2, TOTAL_NUM_ENTRIES, + eraseSerial, TOTAL_NUM_ENTRIES, + TOTAL_NUM_ENTRIES + 1 - eraseSerial, + TOTAL_NUM_ENTRIES - eraseSerial)); + TEST_DO(assertStatus(*s1, eraseSerial2, TOTAL_NUM_ENTRIES, + domainInfo.parts[numParts - 1].count)); + } +} + + +void +Test::testSync() +{ + const unsigned int NUM_PACKETS = 3; + const unsigned int NUM_ENTRIES = 4; + const unsigned int TOTAL_NUM_ENTRIES = NUM_PACKETS * NUM_ENTRIES; + + DummyFileHeaderContext fileHeaderContext; + TransLogServer tlss("test9", 18377, ".", fileHeaderContext, 0x1000000); + TransLogClient tls("tcp/localhost:18377"); + + createDomainTest(tls, "sync", 0); + TransLogClient::Session::UP s1 = openDomainTest(tls, "sync"); + fillDomainTest(s1.get(), NUM_PACKETS, NUM_ENTRIES); + + SerialNum syncedTo(0); + + EXPECT_TRUE(s1->sync(2, syncedTo)); + EXPECT_EQUAL(syncedTo, TOTAL_NUM_ENTRIES); +} + + +void +Test::testTruncateOnVersionMismatch() +{ + const unsigned int NUM_PACKETS = 3; + const unsigned int NUM_ENTRIES = 4; + const unsigned int TOTAL_NUM_ENTRIES = NUM_PACKETS * NUM_ENTRIES; + + uint64_t fromOld(0), toOld(0); + size_t countOld(0); + DummyFileHeaderContext fileHeaderContext; + { + TransLogServer tlss("test11", 18377, ".", fileHeaderContext, 0x1000000); + TransLogClient tls("tcp/localhost:18377"); + + createDomainTest(tls, "sync", 0); + TransLogClient::Session::UP s1 = openDomainTest(tls, "sync"); + fillDomainTest(s1.get(), NUM_PACKETS, NUM_ENTRIES); + EXPECT_TRUE(s1->status(fromOld, toOld, countOld)); + SerialNum syncedTo(0); + + EXPECT_TRUE(s1->sync(2, syncedTo)); + EXPECT_EQUAL(syncedTo, TOTAL_NUM_ENTRIES); + } + FastOS_File f("test11/sync/sync-0000000000000000"); + EXPECT_TRUE(f.OpenWriteOnlyExisting()); + EXPECT_TRUE(f.SetPosition(f.GetSize())); + + char tmp[100]; + memset(tmp, 0, sizeof(tmp)); + EXPECT_EQUAL(static_cast<ssize_t>(sizeof(tmp)), f.Write2(tmp, sizeof(tmp))); + EXPECT_TRUE(f.Close()); + { + TransLogServer tlss("test11", 18377, ".", fileHeaderContext, 0x1000000); + TransLogClient tls("tcp/localhost:18377"); + TransLogClient::Session::UP s1 = openDomainTest(tls, "sync"); + uint64_t from(0), to(0); + size_t count(0); + EXPECT_TRUE(s1->status(from, to, count)); + ASSERT_EQUAL(fromOld, from); + ASSERT_EQUAL(toOld, to); + ASSERT_EQUAL(countOld, count); + } +} + +void +Test::testTruncateOnShortRead() +{ + const unsigned int NUM_PACKETS = 17; + const unsigned int NUM_ENTRIES = 1; + const unsigned int TOTAL_NUM_ENTRIES = NUM_PACKETS * NUM_ENTRIES; + const unsigned int ENTRYSIZE = 4080; + vespalib::string topdir("test10"); + vespalib::string domain("truncate"); + vespalib::string dir(topdir + "/" + domain); + vespalib::string tlsspec("tcp/localhost:18377"); + + DummyFileHeaderContext fileHeaderContext; + { + TransLogServer tlss(topdir, 18377, ".", fileHeaderContext, 0x10000); + TransLogClient tls(tlsspec); + + createDomainTest(tls, domain, 0); + TransLogClient::Session::UP s1 = openDomainTest(tls, domain); + fillDomainTest(s1.get(), NUM_PACKETS, NUM_ENTRIES, ENTRYSIZE); + + SerialNum syncedTo(0); + + EXPECT_TRUE(s1->sync(TOTAL_NUM_ENTRIES, syncedTo)); + EXPECT_EQUAL(syncedTo, TOTAL_NUM_ENTRIES); + } + { + EXPECT_EQUAL(2u, countFiles(dir)); + } + { + TransLogServer tlss(topdir, 18377, ".", fileHeaderContext, 0x10000); + TransLogClient tls(tlsspec); + TransLogClient::Session::UP s1 = openDomainTest(tls, domain); + checkFilledDomainTest(s1, TOTAL_NUM_ENTRIES); + } + { + EXPECT_EQUAL(2u, countFiles(dir)); + } + { + vespalib::string filename(dir + "/truncate-0000000000000017"); + FastOS_File trfile(filename.c_str()); + EXPECT_TRUE(trfile.OpenReadWrite(NULL)); + trfile.SetSize(trfile.getSize() - 1); + trfile.Close(); + } + { + TransLogServer tlss(topdir, 18377, ".", fileHeaderContext, 0x10000); + TransLogClient tls(tlsspec); + TransLogClient::Session::UP s1 = openDomainTest(tls, domain); + checkFilledDomainTest(s1, TOTAL_NUM_ENTRIES - 1); + } + { + EXPECT_EQUAL(2u, countFiles(dir)); + } +} + + +int Test::Main() +{ + TEST_INIT("translogclient_test"); + + if (_argc > 0) { + DummyFileHeaderContext::setCreator(_argv[0]); + } + test1(); + test2(); + testMany(); + testErase(); + partialUpdateTest(); + + testRemove(); + + testSync(); + + testTruncateOnShortRead(); + testTruncateOnVersionMismatch(); + + testCrcVersions(); + + TEST_DONE(); +} diff --git a/searchlib/src/tests/transactionlog/translogclient_test.sh b/searchlib/src/tests/transactionlog/translogclient_test.sh new file mode 100755 index 00000000000..bf7ddab1fcf --- /dev/null +++ b/searchlib/src/tests/transactionlog/translogclient_test.sh @@ -0,0 +1,4 @@ +#!/bin/bash +rm -rf test7 test8 test9 test10 test11 test12 test13 testremove +$VALGRIND ./searchlib_translogclient_test_app +rm -rf test7 test8 test9 test10 test11 test12 test13 testremove diff --git a/searchlib/src/tests/transactionlog/translogserver_test.cpp b/searchlib/src/tests/transactionlog/translogserver_test.cpp new file mode 100644 index 00000000000..fa03e4f5291 --- /dev/null +++ b/searchlib/src/tests/transactionlog/translogserver_test.cpp @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/searchlib/transactionlog/translogserver.h> +#include <vespa/searchlib/index/dummyfileheadercontext.h> + +using search::index::DummyFileHeaderContext; +using search::transactionlog::TransLogServer; + + +int main(int argc, char *argv[]) +{ + if ((argc > 1) && (argv[0] != NULL)) { + DummyFileHeaderContext::setCreator(argv[0]); + } + DummyFileHeaderContext fileHeaderContext; + TransLogServer tls("test7", 18377, ".", fileHeaderContext, 0x10000); + sleep(60); + return 0; +} diff --git a/searchlib/src/tests/transactionlogstress/.gitignore b/searchlib/src/tests/transactionlogstress/.gitignore new file mode 100644 index 00000000000..5913613b455 --- /dev/null +++ b/searchlib/src/tests/transactionlogstress/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +server +translogstress diff --git a/searchlib/src/tests/transactionlogstress/CMakeLists.txt b/searchlib/src/tests/transactionlogstress/CMakeLists.txt new file mode 100644 index 00000000000..c91b0e34b40 --- /dev/null +++ b/searchlib/src/tests/transactionlogstress/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_translogstress_app + SOURCES + translogstress.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_translogstress_app COMMAND searchlib_translogstress_app BENCHMARK) diff --git a/searchlib/src/tests/transactionlogstress/DESC b/searchlib/src/tests/transactionlogstress/DESC new file mode 100644 index 00000000000..47dac6a9342 --- /dev/null +++ b/searchlib/src/tests/transactionlogstress/DESC @@ -0,0 +1 @@ +This is a stress test of the transaction log server. diff --git a/searchlib/src/tests/transactionlogstress/FILES b/searchlib/src/tests/transactionlogstress/FILES new file mode 100644 index 00000000000..68cc8402652 --- /dev/null +++ b/searchlib/src/tests/transactionlogstress/FILES @@ -0,0 +1 @@ +translogstress.cpp diff --git a/searchlib/src/tests/transactionlogstress/translogstress.cpp b/searchlib/src/tests/transactionlogstress/translogstress.cpp new file mode 100644 index 00000000000..1c51c81e633 --- /dev/null +++ b/searchlib/src/tests/transactionlogstress/translogstress.cpp @@ -0,0 +1,875 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +#include <vespa/vespalib/util/buffer.h> +#include <vespa/searchlib/transactionlog/translogserver.h> +#include <vespa/searchlib/transactionlog/translogclient.h> +#include <vespa/searchlib/util/rand48.h> +#include <vespa/searchlib/util/runnable.h> +#include <vespa/searchlib/index/dummyfileheadercontext.h> +#include <iostream> +#include <stdexcept> + +LOG_SETUP("translogstress"); + +using document::ByteBuffer; +using search::Runnable; +using vespalib::Monitor; +using vespalib::MonitorGuard; +using std::shared_ptr; +using vespalib::make_string; +using vespalib::ConstBufferRef; +using search::index::DummyFileHeaderContext; + +namespace search { +namespace transactionlog { + +using ClientSession = TransLogClient::Session; +using Subscriber = TransLogClient::Subscriber; +using Visitor = TransLogClient::Visitor; + +//----------------------------------------------------------------------------- +// BufferGenerator +//----------------------------------------------------------------------------- +class BufferGenerator +{ +private: + Rand48 _rnd; + uint32_t _minStrLen; + uint32_t _maxStrLen; + +public: + BufferGenerator() : + _rnd(), _minStrLen(0), _maxStrLen(0) {} + BufferGenerator(uint32_t minStrLen, uint32_t maxStrLen) : + _rnd(), _minStrLen(minStrLen), _maxStrLen(maxStrLen) {} + void setSeed(long seed) { _rnd.srand48(seed); } + ByteBuffer getRandomBuffer(); +}; + +ByteBuffer +BufferGenerator::getRandomBuffer() +{ + size_t len = _minStrLen + _rnd.lrand48() % (_maxStrLen - _minStrLen); + std::string str; + for (size_t i = 0; i < len; ++i) { + char c = 'a' + _rnd.lrand48() % ('z' - 'a' + 1); + str.push_back(c); + } + ByteBuffer buf(str.size() + 1); + buf.putBytes(str.c_str(), str.size() + 1); + buf.flip(); + return buf; +} + + +//----------------------------------------------------------------------------- +// EntryGenerator +//----------------------------------------------------------------------------- +class EntryGenerator +{ +private: + Rand48 _rnd; + long _baseSeed; + BufferGenerator _bufferGenerator; + const std::vector<document::ByteBuffer> * _buffers; + ByteBuffer _lastGeneratedBuffer; + +public: + EntryGenerator(long baseSeed, const BufferGenerator & bufferGenerator) : + _rnd(), _baseSeed(baseSeed), _bufferGenerator(bufferGenerator), _buffers(NULL), + _lastGeneratedBuffer() {} + EntryGenerator(const EntryGenerator & rhs) : + _rnd(), _baseSeed(rhs._baseSeed), _bufferGenerator(rhs._bufferGenerator), + _buffers(rhs._buffers), _lastGeneratedBuffer(rhs._lastGeneratedBuffer) {} + EntryGenerator & operator=(const EntryGenerator & rhs) { + _rnd = rhs._rnd; + _baseSeed = rhs._baseSeed; + _bufferGenerator = rhs._bufferGenerator; + _buffers = rhs._buffers; + return *this; + }; + SerialNum getRandomSerialNum(SerialNum begin, SerialNum end); + Packet::Entry getRandomEntry(SerialNum num); + Rand48 & getRnd() { return _rnd; } + void setBuffers(const std::vector<ByteBuffer> & buffers) { + _buffers = &buffers; + } +}; + +SerialNum +EntryGenerator::getRandomSerialNum(SerialNum begin, SerialNum end) +{ + // return random number in range [begin, end] + assert(begin <= end); + if (begin == end) { + return SerialNum(begin); + } else { + return SerialNum(begin + _rnd.lrand48() % (end - begin + 1)); + } +} + +Packet::Entry +EntryGenerator::getRandomEntry(SerialNum num) +{ + _rnd.srand48(_baseSeed + num); + if (_buffers != NULL) { + size_t i = _rnd.lrand48() % _buffers->size(); + const ByteBuffer& buffer = (*_buffers)[i]; + return Packet::Entry(num, 1024, ConstBufferRef(buffer.getBuffer(), buffer.getLength())); + } else { + _bufferGenerator.setSeed(_baseSeed + num); + _lastGeneratedBuffer = _bufferGenerator.getRandomBuffer(); + return Packet::Entry(num, 1024, ConstBufferRef(_lastGeneratedBuffer.getBuffer(), _lastGeneratedBuffer.getLength())); + } +} + + +//----------------------------------------------------------------------------- +// EntryComparator +//----------------------------------------------------------------------------- +class EntryComparator +{ +public: + static bool cmp(const Packet::Entry & lhs, const Packet::Entry & rhs) { + if (lhs.serial() != rhs.serial()) { + return false; + } + if (lhs.type() != rhs.type()) { + return false; + } + if (lhs.data().size() != rhs.data().size()) { + return false; + } + if (memcmp(lhs.data().c_str(), rhs.data().c_str(), lhs.data().size()) != 0) { + return false; + } + return true; + } +}; + + +//----------------------------------------------------------------------------- +// EntryPrinter +//----------------------------------------------------------------------------- +class EntryPrinter +{ +public: + static std::string toStr(const Packet::Entry & e) { + std::stringstream ss; + ss << "Entry(serial(" << e.serial() << "), type(" << e.type() << "), bufferSize(" << + e.data().size() << "), buffer("; + for (size_t i = 0; i < e.data().size() - 1; ++i) { + ss << e.data().c_str()[i]; + } + ss << ")"; + return ss.str(); + } +}; + + +//----------------------------------------------------------------------------- +// PacketPrinter +//----------------------------------------------------------------------------- +class PacketPrinter +{ +public: + static std::string toStr(const Packet & p) { + std::stringstream ss; + ss << "Packet(entries(" << p.size() << "), range([" << p.range().from() << ", " << p.range().to() + << "]), bytes(" << p.getHandle().size() << "))"; + return ss.str(); + } +}; + + +//----------------------------------------------------------------------------- +// FeederThread +//----------------------------------------------------------------------------- +class FeederThread : public Runnable +{ +private: + std::string _tlsSpec; + std::string _domain; + TransLogClient _client; + std::unique_ptr<ClientSession> _session; + EntryGenerator _generator; + uint32_t _feedRate; + Packet _packet; + SerialNum _current; + SerialNum _lastCommited; + FastOS_Time _timer; + + void commitPacket(); + bool addEntry(const Packet::Entry & e); + +public: + FeederThread(const std::string & tlsSpec, const std::string & domain, + const EntryGenerator & generator, uint32_t feedRate, size_t packetSize) : + _tlsSpec(tlsSpec), _domain(domain), _client(tlsSpec), _session(), + _generator(generator), _feedRate(feedRate), _packet(packetSize), _current(1), _lastCommited(1), _timer() {} + virtual void doRun(); + SerialNumRange getRange() const { return SerialNumRange(1, _lastCommited); } +}; + +void +FeederThread::commitPacket() +{ + _packet.close(); + const vespalib::nbostream& stream = _packet.getHandle(); + if (!_session->commit(ConstBufferRef(stream.c_str(), stream.size()))) { + throw std::runtime_error(vespalib::make_string + ("FeederThread: Failed commiting %s", PacketPrinter::toStr(_packet).c_str())); + } else { + LOG(info, "FeederThread: commited %s", PacketPrinter::toStr(_packet).c_str()); + } + _packet.clear(); + _lastCommited = _current - 1; +} + +bool +FeederThread::addEntry(const Packet::Entry & e) +{ + //LOG(info, "FeederThread: add %s", EntryPrinter::toStr(e).c_str()); + return _packet.add(e); +} + +void +FeederThread::doRun() +{ + _session = _client.open(_domain); + if (_session.get() == NULL) { + throw std::runtime_error(vespalib::make_string("FeederThread: Could not open session to %s", _tlsSpec.c_str())); + } + + while (!_done) { + if (_feedRate != 0) { + _timer.SetNow(); + for (uint32_t i = 0; i < _feedRate; ++i) { + Packet::Entry entry = _generator.getRandomEntry(_current++); + if (!addEntry(entry)) { + commitPacket(); + if (!addEntry(entry)) { + throw std::runtime_error(vespalib::make_string + ("FeederThread: Could not add %s", EntryPrinter::toStr(entry).c_str())); + } + } + } + commitPacket(); + + uint64_t milliSecsUsed = static_cast<uint64_t>(_timer.MilliSecsToNow()); + if (milliSecsUsed < 1000) { + //LOG(info, "FeederThread: sleep %u ms", 1000 - milliSecsUsed); + FastOS_Thread::Sleep(1000 - milliSecsUsed); + } else { + LOG(info, "FeederThread: max throughput"); + } + } else { + Packet::Entry entry = _generator.getRandomEntry(_current++); + if (!addEntry(entry)) { + commitPacket(); + if (!addEntry(entry)) { + throw std::runtime_error(vespalib::make_string + ("FeederThread: Could not add %s", EntryPrinter::toStr(entry).c_str())); + } + } + } + } +} + + +//----------------------------------------------------------------------------- +// Agent +//----------------------------------------------------------------------------- +class Agent : public ClientSession::Callback +{ +protected: + std::string _tlsSpec; + std::string _domain; + TransLogClient _client; + EntryGenerator _generator; + std::string _name; + uint32_t _id; + bool _validate; + +public: + Agent(const std::string & tlsSpec, const std::string & domain, + const EntryGenerator & generator, const std::string & name, uint32_t id, bool validate) : + ClientSession::Callback(), + _tlsSpec(tlsSpec), _domain(domain), _client(tlsSpec), + _generator(generator), _name(name), _id(id), _validate(validate) {} + virtual ~Agent() {} + virtual RPC::Result receive(const Packet & packet) = 0; + virtual void inSync() {} + virtual void eof() {} + virtual void failed() {} +}; + + +//----------------------------------------------------------------------------- +// SubscriberAgent +//----------------------------------------------------------------------------- +class SubscriberAgent : public Agent +{ +private: + std::unique_ptr<Subscriber> _subscriber; + SerialNum _from; + SerialNum _next; + Monitor _monitor; + + SerialNum getNext() { + MonitorGuard guard(_monitor); + return _next++; + } + +public: + SubscriberAgent(const std::string & tlsSpec, const std::string & domain, + const EntryGenerator & generator, SerialNum from, uint32_t id, bool validate) : + Agent(tlsSpec, domain, generator, "SubscriberAgent", id, validate), + _subscriber(), _from(from), _next(from + 1) {} + virtual ~SubscriberAgent() {} + void start(); + void stop(); + SerialNum getExpectedNext() const { + MonitorGuard guard(_monitor); + return _next; + } + SerialNumRange getRange() const { return SerialNumRange(_from, _next - 1); } + virtual RPC::Result receive(const Packet & packet); +}; + +void +SubscriberAgent::start() +{ + _subscriber = _client.createSubscriber(_domain, *this); + if (_subscriber.get() == NULL) { + throw std::runtime_error(vespalib::make_string + ("SubscriberAgent[%u]: Could not open subscriber to %s", _id, _tlsSpec.c_str())); + } + if (!_subscriber->subscribe(_from)) { + throw std::runtime_error(vespalib::make_string + ("SubscriberAgent[%u]: Could not subscribe to %s from serialnumber %" PRIu64, + _id, _tlsSpec.c_str(), _from)); + } +} + +void +SubscriberAgent::stop() +{ + _subscriber.reset(); +} + +RPC::Result +SubscriberAgent::receive(const Packet & packet) +{ + auto handle = packet.getHandle(); + while (handle.size() > 0) { + Packet::Entry entry; + entry.deserialize(handle); + Packet::Entry expected = _generator.getRandomEntry(getNext()); + if (_validate) { + if (!EntryComparator::cmp(entry, expected)) { + throw std::runtime_error(vespalib::make_string + ("SubscriberAgent[%u]: Got %s, expected %s", _id, + EntryPrinter::toStr(entry).c_str(), + EntryPrinter::toStr(expected).c_str())); + } + } + } + LOG(info, "SubscriberAgent[%u]: received %s", _id, PacketPrinter::toStr(packet).c_str()); + + return RPC::OK; +} + + +//----------------------------------------------------------------------------- +// VisitorAgent +//----------------------------------------------------------------------------- +class VisitorAgent : public Agent +{ +private: + enum State { + IDLE, RUNNING, FINISHED + }; + std::unique_ptr<Visitor> _visitor; + SerialNum _from; + SerialNum _to; + SerialNum _next; + bool _running; + State _state; + Monitor _monitor; + + void setState(State newState) { + MonitorGuard guard(_monitor); + //LOG(info, "VisitorAgent[%u]: setState(%s)", _id, newState == IDLE ? "idle" : + // (newState == RUNNING ? "running" : "finished")); + _state = newState; + } + SerialNum getNext(); + +public: + VisitorAgent(const std::string & tlsSpec, const std::string & domain, + const EntryGenerator & generator, uint32_t id, bool validate) : + Agent(tlsSpec, domain, generator, "VisitorAgent", id, validate), + _visitor(), _from(0), _to(0), _next(0), _state(IDLE) {} + virtual ~VisitorAgent() {} + void start(SerialNum from, SerialNum to); + void setIdle(); + bool idle() { + MonitorGuard guard(_monitor); + return _state == IDLE; + } + bool running() { + MonitorGuard guard(_monitor); + return _state == RUNNING; + } + bool finished() { + MonitorGuard guard(_monitor); + return _state == FINISHED; + } + std::string getState() { + MonitorGuard guard(_monitor); + if (_state == IDLE) { + return std::string("idle"); + } else if (_state == FINISHED) { + return std::string("finished"); + } else { + return std::string("running"); + } + } + SerialNum getFrom() { return _from; } + virtual RPC::Result receive(const Packet & packet); + virtual void eof() { + LOG(info, "VisitorAgent[%u]: eof", _id); + setState(FINISHED); + } +}; + +SerialNum +VisitorAgent::getNext() +{ + SerialNum retval = _next++; + if (retval > _to) { + throw std::runtime_error(make_string("VisitorAgent[%u]: SerialNum (%" PRIu64 ") outside " + "expected range <%" PRIu64 ", %" PRIu64 "]", _id, + retval, _from, _to)); + } + return retval; +} + +void +VisitorAgent::start(SerialNum from, SerialNum to) +{ + assert(idle()); + LOG(info, "VisitorAgent[%u]: start<%" PRIu64 ", %" PRIu64 "]", _id, from, to); + _from = from; + _to = to; + _next = from + 1; + _visitor = _client.createVisitor(_domain, *this); + if (_visitor.get() == NULL) { + throw std::runtime_error(vespalib::make_string + ("VisitorAgent[%u]: Could not open visitor to %s", _id, _tlsSpec.c_str())); + } + setState(RUNNING); + if (!_visitor->visit(_from, _to)) { + throw std::runtime_error(vespalib::make_string + ("VisitorAgent[%u]: Could not visit from %s with range <%" PRIu64 ", %" PRIu64 "]", + _id, _tlsSpec.c_str(), _from, _to)); + } +} + +void +VisitorAgent::setIdle() +{ + assert(finished()); + _visitor.reset(); + setState(IDLE); +} + +RPC::Result +VisitorAgent::receive(const Packet & packet) +{ + auto handle = packet.getHandle(); + while (handle.size() > 0) { + Packet::Entry entry; + entry.deserialize(handle); + Packet::Entry expected = _generator.getRandomEntry(getNext()); + if (_validate) { + if (!EntryComparator::cmp(entry, expected)) { + throw std::runtime_error(vespalib::make_string + ("VisitorAgent[%u]: Got %s, expected %s", _id, + EntryPrinter::toStr(entry).c_str(), + EntryPrinter::toStr(expected).c_str())); + } + } + } + + if (_next > _to + 1) { + throw std::runtime_error(vespalib::make_string + ("VisitorAgent[%u]: Visited range <%" PRIu64 ", %" PRIu64 "], expected " + "range <%" PRIu64 "u, %" PRIu64 "]", _id, + _from, _next - 1, _from, _to)); + } + + return RPC::OK; +} + + +//----------------------------------------------------------------------------- +// ControllerThread +//----------------------------------------------------------------------------- +class ControllerThread : public Runnable +{ +private: + std::string _tlsSpec; + std::string _domain; + TransLogClient _client; + std::unique_ptr<ClientSession> _session; + EntryGenerator _generator; + std::vector<std::shared_ptr<SubscriberAgent> > _subscribers; + std::vector<std::shared_ptr<VisitorAgent> > _visitors; + std::vector<std::shared_ptr<VisitorAgent> > _rndVisitors; + uint64_t _visitorInterval; // in milliseconds + uint64_t _pruneInterval; // in milliseconds + FastOS_Time _pruneTimer; + SerialNum _begin; + SerialNum _end; + size_t _count; + + void getStatus(); + void makeRandomVisitorVector(); + +public: + ControllerThread(const std::string & tlsSpec, const std::string & domain, + const EntryGenerator & generator, uint32_t numSubscribers, uint32_t numVisitors, + uint64_t visitorInterval, uint64_t pruneInterval) : + _tlsSpec(tlsSpec), _domain(domain), _client(tlsSpec.c_str()), _session(), + _generator(generator), _subscribers(), _visitors(), _rndVisitors(), _visitorInterval(visitorInterval), + _pruneInterval(pruneInterval), _pruneTimer(), _begin(0), _end(0), _count(0) + { + for (uint32_t i = 0; i < numSubscribers; ++i) { + _subscribers.push_back(std::shared_ptr<SubscriberAgent> + (new SubscriberAgent(tlsSpec, domain, generator, 0, i, true))); + } + + for (uint32_t i = 0; i < numVisitors; ++i) { + _visitors.push_back(std::shared_ptr<VisitorAgent>(new VisitorAgent(tlsSpec, domain, generator, i, true))); + } + } + void startSubscribers(); + uint32_t runningVisitors(); + std::vector<std::shared_ptr<SubscriberAgent> > & getSubscribers() { return _subscribers; } + std::vector<std::shared_ptr<VisitorAgent> > & getVisitors() { return _visitors; } + virtual void doRun(); + +}; + +void +ControllerThread::getStatus() +{ + if (!_session->status(_begin, _end, _count)) { + throw std::runtime_error(vespalib::make_string("ControllerThread: Could not get status from %s", _tlsSpec.c_str())); + } +} + +void +ControllerThread::makeRandomVisitorVector() +{ + std::vector<std::shared_ptr<VisitorAgent> > tmp(_visitors); + _rndVisitors.clear(); + while (tmp.size() > 0) { + size_t i = _generator.getRnd().lrand48() % tmp.size(); + _rndVisitors.push_back(tmp[i]); + tmp.erase(tmp.begin() + i); + } +} + +void +ControllerThread::startSubscribers() +{ + for (size_t i = 0; i < _subscribers.size(); ++i) { + _subscribers[i]->start(); + } +} + +void +ControllerThread::doRun() +{ + _session = _client.open(_domain); + if (_session.get() == NULL) { + throw std::runtime_error(vespalib::make_string("ControllerThread: Could not open session to %s", _tlsSpec.c_str())); + } + + _pruneTimer.SetNow(); + while (!_done) { + // set finished visitors as idle + for (size_t i = 0; i < _visitors.size(); ++i) { + if (_visitors[i]->finished()) { + _visitors[i]->setIdle(); + } + } + // find idle visitor + makeRandomVisitorVector(); + for (size_t i = 0; i < _rndVisitors.size(); ++i) { + if (_rndVisitors[i]->idle()) { + getStatus(); + SerialNum from = _generator.getRandomSerialNum(_begin, _end) - 1; + SerialNum to = _generator.getRandomSerialNum(from + 1, _end); + _rndVisitors[i]->start(from, to); + break; + } + } + // prune transaction log server + if (_pruneTimer.MilliSecsToNow() > _pruneInterval) { + getStatus(); + SerialNum safePrune = _end; + for (size_t i = 0; i < _visitors.size(); ++i) { + if (_visitors[i]->running() && _visitors[i]->getFrom() < safePrune) { + safePrune = _visitors[i]->getFrom(); + } + } + for (size_t i = 0; i < _subscribers.size(); ++i) { + SerialNum next = _subscribers[i]->getExpectedNext(); + if (next < safePrune) { + safePrune = next; + } + } + LOG(info, "ControllerThread: status: begin(%" PRIu64 "), end(%" PRIu64 "), count(%zu)", _begin, _end, _count); + LOG(info, "ControllerThread: prune [%" PRIu64 ", %" PRIu64 ">", _begin, safePrune); + if (!_session->erase(safePrune)) { + throw std::runtime_error(vespalib::make_string("ControllerThread: Could not erase up to %" PRIu64, safePrune)); + } + _pruneTimer.SetNow(); + } + FastOS_Thread::Sleep(_visitorInterval); + } +} + + +//----------------------------------------------------------------------------- +// TransLogStress +//----------------------------------------------------------------------------- +class TransLogStress : public FastOS_Application +{ +private: + class Config { + public: + uint64_t domainPartSize; + size_t packetSize; + + uint64_t stressTime; + uint32_t feedRate; + uint32_t numSubscribers; + uint32_t numVisitors; + uint64_t visitorInterval; + uint64_t pruneInterval; + + uint32_t numPreGeneratedBuffers; + uint32_t minStrLen; + uint32_t maxStrLen; + long baseSeed; + + Config() : + domainPartSize(0), packetSize(0), stressTime(0), feedRate(0), numSubscribers(0), + numVisitors(0), visitorInterval(0), pruneInterval(0), minStrLen(0), maxStrLen(0), baseSeed(0) {} + }; + + Config _cfg; + + void printConfig(); + void usage(); + +public: + int Main(); +}; + +void +TransLogStress::printConfig() +{ + std::cout << "######## Config ########" << std::endl; + std::cout << "stressTime: " << _cfg.stressTime / 1000 << " s" << std::endl; + std::cout << "feedRate: " << _cfg.feedRate << " per/sec" << std::endl; + std::cout << "numSubscribers: " << _cfg.numSubscribers << std::endl; + std::cout << "numVisitors: " << _cfg.numVisitors << std::endl; + std::cout << "visitorInterval: " << _cfg.visitorInterval << " ms" << std::endl; + std::cout << "pruneInterval: " << _cfg.pruneInterval / 1000 << " s" << std::endl; + std::cout << "numPreGeneratedBuffers: " << _cfg.numPreGeneratedBuffers << std::endl; + std::cout << "minStrLen: " << _cfg.minStrLen << std::endl; + std::cout << "maxStrLen: " << _cfg.maxStrLen << std::endl; + std::cout << "baseSeed: " << _cfg.baseSeed << std::endl; + std::cout << "domainPartSize: " << _cfg.domainPartSize << " bytes" << std::endl; + std::cout << "packetSize: " << _cfg.packetSize << " bytes" << std::endl; +} + +void +TransLogStress::usage() +{ + std::cout << "usage: translogstress [-t stressTime(s)] [-f feedRate] [-s numSubscribers]" << std::endl; + std::cout << " [-v numVisitors] [-c visitorInterval(ms)] [-e pruneInterval(s)]" << std::endl; + std::cout << " [-g numPreGeneratedBuffers] [-i minStrLen] [-a maxStrLen] [-b baseSeed]" << std::endl; + std::cout << " [-d domainPartSize] [-p packetSize]" << std::endl; +} + +int +TransLogStress::Main() +{ + std::string tlsSpec("tcp/localhost:17897"); + std::string domain("translogstress"); + _cfg.domainPartSize = 8000000; // ~8MB + _cfg.packetSize = 0x10000; + + _cfg.stressTime = 1000 * 60; + _cfg.feedRate = 10000; + _cfg.numSubscribers = 1; + _cfg.numVisitors = 1; + _cfg.visitorInterval = 1000 * 1; + _cfg.pruneInterval = 1000 * 12; + + _cfg.numPreGeneratedBuffers = 0; + _cfg.minStrLen = 40; + _cfg.maxStrLen = 80; + _cfg.baseSeed = 100; + + uint64_t sleepTime = 4000; + + int idx = 1; + char opt; + const char * arg; + bool optError = false; + while ((opt = GetOpt("d:p:t:f:s:v:c:e:g:i:a:b:h", arg, idx)) != -1) { + switch (opt) { + case 'd': + _cfg.domainPartSize = atol(arg); + break; + case 'p': + _cfg.packetSize = atol(arg); + break; + case 't': + _cfg.stressTime = 1000 * atol(arg); + break; + case 'f': + _cfg.feedRate = atoi(arg); + break; + case 's': + _cfg.numSubscribers = atoi(arg); + break; + case 'v': + _cfg.numVisitors = atoi(arg); + break; + case 'c': + _cfg.visitorInterval = atol(arg); + break; + case 'e': + _cfg.pruneInterval = 1000 * atol(arg); + break; + case 'g': + _cfg.numPreGeneratedBuffers = atoi(arg); + break; + case 'i': + _cfg.minStrLen = atoi(arg); + break; + case 'a': + _cfg.maxStrLen = atoi(arg); + break; + case 'b': + _cfg.baseSeed = atol(arg); + break; + case 'h': + usage(); + return -1; + default: + optError = true; + break; + } + } + + printConfig(); + FastOS_Thread::Sleep(sleepTime); + + if (_argc != idx || optError) { + usage(); + return -1; + } + + // start transaction log server + DummyFileHeaderContext fileHeaderContext; + TransLogServer tls("server", 17897, ".", fileHeaderContext, _cfg.domainPartSize); + TransLogClient client(tlsSpec); + client.create(domain); + + FastOS_ThreadPool threadPool(256000); + + BufferGenerator bufferGenerator(_cfg.minStrLen, _cfg.maxStrLen); + bufferGenerator.setSeed(_cfg.baseSeed); + std::vector<ByteBuffer> buffers; + for (uint32_t i = 0; i < _cfg.numPreGeneratedBuffers; ++i) { + buffers.push_back(bufferGenerator.getRandomBuffer()); + } + EntryGenerator generator(_cfg.baseSeed, bufferGenerator); + if (buffers.size() > 0) { + generator.setBuffers(buffers); + } + + + // start feeder and controller + FeederThread feeder(tlsSpec, domain, generator, _cfg.feedRate, _cfg.packetSize); + threadPool.NewThread(&feeder); + + FastOS_Thread::Sleep(sleepTime); + + ControllerThread controller(tlsSpec, domain, generator, _cfg.numSubscribers, _cfg.numVisitors, + _cfg.visitorInterval, _cfg.pruneInterval); + threadPool.NewThread(&controller); + + // start subscribers + controller.startSubscribers(); + + // stop feeder and controller + FastOS_Thread::Sleep(_cfg.stressTime); + printConfig(); + LOG(info, "Stop feeder..."); + feeder.stop(); + feeder.join(); + std::cout << "<feeder>" << std::endl; + std::cout << " <from>" << feeder.getRange().from() << "</from>" << std::endl; + std::cout << " <to>" << feeder.getRange().to() << "</to>" << std::endl; + std::cout << " <rate>" << 1000 * (feeder.getRange().to() - feeder.getRange().from()) / (sleepTime + _cfg.stressTime) + << "</rate>" << std::endl; + std::cout << "</feeder>" << std::endl; + + LOG(info, "Stop controller..."); + controller.stop(); + controller.join(); + + FastOS_Thread::Sleep(sleepTime); + std::vector<std::shared_ptr<VisitorAgent> > & visitors = controller.getVisitors(); + for (size_t i = 0; i < visitors.size(); ++i) { + std::cout << "<visitor id='" << i << "'>" << std::endl; + std::cout << "<state>" << visitors[i]->getState() << "</state>" << std::endl; + std::cout << "</visitor>" << std::endl; + } + + // stop subscribers + LOG(info, "Stop subscribers..."); + std::vector<std::shared_ptr<SubscriberAgent> > & subscribers = controller.getSubscribers(); + for (size_t i = 0; i < subscribers.size(); ++i) { + subscribers[i]->stop(); + std::cout << "<subscriber id='" << i << "'>" << std::endl; + std::cout << " <from>" << subscribers[i]->getRange().from() << "</from>" << std::endl; + std::cout << " <to>" << subscribers[i]->getRange().to() << "</to>" << std::endl; + std::cout << "</subscriber>" << std::endl; + } + + threadPool.Close(); + + return 0; +} + +} +} + +int main(int argc, char ** argv) +{ + search::transactionlog::TransLogStress myApp; + return myApp.Entry(argc, argv); +} diff --git a/searchlib/src/tests/true/.gitignore b/searchlib/src/tests/true/.gitignore new file mode 100644 index 00000000000..9ab22a26cdc --- /dev/null +++ b/searchlib/src/tests/true/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +true_test +searchlib_true_test_app diff --git a/searchlib/src/tests/true/CMakeLists.txt b/searchlib/src/tests/true/CMakeLists.txt new file mode 100644 index 00000000000..f7c80db8f92 --- /dev/null +++ b/searchlib/src/tests/true/CMakeLists.txt @@ -0,0 +1,7 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_true_test_app + SOURCES + true.cpp + DEPENDS +) +vespa_add_test(NAME searchlib_true_test_app COMMAND searchlib_true_test_app) diff --git a/searchlib/src/tests/true/DESC b/searchlib/src/tests/true/DESC new file mode 100644 index 00000000000..55b708ce96a --- /dev/null +++ b/searchlib/src/tests/true/DESC @@ -0,0 +1 @@ +A very simple test. A good starting point for writing new tests. diff --git a/searchlib/src/tests/true/FILES b/searchlib/src/tests/true/FILES new file mode 100644 index 00000000000..7d324c5824d --- /dev/null +++ b/searchlib/src/tests/true/FILES @@ -0,0 +1 @@ +true.cpp diff --git a/searchlib/src/tests/true/true.cpp b/searchlib/src/tests/true/true.cpp new file mode 100644 index 00000000000..d052f3ca4e4 --- /dev/null +++ b/searchlib/src/tests/true/true.cpp @@ -0,0 +1,15 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("true_test"); +#include <vespa/vespalib/testkit/testapp.h> + +TEST_SETUP(Test) + +int +Test::Main() +{ + TEST_INIT("true_test"); + EXPECT_TRUE(true); + TEST_DONE(); +} diff --git a/searchlib/src/tests/url/.gitignore b/searchlib/src/tests/url/.gitignore new file mode 100644 index 00000000000..6d6a20d8270 --- /dev/null +++ b/searchlib/src/tests/url/.gitignore @@ -0,0 +1,7 @@ +*.core +.depend +Makefile +core +core.* +testurl +searchlib_testurl_app diff --git a/searchlib/src/tests/url/CMakeLists.txt b/searchlib/src/tests/url/CMakeLists.txt new file mode 100644 index 00000000000..aa52f3d8374 --- /dev/null +++ b/searchlib/src/tests/url/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_testurl_app + SOURCES + testurl.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_testurl_app COMMAND sh dotest.sh) diff --git a/searchlib/src/tests/url/dotest.sh b/searchlib/src/tests/url/dotest.sh new file mode 100755 index 00000000000..f7ac1fd1e69 --- /dev/null +++ b/searchlib/src/tests/url/dotest.sh @@ -0,0 +1,13 @@ +#!/bin/sh +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +# Run test +echo "Testing the FastS_URL class..." +$VALGRIND ./searchlib_testurl_app +if [ $? -eq 0 ]; then + echo "SUCCESS: Test on FastS_URL passed!" +else + echo "FAILURE: Test on FastS_URL failed!" + exit 1 +fi +exit 0 diff --git a/searchlib/src/tests/url/testurl.cpp b/searchlib/src/tests/url/testurl.cpp new file mode 100644 index 00000000000..4ed28453890 --- /dev/null +++ b/searchlib/src/tests/url/testurl.cpp @@ -0,0 +1,750 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Copyright (C) 2000-2003 Fast Search & Transfer ASA +// Copyright (C) 2003 Overture Services Norway AS + +#include <vespa/fastos/fastos.h> +#include <vespa/searchlib/util/url.h> + + +// needed to resolve external symbol from httpd.h on AIX +void FastS_block_usr2() {} + + +static search::util::URL GlobalURL; + +static bool +CheckString(const char *name, + const unsigned char *test1, + const unsigned char *test2) +{ + assert(test1 != NULL); + assert(test2 != NULL); + + if (strcmp((const char*)test1, (const char*)test2)!=0) { + printf("FAILED: %s: '%s' != '%s'!\n", name, test1, test2); + GlobalURL.Dump(); + + return false; + } + return true; +} + +static bool +CheckInt(const char *name, + int test1, + int test2) +{ + if (test1 != test2) { + printf("FAILED: %s: %d != %d!\n", name, test1, test2); + GlobalURL.Dump(); + + return false; + } + return true; +} + +const char * +GetTokenString(search::util::URL &url) +{ + static char tokenbuffer[1000]; + + const unsigned char *token; + search::util::URL::URL_CONTEXT ctx; + + tokenbuffer[0] = '\0'; + + while ((token = url.GetToken(ctx)) != NULL) { + if (tokenbuffer[0] != '\0') + strcat(tokenbuffer, ","); + strcat(tokenbuffer, url.ContextName(ctx)); + strcat(tokenbuffer, ":"); + strcat(tokenbuffer, (const char*)token); + } + + return tokenbuffer; +} + + +static bool +CheckURL(const char *url, + const char *scheme, + const char *host, + const char *domain, + const char *siteowner, + const char *tld, + const char *maintld, + const char */* tldregion */, + const char *port, + const char *path, + int pathdepth, + const char *filename, + const char *extension, + const char *params, + const char *query, + const char *fragment, + const char *address, + const char *tokens, + int verbose=0) +{ + if (verbose>0) + printf("Checking with URL: '%s'\n", url); + + GlobalURL.SetURL((const unsigned char *)url); + + if (verbose>0) + GlobalURL.Dump(); + // GlobalURL.Dump(); + + return + CheckString("URL", (const unsigned char *)url, GlobalURL.GetURL()) && + CheckString("urltype", (const unsigned char *)scheme, + GlobalURL.GetScheme()) && + CheckString("host", (const unsigned char *)host, + GlobalURL.GetHost()) && + CheckString("domain", (const unsigned char *)domain, + GlobalURL.GetDomain()) && + CheckString("siteowner", (const unsigned char *)siteowner, + GlobalURL.GetSiteOwner()) && + CheckString("tld", (const unsigned char *)tld, + GlobalURL.GetTLD()) && + CheckString("maintld", (const unsigned char *)maintld, + GlobalURL.GetMainTLD()) && +#if 0 + CheckString("tldregion", (const unsigned char *)tldregion, + GlobalURL.GetTLDRegion()) && +#endif + CheckString("port", (const unsigned char *)port, + GlobalURL.GetPort()) && + CheckString("path", (const unsigned char *)path, + GlobalURL.GetPath()) && + CheckInt("pathdepth", pathdepth, + GlobalURL.GetPathDepth()) && + CheckString("filename", (const unsigned char *)filename, + GlobalURL.GetFilename()) && + CheckString("extension", (const unsigned char *)extension, + GlobalURL.GetExtension()) && + CheckString("params", (const unsigned char *)params, + GlobalURL.GetParams()) && + CheckString("query", (const unsigned char *)query, + GlobalURL.GetQuery()) && + CheckString("fragment", (const unsigned char *)fragment, + GlobalURL.GetFragment()) && + CheckString("address", (const unsigned char *)address, + GlobalURL.GetAddress()) && + CheckString("TOKENS", (const unsigned char *)tokens, + (const unsigned char*)GetTokenString(GlobalURL)); +} + + +int main(int, char **) +{ + bool success = true; + + success = success && + CheckURL("", // URL + "", // scheme + "", // host + "", // domain + "", // siteowner + "", // tld + "", // maintld + "", // tldregion + "", // port + "", // path + 0, // pathdepth + "", // filename + "", // extension + "", // params + "", // query + "", // fragment + "", // address + "");// Tokenstring + success = success && + CheckURL(".", // URL + "", // scheme + "", // host + "", // domain + "", // siteowner + "", // tld + "", // maintld + "", // tldregion + "", // port + ".", // path + 1, // pathdepth + ".", // filename + "", // extension + "", // params + "", // query + "", // fragment + "", // address + "");// Tokenstring + success = success && + CheckURL("..", // URL + "", // scheme + "", // host + "", // domain + "", // siteowner + "", // tld + "", // maintld + "", // tldregion + "", // port + "..", // path + 1, // pathdepth + "..", // filename + "", // extension + "", // params + "", // query + "", // fragment + "", // address + "");// Tokenstring + success = success && + CheckURL("CHANGES_2.0a", // URL + "", // scheme + "", // host + "", // domain + "", // siteowner + "", // tld + "", // maintld + "", // tldregion + "", // port + "CHANGES_2.0a", // path + 1, // pathdepth + "CHANGES_2.0a", // filename + "0a", // extension + "", // params + "", // query + "", // fragment + "", // address + "FILENAME:CHANGES_2,EXTENSION:0a");// Tokenstring + success = success && + CheckURL("patches/patch-cvs-1.9.10", // URL + "", // scheme + "", // host + "", // domain + "", // siteowner + "", // tld + "", // maintld + "", // tldregion + "", // port + "patches/patch-cvs-1.9.10", // path + 2, // pathdepth + "patch-cvs-1.9.10", // filename + "10", // extension + "", // params + "", // query + "", // fragment + "", // address + "PATH:patches,FILENAME:patch-cvs-1,FILENAME:9,EXTENSION:10");// Tokenstring + success = success && + CheckURL("http:patches/patch-ssh-1.2.14", // URL + "http", // scheme + "", // host + "", // domain + "", // siteowner + "", // tld + "", // maintld + "", // tldregion + "", // port + "patches/patch-ssh-1.2.14", // path + 2, // pathdepth + "patch-ssh-1.2.14", // filename + "14", // extension + "", // params + "", // query + "", // fragment + "", // address + "SCHEME:http,PATH:patches,FILENAME:patch-ssh-1,FILENAME:2,EXTENSION:14");// Tokenstring + success = success && + CheckURL("http://180.uninett.no/servlet/online.Bransje", // URL + "http", // scheme + "180.uninett.no", // host + "uninett.no", // domain + "uninett", // siteowner + "no", // tld + "no", // maintld + "europe", // tldregion + "", // port + "/servlet/online.Bransje", // path + 2, // pathdepth + "online.Bransje", // filename + "Bransje", // extension + "", // query + "", // params + "", // fragment + "", // address + "SCHEME:http,HOST:180,DOMAIN:uninett,MAINTLD:no,PATH:servlet,FILENAME:online,EXTENSION:Bransje");// Tokenstring + success = success && + CheckURL("Bilder.gif/rule11.GIF", // URL + "", // scheme + "", // host + "", // domain + "", // siteowner + "", // tld + "", // maintld + "", // tldregion + "", // port + "Bilder.gif/rule11.GIF", // path + 2, // pathdepth + "rule11.GIF", // filename + "GIF", // extension + "", // params + "", // query + "", // fragment + "", // address + "PATH:Bilder,PATH:gif,FILENAME:rule11,EXTENSION:GIF");// Tokenstring + success = success && + CheckURL("bilder/meny/Buer/bue_o.GIF", // URL + "", // scheme + "", // host + "", // domain + "", // siteowner + "", // tld + "", // maintld + "", // tldregion + "", // port + "bilder/meny/Buer/bue_o.GIF", // path + 4, // pathdepth + "bue_o.GIF", // filename + "GIF", // extension + "", // params + "", // query + "", // fragment + "", // address + "PATH:bilder,PATH:meny,PATH:Buer,FILENAME:bue_o,EXTENSION:GIF");// Tokenstring + success = success && + CheckURL("./fakadm/grafikk/indus_bilde.JPG", // URL + "", // scheme + "", // host + "", // domain + "", // siteowner + "", // tld + "", // maintld + "", // tldregion + "", // port + "./fakadm/grafikk/indus_bilde.JPG", // path + 4, // pathdepth + "indus_bilde.JPG", // filename + "JPG", // extension + "", // params + "", // query + "", // fragment + "", // address + "PATH:fakadm,PATH:grafikk,FILENAME:indus_bilde,EXTENSION:JPG");// Tokenstring + success = success && + CheckURL("linux-2.0.35.tar.bz2", // URL + "", // scheme + "", // host + "", // domain + "", // siteowner + "", // tld + "", // maintld + "", // tldregion + "", // port + "linux-2.0.35.tar.bz2", // path + 1, // pathdepth + "linux-2.0.35.tar.bz2", // filename + "bz2", // extension + "", // params + "", // query + "", // fragment + "", // address + "FILENAME:linux-2,FILENAME:0,FILENAME:35,FILENAME:tar,EXTENSION:bz2");// Tokenstring + success = success && + CheckURL("http://www.underdusken.no", // URL + "http", // scheme + "www.underdusken.no", // host + "underdusken.no", // domain + "underdusken", // siteowner + "no", // tld + "no", // maintld + "europe", // tldregion + "", // port + "", // path + 0, // pathdepth + "", // filename + "", // extension + "", // params + "", // query + "", // fragment + "", // address + "SCHEME:http,HOST:www,DOMAIN:underdusken,MAINTLD:no");// Tokenstring + success = success && + CheckURL("http://www.underdusken.no/?page=dusker/html/0008/Uholdbar.html", // URL + "http", // scheme + "www.underdusken.no", // host + "underdusken.no", // domain + "underdusken", // siteowner + "no", // tld + "no", // maintld + "europe", // tldregion + "", // port + "/", // path + 0, // pathdepth + "", // filename + "", // extension + "", // params + "page=dusker/html/0008/Uholdbar.html", // query + "", // fragment + "", // address + "SCHEME:http,HOST:www,DOMAIN:underdusken,MAINTLD:no,QUERY:page,QUERY:dusker,QUERY:html,QUERY:0008,QUERY:Uholdbar,QUERY:html");// Tokenstring + success = success && + CheckURL("http://www.uni-karlsruhe.de/~ig25/ssh-faq/", // URL + "http", // scheme + "www.uni-karlsruhe.de", // host + "uni-karlsruhe.de", // domain + "uni-karlsruhe", // siteowner + "de", // tld + "de", // maintld + "", // tldregion + "", // port + "/~ig25/ssh-faq/", // path + 2, // pathdepth + "", // filename + "", // extension + "", // params + "", // query + "", // fragment + "", // address + "SCHEME:http,HOST:www,DOMAIN:uni-karlsruhe,MAINTLD:de,PATH:ig25,PATH:ssh-faq");// Tokenstring + success = success && + CheckURL("java/", // URL + "", // scheme + "", // host + "", // domain + "", // siteowner + "", // tld + "", // maintld + "", // tldregion + "", // port + "java/", // path + 1, // pathdepth + "", // filename + "", // extension + "", // params + "", // query + "", // fragment + "", // address + "PATH:java");// Tokenstring + success = success && + CheckURL("javascript:OpenWindow('/survey/faq.html', 'Issues', 'width=635,height=400,toolbars=no,location=no,menubar=yes,status=no,resizable=yes,scrollbars=yes", // URL + "javascript", // scheme + "", // host + "", // domain + "", // siteowner + "", // tld + "", // maintld + "", // tldregion + "", // port + "", // path + 0, // pathdepth + "", // filename + "", // extension + "", // params + "", // query + "", // fragment + "OpenWindow('/survey/faq.html', 'Issues', 'width=635,height=400,toolbars=no,location=no,menubar=yes,status=no,resizable=yes,scrollbars=yes", // address + "SCHEME:javascript,ADDRESS:OpenWindow,ADDRESS:survey,ADDRESS:faq,ADDRESS:html,ADDRESS:Issues,ADDRESS:width,ADDRESS:635,ADDRESS:height,ADDRESS:400,ADDRESS:toolbars,ADDRESS:no,ADDRESS:location,ADDRESS:no,ADDRESS:menubar,ADDRESS:yes,ADDRESS:status,ADDRESS:no,ADDRESS:resizable,ADDRESS:yes,ADDRESS:scrollbars,ADDRESS:yes");// Tokenstring + success = success && + CheckURL("mailto: dmf-post@medisin.ntnu.no", // URL + "mailto", // scheme + "", // host + "", // domain + "", // siteowner + "", // tld + "", // maintld + "", // tldregion + "", // port + "", // path + 0, // pathdepth + "", // filename + "", // extension + "", // params + "", // query + "", // fragment + " dmf-post@medisin.ntnu.no", // address + "SCHEME:mailto,ADDRESS:dmf-post,ADDRESS:medisin,ADDRESS:ntnu,ADDRESS:no");// Tokenstring + success = success && + CheckURL("mailto:%20Harald%20Danielsen@energy.sintef.no", // URL + "mailto", // scheme + "", // host + "", // domain + "", // siteowner + "", // tld + "", // maintld + "", // tldregion + "", // port + "", // path + 0, // pathdepth + "", // filename + "", // extension + "", // params + "", // query + "", // fragment + "%20Harald%20Danielsen@energy.sintef.no", // address + "SCHEME:mailto,ADDRESS:20Harald,ADDRESS:20Danielsen,ADDRESS:energy,ADDRESS:sintef,ADDRESS:no");// Tokenstring + success = success && + CheckURL("www.underdusken.no", // URL + "", // scheme + "www.underdusken.no", // host + "underdusken.no", // domain + "underdusken", // siteowner + "no", // tld + "no", // maintld + "europe", // tldregion + "", // port + "", // path + 0, // pathdepth + "", // filename + "", // extension + "", // params + "", // query + "", // fragment + "", // address + "HOST:www,DOMAIN:underdusken,MAINTLD:no");// Tokenstring + success = success && + CheckURL("~janie/", // URL + "", // scheme + "", // host + "", // domain + "", // siteowner + "", // tld + "", // maintld + "", // tldregion + "", // port + "~janie/", // path + 1, // pathdepth + "", // filename + "", // extension + "", // params + "", // query + "", // fragment + "", // address + "PATH:janie");// Tokenstring + success = success && + CheckURL("https://dette.er.en:2020/~janie/index.htm?param1=q¶m2=r", // URL + "https", // scheme + "dette.er.en", // host + "er.en", // domain + "er", // siteowner + "en", // tld + "en", // maintld + "", // tldregion + "2020", // port + "/~janie/index.htm", // path + 2, // pathdepth + "index.htm", // filename + "htm", // extension + "", // params + "param1=q¶m2=r", // query + "", // fragment + "", // address + "SCHEME:https,HOST:dette,DOMAIN:er,MAINTLD:en,PORT:2020,PATH:janie,FILENAME:index,EXTENSION:htm,QUERY:param1,QUERY:q,QUERY:param2,QUERY:r");// Tokenstring +#if 0 + success = success && + CheckURL("http://www.sony.co.uk/", // URL + "http", // scheme + "www.sony.co.uk", // host + "sony.co.uk", // domain + "sony", // siteowner + "co.uk", // tld + "uk", // maintld + "unitedkingdom", // tldregion + "", // port + "/", // path + 0, // pathdepth + "", // filename + "", // extension + "", // params + "", // query + "", // fragment + "", // address + "SCHEME:http,HOST:www,DOMAIN:sony,DOMAIN:co,MAINTLD:uk");// Tokenstring + success = success && + CheckURL("http://sony.co.uk/", // URL + "http", // scheme + "sony.co.uk", // host + "sony.co.uk", // domain + "sony", // siteowner + "co.uk", // tld + "uk", // maintld + "unitedkingdom", // tldregion + "", // port + "/", // path + 0, // pathdepth + "", // filename + "", // extension + "", // params + "", // query + "", // fragment + "", // address + "SCHEME:http,DOMAIN:sony,DOMAIN:co,MAINTLD:uk");// Tokenstring +#endif + // Test fixes for bugs reported in cvs commit: + // tegge 2000/10/27 22:42:59 CEST + success = success && + CheckURL("http://somehost.somedomain/this!is!it/boom", // URL + "http", // scheme + "somehost.somedomain", // host + "somehost.somedomain", // domain + "somehost", // siteowner + "somedomain", // tld + "somedomain", // maintld + "", // tldregion + "", // port + "/this!is!it/boom", // path + 2, // pathdepth + "boom", // filename + "", // extension + "", // params + "", // query + "", // fragment + "", // address + "SCHEME:http,DOMAIN:somehost,MAINTLD:somedomain,PATH:this,PATH:is,PATH:it,FILENAME:boom");// Tokenstring + success = success && + CheckURL("http://test.com/index.htm?p1=q%20test&p2=r%10d", // URL + "http", // scheme + "test.com", // host + "test.com", // domain + "test", // siteowner + "com", // tld + "com", // maintld + "northamerica", // tldregion + "", // port + "/index.htm", // path + 1, // pathdepth + "index.htm", // filename + "htm", // extension + "", // params + "p1=q%20test&p2=r%10d", // query + "", // fragment + "", // address + "SCHEME:http,DOMAIN:test,MAINTLD:com,FILENAME:index,EXTENSION:htm,QUERY:p1,QUERY:q,QUERY:20test,QUERY:p2,QUERY:r,QUERY:10d");// Tokenstring + + // Test bugs found 2001/06/25 + success = success && + CheckURL("http://arthur/qm/images/qm1.gif", // URL + "http", // scheme + "arthur", // host + "arthur", // domain + "", // siteowner + "", // tld + "", // maintld + "", // tldregion + "", // port + "/qm/images/qm1.gif", // path + 3, // pathdepth + "qm1.gif", // filename + "gif", // extension + "", // params + "", // query + "", // address + "", // fragment + "SCHEME:http,MAINTLD:arthur,PATH:qm,PATH:images,FILENAME:qm1,EXTENSION:gif");// Tokenstring + + // Test Orjan's hypothesis 2003/02/17 + success = success && + CheckURL("http://foo.com/ui;.gif", // URL + "http", // scheme + "foo.com", // host + "foo.com", // domain + "foo", // siteowner + "com", // tld + "com", // maintld + "northamerica", // tldregion + "", // port + "/ui;.gif", // path + 1, // pathdepth + "ui", // filename + "", // extension + ".gif", // params + "", // query + "", // address + "", // fragment + "SCHEME:http,DOMAIN:foo,MAINTLD:com,FILENAME:ui,PARAMS:gif");// Tokenstring + + // Test Orjan's hypothesis 2003/02/17 + success = success && + CheckURL("http://foo.com/ui;.gif", // URL + "http", // scheme + "foo.com", // host + "foo.com", // domain + "foo", // siteowner + "com", // tld + "com", // maintld + "northamerica", // tldregion + "", // port + "/ui;.gif", // path + 1, // pathdepth + "ui", // filename + "", // extension + ".gif", // params + "", // query + "", // address + "", // fragment + "SCHEME:http,DOMAIN:foo,MAINTLD:com,FILENAME:ui,PARAMS:gif");// Tokenstring + + // Verify params handling + success = success && + CheckURL("http://foo.com/ui;par1=1/par2=2", // URL + "http", // scheme + "foo.com", // host + "foo.com", // domain + "foo", // siteowner + "com", // tld + "com", // maintld + "northamerica", // tldregion + "", // port + "/ui;par1=1/par2=2", // path + 1, // pathdepth + "ui", // filename + "", // extension + "par1=1/par2=2", // params + "", // query + "", // fragment + "", // address + "SCHEME:http,DOMAIN:foo,MAINTLD:com,FILENAME:ui,PARAMS:par1,PARAMS:1,PARAMS:par2,PARAMS:2");// Tokenstring + + // Verify synthetic url + success = success && + CheckURL("http://www.foo.no:8080/path/filename.ext;par1=hello/par2=world?query=test#fragment", // URL + "http", // scheme + "www.foo.no", // host + "foo.no", // domain + "foo", // siteowner + "no", // tld + "no", // maintld + "europe", // tldregion + "8080", // port + "/path/filename.ext;par1=hello/par2=world", // path + 2, // pathdepth + "filename.ext", // filename + "ext", // extension + "par1=hello/par2=world", // params + "query=test", // query + "fragment", // fragment + "", // address + "SCHEME:http,HOST:www,DOMAIN:foo,MAINTLD:no,PORT:8080,PATH:path,FILENAME:filename,EXTENSION:ext,PARAMS:par1,PARAMS:hello,PARAMS:par2,PARAMS:world,QUERY:query,QUERY:test,FRAGMENT:fragment");// Tokenstring + + // '&' should be allowed in path according to RFC 1738, 2068 og 2396 + success = success && + CheckURL("http://canonsarang.com/zboard/data/gallery04/HU&BANG.jpg", // URL + "http", // scheme + "canonsarang.com", // host + "canonsarang.com", // domain + "canonsarang", // siteowner + "com", // tld + "com", // maintld + "northamerica", // tldregion + "", // port + "/zboard/data/gallery04/HU&BANG.jpg", // path + 4, // pathdepth + "HU&BANG.jpg", // filename + "jpg", // extension + "", // params + "", // query + "", // address + "", // fragment + "SCHEME:http,DOMAIN:canonsarang,MAINTLD:com,PATH:zboard,PATH:data,PATH:gallery04,FILENAME:HU,FILENAME:BANG,EXTENSION:jpg");// Tokenstring + + return !success; +} diff --git a/searchlib/src/tests/util/.gitignore b/searchlib/src/tests/util/.gitignore new file mode 100644 index 00000000000..a0da2dd3333 --- /dev/null +++ b/searchlib/src/tests/util/.gitignore @@ -0,0 +1,4 @@ +.depend +Makefile +*_test +searchlib_rawbuf_test_app diff --git a/searchlib/src/tests/util/CMakeLists.txt b/searchlib/src/tests/util/CMakeLists.txt new file mode 100644 index 00000000000..43fec57c182 --- /dev/null +++ b/searchlib/src/tests/util/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_rawbuf_test_app + SOURCES + rawbuf_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_rawbuf_test_app COMMAND searchlib_rawbuf_test_app) diff --git a/searchlib/src/tests/util/bufferwriter/.gitignore b/searchlib/src/tests/util/bufferwriter/.gitignore new file mode 100644 index 00000000000..171db45593c --- /dev/null +++ b/searchlib/src/tests/util/bufferwriter/.gitignore @@ -0,0 +1,3 @@ +bufferwriter_bm +searchlib_bufferwriter_test_app +searchlib_bufferwriter_bm_app diff --git a/searchlib/src/tests/util/bufferwriter/CMakeLists.txt b/searchlib/src/tests/util/bufferwriter/CMakeLists.txt new file mode 100644 index 00000000000..a57749ff853 --- /dev/null +++ b/searchlib/src/tests/util/bufferwriter/CMakeLists.txt @@ -0,0 +1,16 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_bufferwriter_test_app + SOURCES + bufferwriter_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_bufferwriter_test_app COMMAND searchlib_bufferwriter_test_app) +vespa_add_executable(searchlib_bufferwriter_bm_app + SOURCES + work.cpp + bm.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_bufferwriter_bm_app COMMAND searchlib_bufferwriter_bm_app BENCHMARK) diff --git a/searchlib/src/tests/util/bufferwriter/bm.cpp b/searchlib/src/tests/util/bufferwriter/bm.cpp new file mode 100644 index 00000000000..228ce4adcdc --- /dev/null +++ b/searchlib/src/tests/util/bufferwriter/bm.cpp @@ -0,0 +1,95 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("bufferwriter_bm"); +#include <vespa/vespalib/testkit/testapp.h> +#include <iostream> +#include "work.h" +#include <vespa/searchlib/util/drainingbufferwriter.h> + +using search::DrainingBufferWriter; + +double getTime() { return fastos::TimeStamp(fastos::ClockSystem::now()).sec(); } + +constexpr size_t million = 1000000; + +enum class WorkFuncDispatch +{ + DIRECT, + LAMBDA, + FUNCTOR, + FUNCTOR2 +}; + + +template <typename T> +void +callWork(size_t size, WorkFuncDispatch dispatch) +{ + std::vector<T> foo; + DrainingBufferWriter writer; + foo.resize(size); + std::cout << "will write " << size << " elements of size " << sizeof(T) << + std::endl; + double before = getTime(); + switch (dispatch) { + case WorkFuncDispatch::DIRECT: + work(foo, writer); + break; + case WorkFuncDispatch::LAMBDA: + workLambda(foo, writer); + break; + case WorkFuncDispatch::FUNCTOR: + workFunctor(foo, writer); + break; + case WorkFuncDispatch::FUNCTOR2: + workFunctor2(foo, writer); + break; + default: + abort(); + } + double after = getTime(); + double delta = (after - before); + double writeSpeed = writer.getBytesWritten() / delta; + EXPECT_GREATER(writeSpeed, 1000); + std::cout << "written is " << writer.getBytesWritten() << std::endl; + std::cout << "time used is " << (delta * 1000.0) << " ms" << std::endl; + std::cout << "write speed is " << writeSpeed << std::endl; +} + + +void +callWorks(WorkFuncDispatch dispatch) +{ + callWork<char>(million * 1000, dispatch); + callWork<short>(million * 500, dispatch); + callWork<int>(million * 250, dispatch); + callWork<long>(million * 125, dispatch); +} + +TEST("simple bufferwriter speed test") +{ + callWorks(WorkFuncDispatch::DIRECT); +} + +TEST("lambda func bufferwriter speed test") +{ + callWorks(WorkFuncDispatch::LAMBDA); +} + +TEST("functor bufferwriter speed test") +{ + callWorks(WorkFuncDispatch::FUNCTOR); +} + +TEST("functor2 bufferwriter speed test") +{ + callWorks(WorkFuncDispatch::FUNCTOR2); +} + + +TEST_MAIN() +{ + TEST_RUN_ALL(); +} diff --git a/searchlib/src/tests/util/bufferwriter/bufferwriter_test.cpp b/searchlib/src/tests/util/bufferwriter/bufferwriter_test.cpp new file mode 100644 index 00000000000..95e4db95e03 --- /dev/null +++ b/searchlib/src/tests/util/bufferwriter/bufferwriter_test.cpp @@ -0,0 +1,158 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("bufferwriter_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/stllike/string.h> +#include <vespa/searchlib/util/bufferwriter.h> +#include <vespa/searchlib/util/drainingbufferwriter.h> +#include <vespa/searchlib/util/rand48.h> + +namespace search +{ + +namespace +{ + +class StoreBufferWriter : public BufferWriter +{ + std::vector<char> _buf; + std::vector<std::unique_ptr<std::vector<char> > > _bufs; + size_t _bytesWritten; + uint32_t _incompleteBuffers; +public: + static constexpr size_t BUFFER_SIZE = 262144; + + StoreBufferWriter(); + + virtual ~StoreBufferWriter() = default; + + virtual void flush() override; + + size_t getBytesWritten() const { return _bytesWritten; } + + std::vector<char> getSingleBuffer() const; + +}; + + +StoreBufferWriter::StoreBufferWriter() + : BufferWriter(), + _buf(), + _bytesWritten(0), + _incompleteBuffers(0) +{ + _buf.resize(BUFFER_SIZE); + setup(&_buf[0], _buf.size()); +} + + +void +StoreBufferWriter::flush() { + assert(_incompleteBuffers == 0); // all previous buffers must have been full + size_t nowLen = usedLen(); + if (nowLen != _buf.size()) { + // buffer is not full, only allowed for last buffer + ++_incompleteBuffers; + } + if (nowLen == 0) { + return; // empty buffer + } + _bufs.emplace_back(std::make_unique<std::vector<char>>()); + _bufs.back()->resize(BUFFER_SIZE); + _buf.resize(nowLen); + _bufs.back()->swap(_buf); + _bytesWritten += nowLen; + setup(&_buf[0], _buf.size()); +} + + +std::vector<char> +StoreBufferWriter::getSingleBuffer() const +{ + std::vector<char> res; + size_t needSize = 0; + for (const auto &buf : _bufs) { + needSize += buf->size(); + } + res.reserve(needSize); + for (const auto &buf : _bufs) { + res.insert(res.end(), buf->cbegin(), buf->cend()); + } + return std::move(res); +} + +} + + +TEST("Test that bufferwriter works with no writes") +{ + DrainingBufferWriter writer; + writer.flush(); + EXPECT_EQUAL(0u, writer.getBytesWritten()); +} + +TEST("Test that bufferwriter works with single byte write") +{ + DrainingBufferWriter writer; + char a = 4; + writer.write(&a, sizeof(a)); + writer.flush(); + EXPECT_EQUAL(1u, writer.getBytesWritten()); +} + +TEST("Test that bufferwriter works with multiple writes") +{ + DrainingBufferWriter writer; + char a = 4; + int16_t b = 5; + int32_t c = 6; + writer.write(&a, sizeof(a)); + writer.write(&b, sizeof(b)); + writer.write(&c, sizeof(c)); + writer.flush(); + EXPECT_EQUAL(7u, writer.getBytesWritten()); +} + + +TEST("Test that bufferwriter works with long writes") +{ + std::vector<char> a; + const size_t mysize = 10000000; + const size_t drainerBufferSize = DrainingBufferWriter::BUFFER_SIZE; + EXPECT_GREATER(mysize, drainerBufferSize); + a.resize(mysize); + DrainingBufferWriter writer; + writer.write(&a[0], a.size()); + writer.flush(); + EXPECT_EQUAL(a.size(), writer.getBytesWritten()); +} + + +TEST("Test that bufferwriter passes on written data") +{ + std::vector<int> a; + const size_t mysize = 25000000; + const size_t drainerBufferSize = DrainingBufferWriter::BUFFER_SIZE; + EXPECT_GREATER(mysize * sizeof(int), drainerBufferSize); + a.reserve(mysize); + search::Rand48 rnd; + for (uint32_t i = 0; i < mysize; ++i) { + a.emplace_back(rnd.lrand48()); + } + StoreBufferWriter writer; + writer.write(&a[0], a.size() * sizeof(int)); + writer.flush(); + EXPECT_EQUAL(a.size() * sizeof(int), writer.getBytesWritten()); + std::vector<char> written = writer.getSingleBuffer(); + EXPECT_EQUAL(a.size() * sizeof(int), written.size()); + EXPECT_TRUE(memcmp(&a[0], &written[0], written.size()) == 0); +} + +} + +TEST_MAIN() +{ + TEST_RUN_ALL(); +} diff --git a/searchlib/src/tests/util/bufferwriter/work.cpp b/searchlib/src/tests/util/bufferwriter/work.cpp new file mode 100644 index 00000000000..9835b28970e --- /dev/null +++ b/searchlib/src/tests/util/bufferwriter/work.cpp @@ -0,0 +1,93 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include "work.h" +#include <vespa/searchlib/util/bufferwriter.h> + +namespace search +{ + +template <class T> +class WriteFunctor +{ + BufferWriter &_writer; +public: + WriteFunctor(BufferWriter &writer) + : _writer(writer) + { + } + + void operator()(const T &val) { _writer.write(&val, sizeof(val)); } +}; + +template <class T> +class WriteFunctor2 +{ + BufferWriter &_writer; +public: + WriteFunctor2(BufferWriter &writer) + : _writer(writer) + { + } + + void operator()(const T &val) __attribute((noinline)) + { _writer.write(&val, sizeof(val)); } +}; + +template <class T, class Func> +void workLoop(const std::vector<T> &v, Func &&func) +{ + for (const auto &val : v) { + func(val); + } +} + +template <class T> +void work(const std::vector<T> &v, BufferWriter &writer) +{ + for (const auto &val : v) { + writer.write(&val, sizeof(val)); + } + writer.flush(); +} + +template <class T> +void workLambda(const std::vector<T> &v, BufferWriter &writer) +{ + workLoop<T>(v, + [&writer](const T &val) { writer.write(&val, sizeof(val)); }); + writer.flush(); +} + +template <class T> +void workFunctor(const std::vector<T> &v, BufferWriter &writer) +{ + workLoop<T>(v, WriteFunctor<T>(writer)); + writer.flush(); +} + +template <class T> +void workFunctor2(const std::vector<T> &v, BufferWriter &writer) +{ + workLoop<T>(v, WriteFunctor2<T>(writer)); + writer.flush(); +} + +template void work(const std::vector<char> &v, BufferWriter &writer); +template void work(const std::vector<short> &v, BufferWriter &writer); +template void work(const std::vector<int> &v, BufferWriter &writer); +template void work(const std::vector<long> &v, BufferWriter &writer); +template void workLambda(const std::vector<char> &v, BufferWriter &writer); +template void workLambda(const std::vector<short> &v, BufferWriter &writer); +template void workLambda(const std::vector<int> &v, BufferWriter &writer); +template void workLambda(const std::vector<long> &v, BufferWriter &writer); +template void workFunctor(const std::vector<char> &v, BufferWriter &writer); +template void workFunctor(const std::vector<short> &v, BufferWriter &writer); +template void workFunctor(const std::vector<int> &v, BufferWriter &writer); +template void workFunctor(const std::vector<long> &v, BufferWriter &writer); +template void workFunctor2(const std::vector<char> &v, BufferWriter &writer); +template void workFunctor2(const std::vector<short> &v, BufferWriter &writer); +template void workFunctor2(const std::vector<int> &v, BufferWriter &writer); +template void workFunctor2(const std::vector<long> &v, BufferWriter &writer); + +} // namespace search diff --git a/searchlib/src/tests/util/bufferwriter/work.h b/searchlib/src/tests/util/bufferwriter/work.h new file mode 100644 index 00000000000..49a1cb8017d --- /dev/null +++ b/searchlib/src/tests/util/bufferwriter/work.h @@ -0,0 +1,19 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +namespace search { + +class BufferWriter; + +template <class T> +using WorkFunc = void (*)(const std::vector<T> &v, BufferWriter &writer); +template <class T> +void work(const std::vector<T> &v, BufferWriter &writer); +template <class T> +void workLambda(const std::vector<T> &v, BufferWriter &writer); +template <class T> +void workFunctor(const std::vector<T> &v, BufferWriter &writer); +template <class T> +void workFunctor2(const std::vector<T> &v, BufferWriter &writer); + +} // namespace search + diff --git a/searchlib/src/tests/util/ioerrorhandler/.gitignore b/searchlib/src/tests/util/ioerrorhandler/.gitignore new file mode 100644 index 00000000000..2f5c2e77191 --- /dev/null +++ b/searchlib/src/tests/util/ioerrorhandler/.gitignore @@ -0,0 +1 @@ +searchlib_ioerrorhandler_test_app diff --git a/searchlib/src/tests/util/ioerrorhandler/CMakeLists.txt b/searchlib/src/tests/util/ioerrorhandler/CMakeLists.txt new file mode 100644 index 00000000000..92d6ab30d72 --- /dev/null +++ b/searchlib/src/tests/util/ioerrorhandler/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_ioerrorhandler_test_app + SOURCES + ioerrorhandler_test.cpp + DEPENDS + searchlib_test + searchlib +) +vespa_add_test(NAME searchlib_ioerrorhandler_test_app COMMAND searchlib_ioerrorhandler_test_app) diff --git a/searchlib/src/tests/util/ioerrorhandler/DESC b/searchlib/src/tests/util/ioerrorhandler/DESC new file mode 100644 index 00000000000..3328798b936 --- /dev/null +++ b/searchlib/src/tests/util/ioerrorhandler/DESC @@ -0,0 +1 @@ +IOErrorHandler test. Take a look at ioerrorhandler_test.cpp for details. diff --git a/searchlib/src/tests/util/ioerrorhandler/FILES b/searchlib/src/tests/util/ioerrorhandler/FILES new file mode 100644 index 00000000000..2447a10e991 --- /dev/null +++ b/searchlib/src/tests/util/ioerrorhandler/FILES @@ -0,0 +1 @@ +ioerrorhandler_test.cpp diff --git a/searchlib/src/tests/util/ioerrorhandler/ioerrorhandler_test.cpp b/searchlib/src/tests/util/ioerrorhandler/ioerrorhandler_test.cpp new file mode 100644 index 00000000000..bcfaa5f8ac8 --- /dev/null +++ b/searchlib/src/tests/util/ioerrorhandler/ioerrorhandler_test.cpp @@ -0,0 +1,358 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("ioerrorhandler_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/stllike/string.h> +#include <vespa/searchlib/util/statefile.h> +#include <vespa/searchlib/util/ioerrorhandler.h> +#include <atomic> +#include <iostream> +#include <fstream> +#include <string> +#include <setjmp.h> +#include <dlfcn.h> +#include <vespa/searchlib/test/statefile.h> +#include <vespa/searchlib/test/statestring.h> + +extern "C" +{ + +ssize_t read(int fd, void *buf, size_t count); +ssize_t write(int fd, const void *buf, size_t count); +ssize_t pread(int fd, void *buf, size_t count, off_t offset); +ssize_t pwrite(int fd, const void *buf, size_t count, off_t offset); + + +} + +using ReadFunc = ssize_t (*)(int fd, void *buf, size_t count); +using WriteFunc = ssize_t (*)(int fd, const void *buf, size_t count); +using PreadFunc = ssize_t (*)(int fd, void *buf, size_t count, off_t offset); +using PwriteFunc = ssize_t (*)(int fd, const void *buf, size_t count, + off_t offset); + +using namespace search::test::statefile; +using namespace search::test::statestring; + +namespace +{ + +ReadFunc libc_read; +WriteFunc libc_write; +PreadFunc libc_pread; +PwriteFunc libc_pwrite; + +} + +int injectErrno; +std::atomic<int> injectreadErrnoTrigger; +std::atomic<int> injectpreadErrnoTrigger; +std::atomic<int> injectwriteErrnoTrigger; +std::atomic<int> injectpwriteErrnoTrigger; + +ssize_t read(int fd, void *buf, size_t count) +{ + if (--injectreadErrnoTrigger == 0) { + errno = injectErrno; + return -1; + } + if (!libc_read) { + libc_read = reinterpret_cast<ReadFunc>(dlsym(RTLD_NEXT, "read")); + } + return libc_read(fd, buf, count); +} + +ssize_t write(int fd, const void *buf, size_t count) +{ + if (--injectwriteErrnoTrigger == 0) { + errno = injectErrno; + return -1; + } + if (!libc_write) { + libc_write = reinterpret_cast<WriteFunc>(dlsym(RTLD_NEXT, "write")); + } + return libc_write(fd, buf, count); +} + +ssize_t pread(int fd, void *buf, size_t count, off_t offset) +{ + if (--injectpreadErrnoTrigger == 0) { + errno = injectErrno; + return -1; + } + if (!libc_pread) { + libc_pread = reinterpret_cast<PreadFunc>(dlsym(RTLD_NEXT, "pread")); + } + return libc_pread(fd, buf, count, offset); +} + + +ssize_t pwrite(int fd, const void *buf, size_t count, off_t offset) +{ + if (--injectpwriteErrnoTrigger == 0) { + errno = injectErrno; + return -1; + } + if (!libc_pwrite) { + libc_pwrite = reinterpret_cast<PwriteFunc>(dlsym(RTLD_NEXT, "pwrite")); + } + return libc_pwrite(fd, buf, count, offset); +} + + + +namespace search +{ + +const char *testStringBase = "This is a test\n"; + +using strvec = std::vector<vespalib::string>; + +namespace +{ + +bool +assertHistory(std::vector<vespalib::string> &exp, + std::vector<vespalib::string> &act) +{ + if (!EXPECT_EQUAL(exp.size(), act.size())) { + return false; + } + for (size_t i = 0; i < exp.size(); ++i) { + if (!EXPECT_EQUAL(exp[i], act[i])) { + return false; + } + } + return true; +} + + +class Fixture +{ +public: + + std::unique_ptr<StateFile> sf; + std::unique_ptr<FastOS_File> file; + char buf[8192]; + char *testString; + + Fixture(); + + void openFile(); + + void openFileDIO(); + + void writeTestString(); +}; + + +Fixture::Fixture() + : sf(), + file() +{ + unlink("testfile"); + StateFile::erase("state"); + sf.reset(new StateFile("state")); + testString = &buf[0]; + int off = reinterpret_cast<unsigned long>(testString) & 4095; + if (off != 0) { + testString += 4096 - off; + } + assert(testString + strlen(testStringBase) < &buf[0] + sizeof(buf)); + strcpy(testString, testStringBase); +} + + +void +Fixture::openFile() +{ + file.reset(new FastOS_File); + file->OpenReadWrite("testfile"); +} + +void +Fixture::openFileDIO() +{ + file.reset(new FastOS_File); + file->EnableDirectIO(); + file->OpenReadWrite("testfile"); +} + +void +Fixture::writeTestString() +{ + file->WriteBuf(testString, strlen(testString)); + file->SetPosition(0); +} + + +} + + +TEST("Test that ioerror handler can be instantated") +{ + StateFile::erase("state"); + StateFile sf("state"); + IOErrorHandler ioeh(&sf); + EXPECT_FALSE(ioeh.fired()); +} + + +TEST_F("Test that ioerror handler can process read error", Fixture) +{ + IOErrorHandler ioeh(f.sf.get()); + EXPECT_FALSE(ioeh.fired()); + f.openFile(); + f.writeTestString(); + uint64_t fileSize = f.file->GetSize(); + EXPECT_EQUAL(strlen(f.testString), fileSize); + char buf[1024]; + assert(fileSize <= sizeof(buf)); + try { + f.file->SetPosition(0); + injectErrno = EIO; + injectreadErrnoTrigger = 1; + f.file->ReadBuf(buf, fileSize); + LOG(error, "Should never get here"); + abort(); + } catch (std::runtime_error &e) { + LOG(info, "Caught std::runtime_error exception: %s", e.what()); + EXPECT_TRUE(strstr(e.what(), "Input/output error") != nullptr); + } + EXPECT_TRUE(ioeh.fired()); + { + vespalib::string act = readState(*f.sf); + normalizeTimestamp(act); + vespalib::string exp = "state=down ts=0.0 operation=read " + "file=testfile error=5 offset=0 len=15 " + "rlen=-1\n"; + EXPECT_EQUAL(exp, act); + } + { + strvec exp({ "state=down ts=0.0 operation=read " + "file=testfile error=5 offset=0 len=15 " + "rlen=-1\n"}); + std::vector<vespalib::string> act(readHistory("state.history")); + normalizeTimestamps(act); + TEST_DO(assertHistory(exp, act)); + } +} + +TEST_F("Test that ioerror handler can process pread error", Fixture) +{ + IOErrorHandler ioeh(f.sf.get()); + EXPECT_FALSE(ioeh.fired()); + f.openFile(); + f.writeTestString(); + uint64_t fileSize = f.file->GetSize(); + EXPECT_EQUAL(strlen(f.testString), fileSize); + char buf[1024]; + assert(fileSize <= sizeof(buf)); + try { + f.file->SetPosition(0); + injectErrno = EIO; + injectpreadErrnoTrigger = 1; + f.file->ReadBuf(buf, fileSize, 0); + LOG(error, "Should never get here"); + abort(); + } catch (std::runtime_error &e) { + LOG(info, "Caught std::runtime_error exception: %s", e.what()); + EXPECT_TRUE(strstr(e.what(), "Input/output error") != nullptr); + } + EXPECT_TRUE(ioeh.fired()); + { + vespalib::string act = readState(*f.sf); + normalizeTimestamp(act); + vespalib::string exp = "state=down ts=0.0 operation=read " + "file=testfile error=5 offset=0 len=15 " + "rlen=-1\n"; + EXPECT_EQUAL(exp, act); + } + { + strvec exp({ "state=down ts=0.0 operation=read " + "file=testfile error=5 offset=0 len=15 " + "rlen=-1\n"}); + std::vector<vespalib::string> act(readHistory("state.history")); + normalizeTimestamps(act); + TEST_DO(assertHistory(exp, act)); + } +} + +TEST_F("Test that ioerror handler can process write error", Fixture) +{ + IOErrorHandler ioeh(f.sf.get()); + EXPECT_FALSE(ioeh.fired()); + f.openFile(); + try { + injectErrno = EIO; + injectwriteErrnoTrigger = 1; + f.writeTestString(); + LOG(error, "Should never get here"); + abort(); + } catch (std::runtime_error &e) { + LOG(info, "Caught std::runtime_error exception: %s", e.what()); + EXPECT_TRUE(strstr(e.what(), "Input/output error") != nullptr); + } + EXPECT_TRUE(ioeh.fired()); + { + vespalib::string act = readState(*f.sf); + normalizeTimestamp(act); + vespalib::string exp = "state=down ts=0.0 operation=write " + "file=testfile error=5 offset=0 len=15 " + "rlen=-1\n"; + EXPECT_EQUAL(exp, act); + } + { + strvec exp({ "state=down ts=0.0 operation=write " + "file=testfile error=5 offset=0 len=15 " + "rlen=-1\n"}); + std::vector<vespalib::string> act(readHistory("state.history")); + normalizeTimestamps(act); + TEST_DO(assertHistory(exp, act)); + } +} + + +TEST_F("Test that ioerror handler can process pwrite error", Fixture) +{ + IOErrorHandler ioeh(f.sf.get()); + EXPECT_FALSE(ioeh.fired()); + f.openFileDIO(); + try { + injectErrno = EIO; + injectpwriteErrnoTrigger = 1; + f.writeTestString(); + LOG(error, "Should never get here"); + abort(); + } catch (std::runtime_error &e) { + LOG(info, "Caught std::runtime_error exception: %s", e.what()); + EXPECT_TRUE(strstr(e.what(), "Input/output error") != nullptr); + } + EXPECT_TRUE(ioeh.fired()); + { + vespalib::string act = readState(*f.sf); + normalizeTimestamp(act); + vespalib::string exp = "state=down ts=0.0 operation=write " + "file=testfile error=5 offset=0 len=15 " + "rlen=-1\n"; + EXPECT_EQUAL(exp, act); + } + { + strvec exp({ "state=down ts=0.0 operation=write " + "file=testfile error=5 offset=0 len=15 " + "rlen=-1\n"}); + std::vector<vespalib::string> act(readHistory("state.history")); + normalizeTimestamps(act); + TEST_DO(assertHistory(exp, act)); + } +} + +} + +TEST_MAIN() +{ + TEST_RUN_ALL(); + search::StateFile::erase("state"); + unlink("testfile"); +} diff --git a/searchlib/src/tests/util/rawbuf_test.cpp b/searchlib/src/tests/util/rawbuf_test.cpp new file mode 100644 index 00000000000..e9dc139bda5 --- /dev/null +++ b/searchlib/src/tests/util/rawbuf_test.cpp @@ -0,0 +1,198 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for rawbuf. + +#include <vespa/log/log.h> +LOG_SETUP("rawbuf_test"); +#include <vespa/fastos/fastos.h> + +#include <vespa/searchlib/util/rawbuf.h> +#include <vespa/vespalib/stllike/string.h> +#include <vespa/vespalib/testkit/testapp.h> + +using vespalib::string; +using namespace search; + +namespace { + +string getString(const RawBuf &buf) { + return string(buf.GetDrainPos(), buf.GetUsedLen()); +} + +TEST("require that rawbuf can append text") { + RawBuf buf(10); + buf += "foo"; + buf += "bar"; + EXPECT_EQUAL("foobar", getString(buf)); +} + +TEST("require that rawbuf expands when appended beyond size") { + RawBuf buf(4); + buf += "foo"; + EXPECT_EQUAL(1u, buf.GetFreeLen()); + buf += "bar"; + EXPECT_EQUAL(2u, buf.GetFreeLen()); + EXPECT_EQUAL("foobar", getString(buf)); +} + +TEST("require that a rawbuf can be appended to another") { + RawBuf buf1(10); + RawBuf buf2(10); + buf1 += "foo"; + buf2 += "bar"; + buf1 += buf2; + EXPECT_EQUAL("foobar", getString(buf1)); +} + +TEST("require that rawbufs can be tested for equality") { + RawBuf buf1(10); + RawBuf buf2(10); + buf1 += "foo"; + buf2 += "bar"; + EXPECT_TRUE(buf1 == buf1); + EXPECT_FALSE(buf1 == buf2); +} + +template <typename T> +void checkAddNum(void (RawBuf::*addNum)(T, size_t, char), size_t num, + size_t fieldw, char fill, const string &expected) { + RawBuf buf(10); + (buf.*addNum)(num, fieldw, fill); + EXPECT_EQUAL(expected, getString(buf)); +} + +TEST("require that rawbuf can add numbers in decimal") { + checkAddNum(&RawBuf::addNum, 0, 4, 'x', "xxx0"); + checkAddNum(&RawBuf::addNum, 42, 4, '0', "0042"); + checkAddNum(&RawBuf::addNum, 12345678901234, 4, '0', "12345678901234"); + checkAddNum(&RawBuf::addNum, -1, 4, '0', "18446744073709551615"); + + checkAddNum(&RawBuf::addNum32, 0, 4, 'x', "xxx0"); + checkAddNum(&RawBuf::addNum32, 42, 4, '0', "0042"); + checkAddNum(&RawBuf::addNum32, 1234567890, 4, '0', "1234567890"); + checkAddNum(&RawBuf::addNum32, -1, 0, '0', "-1"); + checkAddNum(&RawBuf::addNum32, -1, 4, '0', "00-1"); + + checkAddNum(&RawBuf::addNum64, 0, 4, 'x', "xxx0"); + checkAddNum(&RawBuf::addNum64, 42, 4, '0', "0042"); + checkAddNum(&RawBuf::addNum64, 12345678901234, 4, '0', "12345678901234"); + checkAddNum(&RawBuf::addNum64, -1, 0, '0', "-1"); + checkAddNum(&RawBuf::addNum64, -1, 4, '0', "00-1"); +} + +TEST("require that rawbuf can add hitrank") { + RawBuf buf(10); + buf.addHitRank(HitRank(4.2)); + EXPECT_EQUAL("4.2", getString(buf)); +} + +TEST("require that rawbuf can add signedhitrank") { + RawBuf buf(10); + buf.addHitRank(SignedHitRank(-4.213)); + EXPECT_EQUAL("-4.213", getString(buf)); +} + +TEST("require that rawbuf can append data of known length") { + RawBuf buf(10); + const string data("foo bar baz qux quux"); + buf.append(data.data(), data.size()); + EXPECT_EQUAL(data, getString(buf)); +} + +TEST("require that rawbuf can be truncated shorter and longer") { + RawBuf buf(10); + buf += "foobarbaz"; + buf.truncate(3); + buf += "qux"; + buf.truncate(9); + EXPECT_EQUAL("fooquxbaz", getString(buf)); +} + +TEST("require that prealloc makes enough room") { + RawBuf buf(10); + buf += "foo"; + EXPECT_EQUAL(7u, buf.GetFreeLen()); + buf.preAlloc(100); + EXPECT_EQUAL("foo", getString(buf)); + EXPECT_LESS_EQUAL(100u, buf.GetFreeLen()); +} + +TEST("require that rawbuf can read from file") { + FastOS_File file("mytemporaryfile"); + file.OpenReadWrite(); + file.Write2("barbaz", 6); + file.SetPosition(0); + + RawBuf buf(10); + buf += "foo"; + buf.readFile(file, 3); + EXPECT_EQUAL("foobar", getString(buf)); + buf.readFile(file, 100); + EXPECT_EQUAL("foobarbaz", getString(buf)); + + file.Close(); + file.Delete(); +} + +TEST("require that compact discards drained data") { + RawBuf buf(10); + buf += "foobar"; + buf.Drain(3); + buf.Compact(); + buf.Fill(3); + EXPECT_EQUAL("barbar", getString(buf)); +} + +TEST("require that reusing a buffer that has grown 4x will alloc new buffer") { + RawBuf buf(10); + buf.preAlloc(100); + EXPECT_LESS_EQUAL(100u, buf.GetFreeLen()); + buf.Reuse(); + EXPECT_EQUAL(10u, buf.GetFreeLen()); +} + +TEST("require that various length and position information can be found.") { + RawBuf buf(30); + buf += "foo bar baz qux quux corge"; + buf.Drain(7); + EXPECT_EQUAL(7u, buf.GetDrainLen()); + EXPECT_EQUAL(19u, buf.GetUsedLen()); + EXPECT_EQUAL(26u, buf.GetUsedAndDrainLen()); + EXPECT_EQUAL(4u, buf.GetFreeLen()); +} + +TEST("require that rawbuf can 'putToInet' 16-bit numbers") { + RawBuf buf(1); + buf.Put16ToInet(0x1234); + EXPECT_EQUAL(2, buf.GetFillPos() - buf.GetDrainPos()); + EXPECT_EQUAL(0x12, (int) buf.GetDrainPos()[0] & 0xff); + EXPECT_EQUAL(0x34, (int) buf.GetDrainPos()[1] & 0xff); +} + +TEST("require that rawbuf can 'putToInet' 32-bit numbers") { + RawBuf buf(1); + buf.PutToInet(0x12345678); + EXPECT_EQUAL(4, buf.GetFillPos() - buf.GetDrainPos()); + EXPECT_EQUAL(0x12, (int) buf.GetDrainPos()[0] & 0xff); + EXPECT_EQUAL(0x34, (int) buf.GetDrainPos()[1] & 0xff); + EXPECT_EQUAL(0x56, (int) buf.GetDrainPos()[2] & 0xff); + EXPECT_EQUAL(0x78, (int) buf.GetDrainPos()[3] & 0xff); +} + +TEST("require that rawbuf can 'putToInet' 64-bit numbers") { + RawBuf buf(1); + buf.Put64ToInet(0x123456789abcdef0ULL); + EXPECT_EQUAL(8, buf.GetFillPos() - buf.GetDrainPos()); + EXPECT_EQUAL(0x12, (int) buf.GetDrainPos()[0] & 0xff); + EXPECT_EQUAL(0x34, (int) buf.GetDrainPos()[1] & 0xff); + EXPECT_EQUAL(0x56, (int) buf.GetDrainPos()[2] & 0xff); + EXPECT_EQUAL(0x78, (int) buf.GetDrainPos()[3] & 0xff); + EXPECT_EQUAL(0x9a, (int) buf.GetDrainPos()[4] & 0xff); + EXPECT_EQUAL(0xbc, (int) buf.GetDrainPos()[5] & 0xff); + EXPECT_EQUAL(0xde, (int) buf.GetDrainPos()[6] & 0xff); + EXPECT_EQUAL(0xf0, (int) buf.GetDrainPos()[7] & 0xff); +} + + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/util/searchable_stats/.gitignore b/searchlib/src/tests/util/searchable_stats/.gitignore new file mode 100644 index 00000000000..08720bdefb5 --- /dev/null +++ b/searchlib/src/tests/util/searchable_stats/.gitignore @@ -0,0 +1,4 @@ +/.depend +/Makefile +/*_test +searchlib_searchable_stats_test_app diff --git a/searchlib/src/tests/util/searchable_stats/CMakeLists.txt b/searchlib/src/tests/util/searchable_stats/CMakeLists.txt new file mode 100644 index 00000000000..3bc0fbe9c6b --- /dev/null +++ b/searchlib/src/tests/util/searchable_stats/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_searchable_stats_test_app + SOURCES + searchable_stats_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_searchable_stats_test_app COMMAND searchlib_searchable_stats_test_app) diff --git a/searchlib/src/tests/util/searchable_stats/DESC b/searchlib/src/tests/util/searchable_stats/DESC new file mode 100644 index 00000000000..b8127b278f8 --- /dev/null +++ b/searchlib/src/tests/util/searchable_stats/DESC @@ -0,0 +1 @@ +searchable_stats test. Take a look at searchable_stats_test.cpp for details. diff --git a/searchlib/src/tests/util/searchable_stats/FILES b/searchlib/src/tests/util/searchable_stats/FILES new file mode 100644 index 00000000000..94e6ce7f4df --- /dev/null +++ b/searchlib/src/tests/util/searchable_stats/FILES @@ -0,0 +1 @@ +searchable_stats_test.cpp diff --git a/searchlib/src/tests/util/searchable_stats/searchable_stats_test.cpp b/searchlib/src/tests/util/searchable_stats/searchable_stats_test.cpp new file mode 100644 index 00000000000..83aba794824 --- /dev/null +++ b/searchlib/src/tests/util/searchable_stats/searchable_stats_test.cpp @@ -0,0 +1,42 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("searchable_stats_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/util/searchable_stats.h> + +using namespace search; + +class Test : public vespalib::TestApp { +public: + int Main(); +}; + +int +Test::Main() +{ + TEST_INIT("searchable_stats_test"); + { + SearchableStats stats; + EXPECT_EQUAL(0u, stats.memoryUsage()); + EXPECT_EQUAL(0u, stats.docsInMemory()); + EXPECT_EQUAL(0u, stats.sizeOnDisk()); + { + SearchableStats rhs; + EXPECT_EQUAL(&rhs.memoryUsage(100), &rhs); + EXPECT_EQUAL(&rhs.docsInMemory(10), &rhs); + EXPECT_EQUAL(&rhs.sizeOnDisk(1000), &rhs); + EXPECT_EQUAL(&stats.add(rhs), &stats); + } + EXPECT_EQUAL(100u, stats.memoryUsage()); + EXPECT_EQUAL(10u, stats.docsInMemory()); + EXPECT_EQUAL(1000u, stats.sizeOnDisk()); + EXPECT_EQUAL(&stats.add(SearchableStats().memoryUsage(100).docsInMemory(10).sizeOnDisk(1000)), &stats); + EXPECT_EQUAL(200u, stats.memoryUsage()); + EXPECT_EQUAL(20u, stats.docsInMemory()); + EXPECT_EQUAL(2000u, stats.sizeOnDisk()); + } + TEST_DONE(); +} + +TEST_APPHOOK(Test); diff --git a/searchlib/src/tests/util/sigbushandler/.gitignore b/searchlib/src/tests/util/sigbushandler/.gitignore new file mode 100644 index 00000000000..ab5a59f3296 --- /dev/null +++ b/searchlib/src/tests/util/sigbushandler/.gitignore @@ -0,0 +1 @@ +searchlib_sigbushandler_test_app diff --git a/searchlib/src/tests/util/sigbushandler/CMakeLists.txt b/searchlib/src/tests/util/sigbushandler/CMakeLists.txt new file mode 100644 index 00000000000..a55e1e354c7 --- /dev/null +++ b/searchlib/src/tests/util/sigbushandler/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_sigbushandler_test_app + SOURCES + sigbushandler_test.cpp + DEPENDS + searchlib_test + searchlib +) +vespa_add_test(NAME searchlib_sigbushandler_test_app COMMAND searchlib_sigbushandler_test_app) diff --git a/searchlib/src/tests/util/sigbushandler/DESC b/searchlib/src/tests/util/sigbushandler/DESC new file mode 100644 index 00000000000..03ee94c85ba --- /dev/null +++ b/searchlib/src/tests/util/sigbushandler/DESC @@ -0,0 +1 @@ +SigBusHandler test. Take a look at sigbushandler_test.cpp for details. diff --git a/searchlib/src/tests/util/sigbushandler/FILES b/searchlib/src/tests/util/sigbushandler/FILES new file mode 100644 index 00000000000..0998cd8a784 --- /dev/null +++ b/searchlib/src/tests/util/sigbushandler/FILES @@ -0,0 +1 @@ +sigbushandler_test.cpp diff --git a/searchlib/src/tests/util/sigbushandler/sigbushandler_test.cpp b/searchlib/src/tests/util/sigbushandler/sigbushandler_test.cpp new file mode 100644 index 00000000000..af657420575 --- /dev/null +++ b/searchlib/src/tests/util/sigbushandler/sigbushandler_test.cpp @@ -0,0 +1,131 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("sigbushandler_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/stllike/string.h> +#include <vespa/searchlib/util/statefile.h> +#include <vespa/searchlib/util/sigbushandler.h> +#include <atomic> +#include <iostream> +#include <fstream> +#include <string> +#include <vespa/searchlib/test/statefile.h> +#include <vespa/searchlib/test/statestring.h> + +using namespace search::test::statefile; +using namespace search::test::statestring; + +namespace search +{ + +using strvec = std::vector<vespalib::string>; + +namespace +{ + +bool +assertHistory(std::vector<vespalib::string> &exp, + std::vector<vespalib::string> &act) +{ + if (!EXPECT_EQUAL(exp.size(), act.size())) { + return false; + } + for (size_t i = 0; i < exp.size(); ++i) { + if (!EXPECT_EQUAL(exp[i], act[i])) { + return false; + } + } + return true; +} + +} + + +TEST("Test that sigbus handler can be instantated") +{ + StateFile::erase("state"); + StateFile sf("state"); + SigBusHandler sbh(&sf); + EXPECT_FALSE(sbh.fired()); +} + + +TEST("Test that sigbus handler can trap synthetic sigbus") +{ + StateFile::erase("state"); + StateFile sf("state"); + SigBusHandler sbh(&sf); + EXPECT_FALSE(sbh.fired()); + sigjmp_buf sjb; + if (sigsetjmp(sjb, 1) == 0) { + sbh.setUnwind(&sjb); + kill(getpid(), SIGBUS); + LOG(error, "Should never get here"); + abort(); + } + EXPECT_TRUE(sbh.fired()); + { + vespalib::string act = readState(sf); + normalizeTimestamp(act); + EXPECT_EQUAL("state=down ts=0.0 operation=sigbus errno=0 code=0\n", + act); + } + { + strvec exp({"state=down ts=0.0 operation=sigbus errno=0 code=0\n" }); + std::vector<vespalib::string> act(readHistory("state.history")); + normalizeTimestamps(act); + TEST_DO(assertHistory(exp, act)); + } +} + +TEST("Test that sigbus handler can trap normal sigbus") +{ + StateFile::erase("state"); + StateFile sf("state"); + SigBusHandler sbh(&sf); + EXPECT_FALSE(sbh.fired()); + + int fd = open("mmapfile", O_CREAT | O_TRUNC | O_RDWR, 0644); + assert(fd >= 0); + void *mmapres = mmap(nullptr, 4096, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + assert(mmapres != nullptr); + assert(mmapres != reinterpret_cast<void *>(-1l)); + char *p = reinterpret_cast<char *>(mmapres) + 42; + volatile char r = 0; + sigjmp_buf sjb; + if (sigsetjmp(sjb, 1) == 0) { + sbh.setUnwind(&sjb); + r = *p; + LOG(error, "Should never get here"); + abort(); + } + EXPECT_TRUE(sbh.fired()); + EXPECT_TRUE(r == '\0'); + { + vespalib::string act = readState(sf); + vespalib::string exp ="state=down ts=0.0 operation=sigbus errno=0 " + "code=2 addr=0x0000000000000000\n"; + normalizeAddr(exp, p); + normalizeTimestamp(act); + EXPECT_EQUAL(exp, act); + } + { + strvec exp({"state=down ts=0.0 operation=sigbus errno=0 code=2 " + "addr=0x0000000000000000\n" }); + normalizeAddrs(exp, p); + std::vector<vespalib::string> act(readHistory("state.history")); + normalizeTimestamps(act); + TEST_DO(assertHistory(exp, act)); + } +} + +} + +TEST_MAIN() +{ + TEST_RUN_ALL(); + search::StateFile::erase("state"); + unlink("mmapfile"); +} diff --git a/searchlib/src/tests/util/slime_output_raw_buf_adapter/.gitignore b/searchlib/src/tests/util/slime_output_raw_buf_adapter/.gitignore new file mode 100644 index 00000000000..51a916d8333 --- /dev/null +++ b/searchlib/src/tests/util/slime_output_raw_buf_adapter/.gitignore @@ -0,0 +1 @@ +searchlib_slime_output_raw_buf_adapter_test_app diff --git a/searchlib/src/tests/util/slime_output_raw_buf_adapter/CMakeLists.txt b/searchlib/src/tests/util/slime_output_raw_buf_adapter/CMakeLists.txt new file mode 100644 index 00000000000..0735511fe7a --- /dev/null +++ b/searchlib/src/tests/util/slime_output_raw_buf_adapter/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_slime_output_raw_buf_adapter_test_app + SOURCES + slime_output_raw_buf_adapter_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_slime_output_raw_buf_adapter_test_app COMMAND searchlib_slime_output_raw_buf_adapter_test_app) diff --git a/searchlib/src/tests/util/slime_output_raw_buf_adapter/FILES b/searchlib/src/tests/util/slime_output_raw_buf_adapter/FILES new file mode 100644 index 00000000000..5870aa61349 --- /dev/null +++ b/searchlib/src/tests/util/slime_output_raw_buf_adapter/FILES @@ -0,0 +1 @@ +slime_output_raw_buf_adapter_test.cpp diff --git a/searchlib/src/tests/util/slime_output_raw_buf_adapter/slime_output_raw_buf_adapter_test.cpp b/searchlib/src/tests/util/slime_output_raw_buf_adapter/slime_output_raw_buf_adapter_test.cpp new file mode 100644 index 00000000000..5d48520a92c --- /dev/null +++ b/searchlib/src/tests/util/slime_output_raw_buf_adapter/slime_output_raw_buf_adapter_test.cpp @@ -0,0 +1,25 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchlib/util/slime_output_raw_buf_adapter.h> +#include <vespa/vespalib/data/slime/slime.h> + +using namespace search; +using namespace vespalib::slime::convenience; + +TEST("use slime with rawbuf") { + RawBuf buffer(4096); + Slime src; + Slime dst; + { + Cursor &c = src.setObject(); + c.setLong("foo", 5); + c.setString("bar", "text"); + } + EXPECT_NOT_EQUAL(src, dst); + SlimeOutputRawBufAdapter adapter(buffer); + vespalib::slime::BinaryFormat::encode(src, adapter); + vespalib::slime::BinaryFormat::decode(Memory(buffer.GetDrainPos(), buffer.GetUsedLen()), dst); + EXPECT_EQUAL(src, dst); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/tests/util/statebuf/.gitignore b/searchlib/src/tests/util/statebuf/.gitignore new file mode 100644 index 00000000000..270347c1d6b --- /dev/null +++ b/searchlib/src/tests/util/statebuf/.gitignore @@ -0,0 +1 @@ +searchlib_statebuf_test_app diff --git a/searchlib/src/tests/util/statebuf/CMakeLists.txt b/searchlib/src/tests/util/statebuf/CMakeLists.txt new file mode 100644 index 00000000000..0b24cd1552b --- /dev/null +++ b/searchlib/src/tests/util/statebuf/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_statebuf_test_app + SOURCES + statebuf_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_statebuf_test_app COMMAND searchlib_statebuf_test_app) diff --git a/searchlib/src/tests/util/statebuf/DESC b/searchlib/src/tests/util/statebuf/DESC new file mode 100644 index 00000000000..6368f32bcc6 --- /dev/null +++ b/searchlib/src/tests/util/statebuf/DESC @@ -0,0 +1 @@ +statefile test. Take a look at statefile_test.cpp for details. diff --git a/searchlib/src/tests/util/statebuf/FILES b/searchlib/src/tests/util/statebuf/FILES new file mode 100644 index 00000000000..c651bb2758f --- /dev/null +++ b/searchlib/src/tests/util/statebuf/FILES @@ -0,0 +1 @@ +statefile_test.cpp diff --git a/searchlib/src/tests/util/statebuf/statebuf_test.cpp b/searchlib/src/tests/util/statebuf/statebuf_test.cpp new file mode 100644 index 00000000000..952412b9eb6 --- /dev/null +++ b/searchlib/src/tests/util/statebuf/statebuf_test.cpp @@ -0,0 +1,109 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("statebuf_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/stllike/string.h> +#include <vespa/searchlib/util/statebuf.h> +#include <string> + +namespace search +{ + +namespace +{ + + +} + +class Fixture : public StateBuf +{ + char _buf[1024]; + +public: + Fixture() + : StateBuf(_buf, sizeof(_buf)) + { + } +}; + +TEST_F("single character can be appended to stream", Fixture) +{ + f << 'H' << 'e' << 'l' << 'l' << 'o'; + EXPECT_EQUAL("Hello", f.str()); +} + + +TEST_F("strings can be appended to stream", Fixture) +{ + f << "Hello world"; + EXPECT_EQUAL("Hello world", f.str()); +} + +TEST_F("quoted strings can be appended to stream", Fixture) +{ + f.appendQuoted("This is a quoting test, \\ \" \n oops"); + EXPECT_EQUAL("\"This is a quoting test, \\\\ \\\" \\n oops\"", f.str()); +} + +TEST_F("keys can be appended to stream", Fixture) +{ + (f.appendKey("foo") << "fooval").appendKey("bar") << "barval"; + EXPECT_EQUAL("foo=fooval bar=barval", f.str()); +} + + +TEST_F("integers can be appended to stream", Fixture) +{ + f << (UINT64_C(1) << 63) << " " << -42l << " " << 0l; + EXPECT_EQUAL("9223372036854775808 -42 0", f.str()); +} + +TEST_F("struct timespec can be appended to stream", Fixture) +{ + struct timespec ts; + ts.tv_sec = 15; + ts.tv_nsec = 256; + f << ts; + EXPECT_EQUAL("15.000000256", f.str()); +} + +TEST_F("timestamp can be appended to stream", Fixture) +{ + struct timespec ts; + ts.tv_sec = 16; + ts.tv_nsec = 257; + f.appendTimestamp(ts); + EXPECT_EQUAL("ts=16.000000257", f.str()); +} + + +TEST_F("hexadecimal numbers can be appended to stream", Fixture) +{ + (f.appendHex(0xdeadbeefcafebabeul) << " ").appendHex(0x123456789abcdef0ul); + EXPECT_EQUAL("0xdeadbeefcafebabe 0x123456789abcdef0", f.str()); + +} + +TEST_F("pointer address can be appended to stream", Fixture) +{ + f.appendAddr(nullptr); + f.appendAddr(reinterpret_cast<void *>(0x12345ul)); + EXPECT_EQUAL("addr=0x0000000000000000 addr=0x0000000000012345", f.str()); +} + + +TEST_F("base and size methods can be called on stream", Fixture) +{ + f << "Hello world\n"; + std::string s(f.base(), f.base() + f.size()); + EXPECT_EQUAL("Hello world\n", s); +} + +} + + +TEST_MAIN() +{ + TEST_RUN_ALL(); +} diff --git a/searchlib/src/tests/util/statefile/.gitignore b/searchlib/src/tests/util/statefile/.gitignore new file mode 100644 index 00000000000..504b7431a7a --- /dev/null +++ b/searchlib/src/tests/util/statefile/.gitignore @@ -0,0 +1 @@ +searchlib_statefile_test_app diff --git a/searchlib/src/tests/util/statefile/CMakeLists.txt b/searchlib/src/tests/util/statefile/CMakeLists.txt new file mode 100644 index 00000000000..b90b87fce7d --- /dev/null +++ b/searchlib/src/tests/util/statefile/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_statefile_test_app + SOURCES + statefile_test.cpp + DEPENDS + searchlib_test + searchlib +) +vespa_add_test(NAME searchlib_statefile_test_app COMMAND searchlib_statefile_test_app) diff --git a/searchlib/src/tests/util/statefile/DESC b/searchlib/src/tests/util/statefile/DESC new file mode 100644 index 00000000000..6368f32bcc6 --- /dev/null +++ b/searchlib/src/tests/util/statefile/DESC @@ -0,0 +1 @@ +statefile test. Take a look at statefile_test.cpp for details. diff --git a/searchlib/src/tests/util/statefile/FILES b/searchlib/src/tests/util/statefile/FILES new file mode 100644 index 00000000000..c651bb2758f --- /dev/null +++ b/searchlib/src/tests/util/statefile/FILES @@ -0,0 +1 @@ +statefile_test.cpp diff --git a/searchlib/src/tests/util/statefile/statefile_test.cpp b/searchlib/src/tests/util/statefile/statefile_test.cpp new file mode 100644 index 00000000000..583d21e1cec --- /dev/null +++ b/searchlib/src/tests/util/statefile/statefile_test.cpp @@ -0,0 +1,294 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("statefile_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/stllike/string.h> +#include <vespa/searchlib/util/statefile.h> +#include <atomic> +#include <iostream> +#include <fstream> +#include <string> +#include <vespa/searchlib/test/statefile.h> + + +using namespace search::test::statefile; + +namespace search +{ + +namespace +{ + +bool +hasFile(const char *name) +{ + return access(name, R_OK | W_OK) == 0; +} + + +void +addState(StateFile &sf, const char *buf) +{ + size_t bufLen = strlen(buf); + sf.addState(buf, bufLen, false); +} + +void +addSignalState(StateFile &sf, const char *buf) +{ + size_t bufLen = strlen(buf); + sf.addState(buf, bufLen, true); +} + + +bool +assertHistory(std::vector<vespalib::string> &exp, + std::vector<vespalib::string> &act) +{ + if (!EXPECT_EQUAL(exp.size(), act.size())) { + return false; + } + for (size_t i = 0; i < exp.size(); ++i) { + if (!EXPECT_EQUAL(exp[i], act[i])) { + return false; + } + } + return true; +} + + +int64_t +getSize(const char *name) +{ + struct stat stbuf; + if (stat(name, &stbuf) != 0) + return 0; + return stbuf.st_size; +} + + +void +setSize(const char *name, int64_t newSize) +{ + int truncRes = truncate(name, newSize); + assert(truncRes == 0); +} + + +} + + +TEST("Test lock free atomic int used by async signal safe lock primitive") +{ + std::atomic<int> f; + ASSERT_TRUE(f.is_lock_free()); +} + + +TEST("Test that statefile can be created") +{ + StateFile::erase("state"); + EXPECT_FALSE(hasFile("state")); + EXPECT_FALSE(hasFile("state.history")); + StateFile sf("state"); + EXPECT_TRUE(hasFile("state")); + EXPECT_TRUE(hasFile("state.history")); + EXPECT_EQUAL(0, sf.getGen()); + StateFile::erase("state"); + EXPECT_FALSE(hasFile("state")); + EXPECT_FALSE(hasFile("state.history")); + StateFile::erase("state"); + EXPECT_FALSE(hasFile("state")); + EXPECT_FALSE(hasFile("state.history")); +} + + +TEST("Test that statefile can add event") +{ + StateFile::erase("state"); + StateFile sf("state"); + + addState(sf, "Hello world\n"); + vespalib::string check = readState(sf); + EXPECT_EQUAL("Hello world\n", check); + EXPECT_EQUAL(1, sf.getGen()); +} + +TEST("Test that history is appended to") +{ + StateFile::erase("state"); + StateFile sf("state"); + + addState(sf, "Hello world\n"); + addState(sf, "Foo bar\n"); + vespalib::string check = readState(sf); + EXPECT_EQUAL("Foo bar\n", check); + EXPECT_EQUAL(2, sf.getGen()); + { + std::vector<vespalib::string> exp({ "Hello world\n", "Foo bar\n" }); + std::vector<vespalib::string> act(readHistory("state.history")); + TEST_DO(assertHistory(exp, act)); + } +} + + +TEST("Test that truncated history is truncated at event boundary") +{ + StateFile::erase("state"); + int64_t histSize = 1; + { + StateFile sf("state"); + addState(sf, "Hello world\n"); + addState(sf, "Foo bar\n"); + EXPECT_EQUAL(2, sf.getGen()); + histSize = getSize("state.history"); + EXPECT_EQUAL(20, histSize); + addState(sf, "zap\n"); + EXPECT_EQUAL(3, sf.getGen()); + } + // Lose 2 last events in history + setSize("state.history", histSize - 1); + // Last event is restored to history from main state file + StateFile sf("state"); + vespalib::string check = readState(sf); + EXPECT_EQUAL("zap\n", check); + EXPECT_EQUAL(0, sf.getGen()); + { + std::vector<vespalib::string> exp({ "Hello world\n", "zap\n" }); + std::vector<vespalib::string> act(readHistory("state.history")); + TEST_DO(assertHistory(exp, act)); + } +} + + +TEST("Test that async signal safe path adds event") +{ + StateFile::erase("state"); + StateFile sf("state"); + + addSignalState(sf, "Hello world\n"); + addSignalState(sf, "Foo bar\n"); + vespalib::string check = readState(sf); + EXPECT_EQUAL("Foo bar\n", check); + EXPECT_EQUAL(2, sf.getGen()); + { + std::vector<vespalib::string> exp({ "Hello world\n", "Foo bar\n" }); + std::vector<vespalib::string> act(readHistory("state.history")); + TEST_DO(assertHistory(exp, act)); + } +} + + +TEST("Test that state file can be restored from history") +{ + StateFile::erase("state"); + { + StateFile sf("state"); + addState(sf, "Hello world\n"); + addState(sf, "Foo bar\n"); + EXPECT_EQUAL(2, sf.getGen()); + } + // Lose event in main state file + setSize("state", 0); + EXPECT_EQUAL(0, getSize("state")); + // Last event is restored to history from main state file + StateFile sf("state"); + EXPECT_NOT_EQUAL(0, getSize("state")); + vespalib::string check = readState(sf); + EXPECT_EQUAL("Foo bar\n", check); + { + std::vector<vespalib::string> exp({ "Hello world\n", "Foo bar\n" }); + std::vector<vespalib::string> act(readHistory("state.history")); + TEST_DO(assertHistory(exp, act)); + } +} + + +TEST("Test that different entry is added to history") +{ + StateFile::erase("state"); + { + StateFile sf("state"); + addState(sf, "Hello world\n"); + EXPECT_EQUAL(1, sf.getGen()); + } + // Write changed entry to main state file + { + std::ofstream of("state"); + of << "zap\n"; + } + // Add changed event to history + StateFile sf("state"); + EXPECT_NOT_EQUAL(0, getSize("state")); + vespalib::string check = readState(sf); + EXPECT_EQUAL("zap\n", check); + { + std::vector<vespalib::string> exp({ "Hello world\n", "zap\n" }); + std::vector<vespalib::string> act(readHistory("state.history")); + TEST_DO(assertHistory(exp, act)); + } +} + + +TEST("Test that state history stops at NUL byte") +{ + StateFile::erase("state"); + { + StateFile sf("state"); + addState(sf, "Hello world\n"); + addState(sf, "Foo bar\n"); + EXPECT_EQUAL(2, sf.getGen()); + } + // Corrupt history state file + { + char buf[1]; + buf[0] = '\0'; + std::ofstream of("state.history"); + of.write(&buf[0], 1); + } + StateFile sf("state"); + vespalib::string check = readState(sf); + EXPECT_EQUAL("Foo bar\n", check); + { + std::vector<vespalib::string> exp({ "Foo bar\n" }); + std::vector<vespalib::string> act(readHistory("state.history")); + TEST_DO(assertHistory(exp, act)); + } + +} + +TEST("Test that main state stops at NUL byte") +{ + StateFile::erase("state"); + { + StateFile sf("state"); + addState(sf, "Hello world\n"); + addState(sf, "Foo bar\n"); + EXPECT_EQUAL(2, sf.getGen()); + } + // Corrupt history state file + { + char buf[10]; + strcpy(buf, "zap"); + std::ofstream of("state"); + of.write(&buf[0], strlen(buf) + 1); + } + StateFile sf("state"); + vespalib::string check = readState(sf); + EXPECT_EQUAL("Foo bar\n", check); + { + std::vector<vespalib::string> exp({ "Hello world\n", "Foo bar\n" }); + std::vector<vespalib::string> act(readHistory("state.history")); + TEST_DO(assertHistory(exp, act)); + } + +} + +} + +TEST_MAIN() +{ + TEST_RUN_ALL(); + search::StateFile::erase("state"); +} |