diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
commit | 72231250ed81e10d66bfe70701e64fa5fe50f712 (patch) | |
tree | 2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp |
Publish
Diffstat (limited to 'searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp')
-rw-r--r-- | searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp | 240 |
1 files changed, 240 insertions, 0 deletions
diff --git a/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp new file mode 100644 index 00000000000..7436913b642 --- /dev/null +++ b/searchlib/src/tests/queryeval/weighted_set_term/weighted_set_term_test.cpp @@ -0,0 +1,240 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("weighted_set_term_test"); +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/searchlib/queryeval/weighted_set_term_search.h> + +#include <vespa/searchlib/fef/fef.h> +#include <vespa/searchlib/query/tree/simplequery.h> +#include <vespa/searchlib/queryeval/field_spec.h> +#include <vespa/searchlib/queryeval/searchiterator.h> +#include <vespa/searchlib/queryeval/blueprint.h> +#include <vespa/searchlib/queryeval/fake_result.h> +#include <vespa/searchlib/queryeval/fake_searchable.h> +#include <vespa/searchlib/queryeval/fake_requestcontext.h> +#include <vespa/searchlib/queryeval/weighted_set_term_search.h> +#include <vespa/searchlib/test/initrange.h> +#include <vespa/searchlib/test/document_weight_attribute_helper.h> +#include <memory> +#include <string> +#include <map> + +using namespace search; +using namespace search::query; +using namespace search::fef; +using namespace search::queryeval; +using search::test::InitRangeVerifier; +using search::test::DocumentWeightAttributeHelper; + +namespace { + +void setupFakeSearchable(FakeSearchable &fake) { + for (size_t docid = 1; docid < 10; ++docid) { + std::string token1 = vespalib::make_string("%zu", docid); + std::string token2 = vespalib::make_string("1%zu", docid); + std::string token3 = vespalib::make_string("2%zu", docid); + + fake.addResult("field", token1, FakeResult().doc(docid)); + fake.addResult("multi-field", token1, FakeResult().doc(docid)); + fake.addResult("multi-field", token2, FakeResult().doc(docid)); + fake.addResult("multi-field", token3, FakeResult().doc(docid)); + } +} + +struct WS { + static const uint32_t fieldId = 42; + MatchDataLayout layout; + TermFieldHandle handle; + std::vector<std::pair<std::string, uint32_t> > tokens; + + WS() : layout(), handle(layout.allocTermField(fieldId)), tokens() { + MatchData::UP tmp = layout.createMatchData(); + ASSERT_TRUE(tmp->resolveTermField(handle)->getFieldId() == fieldId); + } + + WS &add(const std::string &token, uint32_t weight) { + tokens.push_back(std::make_pair(token, weight)); + return *this; + } + + Node::UP createNode() const { + SimpleWeightedSetTerm *node = new SimpleWeightedSetTerm("view", 0, Weight(0)); + for (size_t i = 0; i < tokens.size(); ++i) { + node->append(Node::UP(new SimpleStringTerm(tokens[i].first, "view", 0, Weight(tokens[i].second)))); + } + return Node::UP(node); + } + + bool isGenericSearch(Searchable &searchable, const std::string &field, bool strict) const { + FakeRequestContext requestContext; + MatchData::UP md = layout.createMatchData(); + Node::UP node = createNode(); + FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle)); + queryeval::Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, *node); + bp->fetchPostings(strict); + SearchIterator::UP sb = bp->createSearch(*md, strict); + return (dynamic_cast<WeightedSetTermSearch*>(sb.get()) != 0); + } + + FakeResult search(Searchable &searchable, const std::string &field, bool strict) const { + FakeRequestContext requestContext; + MatchData::UP md = layout.createMatchData(); + Node::UP node = createNode(); + FieldSpecList fields = FieldSpecList().add(FieldSpec(field, fieldId, handle)); + queryeval::Blueprint::UP bp = searchable.createBlueprint(requestContext, fields, *node); + bp->fetchPostings(strict); + SearchIterator::UP sb = bp->createSearch(*md, strict); + sb->initFullRange(); + FakeResult result; + for (uint32_t docId = 1; docId < 10; ++docId) { + if (sb->seek(docId)) { + sb->unpack(docId); + result.doc(docId); + TermFieldMatchData &data = *md->resolveTermField(handle); + FieldPositionsIterator itr = data.getIterator(); + for (; itr.valid(); itr.next()) { + result.elem(itr.getElementId()); + result.weight(itr.getElementWeight()); + result.pos(itr.getPosition()); + } + } + } + return result; + } +}; + +struct MockSearch : public SearchIterator { + int seekCnt; + int _initial; + MockSearch(uint32_t initial) : SearchIterator(), seekCnt(0), _initial(initial) { } + void initRange(uint32_t begin, uint32_t end) override { + SearchIterator::initRange(begin, end); + setDocId(_initial); + } + virtual void doSeek(uint32_t) { + ++seekCnt; + setAtEnd(); + } + virtual void doUnpack(uint32_t) {} +}; + +struct MockFixture { + MockSearch *mock; + TermFieldMatchData tfmd; + std::unique_ptr<SearchIterator> search; + MockFixture(uint32_t initial) : mock(0), tfmd(), search() { + std::vector<SearchIterator*> children; + std::vector<int32_t> weights; + mock = new MockSearch(initial); + children.push_back(mock); + weights.push_back(1); + search.reset(WeightedSetTermSearch::create(children, tfmd, weights)); + } +}; + +} // namespace <unnamed> + +TEST("testSimple") { + FakeSearchable index; + setupFakeSearchable(index); + FakeResult expect = FakeResult() + .doc(3).elem(0).weight(30).pos(0) + .doc(5).elem(0).weight(50).pos(0) + .doc(7).elem(0).weight(70).pos(0); + WS ws = WS().add("7", 70).add("5", 50).add("3", 30).add("100", 1000); + EXPECT_TRUE(ws.isGenericSearch(index, "field", true)); + EXPECT_TRUE(ws.isGenericSearch(index, "field", false)); + EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", true)); + EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", false)); + + EXPECT_EQUAL(expect, ws.search(index, "field", true)); + EXPECT_EQUAL(expect, ws.search(index, "field", false)); + EXPECT_EQUAL(expect, ws.search(index, "multi-field", true)); + EXPECT_EQUAL(expect, ws.search(index, "multi-field", false)); +} + +TEST("testMulti") { + FakeSearchable index; + setupFakeSearchable(index); + FakeResult expect = FakeResult() + .doc(3).elem(0).weight(230).pos(0).elem(0).weight(130).pos(0).elem(0).weight(30).pos(0) + .doc(5).elem(0).weight(150).pos(0).elem(0).weight(50).pos(0) + .doc(7).elem(0).weight(70).pos(0); + WS ws = WS().add("7", 70).add("5", 50).add("3", 30) + .add("15", 150).add("13", 130) + .add("23", 230).add("100", 1000); + EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", true)); + EXPECT_TRUE(ws.isGenericSearch(index, "multi-field", false)); + + EXPECT_EQUAL(expect, ws.search(index, "multi-field", true)); + EXPECT_EQUAL(expect, ws.search(index, "multi-field", false)); +} + +TEST_F("test Eager Empty Child", MockFixture(search::endDocId)) { + MockSearch *mock = f1.mock; + SearchIterator &search = *f1.search; + search.initFullRange(); + EXPECT_EQUAL(search.beginId(), search.getDocId()); + EXPECT_TRUE(!search.seek(1)); + EXPECT_TRUE(search.isAtEnd()); + EXPECT_EQUAL(0, mock->seekCnt); +} + +TEST_F("test Eager Matching Child", MockFixture(5)) { + MockSearch *mock = f1.mock; + SearchIterator &search = *f1.search; + search.initFullRange(); + EXPECT_EQUAL(search.beginId(), search.getDocId()); + EXPECT_TRUE(!search.seek(3)); + EXPECT_EQUAL(5u, search.getDocId()); + EXPECT_EQUAL(0, mock->seekCnt); + EXPECT_TRUE(search.seek(5)); + EXPECT_EQUAL(5u, search.getDocId()); + EXPECT_EQUAL(0, mock->seekCnt); + EXPECT_TRUE(!search.seek(7)); + EXPECT_TRUE(search.isAtEnd()); + EXPECT_EQUAL(1, mock->seekCnt); +} + +TEST("verify initRange with search iterator children") { + const size_t num_children = 7; + InitRangeVerifier ir; + using DocIds = InitRangeVerifier::DocIds; + std::vector<DocIds> split_lists(num_children); + auto full_list = ir.getExpectedDocIds(); + for (size_t i = 0; i < full_list.size(); ++i) { + split_lists[i % num_children].push_back(full_list[i]); + } + bool strict = true; + std::vector<SearchIterator*> children; + for (size_t i = 0; i < num_children; ++i) { + children.push_back(ir.createIterator(split_lists[i], strict).release()); + } + TermFieldMatchData tfmd; + std::vector<int32_t> weights(num_children, 1); + SearchIterator::UP itr(WeightedSetTermSearch::create(children, tfmd, weights)); + ir.verify(*itr); +} + +TEST("verify initRange with document weight iterator children") { + const size_t num_children = 7; + InitRangeVerifier ir; + DocumentWeightAttributeHelper helper; + helper.add_docs(ir.getDocIdLimit()); + auto full_list = ir.getExpectedDocIds(); + for (size_t i = 0; i < full_list.size(); ++i) { + helper.set_doc(full_list[i], i % num_children, 1); + } + TermFieldMatchData tfmd; + std::vector<int32_t> weights(num_children, 1); + std::vector<DocumentWeightIterator> children; + for (size_t i = 0; i < num_children; ++i) { + auto dict_entry = helper.dwa().lookup(vespalib::make_string("%zu", i).c_str()); + helper.dwa().create(dict_entry.posting_idx, children); + } + SearchIterator::UP itr(WeightedSetTermSearch::create(tfmd, weights, std::move(children))); + ir.verify(*itr); +} + +TEST_MAIN() { TEST_RUN_ALL(); } |