diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
commit | 72231250ed81e10d66bfe70701e64fa5fe50f712 (patch) | |
tree | 2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /searchlib/src/tests/fef/phrasesplitter |
Publish
Diffstat (limited to 'searchlib/src/tests/fef/phrasesplitter')
6 files changed, 349 insertions, 0 deletions
diff --git a/searchlib/src/tests/fef/phrasesplitter/.gitignore b/searchlib/src/tests/fef/phrasesplitter/.gitignore new file mode 100644 index 00000000000..418f9961840 --- /dev/null +++ b/searchlib/src/tests/fef/phrasesplitter/.gitignore @@ -0,0 +1,6 @@ +.depend +Makefile +benchmark +phrasesplitter_test +searchlib_phrasesplitter_test_app +searchlib_benchmark_app diff --git a/searchlib/src/tests/fef/phrasesplitter/CMakeLists.txt b/searchlib/src/tests/fef/phrasesplitter/CMakeLists.txt new file mode 100644 index 00000000000..aa16f3e0a0d --- /dev/null +++ b/searchlib/src/tests/fef/phrasesplitter/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_phrasesplitter_test_app + SOURCES + phrasesplitter_test.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_phrasesplitter_test_app COMMAND searchlib_phrasesplitter_test_app) +vespa_add_executable(searchlib_benchmark_app + SOURCES + benchmark.cpp + DEPENDS + searchlib +) +vespa_add_test(NAME searchlib_benchmark_app COMMAND searchlib_benchmark_app BENCHMARK) diff --git a/searchlib/src/tests/fef/phrasesplitter/DESC b/searchlib/src/tests/fef/phrasesplitter/DESC new file mode 100644 index 00000000000..fba49bdb8c0 --- /dev/null +++ b/searchlib/src/tests/fef/phrasesplitter/DESC @@ -0,0 +1 @@ +phrasesplitter test. Take a look at phrasesplitter.cpp for details. diff --git a/searchlib/src/tests/fef/phrasesplitter/FILES b/searchlib/src/tests/fef/phrasesplitter/FILES new file mode 100644 index 00000000000..be37941d0c8 --- /dev/null +++ b/searchlib/src/tests/fef/phrasesplitter/FILES @@ -0,0 +1 @@ +phrasesplitter.cpp diff --git a/searchlib/src/tests/fef/phrasesplitter/benchmark.cpp b/searchlib/src/tests/fef/phrasesplitter/benchmark.cpp new file mode 100644 index 00000000000..ca90b1de261 --- /dev/null +++ b/searchlib/src/tests/fef/phrasesplitter/benchmark.cpp @@ -0,0 +1,84 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("phrasesplitter_test"); +#include <vespa/vespalib/testkit/testapp.h> + +#include <iomanip> +#include <vespa/searchlib/fef/matchdatalayout.h> +#include <vespa/searchlib/fef/phrasesplitter.h> +#include <vespa/searchlib/fef/test/queryenvironment.h> + +namespace search { +namespace fef { + +class Benchmark : public vespalib::TestApp +{ +private: + FastOS_Time _timer; + double _sample; + + void start() { _timer.SetNow(); } + void sample() { _sample = _timer.MilliSecsToNow(); } + void run(size_t numRuns, size_t numPositions); + +public: + Benchmark() : _timer(), _sample(0) {} + int Main(); +}; + +void +Benchmark::run(size_t numRuns, size_t numPositions) +{ + test::QueryEnvironment qe; + std::vector<SimpleTermData> &terms = qe.getTerms(); + MatchDataLayout mdl; + terms.push_back(SimpleTermData()); + terms.back().setUniqueId(1); + terms.back().setPhraseLength(3); // phrase with 3 terms + terms.back().addField(0).setHandle(mdl.allocTermField(0)); + MatchData::UP md = mdl.createMatchData(); + TermFieldMatchData *tmd = md->resolveTermField(terms[0].lookupField(0)->getHandle()); + for (size_t i = 0; i < numPositions; ++i) { + tmd->appendPosition(TermFieldMatchDataPosition(0, i, 0, numPositions)); + } + + PhraseSplitter ps(qe, 0); + + std::cout << "Start benchmark with numRuns(" << numRuns << ") and numPositions(" << numPositions << ")" << std::endl; + + start(); + + for (size_t i = 0; i < numRuns; ++i) { + ps.update(*md); + } + + sample(); +} + +int +Benchmark::Main() +{ + + TEST_INIT("benchmark"); + + if (_argc != 3) { + std::cout << "Must specify <numRuns> and <numPositions>" << std::endl; + return 0; + } + + size_t numRuns = strtoull(_argv[1], NULL, 10); + size_t numPositions = strtoull(_argv[2], NULL, 10); + + run(numRuns, numPositions); + + std::cout << "TET: " << _sample << " (ms)" << std::endl; + std::cout << "ETPD: " << std::fixed << std::setprecision(10) << _sample / numRuns << " (ms)" << std::endl; + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::fef::Benchmark); diff --git a/searchlib/src/tests/fef/phrasesplitter/phrasesplitter_test.cpp b/searchlib/src/tests/fef/phrasesplitter/phrasesplitter_test.cpp new file mode 100644 index 00000000000..0fa6f27022e --- /dev/null +++ b/searchlib/src/tests/fef/phrasesplitter/phrasesplitter_test.cpp @@ -0,0 +1,242 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("phrasesplitter_test"); +#include <vespa/vespalib/testkit/testapp.h> + +#include <vespa/searchlib/fef/matchdatalayout.h> +#include <vespa/searchlib/fef/phrasesplitter.h> +#include <vespa/searchlib/fef/test/queryenvironment.h> + +namespace search { +namespace fef { + +class PhraseSplitterTest : public vespalib::TestApp +{ +private: + void assertTermData(const ITermData * td, uint32_t uniqueId, uint32_t numTerms, + uint32_t fieldId, uint32_t termHandle); + void testCopyTermFieldMatchData(); + void testSplitter(); + void testSplitterUpdate(); + +public: + int Main(); +}; + +void +PhraseSplitterTest::assertTermData(const ITermData *td, uint32_t uniqueId, uint32_t numTerms, + uint32_t fieldId, uint32_t tfHandle) +{ + // fprintf(stderr, "checking uid=%d numterms=%d field=%d handle=%d\n", uniqueId, numTerms, fieldId, tfHandle); + EXPECT_EQUAL(uniqueId, td->getUniqueId()); + EXPECT_EQUAL(numTerms, td->getPhraseLength()); + EXPECT_EQUAL(tfHandle, td->lookupField(fieldId)->getHandle()); +} + +void +PhraseSplitterTest::testCopyTermFieldMatchData() +{ + TermFieldMatchData src; + src.reset(1); + src.appendPosition(TermFieldMatchDataPosition(0, 5, 0, 1000)); + src.appendPosition(TermFieldMatchDataPosition(0, 15, 0, 1000)); + + SimpleTermData td; + TermFieldMatchData dst; + dst.reset(0); + // dst.setTermData(&td); + dst.appendPosition(TermFieldMatchDataPosition(0, 10, 0, 1000)); + { + FieldPositionsIterator itr = dst.getIterator(); + EXPECT_EQUAL(itr.getPosition(), 10u); + itr.next(); + ASSERT_TRUE(!itr.valid()); + } + + PhraseSplitter::copyTermFieldMatchData(dst, src, 2); + + EXPECT_EQUAL(dst.getDocId(), 1u); + { + TermFieldMatchData::PositionsIterator itr = dst.begin(); + EXPECT_EQUAL(itr->getPosition(), 7u); + ++itr; + EXPECT_EQUAL(itr->getPosition(), 17u); + ++itr; + ASSERT_TRUE(itr == dst.end()); + } + { + FieldPositionsIterator itr = dst.getIterator(); + EXPECT_EQUAL(itr.getPosition(), 7u); + itr.next(); + EXPECT_EQUAL(itr.getPosition(), 17u); + itr.next(); + ASSERT_TRUE(!itr.valid()); + } +} + +void +PhraseSplitterTest::testSplitter() +{ + { // single term + test::QueryEnvironment qe; + std::vector<SimpleTermData> &terms = qe.getTerms(); + MatchDataLayout mdl; + terms.push_back(SimpleTermData()); + terms.back().addField(0).setHandle(mdl.allocTermField(0)); + MatchData::UP md = mdl.createMatchData(); + PhraseSplitter ps(qe, 0); + ASSERT_TRUE(ps.getNumTerms() == 1); + ps.update(*md); + // check that nothing is served from the splitter + EXPECT_EQUAL(ps.getTerm(0), &terms[0]); + TermFieldHandle handle = terms[0].lookupField(0)->getHandle(); + EXPECT_EQUAL(ps.resolveTermField(handle), md->resolveTermField(handle)); + } + { // single phrase + test::QueryEnvironment qe; + std::vector<SimpleTermData> & terms = qe.getTerms(); + MatchDataLayout mdl; + terms.push_back(SimpleTermData()); + terms.back().setUniqueId(1); + terms.back().setPhraseLength(3); + terms.back().addField(0).setHandle(mdl.allocTermField(0)); + terms.back().addField(7).setHandle(mdl.allocTermField(7)); + MatchData::UP md = mdl.createMatchData(); + PhraseSplitter ps(qe, 7); + ASSERT_TRUE(ps.getNumTerms() == 3); + ps.update(*md); + // check that all is served from the splitter + for (size_t i = 0; i < 3; ++i) { + // fprintf(stderr, "checking term %d\n", (int)i); + const ITermData *td = ps.getTerm(i); + EXPECT_NOT_EQUAL(td, &terms[0]); + EXPECT_NOT_EQUAL(td->lookupField(7), (ITermFieldData *)0); + EXPECT_EQUAL(td->lookupField(0), (ITermFieldData *)0); + TEST_DO(assertTermData(td, 1, 1, 7, i + 4)); // skipHandles = 4 + EXPECT_NOT_EQUAL(td->lookupField(7)->getHandle(), + terms[0].lookupField(7)->getHandle()); + EXPECT_NOT_EQUAL(ps.resolveTermField(td->lookupField(7)->getHandle()), + md->resolveTermField(terms[0].lookupField(7)->getHandle())); + } + } + { // combination + test::QueryEnvironment qe; + std::vector<SimpleTermData> &terms = qe.getTerms(); + MatchDataLayout mdl; + for (size_t i = 0; i < 3; ++i) { + terms.push_back(SimpleTermData()); + terms.back().setUniqueId(i); + terms.back().setPhraseLength(1); + terms.back().addField(4).setHandle(mdl.allocTermField(4)); + terms.back().addField(7).setHandle(mdl.allocTermField(7)); + // fprintf(stderr, "setup B term %p #f %zd\n", &terms.back(), terms.back().numFields()); + } + terms[1].setPhraseLength(3); + MatchData::UP md = mdl.createMatchData(); + PhraseSplitter ps(qe, 4); + ASSERT_TRUE(ps.getNumTerms() == 5); + ps.update(*md); + { // first term + // fprintf(stderr, "first term\n"); + EXPECT_EQUAL(ps.getTerm(0), &terms[0]); + TEST_DO(assertTermData(ps.getTerm(0), 0, 1, 4, 0)); + TEST_DO(assertTermData(ps.getTerm(0), 0, 1, 7, 1)); + + TermFieldHandle handle = terms[0].lookupField(4)->getHandle(); + EXPECT_EQUAL(ps.resolveTermField(handle), md->resolveTermField(handle)); + handle = terms[0].lookupField(7)->getHandle(); + EXPECT_EQUAL(ps.resolveTermField(handle), md->resolveTermField(handle)); + } + for (size_t i = 0; i < 3; ++i) { // phrase + // fprintf(stderr, "phrase term %zd\n", i); + const ITermData *td = ps.getTerm(i + 1); + EXPECT_NOT_EQUAL(td, &terms[1]); + TEST_DO(assertTermData(td, 1, 1, 4, i + 11)); // skipHandles == 11 + EXPECT_EQUAL(td->lookupField(7), (ITermFieldData *)0); + EXPECT_NOT_EQUAL(ps.resolveTermField(td->lookupField(4)->getHandle()), + md->resolveTermField(terms[1].lookupField(4)->getHandle())); + } + { // last term + // fprintf(stderr, "last term\n"); + EXPECT_EQUAL(ps.getTerm(4), &terms[2]); + TEST_DO(assertTermData(ps.getTerm(4), 2, 1, 4, 4)); + TEST_DO(assertTermData(ps.getTerm(4), 2, 1, 7, 5)); + + // fprintf(stderr, "inspect term %p #f %zd\n", &terms[2], terms[2].numFields()); + fflush(stderr); + TermFieldHandle handle = terms[2].lookupField(4)->getHandle(); + EXPECT_EQUAL(ps.resolveTermField(handle), md->resolveTermField(handle)); + } + } +} + +void +PhraseSplitterTest::testSplitterUpdate() +{ + { + test::QueryEnvironment qe; + std::vector<SimpleTermData> &terms = qe.getTerms(); + MatchDataLayout mdl; + for (size_t i = 0; i < 3; ++i) { + terms.push_back(SimpleTermData()); + terms.back().setUniqueId(i); + terms.back().setPhraseLength(1); + terms.back().addField(0).setHandle(mdl.allocTermField(0)); + } + terms[0].setPhraseLength(2); + terms[2].setPhraseLength(2); + MatchData::UP md = mdl.createMatchData(); + PhraseSplitter ps(qe, 0); + ASSERT_TRUE(ps.getNumTerms() == 5); + { // first phrase + TermFieldMatchData * tmd = md->resolveTermField(terms[0].lookupField(0)->getHandle()); + tmd->appendPosition(TermFieldMatchDataPosition(0, 10, 0, 1000)); + } + { // first term + TermFieldMatchData * tmd = md->resolveTermField(terms[1].lookupField(0)->getHandle()); + tmd->appendPosition(TermFieldMatchDataPosition(0, 20, 0, 1000)); + } + { // second phrase + TermFieldMatchData * tmd = md->resolveTermField(terms[2].lookupField(0)->getHandle()); + tmd->appendPosition(TermFieldMatchDataPosition(0, 30, 0, 1000)); + } + ps.update(*md); + for (size_t i = 0; i < 2; ++i) { // first phrase + const TermFieldMatchData * tmd = ps.resolveTermField(ps.getTerm(i)->lookupField(0)->getHandle()); + TermFieldMatchData::PositionsIterator itr = tmd->begin(); + EXPECT_EQUAL((itr++)->getPosition(), 10 + i); + ASSERT_TRUE(itr == tmd->end()); + } + { // first term + TermFieldMatchData * tmd = md->resolveTermField(ps.getTerm(2)->lookupField(0)->getHandle()); + TermFieldMatchData::PositionsIterator itr = tmd->begin(); + EXPECT_EQUAL((itr++)->getPosition(), 20u); + ASSERT_TRUE(itr == tmd->end()); + } + for (size_t i = 0; i < 2; ++i) { // second phrase + const TermFieldMatchData * tmd = ps.resolveTermField(ps.getTerm(i + 3)->lookupField(0)->getHandle()); + TermFieldMatchData::PositionsIterator itr = tmd->begin(); + EXPECT_EQUAL((itr++)->getPosition(), 30 + i); + ASSERT_TRUE(itr == tmd->end()); + } + } +} + +int +PhraseSplitterTest::Main() +{ + + TEST_INIT("phrasesplitter_test"); + + testCopyTermFieldMatchData(); + testSplitter(); + testSplitterUpdate(); + + TEST_DONE(); +} + +} +} + +TEST_APPHOOK(search::fef::PhraseSplitterTest); |