aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/tests/fef/phrasesplitter
diff options
context:
space:
mode:
authorJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
committerJon Bratseth <bratseth@yahoo-inc.com>2016-06-15 23:09:44 +0200
commit72231250ed81e10d66bfe70701e64fa5fe50f712 (patch)
tree2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /searchlib/src/tests/fef/phrasesplitter
Publish
Diffstat (limited to 'searchlib/src/tests/fef/phrasesplitter')
-rw-r--r--searchlib/src/tests/fef/phrasesplitter/.gitignore6
-rw-r--r--searchlib/src/tests/fef/phrasesplitter/CMakeLists.txt15
-rw-r--r--searchlib/src/tests/fef/phrasesplitter/DESC1
-rw-r--r--searchlib/src/tests/fef/phrasesplitter/FILES1
-rw-r--r--searchlib/src/tests/fef/phrasesplitter/benchmark.cpp84
-rw-r--r--searchlib/src/tests/fef/phrasesplitter/phrasesplitter_test.cpp242
6 files changed, 349 insertions, 0 deletions
diff --git a/searchlib/src/tests/fef/phrasesplitter/.gitignore b/searchlib/src/tests/fef/phrasesplitter/.gitignore
new file mode 100644
index 00000000000..418f9961840
--- /dev/null
+++ b/searchlib/src/tests/fef/phrasesplitter/.gitignore
@@ -0,0 +1,6 @@
+.depend
+Makefile
+benchmark
+phrasesplitter_test
+searchlib_phrasesplitter_test_app
+searchlib_benchmark_app
diff --git a/searchlib/src/tests/fef/phrasesplitter/CMakeLists.txt b/searchlib/src/tests/fef/phrasesplitter/CMakeLists.txt
new file mode 100644
index 00000000000..aa16f3e0a0d
--- /dev/null
+++ b/searchlib/src/tests/fef/phrasesplitter/CMakeLists.txt
@@ -0,0 +1,15 @@
+# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_phrasesplitter_test_app
+ SOURCES
+ phrasesplitter_test.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_phrasesplitter_test_app COMMAND searchlib_phrasesplitter_test_app)
+vespa_add_executable(searchlib_benchmark_app
+ SOURCES
+ benchmark.cpp
+ DEPENDS
+ searchlib
+)
+vespa_add_test(NAME searchlib_benchmark_app COMMAND searchlib_benchmark_app BENCHMARK)
diff --git a/searchlib/src/tests/fef/phrasesplitter/DESC b/searchlib/src/tests/fef/phrasesplitter/DESC
new file mode 100644
index 00000000000..fba49bdb8c0
--- /dev/null
+++ b/searchlib/src/tests/fef/phrasesplitter/DESC
@@ -0,0 +1 @@
+phrasesplitter test. Take a look at phrasesplitter.cpp for details.
diff --git a/searchlib/src/tests/fef/phrasesplitter/FILES b/searchlib/src/tests/fef/phrasesplitter/FILES
new file mode 100644
index 00000000000..be37941d0c8
--- /dev/null
+++ b/searchlib/src/tests/fef/phrasesplitter/FILES
@@ -0,0 +1 @@
+phrasesplitter.cpp
diff --git a/searchlib/src/tests/fef/phrasesplitter/benchmark.cpp b/searchlib/src/tests/fef/phrasesplitter/benchmark.cpp
new file mode 100644
index 00000000000..ca90b1de261
--- /dev/null
+++ b/searchlib/src/tests/fef/phrasesplitter/benchmark.cpp
@@ -0,0 +1,84 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("phrasesplitter_test");
+#include <vespa/vespalib/testkit/testapp.h>
+
+#include <iomanip>
+#include <vespa/searchlib/fef/matchdatalayout.h>
+#include <vespa/searchlib/fef/phrasesplitter.h>
+#include <vespa/searchlib/fef/test/queryenvironment.h>
+
+namespace search {
+namespace fef {
+
+class Benchmark : public vespalib::TestApp
+{
+private:
+ FastOS_Time _timer;
+ double _sample;
+
+ void start() { _timer.SetNow(); }
+ void sample() { _sample = _timer.MilliSecsToNow(); }
+ void run(size_t numRuns, size_t numPositions);
+
+public:
+ Benchmark() : _timer(), _sample(0) {}
+ int Main();
+};
+
+void
+Benchmark::run(size_t numRuns, size_t numPositions)
+{
+ test::QueryEnvironment qe;
+ std::vector<SimpleTermData> &terms = qe.getTerms();
+ MatchDataLayout mdl;
+ terms.push_back(SimpleTermData());
+ terms.back().setUniqueId(1);
+ terms.back().setPhraseLength(3); // phrase with 3 terms
+ terms.back().addField(0).setHandle(mdl.allocTermField(0));
+ MatchData::UP md = mdl.createMatchData();
+ TermFieldMatchData *tmd = md->resolveTermField(terms[0].lookupField(0)->getHandle());
+ for (size_t i = 0; i < numPositions; ++i) {
+ tmd->appendPosition(TermFieldMatchDataPosition(0, i, 0, numPositions));
+ }
+
+ PhraseSplitter ps(qe, 0);
+
+ std::cout << "Start benchmark with numRuns(" << numRuns << ") and numPositions(" << numPositions << ")" << std::endl;
+
+ start();
+
+ for (size_t i = 0; i < numRuns; ++i) {
+ ps.update(*md);
+ }
+
+ sample();
+}
+
+int
+Benchmark::Main()
+{
+
+ TEST_INIT("benchmark");
+
+ if (_argc != 3) {
+ std::cout << "Must specify <numRuns> and <numPositions>" << std::endl;
+ return 0;
+ }
+
+ size_t numRuns = strtoull(_argv[1], NULL, 10);
+ size_t numPositions = strtoull(_argv[2], NULL, 10);
+
+ run(numRuns, numPositions);
+
+ std::cout << "TET: " << _sample << " (ms)" << std::endl;
+ std::cout << "ETPD: " << std::fixed << std::setprecision(10) << _sample / numRuns << " (ms)" << std::endl;
+
+ TEST_DONE();
+}
+
+}
+}
+
+TEST_APPHOOK(search::fef::Benchmark);
diff --git a/searchlib/src/tests/fef/phrasesplitter/phrasesplitter_test.cpp b/searchlib/src/tests/fef/phrasesplitter/phrasesplitter_test.cpp
new file mode 100644
index 00000000000..0fa6f27022e
--- /dev/null
+++ b/searchlib/src/tests/fef/phrasesplitter/phrasesplitter_test.cpp
@@ -0,0 +1,242 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP("phrasesplitter_test");
+#include <vespa/vespalib/testkit/testapp.h>
+
+#include <vespa/searchlib/fef/matchdatalayout.h>
+#include <vespa/searchlib/fef/phrasesplitter.h>
+#include <vespa/searchlib/fef/test/queryenvironment.h>
+
+namespace search {
+namespace fef {
+
+class PhraseSplitterTest : public vespalib::TestApp
+{
+private:
+ void assertTermData(const ITermData * td, uint32_t uniqueId, uint32_t numTerms,
+ uint32_t fieldId, uint32_t termHandle);
+ void testCopyTermFieldMatchData();
+ void testSplitter();
+ void testSplitterUpdate();
+
+public:
+ int Main();
+};
+
+void
+PhraseSplitterTest::assertTermData(const ITermData *td, uint32_t uniqueId, uint32_t numTerms,
+ uint32_t fieldId, uint32_t tfHandle)
+{
+ // fprintf(stderr, "checking uid=%d numterms=%d field=%d handle=%d\n", uniqueId, numTerms, fieldId, tfHandle);
+ EXPECT_EQUAL(uniqueId, td->getUniqueId());
+ EXPECT_EQUAL(numTerms, td->getPhraseLength());
+ EXPECT_EQUAL(tfHandle, td->lookupField(fieldId)->getHandle());
+}
+
+void
+PhraseSplitterTest::testCopyTermFieldMatchData()
+{
+ TermFieldMatchData src;
+ src.reset(1);
+ src.appendPosition(TermFieldMatchDataPosition(0, 5, 0, 1000));
+ src.appendPosition(TermFieldMatchDataPosition(0, 15, 0, 1000));
+
+ SimpleTermData td;
+ TermFieldMatchData dst;
+ dst.reset(0);
+ // dst.setTermData(&td);
+ dst.appendPosition(TermFieldMatchDataPosition(0, 10, 0, 1000));
+ {
+ FieldPositionsIterator itr = dst.getIterator();
+ EXPECT_EQUAL(itr.getPosition(), 10u);
+ itr.next();
+ ASSERT_TRUE(!itr.valid());
+ }
+
+ PhraseSplitter::copyTermFieldMatchData(dst, src, 2);
+
+ EXPECT_EQUAL(dst.getDocId(), 1u);
+ {
+ TermFieldMatchData::PositionsIterator itr = dst.begin();
+ EXPECT_EQUAL(itr->getPosition(), 7u);
+ ++itr;
+ EXPECT_EQUAL(itr->getPosition(), 17u);
+ ++itr;
+ ASSERT_TRUE(itr == dst.end());
+ }
+ {
+ FieldPositionsIterator itr = dst.getIterator();
+ EXPECT_EQUAL(itr.getPosition(), 7u);
+ itr.next();
+ EXPECT_EQUAL(itr.getPosition(), 17u);
+ itr.next();
+ ASSERT_TRUE(!itr.valid());
+ }
+}
+
+void
+PhraseSplitterTest::testSplitter()
+{
+ { // single term
+ test::QueryEnvironment qe;
+ std::vector<SimpleTermData> &terms = qe.getTerms();
+ MatchDataLayout mdl;
+ terms.push_back(SimpleTermData());
+ terms.back().addField(0).setHandle(mdl.allocTermField(0));
+ MatchData::UP md = mdl.createMatchData();
+ PhraseSplitter ps(qe, 0);
+ ASSERT_TRUE(ps.getNumTerms() == 1);
+ ps.update(*md);
+ // check that nothing is served from the splitter
+ EXPECT_EQUAL(ps.getTerm(0), &terms[0]);
+ TermFieldHandle handle = terms[0].lookupField(0)->getHandle();
+ EXPECT_EQUAL(ps.resolveTermField(handle), md->resolveTermField(handle));
+ }
+ { // single phrase
+ test::QueryEnvironment qe;
+ std::vector<SimpleTermData> & terms = qe.getTerms();
+ MatchDataLayout mdl;
+ terms.push_back(SimpleTermData());
+ terms.back().setUniqueId(1);
+ terms.back().setPhraseLength(3);
+ terms.back().addField(0).setHandle(mdl.allocTermField(0));
+ terms.back().addField(7).setHandle(mdl.allocTermField(7));
+ MatchData::UP md = mdl.createMatchData();
+ PhraseSplitter ps(qe, 7);
+ ASSERT_TRUE(ps.getNumTerms() == 3);
+ ps.update(*md);
+ // check that all is served from the splitter
+ for (size_t i = 0; i < 3; ++i) {
+ // fprintf(stderr, "checking term %d\n", (int)i);
+ const ITermData *td = ps.getTerm(i);
+ EXPECT_NOT_EQUAL(td, &terms[0]);
+ EXPECT_NOT_EQUAL(td->lookupField(7), (ITermFieldData *)0);
+ EXPECT_EQUAL(td->lookupField(0), (ITermFieldData *)0);
+ TEST_DO(assertTermData(td, 1, 1, 7, i + 4)); // skipHandles = 4
+ EXPECT_NOT_EQUAL(td->lookupField(7)->getHandle(),
+ terms[0].lookupField(7)->getHandle());
+ EXPECT_NOT_EQUAL(ps.resolveTermField(td->lookupField(7)->getHandle()),
+ md->resolveTermField(terms[0].lookupField(7)->getHandle()));
+ }
+ }
+ { // combination
+ test::QueryEnvironment qe;
+ std::vector<SimpleTermData> &terms = qe.getTerms();
+ MatchDataLayout mdl;
+ for (size_t i = 0; i < 3; ++i) {
+ terms.push_back(SimpleTermData());
+ terms.back().setUniqueId(i);
+ terms.back().setPhraseLength(1);
+ terms.back().addField(4).setHandle(mdl.allocTermField(4));
+ terms.back().addField(7).setHandle(mdl.allocTermField(7));
+ // fprintf(stderr, "setup B term %p #f %zd\n", &terms.back(), terms.back().numFields());
+ }
+ terms[1].setPhraseLength(3);
+ MatchData::UP md = mdl.createMatchData();
+ PhraseSplitter ps(qe, 4);
+ ASSERT_TRUE(ps.getNumTerms() == 5);
+ ps.update(*md);
+ { // first term
+ // fprintf(stderr, "first term\n");
+ EXPECT_EQUAL(ps.getTerm(0), &terms[0]);
+ TEST_DO(assertTermData(ps.getTerm(0), 0, 1, 4, 0));
+ TEST_DO(assertTermData(ps.getTerm(0), 0, 1, 7, 1));
+
+ TermFieldHandle handle = terms[0].lookupField(4)->getHandle();
+ EXPECT_EQUAL(ps.resolveTermField(handle), md->resolveTermField(handle));
+ handle = terms[0].lookupField(7)->getHandle();
+ EXPECT_EQUAL(ps.resolveTermField(handle), md->resolveTermField(handle));
+ }
+ for (size_t i = 0; i < 3; ++i) { // phrase
+ // fprintf(stderr, "phrase term %zd\n", i);
+ const ITermData *td = ps.getTerm(i + 1);
+ EXPECT_NOT_EQUAL(td, &terms[1]);
+ TEST_DO(assertTermData(td, 1, 1, 4, i + 11)); // skipHandles == 11
+ EXPECT_EQUAL(td->lookupField(7), (ITermFieldData *)0);
+ EXPECT_NOT_EQUAL(ps.resolveTermField(td->lookupField(4)->getHandle()),
+ md->resolveTermField(terms[1].lookupField(4)->getHandle()));
+ }
+ { // last term
+ // fprintf(stderr, "last term\n");
+ EXPECT_EQUAL(ps.getTerm(4), &terms[2]);
+ TEST_DO(assertTermData(ps.getTerm(4), 2, 1, 4, 4));
+ TEST_DO(assertTermData(ps.getTerm(4), 2, 1, 7, 5));
+
+ // fprintf(stderr, "inspect term %p #f %zd\n", &terms[2], terms[2].numFields());
+ fflush(stderr);
+ TermFieldHandle handle = terms[2].lookupField(4)->getHandle();
+ EXPECT_EQUAL(ps.resolveTermField(handle), md->resolveTermField(handle));
+ }
+ }
+}
+
+void
+PhraseSplitterTest::testSplitterUpdate()
+{
+ {
+ test::QueryEnvironment qe;
+ std::vector<SimpleTermData> &terms = qe.getTerms();
+ MatchDataLayout mdl;
+ for (size_t i = 0; i < 3; ++i) {
+ terms.push_back(SimpleTermData());
+ terms.back().setUniqueId(i);
+ terms.back().setPhraseLength(1);
+ terms.back().addField(0).setHandle(mdl.allocTermField(0));
+ }
+ terms[0].setPhraseLength(2);
+ terms[2].setPhraseLength(2);
+ MatchData::UP md = mdl.createMatchData();
+ PhraseSplitter ps(qe, 0);
+ ASSERT_TRUE(ps.getNumTerms() == 5);
+ { // first phrase
+ TermFieldMatchData * tmd = md->resolveTermField(terms[0].lookupField(0)->getHandle());
+ tmd->appendPosition(TermFieldMatchDataPosition(0, 10, 0, 1000));
+ }
+ { // first term
+ TermFieldMatchData * tmd = md->resolveTermField(terms[1].lookupField(0)->getHandle());
+ tmd->appendPosition(TermFieldMatchDataPosition(0, 20, 0, 1000));
+ }
+ { // second phrase
+ TermFieldMatchData * tmd = md->resolveTermField(terms[2].lookupField(0)->getHandle());
+ tmd->appendPosition(TermFieldMatchDataPosition(0, 30, 0, 1000));
+ }
+ ps.update(*md);
+ for (size_t i = 0; i < 2; ++i) { // first phrase
+ const TermFieldMatchData * tmd = ps.resolveTermField(ps.getTerm(i)->lookupField(0)->getHandle());
+ TermFieldMatchData::PositionsIterator itr = tmd->begin();
+ EXPECT_EQUAL((itr++)->getPosition(), 10 + i);
+ ASSERT_TRUE(itr == tmd->end());
+ }
+ { // first term
+ TermFieldMatchData * tmd = md->resolveTermField(ps.getTerm(2)->lookupField(0)->getHandle());
+ TermFieldMatchData::PositionsIterator itr = tmd->begin();
+ EXPECT_EQUAL((itr++)->getPosition(), 20u);
+ ASSERT_TRUE(itr == tmd->end());
+ }
+ for (size_t i = 0; i < 2; ++i) { // second phrase
+ const TermFieldMatchData * tmd = ps.resolveTermField(ps.getTerm(i + 3)->lookupField(0)->getHandle());
+ TermFieldMatchData::PositionsIterator itr = tmd->begin();
+ EXPECT_EQUAL((itr++)->getPosition(), 30 + i);
+ ASSERT_TRUE(itr == tmd->end());
+ }
+ }
+}
+
+int
+PhraseSplitterTest::Main()
+{
+
+ TEST_INIT("phrasesplitter_test");
+
+ testCopyTermFieldMatchData();
+ testSplitter();
+ testSplitterUpdate();
+
+ TEST_DONE();
+}
+
+}
+}
+
+TEST_APPHOOK(search::fef::PhraseSplitterTest);