diff options
author | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
---|---|---|
committer | Jon Bratseth <bratseth@yahoo-inc.com> | 2016-06-15 23:09:44 +0200 |
commit | 72231250ed81e10d66bfe70701e64fa5fe50f712 (patch) | |
tree | 2728bba1131a6f6e5bdf95afec7d7ff9358dac50 /searchcore/src/tests/proton/matching |
Publish
Diffstat (limited to 'searchcore/src/tests/proton/matching')
28 files changed, 3985 insertions, 0 deletions
diff --git a/searchcore/src/tests/proton/matching/.cvsignore b/searchcore/src/tests/proton/matching/.cvsignore new file mode 100644 index 00000000000..75b0a127c8f --- /dev/null +++ b/searchcore/src/tests/proton/matching/.cvsignore @@ -0,0 +1,3 @@ +.depend +Makefile +matching_test diff --git a/searchcore/src/tests/proton/matching/.gitignore b/searchcore/src/tests/proton/matching/.gitignore new file mode 100644 index 00000000000..c9789272a35 --- /dev/null +++ b/searchcore/src/tests/proton/matching/.gitignore @@ -0,0 +1,14 @@ +.depend +Makefile +matching_test +query_test +querynodes_test +resolveviewvisitor_test +termdataextractor_test +searchcore_matching_stats_test_app +searchcore_matching_test_app +searchcore_query_test_app +searchcore_querynodes_test_app +searchcore_resolveviewvisitor_test_app +searchcore_sessionmanager_test_app +searchcore_termdataextractor_test_app diff --git a/searchcore/src/tests/proton/matching/CMakeLists.txt b/searchcore/src/tests/proton/matching/CMakeLists.txt new file mode 100644 index 00000000000..8007ff0344d --- /dev/null +++ b/searchcore/src/tests/proton/matching/CMakeLists.txt @@ -0,0 +1,60 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchcore_matching_test_app + SOURCES + matching_test.cpp + DEPENDS + searchcore_server + searchcore_fconfig + searchcore_matching + searchcore_feedoperation + searchcore_documentmetastore + searchcore_bucketdb + searchcore_pcommon + searchcore_grouping + searchcore_util +) +vespa_add_test(NAME searchcore_matching_test_app COMMAND searchcore_matching_test_app) +vespa_add_executable(searchcore_sessionmanager_test_app + SOURCES + sessionmanager_test.cpp + DEPENDS + searchcore_matching + searchcore_grouping +) +vespa_add_test(NAME searchcore_sessionmanager_test_app COMMAND searchcore_sessionmanager_test_app) +vespa_add_executable(searchcore_matching_stats_test_app + SOURCES + matching_stats_test.cpp + DEPENDS + searchcore_matching +) +vespa_add_test(NAME searchcore_matching_stats_test_app COMMAND searchcore_matching_stats_test_app) +vespa_add_executable(searchcore_query_test_app + SOURCES + query_test.cpp + DEPENDS + searchcore_server + searchcore_matching +) +vespa_add_test(NAME searchcore_query_test_app COMMAND searchcore_query_test_app) +vespa_add_executable(searchcore_termdataextractor_test_app + SOURCES + termdataextractor_test.cpp + DEPENDS + searchcore_matching +) +vespa_add_test(NAME searchcore_termdataextractor_test_app COMMAND searchcore_termdataextractor_test_app) +vespa_add_executable(searchcore_resolveviewvisitor_test_app + SOURCES + resolveviewvisitor_test.cpp + DEPENDS + searchcore_matching +) +vespa_add_test(NAME searchcore_resolveviewvisitor_test_app COMMAND searchcore_resolveviewvisitor_test_app) +vespa_add_executable(searchcore_querynodes_test_app + SOURCES + querynodes_test.cpp + DEPENDS + searchcore_matching +) +vespa_add_test(NAME searchcore_querynodes_test_app COMMAND searchcore_querynodes_test_app) diff --git a/searchcore/src/tests/proton/matching/DESC b/searchcore/src/tests/proton/matching/DESC new file mode 100644 index 00000000000..435b17f333e --- /dev/null +++ b/searchcore/src/tests/proton/matching/DESC @@ -0,0 +1 @@ +matching test. Take a look at matching.cpp for details. diff --git a/searchcore/src/tests/proton/matching/FILES b/searchcore/src/tests/proton/matching/FILES new file mode 100644 index 00000000000..0213f77d899 --- /dev/null +++ b/searchcore/src/tests/proton/matching/FILES @@ -0,0 +1 @@ +matching.cpp diff --git a/searchcore/src/tests/proton/matching/docid_range_scheduler/.gitignore b/searchcore/src/tests/proton/matching/docid_range_scheduler/.gitignore new file mode 100644 index 00000000000..8de390797da --- /dev/null +++ b/searchcore/src/tests/proton/matching/docid_range_scheduler/.gitignore @@ -0,0 +1,3 @@ +/docid_range_scheduler_bench +searchcore_docid_range_scheduler_test_app +searchcore_docid_range_scheduler_bench_app diff --git a/searchcore/src/tests/proton/matching/docid_range_scheduler/CMakeLists.txt b/searchcore/src/tests/proton/matching/docid_range_scheduler/CMakeLists.txt new file mode 100644 index 00000000000..3892ac41b92 --- /dev/null +++ b/searchcore/src/tests/proton/matching/docid_range_scheduler/CMakeLists.txt @@ -0,0 +1,15 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchcore_docid_range_scheduler_test_app + SOURCES + docid_range_scheduler_test.cpp + DEPENDS + searchcore_matching +) +vespa_add_test(NAME searchcore_docid_range_scheduler_test_app COMMAND searchcore_docid_range_scheduler_test_app) +vespa_add_executable(searchcore_docid_range_scheduler_bench_app + SOURCES + docid_range_scheduler_bench.cpp + DEPENDS + searchcore_matching +) +vespa_add_test(NAME searchcore_docid_range_scheduler_bench_app COMMAND searchcore_docid_range_scheduler_bench_app BENCHMARK) diff --git a/searchcore/src/tests/proton/matching/docid_range_scheduler/docid_range_scheduler_bench.cpp b/searchcore/src/tests/proton/matching/docid_range_scheduler/docid_range_scheduler_bench.cpp new file mode 100644 index 00000000000..848743e0f23 --- /dev/null +++ b/searchcore/src/tests/proton/matching/docid_range_scheduler/docid_range_scheduler_bench.cpp @@ -0,0 +1,226 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchcore/proton/matching/docid_range_scheduler.h> +#include <vespa/vespalib/util/benchmark_timer.h> +#include <vespa/vespalib/util/stringfmt.h> + +using namespace proton::matching; +using namespace vespalib; + +//----------------------------------------------------------------------------- + +size_t do_work(size_t cost) __attribute__((noinline)); +size_t do_work(size_t cost) { + size_t result = 0; + size_t loop_cnt = 42; + for (size_t n = 0; n < cost; ++n) { + result += (cost * n); + for (size_t i = 0; i < loop_cnt; ++i) { + result += (cost * n * i); + for (size_t j = 0; j < loop_cnt; ++j) { + result += (cost * n * i * j); + for (size_t k = 0; k < loop_cnt; ++k) { + result += (cost * n * i * j * k); + } + } + } + } + return result; +} + +//----------------------------------------------------------------------------- + +TEST("measure do_work overhead for different cost inputs") { + for (size_t cost: {0, 1, 10, 100, 1000}) { + BenchmarkTimer timer(1.0); + while (timer.has_budget()) { + timer.before(); + (void) do_work(cost); + timer.after(); + } + double min_time_s = timer.min_time(); + fprintf(stderr, "const %zu: %g us\n", cost, min_time_s * 1000.0 * 1000.0); + } +} + +//----------------------------------------------------------------------------- + +struct Work { + typedef std::unique_ptr<Work> UP; + virtual vespalib::string desc() const = 0; + virtual void perform(uint32_t docid) const = 0; + virtual ~Work() {} +}; + +struct UniformWork : public Work { + size_t cost; + UniformWork(size_t cost_in) : cost(cost_in) {} + vespalib::string desc() const override { return make_string("uniform(%zu)", cost); } + void perform(uint32_t) const override { (void) do_work(cost); } +}; + +struct TriangleWork : public Work { + size_t div; + TriangleWork(size_t div_in) : div(div_in) {} + vespalib::string desc() const override { return make_string("triangle(docid/%zu)", div); } + void perform(uint32_t docid) const override { (void) do_work(docid/div); } +}; + +struct SpikeWork : public Work { + uint32_t begin; + uint32_t end; + size_t cost; + SpikeWork(uint32_t begin_in, uint32_t end_in, size_t cost_in) + : begin(begin_in), end(end_in), cost(cost_in) {} + vespalib::string desc() const override { return make_string("spike(%u,%u,%zu)", begin, end, cost); } + void perform(uint32_t docid) const override { + if ((docid >= begin) && (docid < end)) { + (void) do_work(cost); + } + } +}; + +struct WorkList { + std::vector<Work::UP> work_list; + WorkList() : work_list() { + work_list.push_back(std::make_unique<UniformWork>(10)); + work_list.push_back(std::make_unique<TriangleWork>(4878)); + work_list.push_back(std::make_unique<SpikeWork>(1, 10001, 100)); + work_list.push_back(std::make_unique<SpikeWork>(1, 1001, 1000)); + work_list.push_back(std::make_unique<SpikeWork>(1, 101, 10000)); + work_list.push_back(std::make_unique<SpikeWork>(1, 11, 100000)); + work_list.push_back(std::make_unique<SpikeWork>(90001, 100001, 100)); + work_list.push_back(std::make_unique<SpikeWork>(99001, 100001, 1000)); + work_list.push_back(std::make_unique<SpikeWork>(99901, 100001, 10000)); + work_list.push_back(std::make_unique<SpikeWork>(99991, 100001, 100000)); + } +}; + +//----------------------------------------------------------------------------- + +struct SchedulerFactory { + typedef std::unique_ptr<SchedulerFactory> UP; + virtual vespalib::string desc() const = 0; + virtual DocidRangeScheduler::UP create(uint32_t docid_limit) const = 0; + virtual ~SchedulerFactory() {} +}; + +struct PartitionSchedulerFactory : public SchedulerFactory { + size_t num_threads; + PartitionSchedulerFactory(size_t num_threads_in) : num_threads(num_threads_in) {} + vespalib::string desc() const override { return make_string("partition(threads:%zu)", num_threads); } + DocidRangeScheduler::UP create(uint32_t docid_limit) const override { + return std::make_unique<PartitionDocidRangeScheduler>(num_threads, docid_limit); + } +}; + +struct TaskSchedulerFactory : public SchedulerFactory { + size_t num_threads; + size_t num_tasks; + TaskSchedulerFactory(size_t num_threads_in, size_t num_tasks_in) + : num_threads(num_threads_in), num_tasks(num_tasks_in) {} + vespalib::string desc() const override { return make_string("task(threads:%zu,num_tasks:%zu)", num_threads, num_tasks); } + DocidRangeScheduler::UP create(uint32_t docid_limit) const override { + return std::make_unique<TaskDocidRangeScheduler>(num_threads, num_tasks, docid_limit); + } +}; + +struct AdaptiveSchedulerFactory : public SchedulerFactory { + size_t num_threads; + size_t min_task; + AdaptiveSchedulerFactory(size_t num_threads_in, size_t min_task_in) + : num_threads(num_threads_in), min_task(min_task_in) {} + vespalib::string desc() const override { return make_string("adaptive(threads:%zu,min_task:%zu)", num_threads, min_task); } + DocidRangeScheduler::UP create(uint32_t docid_limit) const override { + return std::make_unique<AdaptiveDocidRangeScheduler>(num_threads, min_task, docid_limit); + } +}; + +struct SchedulerList { + std::vector<SchedulerFactory::UP> factory_list; + SchedulerList(size_t num_threads) : factory_list() { + factory_list.push_back(std::make_unique<PartitionSchedulerFactory>(num_threads)); + factory_list.push_back(std::make_unique<TaskSchedulerFactory>(num_threads, num_threads)); + factory_list.push_back(std::make_unique<TaskSchedulerFactory>(num_threads, 64)); + factory_list.push_back(std::make_unique<TaskSchedulerFactory>(num_threads, 256)); + factory_list.push_back(std::make_unique<TaskSchedulerFactory>(num_threads, 1024)); + factory_list.push_back(std::make_unique<TaskSchedulerFactory>(num_threads, 4096)); + factory_list.push_back(std::make_unique<AdaptiveSchedulerFactory>(num_threads, 1000)); + factory_list.push_back(std::make_unique<AdaptiveSchedulerFactory>(num_threads, 100)); + factory_list.push_back(std::make_unique<AdaptiveSchedulerFactory>(num_threads, 10)); + factory_list.push_back(std::make_unique<AdaptiveSchedulerFactory>(num_threads, 1)); + } +}; + +//----------------------------------------------------------------------------- + +void worker(DocidRangeScheduler &scheduler, const Work &work, size_t thread_id) { + IdleObserver observer = scheduler.make_idle_observer(); + if (observer.is_always_zero()) { + for (DocidRange range = scheduler.first_range(thread_id); + !range.empty(); + range = scheduler.next_range(thread_id)) + { + do_work(10); // represents init-range cost + for (uint32_t docid = range.begin; docid < range.end; ++docid) { + work.perform(docid); + } + } + } else { + for (DocidRange range = scheduler.first_range(thread_id); + !range.empty(); + range = scheduler.next_range(thread_id)) + { + do_work(10); // represents init-range cost + for (uint32_t docid = range.begin; docid < range.end; ++docid) { + work.perform(docid); + if (observer.get() > 0) { + range = scheduler.share_range(thread_id, DocidRange(docid, range.end)); + } + } + } + } +} + +//----------------------------------------------------------------------------- + +TEST_MT_FFF("benchmark different combinations of schedulers and work loads", 8, + DocidRangeScheduler::UP(), SchedulerList(num_threads), WorkList()) +{ + if (thread_id == 0) { + fprintf(stderr, "Benchmarking with %zu threads:\n", num_threads); + } + for (size_t scheduler = 0; scheduler < f2.factory_list.size(); ++scheduler) { + for (size_t work = 0; work < f3.work_list.size(); ++work) { + if (thread_id == 0) { + fprintf(stderr, " scheduler: %s, work load: %s ", + f2.factory_list[scheduler]->desc().c_str(), + f3.work_list[work]->desc().c_str()); + } + BenchmarkTimer timer(1.0); + for (size_t i = 0; i < 5; ++i) { + TEST_BARRIER(); + if (thread_id == 0) { + f1 = f2.factory_list[scheduler]->create(100001); + } + TEST_BARRIER(); + timer.before(); + worker(*f1, *f3.work_list[work], thread_id); + TEST_BARRIER(); + timer.after(); + if (thread_id == 0) { + fprintf(stderr, "."); + } + } + if (thread_id == 0) { + fprintf(stderr, " real time: %g ms\n", timer.min_time() * 1000.0); + } + } + } +} + +//----------------------------------------------------------------------------- + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchcore/src/tests/proton/matching/docid_range_scheduler/docid_range_scheduler_test.cpp b/searchcore/src/tests/proton/matching/docid_range_scheduler/docid_range_scheduler_test.cpp new file mode 100644 index 00000000000..6716e945a0d --- /dev/null +++ b/searchcore/src/tests/proton/matching/docid_range_scheduler/docid_range_scheduler_test.cpp @@ -0,0 +1,286 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchcore/proton/matching/docid_range_scheduler.h> +#include <chrono> +#include <thread> + +using namespace proton::matching; + +void verify_range(DocidRange a, DocidRange b) { + EXPECT_EQUAL(a.begin, b.begin); + EXPECT_EQUAL(a.end, b.end); +} + +//----------------------------------------------------------------------------- + +TEST("require that default docid range constructor creates and empty range") { + EXPECT_TRUE(DocidRange().empty()); + EXPECT_EQUAL(DocidRange().size(), 0u); +} + +TEST("require that docid range ensures end is not less than begin") { + EXPECT_EQUAL(DocidRange(10, 20).size(), 10u); + EXPECT_TRUE(!DocidRange(10, 20).empty()); + EXPECT_EQUAL(DocidRange(10, 20).begin, 10u); + EXPECT_EQUAL(DocidRange(10, 20).end, 20u); + EXPECT_EQUAL(DocidRange(20, 10).size(), 0u); + EXPECT_TRUE(DocidRange(20, 10).empty()); + EXPECT_EQUAL(DocidRange(20, 10).begin, 20u); + EXPECT_EQUAL(DocidRange(20, 10).end, 20u); +} + +//----------------------------------------------------------------------------- + +TEST("require that default constructed IdleObserver is always zero") { + IdleObserver observer; + EXPECT_TRUE(observer.is_always_zero()); + EXPECT_EQUAL(0u, observer.get()); +} + +TEST("require that IdleObserver can observe an atomic size_t value") { + std::atomic<size_t> idle(0); + IdleObserver observer(idle); + EXPECT_TRUE(!observer.is_always_zero()); + EXPECT_EQUAL(0u, observer.get()); + idle = 10; + EXPECT_EQUAL(10u, observer.get()); +} + +//----------------------------------------------------------------------------- + +TEST("require that the docid range splitter can split a docid range") { + DocidRangeSplitter splitter(DocidRange(1, 16), 4); + TEST_DO(verify_range(splitter.get(0), DocidRange(1, 5))); + TEST_DO(verify_range(splitter.get(1), DocidRange(5, 9))); + TEST_DO(verify_range(splitter.get(2), DocidRange(9, 13))); + TEST_DO(verify_range(splitter.get(3), DocidRange(13, 16))); +} + +TEST("require that the docid range splitter can split an empty range") { + DocidRangeSplitter splitter(DocidRange(5, 5), 2); + TEST_DO(verify_range(splitter.get(0), DocidRange(5, 5))); + TEST_DO(verify_range(splitter.get(1), DocidRange(5, 5))); +} + +TEST("require that the docid range splitter can split a range into more parts than values") { + DocidRangeSplitter splitter(DocidRange(1, 4), 4); + TEST_DO(verify_range(splitter.get(0), DocidRange(1, 2))); + TEST_DO(verify_range(splitter.get(1), DocidRange(2, 3))); + TEST_DO(verify_range(splitter.get(2), DocidRange(3, 4))); + TEST_DO(verify_range(splitter.get(3), DocidRange(4, 4))); +} + +TEST("require that the docid range splitter gives empty ranges if accessed with too high index") { + DocidRangeSplitter splitter(DocidRange(1, 4), 3); + TEST_DO(verify_range(splitter.get(0), DocidRange(1, 2))); + TEST_DO(verify_range(splitter.get(1), DocidRange(2, 3))); + TEST_DO(verify_range(splitter.get(2), DocidRange(3, 4))); + TEST_DO(verify_range(splitter.get(3), DocidRange(4, 4))); + TEST_DO(verify_range(splitter.get(100), DocidRange(4, 4))); +} + +//----------------------------------------------------------------------------- + +TEST("require that the partition scheduler acts as expected") { + PartitionDocidRangeScheduler scheduler(4, 16); + TEST_DO(verify_range(scheduler.total_span(0), DocidRange(1, 5))); + TEST_DO(verify_range(scheduler.total_span(1), DocidRange(5, 9))); + TEST_DO(verify_range(scheduler.total_span(2), DocidRange(9, 13))); + TEST_DO(verify_range(scheduler.total_span(3), DocidRange(13, 16))); + EXPECT_EQUAL(scheduler.total_size(0), 4u); + EXPECT_EQUAL(scheduler.total_size(1), 4u); + EXPECT_EQUAL(scheduler.total_size(2), 4u); + EXPECT_EQUAL(scheduler.total_size(3), 3u); + EXPECT_EQUAL(scheduler.unassigned_size(), 0u); + TEST_DO(verify_range(scheduler.first_range(0), DocidRange(1, 5))); + TEST_DO(verify_range(scheduler.first_range(1), DocidRange(5, 9))); + TEST_DO(verify_range(scheduler.first_range(2), DocidRange(9, 13))); + TEST_DO(verify_range(scheduler.first_range(3), DocidRange(13, 16))); + TEST_DO(verify_range(scheduler.next_range(0), DocidRange())); + TEST_DO(verify_range(scheduler.next_range(1), DocidRange())); + TEST_DO(verify_range(scheduler.next_range(2), DocidRange())); + TEST_DO(verify_range(scheduler.next_range(3), DocidRange())); +} + +TEST("require that the partition scheduler protects against documents underflow") { + PartitionDocidRangeScheduler scheduler(2, 0); + TEST_DO(verify_range(scheduler.total_span(0), DocidRange(1,1))); + TEST_DO(verify_range(scheduler.total_span(1), DocidRange(1,1))); + EXPECT_EQUAL(scheduler.total_size(0), 0u); + EXPECT_EQUAL(scheduler.total_size(1), 0u); + EXPECT_EQUAL(scheduler.unassigned_size(), 0u); + TEST_DO(verify_range(scheduler.first_range(0), DocidRange(1,1))); + TEST_DO(verify_range(scheduler.first_range(1), DocidRange(1,1))); + TEST_DO(verify_range(scheduler.next_range(0), DocidRange())); + TEST_DO(verify_range(scheduler.next_range(1), DocidRange())); +} + +//----------------------------------------------------------------------------- + +TEST("require that the task scheduler acts as expected") { + TaskDocidRangeScheduler scheduler(2, 5, 20); + EXPECT_EQUAL(scheduler.unassigned_size(), 19u); + TEST_DO(verify_range(scheduler.total_span(0), DocidRange(1, 20))); + TEST_DO(verify_range(scheduler.total_span(1), DocidRange(1, 20))); + EXPECT_EQUAL(scheduler.total_size(0), 0u); + EXPECT_EQUAL(scheduler.total_size(1), 0u); + TEST_DO(verify_range(scheduler.first_range(1), DocidRange(1, 5))); + TEST_DO(verify_range(scheduler.first_range(0), DocidRange(5, 9))); + TEST_DO(verify_range(scheduler.next_range(0), DocidRange(9, 13))); + EXPECT_EQUAL(scheduler.unassigned_size(), 7u); + TEST_DO(verify_range(scheduler.next_range(1), DocidRange(13, 17))); + TEST_DO(verify_range(scheduler.next_range(0), DocidRange(17, 20))); + TEST_DO(verify_range(scheduler.next_range(0), DocidRange(20, 20))); + TEST_DO(verify_range(scheduler.next_range(1), DocidRange(20, 20))); + EXPECT_EQUAL(scheduler.total_size(0), 11u); + EXPECT_EQUAL(scheduler.total_size(1), 8u); + EXPECT_EQUAL(scheduler.unassigned_size(), 0u); +} + +TEST("require that the task scheduler protects against documents underflow") { + TaskDocidRangeScheduler scheduler(2, 4, 0); + TEST_DO(verify_range(scheduler.total_span(0), DocidRange(1,1))); + TEST_DO(verify_range(scheduler.total_span(1), DocidRange(1,1))); + EXPECT_EQUAL(scheduler.total_size(0), 0u); + EXPECT_EQUAL(scheduler.total_size(1), 0u); + EXPECT_EQUAL(scheduler.unassigned_size(), 0u); + TEST_DO(verify_range(scheduler.first_range(0), DocidRange(1,1))); + TEST_DO(verify_range(scheduler.first_range(1), DocidRange(1,1))); + TEST_DO(verify_range(scheduler.next_range(0), DocidRange(1,1))); + TEST_DO(verify_range(scheduler.next_range(1), DocidRange(1,1))); +} + +//----------------------------------------------------------------------------- + +TEST("require that the adaptive scheduler starts by dividing the docid space equally") { + AdaptiveDocidRangeScheduler scheduler(4, 1, 16); + EXPECT_EQUAL(scheduler.total_size(0), 4u); + EXPECT_EQUAL(scheduler.total_size(1), 4u); + EXPECT_EQUAL(scheduler.total_size(2), 4u); + EXPECT_EQUAL(scheduler.total_size(3), 3u); + EXPECT_EQUAL(scheduler.unassigned_size(), 0u); + TEST_DO(verify_range(scheduler.first_range(0), DocidRange(1, 5))); + TEST_DO(verify_range(scheduler.first_range(1), DocidRange(5, 9))); + TEST_DO(verify_range(scheduler.first_range(2), DocidRange(9, 13))); + TEST_DO(verify_range(scheduler.first_range(3), DocidRange(13, 16))); +} + +TEST("require that the adaptive scheduler reports the full span to all threads") { + AdaptiveDocidRangeScheduler scheduler(3, 1, 16); + TEST_DO(verify_range(scheduler.total_span(0), DocidRange(1,16))); + TEST_DO(verify_range(scheduler.total_span(1), DocidRange(1,16))); + TEST_DO(verify_range(scheduler.total_span(2), DocidRange(1,16))); +} + +TEST_MT_F("require that the adaptive scheduler terminates when all workers request more work", + 4, AdaptiveDocidRangeScheduler(num_threads, 1, 16)) +{ + (void) f1.first_range(thread_id); + DocidRange range = f1.next_range(thread_id); + EXPECT_TRUE(range.empty()); +} + +void wait_idle(const DocidRangeScheduler &scheduler, size_t wanted) { + IdleObserver observer = scheduler.make_idle_observer(); + while (observer.get() != wanted) { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } +} + +TEST_MT_F("require that the adaptive scheduler enables threads to share work", + 3, AdaptiveDocidRangeScheduler(num_threads, 1, 28)) +{ + DocidRange range = f1.first_range(thread_id); + if (thread_id == 0) { + TEST_DO(verify_range(range, DocidRange(1,10))); + } else if (thread_id == 1) { + TEST_DO(verify_range(range, DocidRange(10,19))); + } else { + TEST_DO(verify_range(range, DocidRange(19,28))); + } + EXPECT_EQUAL(f1.total_size(thread_id), 9u); + TEST_DO(verify_range(f1.share_range(thread_id, range), range)); + TEST_BARRIER(); + if (thread_id == 0) { + TEST_DO(verify_range(f1.next_range(thread_id), DocidRange(25,28))); + } else if (thread_id == 1) { + wait_idle(f1, 1); + TEST_DO(verify_range(f1.next_range(thread_id), DocidRange(22,25))); + } else { + wait_idle(f1, 2); + verify_range(f1.share_range(thread_id, range), DocidRange(19,22)); + } + TEST_DO(verify_range(f1.next_range(thread_id), DocidRange())); + EXPECT_EQUAL(f1.total_size(0), 12u); + EXPECT_EQUAL(f1.total_size(1), 12u); + EXPECT_EQUAL(f1.total_size(2), 3u); +} + +TEST("require that the adaptive scheduler protects against documents underflow") { + AdaptiveDocidRangeScheduler scheduler(2, 1, 0); + TEST_DO(verify_range(scheduler.first_range(0), DocidRange(1,1))); + TEST_DO(verify_range(scheduler.first_range(1), DocidRange(1,1))); + EXPECT_EQUAL(scheduler.total_size(0), 0u); + EXPECT_EQUAL(scheduler.total_size(1), 0u); + EXPECT_EQUAL(scheduler.unassigned_size(), 0u); +} + +TEST_MT_F("require that the adaptive scheduler respects the minimal task size", + 2, AdaptiveDocidRangeScheduler(num_threads, 3, 21)) +{ + EXPECT_EQUAL(f1.first_range(thread_id).size(), 10u); + if (thread_id == 0) { + TEST_DO(verify_range(f1.next_range(thread_id), DocidRange(18,21))); + TEST_DO(verify_range(f1.next_range(thread_id), DocidRange())); + } else { + wait_idle(f1, 1); + // a range with size 5 will not be split + TEST_DO(verify_range(f1.share_range(thread_id, DocidRange(16,21)), DocidRange(16,21))); + // a range with size 6 will be split + TEST_DO(verify_range(f1.share_range(thread_id, DocidRange(15,21)), DocidRange(15,18))); + TEST_DO(verify_range(f1.next_range(thread_id), DocidRange())); + } +} + +TEST_MT_F("require that the adaptive scheduler will never split a task with size 1", + 2, AdaptiveDocidRangeScheduler(num_threads, 0, 21)) +{ + EXPECT_EQUAL(f1.first_range(thread_id).size(), 10u); + if (thread_id == 0) { + TEST_DO(verify_range(f1.next_range(thread_id), DocidRange())); + } else { + IdleObserver observer = f1.make_idle_observer(); + while (observer.get() == 0) { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + DocidRange small_range = DocidRange(20,21); + verify_range(f1.share_range(thread_id, small_range), small_range); + TEST_DO(verify_range(f1.next_range(thread_id), DocidRange())); + } +} + +TEST_MT_F("require that the adaptive scheduler can leave idle workers alone due to minimal task size", + 3, AdaptiveDocidRangeScheduler(num_threads, 3, 28)) +{ + EXPECT_EQUAL(f1.first_range(thread_id).size(), 9u); + if (thread_id == 0) { + TEST_DO(verify_range(f1.next_range(thread_id), DocidRange())); + } else if (thread_id == 1) { + wait_idle(f1, 1); + TEST_DO(verify_range(f1.next_range(thread_id), DocidRange(24,28))); + TEST_DO(verify_range(f1.next_range(thread_id), DocidRange())); + } else { + wait_idle(f1, 2); + verify_range(f1.share_range(thread_id, DocidRange(20,28)), DocidRange(20,24)); + TEST_DO(verify_range(f1.next_range(thread_id), DocidRange())); + } + EXPECT_EQUAL(f1.total_size(0), 9u); + EXPECT_EQUAL(f1.total_size(1), 13u); + EXPECT_EQUAL(f1.total_size(2), 5u); +} + +//----------------------------------------------------------------------------- + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchcore/src/tests/proton/matching/match_loop_communicator/.gitignore b/searchcore/src/tests/proton/matching/match_loop_communicator/.gitignore new file mode 100644 index 00000000000..c3797981bab --- /dev/null +++ b/searchcore/src/tests/proton/matching/match_loop_communicator/.gitignore @@ -0,0 +1 @@ +searchcore_match_loop_communicator_test_app diff --git a/searchcore/src/tests/proton/matching/match_loop_communicator/CMakeLists.txt b/searchcore/src/tests/proton/matching/match_loop_communicator/CMakeLists.txt new file mode 100644 index 00000000000..513e002f064 --- /dev/null +++ b/searchcore/src/tests/proton/matching/match_loop_communicator/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchcore_match_loop_communicator_test_app + SOURCES + match_loop_communicator_test.cpp + DEPENDS + searchcore_matching +) +vespa_add_test(NAME searchcore_match_loop_communicator_test_app COMMAND searchcore_match_loop_communicator_test_app) diff --git a/searchcore/src/tests/proton/matching/match_loop_communicator/FILES b/searchcore/src/tests/proton/matching/match_loop_communicator/FILES new file mode 100644 index 00000000000..d2f1096aaa3 --- /dev/null +++ b/searchcore/src/tests/proton/matching/match_loop_communicator/FILES @@ -0,0 +1 @@ +match_loop_communicator_test.cpp diff --git a/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp b/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp new file mode 100644 index 00000000000..92139a1c027 --- /dev/null +++ b/searchcore/src/tests/proton/matching/match_loop_communicator/match_loop_communicator_test.cpp @@ -0,0 +1,118 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchcore/proton/matching/match_loop_communicator.h> +#include <vespa/vespalib/util/box.h> + +using namespace proton::matching; + +using vespalib::Box; +using vespalib::make_box; + +typedef MatchLoopCommunicator::Range Range; +typedef MatchLoopCommunicator::RangePair RangePair; +typedef MatchLoopCommunicator::feature_t feature_t; +typedef MatchLoopCommunicator::Matches Matches; + +std::vector<feature_t> makeScores(size_t id) { + switch (id) { + case 0: return make_box<feature_t>(5.4, 4.4, 3.4, 2.4, 1.4); + case 1: return make_box<feature_t>(5.3, 4.3, 3.3, 2.3, 1.3); + case 2: return make_box<feature_t>(5.2, 4.2, 3.2, 2.2, 1.2); + case 3: return make_box<feature_t>(5.1, 4.1, 3.1, 2.1, 1.1); + case 4: return make_box<feature_t>(5.0, 4.0, 3.0, 2.0, 1.0); + } + return Box<feature_t>(); +} + +RangePair makeRanges(size_t id) { + switch (id) { + case 0: return std::make_pair(Range(5, 5), Range(7, 7)); + case 1: return std::make_pair(Range(2, 2), Range(8, 8)); + case 2: return std::make_pair(Range(3, 3), Range(6, 6)); + case 3: return std::make_pair(Range(1, 1), Range(5, 5)); + case 4: return std::make_pair(Range(4, 4), Range(9, 9)); + } + return std::make_pair(Range(-50, -60), Range(60, 50)); +} + +TEST_F("require that selectBest gives appropriate results for single thread", MatchLoopCommunicator(num_threads, 3)) { + EXPECT_EQUAL(2u, f1.selectBest(make_box<feature_t>(5, 4))); + EXPECT_EQUAL(3u, f1.selectBest(make_box<feature_t>(5, 4, 3))); + EXPECT_EQUAL(3u, f1.selectBest(make_box<feature_t>(5, 4, 3, 2))); +} + +TEST_MT_F("require that selectBest works with no hits", 10, MatchLoopCommunicator(num_threads, 10)) { + EXPECT_EQUAL(0u, f1.selectBest(Box<feature_t>())); +} + +TEST_MT_F("require that selectBest works with too many hits from all threads", 5, MatchLoopCommunicator(num_threads, 13)) { + if (thread_id < 3) { + EXPECT_EQUAL(3u, f1.selectBest(makeScores(thread_id))); + } else { + EXPECT_EQUAL(2u, f1.selectBest(makeScores(thread_id))); + } +} + +TEST_MT_F("require that selectBest works with some exhausted threads", 5, MatchLoopCommunicator(num_threads, 22)) { + if (thread_id < 2) { + EXPECT_EQUAL(5u, f1.selectBest(makeScores(thread_id))); + } else { + EXPECT_EQUAL(4u, f1.selectBest(makeScores(thread_id))); + } +} + +TEST_MT_F("require that selectBest can select all hits from all threads", 5, MatchLoopCommunicator(num_threads, 100)) { + EXPECT_EQUAL(5u, f1.selectBest(makeScores(thread_id))); +} + +TEST_MT_F("require that selectBest works with some empty threads", 10, MatchLoopCommunicator(num_threads, 7)) { + if (thread_id < 2) { + EXPECT_EQUAL(2u, f1.selectBest(makeScores(thread_id))); + } else if (thread_id < 5) { + EXPECT_EQUAL(1u, f1.selectBest(makeScores(thread_id))); + } else { + EXPECT_EQUAL(0u, f1.selectBest(makeScores(thread_id))); + } +} + +TEST_F("require that rangeCover is identity function for single thread", MatchLoopCommunicator(num_threads, 5)) { + RangePair res = f1.rangeCover(std::make_pair(Range(2, 4), Range(3, 5))); + EXPECT_EQUAL(2, res.first.low); + EXPECT_EQUAL(4, res.first.high); + EXPECT_EQUAL(3, res.second.low); + EXPECT_EQUAL(5, res.second.high); +} + +TEST_MT_F("require that rangeCover can mix ranges from multiple threads", 5, MatchLoopCommunicator(num_threads, 5)) { + RangePair res = f1.rangeCover(makeRanges(thread_id)); + EXPECT_EQUAL(1, res.first.low); + EXPECT_EQUAL(5, res.first.high); + EXPECT_EQUAL(5, res.second.low); + EXPECT_EQUAL(9, res.second.high); +} + +TEST_MT_F("require that invalid ranges are ignored", 10, MatchLoopCommunicator(num_threads, 5)) { + RangePair res = f1.rangeCover(makeRanges(thread_id)); + EXPECT_EQUAL(1, res.first.low); + EXPECT_EQUAL(5, res.first.high); + EXPECT_EQUAL(5, res.second.low); + EXPECT_EQUAL(9, res.second.high); +} + +TEST_MT_F("require that only invalid ranges produce default invalid range", 3, MatchLoopCommunicator(num_threads, 5)) { + RangePair res = f1.rangeCover(makeRanges(10)); + Range expect; + EXPECT_FALSE(expect.isValid()); + EXPECT_EQUAL(expect.low, res.first.low); + EXPECT_EQUAL(expect.high, res.first.high); + EXPECT_EQUAL(expect.low, res.second.low); + EXPECT_EQUAL(expect.high, res.second.high); +} + +TEST_MT_F("require that count_matches will count hits and docs across threads", 4, MatchLoopCommunicator(num_threads, 5)) { + double freq = (0.0/10.0 + 1.0/11.0 + 2.0/12.0 + 3.0/13.0) / 4.0; + EXPECT_APPROX(freq, f1.estimate_match_frequency(Matches(thread_id, thread_id + 10)), 0.00001); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchcore/src/tests/proton/matching/match_phase_limiter/.gitignore b/searchcore/src/tests/proton/matching/match_phase_limiter/.gitignore new file mode 100644 index 00000000000..69806654ee0 --- /dev/null +++ b/searchcore/src/tests/proton/matching/match_phase_limiter/.gitignore @@ -0,0 +1 @@ +searchcore_match_phase_limiter_test_app diff --git a/searchcore/src/tests/proton/matching/match_phase_limiter/CMakeLists.txt b/searchcore/src/tests/proton/matching/match_phase_limiter/CMakeLists.txt new file mode 100644 index 00000000000..78c16d1435d --- /dev/null +++ b/searchcore/src/tests/proton/matching/match_phase_limiter/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchcore_match_phase_limiter_test_app + SOURCES + match_phase_limiter_test.cpp + DEPENDS + searchcore_matching +) +vespa_add_test(NAME searchcore_match_phase_limiter_test_app COMMAND searchcore_match_phase_limiter_test_app) diff --git a/searchcore/src/tests/proton/matching/match_phase_limiter/FILES b/searchcore/src/tests/proton/matching/match_phase_limiter/FILES new file mode 100644 index 00000000000..776925a0d69 --- /dev/null +++ b/searchcore/src/tests/proton/matching/match_phase_limiter/FILES @@ -0,0 +1 @@ +match_phase_limiter_test.cpp diff --git a/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp b/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp new file mode 100644 index 00000000000..35757cb43c7 --- /dev/null +++ b/searchcore/src/tests/proton/matching/match_phase_limiter/match_phase_limiter_test.cpp @@ -0,0 +1,361 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchcore/proton/matching/match_phase_limiter.h> +#include <vespa/searchlib/queryeval/termasstring.h> +#include <vespa/searchlib/queryeval/andsearchstrict.h> +#include <vespa/searchlib/queryeval/fake_requestcontext.h> + +using namespace proton::matching; +using search::queryeval::SearchIterator; +using search::queryeval::Searchable; +using search::queryeval::Blueprint; +using search::queryeval::SimpleLeafBlueprint; +using search::queryeval::FieldSpec; +using search::queryeval::FieldSpecBaseList; +using search::queryeval::AndSearchStrict; +using search::queryeval::termAsString; +using search::queryeval::FakeRequestContext; +using search::fef::TermFieldMatchDataArray; + +//----------------------------------------------------------------------------- + +SearchIterator::UP prepare(SearchIterator * search) +{ + search->initFullRange(); + return SearchIterator::UP(search); +} + +struct MockSearch : SearchIterator { + FieldSpec spec; + vespalib::string term; + vespalib::Trinary _strict; + TermFieldMatchDataArray tfmda; + bool postings_fetched; + uint32_t last_seek = beginId(); + uint32_t last_unpack = beginId(); + MockSearch(const vespalib::string &term_in) + : spec(0, 0, 0), term(term_in), _strict(vespalib::Trinary::True), tfmda(), postings_fetched(false) {} + MockSearch(const FieldSpec &spec_in, const vespalib::string &term_in, bool strict_in, + const TermFieldMatchDataArray &tfmda_in, bool postings_fetched_in) + : spec(spec_in), term(term_in), + _strict(strict_in ? vespalib::Trinary::True : vespalib::Trinary::False), + tfmda(tfmda_in), + postings_fetched(postings_fetched_in) {} + void doSeek(uint32_t docid) override { last_seek = docid; setDocId(docid); } + void doUnpack(uint32_t docid) override { last_unpack = docid; } + vespalib::Trinary is_strict() const override { return _strict; } + bool strict() const { return (is_strict() == vespalib::Trinary::True); } +}; + +struct MockBlueprint : SimpleLeafBlueprint { + FieldSpec spec; + vespalib::string term; + bool postings_fetched = false; + bool postings_strict = false; + MockBlueprint(const FieldSpec &spec_in, const vespalib::string &term_in) + : SimpleLeafBlueprint(FieldSpecBaseList().add(spec_in)), spec(spec_in), term(term_in) + { + setEstimate(HitEstimate(756, false)); + } + virtual SearchIterator::UP createLeafSearch(const TermFieldMatchDataArray &tfmda, + bool strict) const override + { + if (postings_fetched) { + EXPECT_EQUAL(postings_strict, strict); + } + return SearchIterator::UP(new MockSearch(spec, term, strict, tfmda, + postings_fetched)); + } + virtual void fetchPostings(bool strict) override { + postings_strict = strict; + postings_fetched = true; + } +}; + +struct MockSearchable : Searchable { + size_t create_cnt = 0; + virtual Blueprint::UP createBlueprint(const search::queryeval::IRequestContext & requestContext, + const FieldSpec &field, + const search::query::Node &term) override + { + (void) requestContext; + ++create_cnt; + return Blueprint::UP(new MockBlueprint(field, termAsString(term))); + } +}; + +//----------------------------------------------------------------------------- + +TEST("require that match phase limit calculator gives expert values") { + MatchPhaseLimitCalculator calc(5000, 1, 0.2); + EXPECT_EQUAL(1000u, calc.sample_hits_per_thread(1)); + EXPECT_EQUAL(100u, calc.sample_hits_per_thread(10)); + EXPECT_EQUAL(10000u, calc.wanted_num_docs(0.5)); + EXPECT_EQUAL(50000u, calc.wanted_num_docs(0.1)); +} + +TEST("require that match phase limit calculator can estimate hits") { + MatchPhaseLimitCalculator calc(0, 1, 0.2); // max hits not used + EXPECT_EQUAL(0u, calc.estimated_hits(0.0, 0)); + EXPECT_EQUAL(0u, calc.estimated_hits(0.0, 1)); + EXPECT_EQUAL(0u, calc.estimated_hits(0.0, 1000)); + EXPECT_EQUAL(1u, calc.estimated_hits(1.0, 1)); + EXPECT_EQUAL(10u, calc.estimated_hits(1.0, 10)); + EXPECT_EQUAL(5u, calc.estimated_hits(0.5, 10)); + EXPECT_EQUAL(500u, calc.estimated_hits(0.5, 1000)); +} + +TEST("require that match phase limit calculator has lower bound on global sample hits") { + MatchPhaseLimitCalculator calc(100, 1, 0.2); + EXPECT_EQUAL(128u, calc.sample_hits_per_thread(1)); + EXPECT_EQUAL(4u, calc.sample_hits_per_thread(32)); +} + +TEST("require that match phase limit calculator has lower bound on thread sample hits") { + MatchPhaseLimitCalculator calc(5000, 1, 0.2); + EXPECT_EQUAL(1u, calc.sample_hits_per_thread(10000)); +} + +TEST("require that match phase limit calculator has lower bound on wanted hits") { + MatchPhaseLimitCalculator calc(100, 1, 0.2); + EXPECT_EQUAL(128u, calc.wanted_num_docs(1.0)); +} + +TEST("require that match phase limit calculator has upper bound on wanted hits") { + MatchPhaseLimitCalculator calc(100000000, 1, 0.2); + EXPECT_EQUAL(0x7fffFFFFu, calc.wanted_num_docs(0.0000001)); +} + +TEST("require that match phase limit calculator gives sane values with no hits") { + MatchPhaseLimitCalculator calc(100, 1, 0.2); + EXPECT_EQUAL(128u, calc.wanted_num_docs(1.0)); + EXPECT_EQUAL(0x7fffFFFFu, calc.wanted_num_docs(0.000000001)); + EXPECT_EQUAL(0x7fffFFFFu, calc.wanted_num_docs(0.000000001)); +} + +TEST("verify numbers used in matching test") { + MatchPhaseLimitCalculator calc(150, 1, 0.2); + EXPECT_EQUAL(1u, calc.sample_hits_per_thread(75)); + EXPECT_EQUAL(176u, calc.wanted_num_docs(74.0 / 87.0)); +} + +TEST("require that max group size is calculated correctly") { + for (size_t min_groups: std::vector<size_t>({0, 1, 2, 3, 4, 10, 500})) { + for (size_t wanted_hits: std::vector<size_t>({0, 3, 321, 921})) { + MatchPhaseLimitCalculator calc(100, min_groups, 0.2); + if (min_groups == 0) { + EXPECT_EQUAL(wanted_hits, calc.max_group_size(wanted_hits)); + } else { + EXPECT_EQUAL((wanted_hits / min_groups), calc.max_group_size(wanted_hits)); + } + } + } +} + +TEST("require that the attribute limiter works correctly") { + FakeRequestContext requestContext; + for (int i = 0; i <= 7; ++i) { + bool descending = (i & 1) != 0; + bool strict = (i & 2) != 0; + bool diverse = (i & 4) != 0; + MockSearchable searchable; + AttributeLimiter limiter(searchable, requestContext, "limiter_attribute", descending, "category", 10.0, AttributeLimiter::LOOSE); + EXPECT_EQUAL(0u, searchable.create_cnt); + EXPECT_FALSE(limiter.was_used()); + SearchIterator::UP s1 = limiter.create_search(42, diverse ? 3 : 42, strict); + EXPECT_TRUE(limiter.was_used()); + EXPECT_EQUAL(1u, searchable.create_cnt); + SearchIterator::UP s2 = limiter.create_search(42, diverse ? 3 : 42, strict); + EXPECT_EQUAL(1u, searchable.create_cnt); + MockSearch *ms = dynamic_cast<MockSearch*>(s1.get()); + ASSERT_TRUE(ms != nullptr); + EXPECT_EQUAL("limiter_attribute", ms->spec.getName()); + EXPECT_EQUAL(0u, ms->spec.getFieldId()); + EXPECT_EQUAL(0u, ms->spec.getHandle()); + EXPECT_EQUAL(strict, ms->strict()); + EXPECT_TRUE(ms->postings_fetched); + if (descending) { + if (diverse) { + EXPECT_EQUAL("[;;-42;category;3;140;loose]", ms->term); + } else { + EXPECT_EQUAL("[;;-42]", ms->term); + } + } else { + if (diverse) { + EXPECT_EQUAL("[;;42;category;3;140;loose]", ms->term); + } else { + EXPECT_EQUAL("[;;42]", ms->term); + } + } + ASSERT_EQUAL(1u, ms->tfmda.size()); + EXPECT_EQUAL(0u, ms->tfmda[0]->getFieldId()); + } +} + +TEST("require that no limiter has no behavior") { + NoMatchPhaseLimiter no_limiter; + MaybeMatchPhaseLimiter &limiter = no_limiter; + EXPECT_FALSE(limiter.is_enabled()); + EXPECT_EQUAL(0u, limiter.sample_hits_per_thread(1)); + SearchIterator::UP search = limiter.maybe_limit(prepare(new MockSearch("search")), 1.0, 100000000); + limiter.updateDocIdSpaceEstimate(1000, 9000); + EXPECT_EQUAL(std::numeric_limits<size_t>::max(), limiter.getDocIdSpaceEstimate()); + MockSearch *ms = dynamic_cast<MockSearch*>(search.get()); + ASSERT_TRUE(ms != nullptr); + EXPECT_EQUAL("search", ms->term); + EXPECT_FALSE(limiter.was_limited()); +} + +TEST("require that the match phase limiter may chose not to limit the query") { + FakeRequestContext requestContext; + MockSearchable searchable; + MatchPhaseLimiter yes_limiter(10000, searchable, requestContext, "limiter_attribute", 1000, true, 1.0, 0.2, 1.0, "", 1, 10.0, AttributeLimiter::LOOSE); + MaybeMatchPhaseLimiter &limiter = yes_limiter; + EXPECT_TRUE(limiter.is_enabled()); + EXPECT_EQUAL(20u, limiter.sample_hits_per_thread(10)); + SearchIterator::UP search = limiter.maybe_limit(prepare(new MockSearch("search")), + 0.005, 100000); + limiter.updateDocIdSpaceEstimate(1000, 9000); + EXPECT_EQUAL(10000u, limiter.getDocIdSpaceEstimate()); + MockSearch *ms = dynamic_cast<MockSearch*>(search.get()); + ASSERT_TRUE(ms != nullptr); + EXPECT_EQUAL("search", ms->term); + EXPECT_FALSE(limiter.was_limited()); +} + +struct MaxFilterCoverageLimiterFixture { + + FakeRequestContext requestContext; + MockSearchable searchable; + + MatchPhaseLimiter::UP getMaxFilterCoverageLimiter() { + MatchPhaseLimiter::UP yes_limiter(new MatchPhaseLimiter(10000, searchable, requestContext, "limiter_attribute", 10000, true, 0.05, 1.0, 1.0, "", 1, 10.0, AttributeLimiter::LOOSE)); + MaybeMatchPhaseLimiter &limiter = *yes_limiter; + EXPECT_TRUE(limiter.is_enabled()); + EXPECT_EQUAL(1000u, limiter.sample_hits_per_thread(10)); + return yes_limiter; + } +}; + +TEST_F("require that the match phase limiter may chose not to limit the query when considering max-filter-coverage", MaxFilterCoverageLimiterFixture) { + MatchPhaseLimiter::UP limiterUP = f.getMaxFilterCoverageLimiter(); + MaybeMatchPhaseLimiter & limiter = *limiterUP; + SearchIterator::UP search = limiter.maybe_limit(prepare(new MockSearch("search")), 0.10, 1900000); + limiter.updateDocIdSpaceEstimate(1000, 1899000); + EXPECT_EQUAL(1900000u, limiter.getDocIdSpaceEstimate()); + MockSearch *ms = dynamic_cast<MockSearch *>(search.get()); + ASSERT_TRUE(ms != nullptr); + EXPECT_EQUAL("search", ms->term); + EXPECT_FALSE(limiter.was_limited()); +} + +TEST_F("require that the match phase limiter may chose to limit the query even when considering max-filter-coverage", MaxFilterCoverageLimiterFixture) { + MatchPhaseLimiter::UP limiterUP = f.getMaxFilterCoverageLimiter(); + MaybeMatchPhaseLimiter & limiter = *limiterUP; + SearchIterator::UP search = limiter.maybe_limit(prepare(new MockSearch("search")), 0.10, 2100000); + limiter.updateDocIdSpaceEstimate(1000, 2099000); + EXPECT_EQUAL(159684u, limiter.getDocIdSpaceEstimate()); + LimitedSearch *strict_and = dynamic_cast<LimitedSearch*>(search.get()); + ASSERT_TRUE(strict_and != nullptr); + const MockSearch *ms1 = dynamic_cast<const MockSearch*>(&strict_and->getFirst()); + ASSERT_TRUE(ms1 != nullptr); + const MockSearch *ms2 = dynamic_cast<const MockSearch*>(&strict_and->getSecond()); + ASSERT_TRUE(ms2 != nullptr); + EXPECT_EQUAL("[;;-100000]", ms1->term); + EXPECT_EQUAL("search", ms2->term); + EXPECT_TRUE(ms1->strict()); + EXPECT_TRUE(ms2->strict()); + EXPECT_TRUE(limiter.was_limited()); +} + +TEST("require that the match phase limiter is able to pre-limit the query") { + FakeRequestContext requestContext; + MockSearchable searchable; + MatchPhaseLimiter yes_limiter(10000, searchable, requestContext, "limiter_attribute", 500, true, 1.0, 0.2, 1.0, "", 1, 10.0, AttributeLimiter::LOOSE); + MaybeMatchPhaseLimiter &limiter = yes_limiter; + EXPECT_TRUE(limiter.is_enabled()); + EXPECT_EQUAL(12u, limiter.sample_hits_per_thread(10)); + SearchIterator::UP search = limiter.maybe_limit(prepare(new MockSearch("search")), + 0.1, 100000); + limiter.updateDocIdSpaceEstimate(1000, 9000); + EXPECT_EQUAL(1680u, limiter.getDocIdSpaceEstimate()); + LimitedSearch *strict_and = dynamic_cast<LimitedSearch*>(search.get()); + ASSERT_TRUE(strict_and != nullptr); + const MockSearch *ms1 = dynamic_cast<const MockSearch*>(&strict_and->getFirst()); + ASSERT_TRUE(ms1 != nullptr); + const MockSearch *ms2 = dynamic_cast<const MockSearch*>(&strict_and->getSecond()); + ASSERT_TRUE(ms2 != nullptr); + EXPECT_EQUAL("[;;-5000]", ms1->term); + EXPECT_EQUAL("search", ms2->term); + EXPECT_TRUE(ms1->strict()); + EXPECT_TRUE(ms2->strict()); + search->seek(100); + EXPECT_EQUAL(100u, ms1->last_seek); + EXPECT_EQUAL(100u, ms2->last_seek); + search->unpack(100); + EXPECT_EQUAL(0u, ms1->last_unpack); // will not unpack limiting term + EXPECT_EQUAL(100u, ms2->last_unpack); + EXPECT_TRUE(limiter.was_limited()); +} + +TEST("require that the match phase limiter is able to post-limit the query") { + MockSearchable searchable; + FakeRequestContext requestContext; + MatchPhaseLimiter yes_limiter(10000, searchable, requestContext,"limiter_attribute", 1500, true, 1.0, 0.2, 1.0, "", 1, 10.0, AttributeLimiter::LOOSE); + MaybeMatchPhaseLimiter &limiter = yes_limiter; + EXPECT_TRUE(limiter.is_enabled()); + EXPECT_EQUAL(30u, limiter.sample_hits_per_thread(10)); + SearchIterator::UP search = limiter.maybe_limit(prepare(new MockSearch("search")), 0.1, 100000); + limiter.updateDocIdSpaceEstimate(1000, 9000); + EXPECT_EQUAL(1680u, limiter.getDocIdSpaceEstimate()); + LimitedSearch *strict_and = dynamic_cast<LimitedSearch*>(search.get()); + ASSERT_TRUE(strict_and != nullptr); + const MockSearch *ms1 = dynamic_cast<const MockSearch*>(&strict_and->getFirst()); + ASSERT_TRUE(ms1 != nullptr); + const MockSearch *ms2 = dynamic_cast<const MockSearch*>(&strict_and->getSecond()); + ASSERT_TRUE(ms2 != nullptr); + EXPECT_EQUAL("search", ms1->term); + EXPECT_EQUAL("[;;-15000]", ms2->term); + EXPECT_TRUE(ms1->strict()); + EXPECT_FALSE(ms2->strict()); + search->seek(100); + EXPECT_EQUAL(100u, ms1->last_seek); + EXPECT_EQUAL(100u, ms2->last_seek); + search->unpack(100); + EXPECT_EQUAL(100u, ms1->last_unpack); + EXPECT_EQUAL(0u, ms2->last_unpack); // will not unpack limiting term + EXPECT_TRUE(limiter.was_limited()); +} + +void verifyDiversity(AttributeLimiter::DiversityCutoffStrategy strategy) +{ + MockSearchable searchable; + FakeRequestContext requestContext; + MatchPhaseLimiter yes_limiter(10000, searchable, requestContext,"limiter_attribute", 500, true, 1.0, 0.2, 1.0, "category", 10, 13.1, strategy); + MaybeMatchPhaseLimiter &limiter = yes_limiter; + SearchIterator::UP search = limiter.maybe_limit(prepare(new MockSearch("search")), 0.1, 100000); + limiter.updateDocIdSpaceEstimate(1000, 9000); + EXPECT_EQUAL(1680u, limiter.getDocIdSpaceEstimate()); + LimitedSearch *strict_and = dynamic_cast<LimitedSearch*>(search.get()); + ASSERT_TRUE(strict_and != nullptr); + const MockSearch *ms1 = dynamic_cast<const MockSearch*>(&strict_and->getFirst()); + ASSERT_TRUE(ms1 != nullptr); + if (strategy == AttributeLimiter::LOOSE) { + EXPECT_EQUAL("[;;-5000;category;500;131;loose]", ms1->term); + } else if (strategy == AttributeLimiter::STRICT) { + EXPECT_EQUAL("[;;-5000;category;500;131;strict]", ms1->term); + } else { + ASSERT_TRUE(false); + } +} + +TEST("require that the match phase limiter can use loose diversity") { + verifyDiversity(AttributeLimiter::LOOSE); +} + +TEST("require that the match phase limiter can use strict diversity") { + verifyDiversity(AttributeLimiter::STRICT); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchcore/src/tests/proton/matching/matching_stats_test.cpp b/searchcore/src/tests/proton/matching/matching_stats_test.cpp new file mode 100644 index 00000000000..237f283f042 --- /dev/null +++ b/searchcore/src/tests/proton/matching/matching_stats_test.cpp @@ -0,0 +1,151 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("matching_stats_test"); +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchcore/proton/matching/matching_stats.h> + +using namespace proton::matching; + +TEST("requireThatDocCountsAddUp") { + MatchingStats stats; + EXPECT_EQUAL(0u, stats.docsMatched()); + EXPECT_EQUAL(0u, stats.docsRanked()); + EXPECT_EQUAL(0u, stats.docsReRanked()); + EXPECT_EQUAL(0u, stats.queries()); + EXPECT_EQUAL(0u, stats.limited_queries()); + { + MatchingStats rhs; + EXPECT_EQUAL(&rhs.docsMatched(1000), &rhs); + EXPECT_EQUAL(&rhs.docsRanked(100), &rhs); + EXPECT_EQUAL(&rhs.docsReRanked(10), &rhs); + EXPECT_EQUAL(&rhs.queries(2), &rhs); + EXPECT_EQUAL(&rhs.limited_queries(1), &rhs); + EXPECT_EQUAL(&stats.add(rhs), &stats); + } + EXPECT_EQUAL(1000u, stats.docsMatched()); + EXPECT_EQUAL(100u, stats.docsRanked()); + EXPECT_EQUAL(10u, stats.docsReRanked()); + EXPECT_EQUAL(2u, stats.queries()); + EXPECT_EQUAL(1u, stats.limited_queries()); + EXPECT_EQUAL(&stats.add(MatchingStats().docsMatched(1000).docsRanked(100) + .docsReRanked(10).queries(2).limited_queries(1)), &stats); + EXPECT_EQUAL(2000u, stats.docsMatched()); + EXPECT_EQUAL(200u, stats.docsRanked()); + EXPECT_EQUAL(20u, stats.docsReRanked()); + EXPECT_EQUAL(4u, stats.queries()); + EXPECT_EQUAL(2u, stats.limited_queries()); +} + +TEST("requireThatAverageTimesAreRecorded") { + MatchingStats stats; + EXPECT_APPROX(0.0, stats.matchTimeAvg(), 0.00001); + EXPECT_APPROX(0.0, stats.groupingTimeAvg(), 0.00001); + EXPECT_APPROX(0.0, stats.rerankTimeAvg(), 0.00001); + EXPECT_APPROX(0.0, stats.queryCollateralTimeAvg(), 0.00001); + EXPECT_APPROX(0.0, stats.queryLatencyAvg(), 0.00001); + EXPECT_EQUAL(0u, stats.matchTimeCount()); + EXPECT_EQUAL(0u, stats.groupingTimeCount()); + EXPECT_EQUAL(0u, stats.rerankTimeCount()); + EXPECT_EQUAL(0u, stats.queryCollateralTimeCount()); + EXPECT_EQUAL(0u, stats.queryLatencyCount()); + stats.matchTime(0.01).groupingTime(0.1).rerankTime(0.5).queryCollateralTime(2.0).queryLatency(1.0); + EXPECT_APPROX(0.01, stats.matchTimeAvg(), 0.00001); + EXPECT_APPROX(0.1, stats.groupingTimeAvg(), 0.00001); + EXPECT_APPROX(0.5, stats.rerankTimeAvg(), 0.00001); + EXPECT_APPROX(2.0, stats.queryCollateralTimeAvg(), 0.00001); + EXPECT_APPROX(1.0, stats.queryLatencyAvg(), 0.00001); + stats.add(MatchingStats().matchTime(0.03).groupingTime(0.3).rerankTime(1.5).queryCollateralTime(6.0).queryLatency(3.0)); + EXPECT_APPROX(0.02, stats.matchTimeAvg(), 0.00001); + EXPECT_APPROX(0.2, stats.groupingTimeAvg(), 0.00001); + EXPECT_APPROX(1.0, stats.rerankTimeAvg(), 0.00001); + EXPECT_APPROX(4.0, stats.queryCollateralTimeAvg(), 0.00001); + EXPECT_APPROX(2.0, stats.queryLatencyAvg(), 0.00001); + stats.add(MatchingStats().matchTime(0.05) + .groupingTime(0.5) + .rerankTime(2.5) + .queryCollateralTime(10.0) + .queryLatency(5.0)); + stats.add(MatchingStats().matchTime(0.05).matchTime(0.03) + .groupingTime(0.5).groupingTime(0.3) + .rerankTime(2.5).rerankTime(1.5) + .queryCollateralTime(10.0).queryCollateralTime(6.0) + .queryLatency(5.0).queryLatency(3.0)); + EXPECT_APPROX(0.03, stats.matchTimeAvg(), 0.00001); + EXPECT_APPROX(0.3, stats.groupingTimeAvg(), 0.00001); + EXPECT_APPROX(1.5, stats.rerankTimeAvg(), 0.00001); + EXPECT_APPROX(6.0, stats.queryCollateralTimeAvg(), 0.00001); + EXPECT_APPROX(3.0, stats.queryLatencyAvg(), 0.00001); + EXPECT_EQUAL(4u, stats.matchTimeCount()); + EXPECT_EQUAL(4u, stats.groupingTimeCount()); + EXPECT_EQUAL(4u, stats.rerankTimeCount()); + EXPECT_EQUAL(4u, stats.queryCollateralTimeCount()); + EXPECT_EQUAL(4u, stats.queryLatencyCount()); +} + +TEST("requireThatPartitionsAreAddedCorrectly") { + MatchingStats all1; + EXPECT_EQUAL(0u, all1.docsMatched()); + EXPECT_EQUAL(0u, all1.getNumPartitions()); + + MatchingStats::Partition subPart; + subPart.docsMatched(3).docsRanked(2).docsReRanked(1) + .active_time(1.0).wait_time(0.5); + EXPECT_EQUAL(3u, subPart.docsMatched()); + EXPECT_EQUAL(2u, subPart.docsRanked()); + EXPECT_EQUAL(1u, subPart.docsReRanked()); + EXPECT_EQUAL(1.0, subPart.active_time_avg()); + EXPECT_EQUAL(0.5, subPart.wait_time_avg()); + EXPECT_EQUAL(1u, subPart.active_time_count()); + EXPECT_EQUAL(1u, subPart.wait_time_count()); + + all1.merge_partition(subPart, 0); + EXPECT_EQUAL(3u, all1.docsMatched()); + EXPECT_EQUAL(2u, all1.docsRanked()); + EXPECT_EQUAL(1u, all1.docsReRanked()); + EXPECT_EQUAL(1u, all1.getNumPartitions()); + EXPECT_EQUAL(3u, all1.getPartition(0).docsMatched()); + EXPECT_EQUAL(2u, all1.getPartition(0).docsRanked()); + EXPECT_EQUAL(1u, all1.getPartition(0).docsReRanked()); + EXPECT_EQUAL(1.0, all1.getPartition(0).active_time_avg()); + EXPECT_EQUAL(0.5, all1.getPartition(0).wait_time_avg()); + EXPECT_EQUAL(1u, all1.getPartition(0).active_time_count()); + EXPECT_EQUAL(1u, all1.getPartition(0).wait_time_count()); + + all1.merge_partition(subPart, 1); + EXPECT_EQUAL(6u, all1.docsMatched()); + EXPECT_EQUAL(4u, all1.docsRanked()); + EXPECT_EQUAL(2u, all1.docsReRanked()); + EXPECT_EQUAL(2u, all1.getNumPartitions()); + EXPECT_EQUAL(3u, all1.getPartition(1).docsMatched()); + EXPECT_EQUAL(2u, all1.getPartition(1).docsRanked()); + EXPECT_EQUAL(1u, all1.getPartition(1).docsReRanked()); + EXPECT_EQUAL(1.0, all1.getPartition(1).active_time_avg()); + EXPECT_EQUAL(0.5, all1.getPartition(1).wait_time_avg()); + EXPECT_EQUAL(1u, all1.getPartition(1).active_time_count()); + EXPECT_EQUAL(1u, all1.getPartition(1).wait_time_count()); + + all1.add(all1); + EXPECT_EQUAL(12u, all1.docsMatched()); + EXPECT_EQUAL(8u, all1.docsRanked()); + EXPECT_EQUAL(4u, all1.docsReRanked()); + EXPECT_EQUAL(2u, all1.getNumPartitions()); + EXPECT_EQUAL(6u, all1.getPartition(0).docsMatched()); + EXPECT_EQUAL(4u, all1.getPartition(0).docsRanked()); + EXPECT_EQUAL(2u, all1.getPartition(0).docsReRanked()); + EXPECT_EQUAL(1.0, all1.getPartition(0).active_time_avg()); + EXPECT_EQUAL(0.5, all1.getPartition(0).wait_time_avg()); + EXPECT_EQUAL(2u, all1.getPartition(0).active_time_count()); + EXPECT_EQUAL(2u, all1.getPartition(0).wait_time_count()); + EXPECT_EQUAL(6u, all1.getPartition(1).docsMatched()); + EXPECT_EQUAL(4u, all1.getPartition(1).docsRanked()); + EXPECT_EQUAL(2u, all1.getPartition(1).docsReRanked()); + EXPECT_EQUAL(1.0, all1.getPartition(1).active_time_avg()); + EXPECT_EQUAL(0.5, all1.getPartition(1).wait_time_avg()); + EXPECT_EQUAL(2u, all1.getPartition(1).active_time_count()); + EXPECT_EQUAL(2u, all1.getPartition(1).wait_time_count()); +} + +TEST_MAIN() { + TEST_RUN_ALL(); +} diff --git a/searchcore/src/tests/proton/matching/matching_test.cpp b/searchcore/src/tests/proton/matching/matching_test.cpp new file mode 100644 index 00000000000..b650c983be0 --- /dev/null +++ b/searchcore/src/tests/proton/matching/matching_test.cpp @@ -0,0 +1,775 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("matching_test"); +#include <vespa/vespalib/testkit/testapp.h> + +#include <vespa/document/base/globalid.h> +#include <initializer_list> +#include <vespa/searchcommon/attribute/iattributecontext.h> +#include <vespa/searchcore/proton/common/bucketfactory.h> +#include <vespa/searchcore/proton/documentmetastore/documentmetastore.h> +#include <vespa/searchcore/proton/matching/fakesearchcontext.h> +#include <vespa/searchcore/proton/matching/isearchcontext.h> +#include <vespa/searchcore/proton/matching/matcher.h> +#include <vespa/searchcore/proton/matching/querynodes.h> +#include <vespa/searchcore/proton/matching/sessionmanager.h> +#include <vespa/searchcore/proton/matching/viewresolver.h> +#include <vespa/searchlib/aggregation/aggregation.h> +#include <vespa/searchlib/aggregation/grouping.h> +#include <vespa/searchlib/aggregation/perdocexpression.h> +#include <vespa/searchlib/attribute/extendableattributes.h> +#include <vespa/searchlib/common/featureset.h> +#include <vespa/searchlib/engine/docsumrequest.h> +#include <vespa/searchlib/fef/properties.h> +#include <vespa/searchlib/query/tree/querybuilder.h> +#include <vespa/searchlib/query/tree/stackdumpcreator.h> +#include <vespa/searchlib/queryeval/isourceselector.h> +#include <vespa/vespalib/util/simple_thread_bundle.h> +#include <vespa/searchcore/proton/matching/match_params.h> + +using namespace proton::matching; +using namespace proton; +using namespace search::aggregation; +using namespace search::attribute; +using namespace search::engine; +using namespace search::expression; +using namespace search::fef; +using namespace search::grouping; +using namespace search::index; +using namespace search::query; +using namespace search::queryeval; +using namespace search; +using storage::spi::Timestamp; + +void inject_match_phase_limiting(Properties &setup, const vespalib::string &attribute, size_t max_hits, bool descending) +{ + Properties cfg; + cfg.add(indexproperties::matchphase::DegradationAttribute::NAME, attribute); + cfg.add(indexproperties::matchphase::DegradationAscendingOrder::NAME, descending ? "false" : "true"); + cfg.add(indexproperties::matchphase::DegradationMaxHits::NAME, vespalib::make_string("%zu", max_hits)); + setup.import(cfg); +} + +//----------------------------------------------------------------------------- + +const uint32_t NUM_DOCS = 1000; + +//----------------------------------------------------------------------------- + +class MyAttributeContext : public IAttributeContext +{ +private: + typedef std::map<string, IAttributeVector *> Map; + Map _vectors; + +public: + const IAttributeVector *get(const string &name) const { + if (_vectors.find(name) == _vectors.end()) { + return 0; + } + return _vectors.find(name)->second; + } + virtual const IAttributeVector * + getAttribute(const string &name) const { + return get(name); + } + virtual const IAttributeVector * + getAttributeStableEnum(const string &name) const { + return get(name); + } + virtual void + getAttributeList(std::vector<const IAttributeVector *> & list) const { + Map::const_iterator pos = _vectors.begin(); + Map::const_iterator end = _vectors.end(); + for (; pos != end; ++pos) { + list.push_back(pos->second); + } + } + ~MyAttributeContext() { + Map::iterator pos = _vectors.begin(); + Map::iterator end = _vectors.end(); + for (; pos != end; ++pos) { + delete pos->second; + } + } + + //------------------------------------------------------------------------- + + void add(IAttributeVector *attr) { + _vectors[attr->getName()] = attr; + } +}; + +//----------------------------------------------------------------------------- + +struct MyWorld { + Schema schema; + Properties config; + FakeSearchContext searchContext; + MyAttributeContext attributeContext; + SessionManager::SP sessionManager; + DocumentMetaStore metaStore; + MatchingStats matchingStats; + vespalib::Clock clock; + QueryLimiter queryLimiter; + + MyWorld() + : schema(), + config(), + searchContext(), + attributeContext(), + sessionManager(), + metaStore(std::make_shared<BucketDBOwner>()), + matchingStats(), + clock(), + queryLimiter() + { + } + + void basicSetup(size_t heapSize=10, size_t arraySize=100) { + // schema + schema.addIndexField(Schema::IndexField("f1", Schema::STRING)); + schema.addIndexField(Schema::IndexField("f2", Schema::STRING)); + schema.addIndexField(Schema::IndexField("tensor_field", Schema::TENSOR)); + schema.addAttributeField(Schema::AttributeField("a1", Schema::INT32)); + schema.addAttributeField(Schema::AttributeField("a2", Schema::INT32)); + schema.addAttributeField(Schema::AttributeField("predicate_field", Schema::BOOLEANTREE)); + + // config + config.add(indexproperties::rank::FirstPhase::NAME, "attribute(a1)"); + config.add(indexproperties::hitcollector::HeapSize::NAME, (vespalib::asciistream() << heapSize).str()); + config.add(indexproperties::hitcollector::ArraySize::NAME, (vespalib::asciistream() << arraySize).str()); + config.add(indexproperties::summary::Feature::NAME, "attribute(a1)"); + config.add(indexproperties::summary::Feature::NAME, "value(100)"); + config.add(indexproperties::dump::IgnoreDefaultFeatures::NAME, "true"); + config.add(indexproperties::dump::Feature::NAME, "attribute(a2)"); + + // search context + searchContext.setLimit(NUM_DOCS); + searchContext.addIdx(0).addIdx(1); + for (uint32_t i = 0; i < NUM_DOCS; ++i) { + searchContext.selector().setSource(i, i % 2); // even -> 0 + // odd -> 1 + } + + // attribute context + { + SingleInt32ExtAttribute *attr = new SingleInt32ExtAttribute("a1"); + AttributeVector::DocId docid; + for (uint32_t i = 0; i < NUM_DOCS; ++i) { + attr->addDoc(docid); + attr->add(i, docid); // value = docid + } + assert(docid + 1 == NUM_DOCS); + attributeContext.add(attr); + } + { + SingleInt32ExtAttribute *attr = new SingleInt32ExtAttribute("a2"); + AttributeVector::DocId docid; + for (uint32_t i = 0; i < NUM_DOCS; ++i) { + attr->addDoc(docid); + attr->add(i * 2, docid); // value = docid * 2 + } + assert(docid + 1 == NUM_DOCS); + attributeContext.add(attr); + } + + // grouping + sessionManager = SessionManager::SP(new SessionManager(100)); + + // metaStore + for (uint32_t i = 0; i < NUM_DOCS; ++i) { + document::DocumentId docId(vespalib::make_string("doc::%u", i)); + const document::GlobalId &gid = docId.getGlobalId(); + typedef DocumentMetaStore::Result PutRes; + document::BucketId bucketId(BucketFactory::getBucketId(docId)); + PutRes putRes(metaStore.put(gid, + bucketId, + Timestamp(0u), + i)); + metaStore.setBucketState(bucketId, true); + } + } + + void set_property(const vespalib::string &name, const vespalib::string &value) { + Properties cfg; + cfg.add(name, value); + config.import(cfg); + } + + void setup_match_phase_limiting(const vespalib::string &attribute, size_t max_hits, bool descending) + { + inject_match_phase_limiting(config, attribute, max_hits, descending); + } + + void add_match_phase_limiting_result(const vespalib::string &attribute, size_t want_docs, + bool descending, std::initializer_list<uint32_t> docs) + { + vespalib::string term = vespalib::make_string("[;;%s%zu]", descending ? "-" : "", want_docs); + FakeResult result; + for (uint32_t doc: docs) { + result.doc(doc); + } + searchContext.attr().addResult(attribute, term, result); + } + + void setupSecondPhaseRanking() { + Properties cfg; + cfg.add(indexproperties::rank::SecondPhase::NAME, "attribute(a2)"); + cfg.add(indexproperties::hitcollector::HeapSize::NAME, "3"); + config.import(cfg); + } + + void verbose_a1_result(const vespalib::string &term) { + FakeResult result; + for (uint32_t i = 15; i < NUM_DOCS; ++i) { + result.doc(i); + } + searchContext.attr().addResult("a1", term, result); + } + + void basicResults() { + searchContext.idx(0).getFake().addResult("f1", "foo", + FakeResult() + .doc(10).doc(20).doc(30)); + searchContext.idx(0).getFake().addResult( + "f1", "spread", + FakeResult() + .doc(100).doc(200).doc(300).doc(400).doc(500) + .doc(600).doc(700).doc(800).doc(900)); + } + + void setStackDump(Request &request, const vespalib::string &field, + const vespalib::string &term) { + QueryBuilder<ProtonNodeTypes> builder; + builder.addStringTerm(term, field, 1, search::query::Weight(1)); + vespalib::string stack_dump = + StackDumpCreator::create(*builder.build()); + request.stackDump.assign(stack_dump.data(), + stack_dump.data() + stack_dump.size()); + } + + SearchRequest::SP createSimpleRequest(const vespalib::string &field, + const vespalib::string &term) + { + SearchRequest::SP request(new SearchRequest); + request->setTimeout(60 * fastos::TimeStamp::SEC); + setStackDump(*request, field, term); + request->maxhits = 10; + return request; + } + + struct MySearchHandler : ISearchHandler { + Matcher::SP _matcher; + + MySearchHandler(Matcher::SP matcher) : _matcher(matcher) {} + + virtual DocsumReply::UP getDocsums(const DocsumRequest &) + { return DocsumReply::UP(); } + virtual SearchReply::UP match(const ISearchHandler::SP &, + const SearchRequest &, + vespalib::ThreadBundle &) const + { return SearchReply::UP(); } + }; + + double get_first_phase_termwise_limit() { + Matcher matcher(schema, config, clock, queryLimiter, 0); + SearchRequest::SP request = createSimpleRequest("f1", "spread"); + search::fef::Properties overrides; + MatchToolsFactory::UP match_tools_factory = matcher.create_match_tools_factory( + *request, searchContext, attributeContext, metaStore, overrides); + MatchTools::UP match_tools = match_tools_factory->createMatchTools(); + RankProgram::UP rank_program = match_tools->first_phase_program(); + return rank_program->match_data().get_termwise_limit(); + } + + SearchReply::UP performSearch(SearchRequest::SP req, size_t threads) { + Matcher::SP matcher(new Matcher(schema, config, clock, queryLimiter, 0)); + SearchSession::OwnershipBundle owned_objects; + owned_objects.search_handler.reset(new MySearchHandler(matcher)); + owned_objects.context.reset(new MatchContext( + IAttributeContext::UP(new MyAttributeContext), + ISearchContext::UP(new FakeSearchContext))); + vespalib::SimpleThreadBundle threadBundle(threads); + SearchReply::UP reply = + matcher->match(*req, threadBundle, searchContext, attributeContext, + *sessionManager, metaStore, + std::move(owned_objects)); + matchingStats.add(matcher->getStats()); + return reply; + } + + DocsumRequest::SP createSimpleDocsumRequest(const vespalib::string & field, + const vespalib::string & term) + { + DocsumRequest::SP request(new DocsumRequest); + setStackDump(*request, field, term); + + // match a subset of basic result + request for a non-hit (not + // sorted on docid) + request->hits.push_back(DocsumRequest::Hit()); + request->hits.back().docid = 30; + request->hits.push_back(DocsumRequest::Hit()); + request->hits.back().docid = 10; + request->hits.push_back(DocsumRequest::Hit()); + request->hits.back().docid = 15; + return request; + } + + std::unique_ptr<FieldInfo> get_field_info(const vespalib::string &field_name) { + Matcher::SP matcher(new Matcher(schema, config, clock, queryLimiter, 0)); + const FieldInfo *field = matcher->get_index_env().getFieldByName(field_name); + if (field == nullptr) { + return std::unique_ptr<FieldInfo>(nullptr); + } + return std::make_unique<FieldInfo>(*field); + } + + FeatureSet::SP getSummaryFeatures(DocsumRequest::SP req) { + Matcher matcher(schema, config, clock, queryLimiter, 0); + return matcher.getSummaryFeatures(*req, searchContext, + attributeContext, *sessionManager); + } + + FeatureSet::SP getRankFeatures(DocsumRequest::SP req) { + Matcher matcher(schema, config, clock, queryLimiter, 0); + return matcher.getRankFeatures(*req, searchContext, attributeContext, + *sessionManager); + } + +}; + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- + +void verifyViewResolver(const ViewResolver &resolver) { + { + std::vector<vespalib::string> fields; + EXPECT_TRUE(resolver.resolve("foo", fields)); + ASSERT_TRUE(fields.size() == 2u); + EXPECT_EQUAL("x", fields[0]); + EXPECT_EQUAL("y", fields[1]); + } + { + std::vector<vespalib::string> fields; + EXPECT_TRUE(resolver.resolve("bar", fields)); + ASSERT_TRUE(fields.size() == 1u); + EXPECT_EQUAL("z", fields[0]); + } + { + std::vector<vespalib::string> fields; + EXPECT_TRUE(!resolver.resolve("baz", fields)); + ASSERT_TRUE(fields.size() == 1u); + EXPECT_EQUAL("baz", fields[0]); + } +} + +TEST("require that view resolver can be set up directly") { + ViewResolver resolver; + resolver.add("foo", "x").add("foo", "y").add("bar", "z"); + TEST_DO(verifyViewResolver(resolver)); +} + +TEST("require that view resolver can be set up from schema") { + Schema schema; + Schema::FieldSet foo("foo"); + foo.addField("x").addField("y"); + Schema::FieldSet bar("bar"); + bar.addField("z"); + schema.addFieldSet(foo); + schema.addFieldSet(bar); + ViewResolver resolver = ViewResolver::createFromSchema(schema); + TEST_DO(verifyViewResolver(resolver)); +} + +//----------------------------------------------------------------------------- + +TEST("require that matching is performed (multi-threaded)") { + for (size_t threads = 1; threads <= 16; ++threads) { + MyWorld world; + world.basicSetup(); + world.basicResults(); + SearchRequest::SP request = world.createSimpleRequest("f1", "spread"); + SearchReply::UP reply = world.performSearch(request, threads); + EXPECT_EQUAL(9u, world.matchingStats.docsMatched()); + EXPECT_EQUAL(9u, reply->hits.size()); + EXPECT_GREATER(world.matchingStats.matchTimeAvg(), 0.0000001); + } +} + +TEST("require that matching also returns hits when only bitvector is used (multi-threaded)") { + for (size_t threads = 1; threads <= 16; ++threads) { + MyWorld world; + world.basicSetup(0, 0); + world.verbose_a1_result("all"); + SearchRequest::SP request = world.createSimpleRequest("a1", "all"); + SearchReply::UP reply = world.performSearch(request, threads); + EXPECT_EQUAL(985u, world.matchingStats.docsMatched()); + EXPECT_EQUAL(10u, reply->hits.size()); + EXPECT_GREATER(world.matchingStats.matchTimeAvg(), 0.0000001); + } +} + +TEST("require that ranking is performed (multi-threaded)") { + for (size_t threads = 1; threads <= 16; ++threads) { + MyWorld world; + world.basicSetup(); + world.basicResults(); + SearchRequest::SP request = world.createSimpleRequest("f1", "spread"); + SearchReply::UP reply = world.performSearch(request, threads); + EXPECT_EQUAL(9u, world.matchingStats.docsMatched()); + EXPECT_EQUAL(9u, world.matchingStats.docsRanked()); + EXPECT_EQUAL(0u, world.matchingStats.docsReRanked()); + ASSERT_TRUE(reply->hits.size() == 9u); + EXPECT_EQUAL(document::DocumentId("doc::900").getGlobalId(), reply->hits[0].gid); + EXPECT_EQUAL(900.0, reply->hits[0].metric); + EXPECT_EQUAL(document::DocumentId("doc::800").getGlobalId(), reply->hits[1].gid); + EXPECT_EQUAL(800.0, reply->hits[1].metric); + EXPECT_EQUAL(document::DocumentId("doc::700").getGlobalId(), reply->hits[2].gid); + EXPECT_EQUAL(700.0, reply->hits[2].metric); + EXPECT_GREATER(world.matchingStats.matchTimeAvg(), 0.0000001); + EXPECT_EQUAL(0.0, world.matchingStats.rerankTimeAvg()); + } +} + +TEST("require that re-ranking is performed (multi-threaded)") { + for (size_t threads = 1; threads <= 16; ++threads) { + MyWorld world; + world.basicSetup(); + world.setupSecondPhaseRanking(); + world.basicResults(); + SearchRequest::SP request = world.createSimpleRequest("f1", "spread"); + SearchReply::UP reply = world.performSearch(request, threads); + EXPECT_EQUAL(9u, world.matchingStats.docsMatched()); + EXPECT_EQUAL(9u, world.matchingStats.docsRanked()); + EXPECT_EQUAL(3u, world.matchingStats.docsReRanked()); + ASSERT_TRUE(reply->hits.size() == 9u); + EXPECT_EQUAL(document::DocumentId("doc::900").getGlobalId(), reply->hits[0].gid); + EXPECT_EQUAL(1800.0, reply->hits[0].metric); + EXPECT_EQUAL(document::DocumentId("doc::800").getGlobalId(), reply->hits[1].gid); + EXPECT_EQUAL(1600.0, reply->hits[1].metric); + EXPECT_EQUAL(document::DocumentId("doc::700").getGlobalId(), reply->hits[2].gid); + EXPECT_EQUAL(1400.0, reply->hits[2].metric); + EXPECT_EQUAL(document::DocumentId("doc::600").getGlobalId(), reply->hits[3].gid); + EXPECT_EQUAL(600.0, reply->hits[3].metric); + EXPECT_EQUAL(document::DocumentId("doc::500").getGlobalId(), reply->hits[4].gid); + EXPECT_EQUAL(500.0, reply->hits[4].metric); + EXPECT_GREATER(world.matchingStats.matchTimeAvg(), 0.0000001); + EXPECT_GREATER(world.matchingStats.rerankTimeAvg(), 0.0000001); + } +} + +TEST("require that sortspec can be used (multi-threaded)") { + for (size_t threads = 1; threads <= 16; ++threads) { + MyWorld world; + world.basicSetup(); + world.basicResults(); + SearchRequest::SP request = world.createSimpleRequest("f1", "spread"); + request->sortSpec = "+a1"; + SearchReply::UP reply = world.performSearch(request, threads); + ASSERT_EQUAL(9u, reply->hits.size()); + EXPECT_EQUAL(document::DocumentId("doc::100").getGlobalId(), reply->hits[0].gid); + EXPECT_EQUAL(0.0, reply->hits[0].metric); + EXPECT_EQUAL(document::DocumentId("doc::200").getGlobalId(), reply->hits[1].gid); + EXPECT_EQUAL(0.0, reply->hits[1].metric); + EXPECT_EQUAL(document::DocumentId("doc::300").getGlobalId(), reply->hits[2].gid); + EXPECT_EQUAL(0.0, reply->hits[2].metric); + } +} + +TEST("require that grouping is performed (multi-threaded)") { + for (size_t threads = 1; threads <= 16; ++threads) { + MyWorld world; + world.basicSetup(); + world.basicResults(); + SearchRequest::SP request = world.createSimpleRequest("f1", "spread"); + { + vespalib::nbostream buf; + vespalib::NBOSerializer os(buf); + uint32_t n = 1; + os << n; + Grouping grequest = + Grouping() + .setRoot(Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("a1")))); + grequest.serialize(os); + request->groupSpec.assign(buf.c_str(), buf.c_str() + buf.size()); + } + SearchReply::UP reply = world.performSearch(request, threads); + { + vespalib::nbostream buf(&reply->groupResult[0], + reply->groupResult.size()); + vespalib::NBOSerializer is(buf); + uint32_t n; + is >> n; + EXPECT_EQUAL(1u, n); + Grouping gresult; + gresult.deserialize(is); + Grouping gexpect = Grouping() + .setRoot(Group() + .addResult(SumAggregationResult() + .setExpression(AttributeNode("a1")) + .setResult(Int64ResultNode(4500)))); + EXPECT_EQUAL(gexpect.root().asString(), gresult.root().asString()); + } + EXPECT_GREATER(world.matchingStats.groupingTimeAvg(), 0.0000001); + } +} + +TEST("require that summary features are filled") { + MyWorld world; + world.basicSetup(); + world.basicResults(); + DocsumRequest::SP req = world.createSimpleDocsumRequest("f1", "foo"); + FeatureSet::SP fs = world.getSummaryFeatures(req); + const feature_t * f = NULL; + EXPECT_EQUAL(2u, fs->numFeatures()); + EXPECT_EQUAL("attribute(a1)", fs->getNames()[0]); + EXPECT_EQUAL("value(100)", fs->getNames()[1]); + EXPECT_EQUAL(2u, fs->numDocs()); + f = fs->getFeaturesByDocId(10); + EXPECT_TRUE(f != NULL); + EXPECT_EQUAL(10, f[0]); + EXPECT_EQUAL(100, f[1]); + f = fs->getFeaturesByDocId(15); + EXPECT_TRUE(f == NULL); + f = fs->getFeaturesByDocId(30); + EXPECT_TRUE(f != NULL); + EXPECT_EQUAL(30, f[0]); + EXPECT_EQUAL(100, f[1]); +} + +TEST("require that rank features are filled") { + MyWorld world; + world.basicSetup(); + world.basicResults(); + DocsumRequest::SP req = world.createSimpleDocsumRequest("f1", "foo"); + FeatureSet::SP fs = world.getRankFeatures(req); + const feature_t * f = NULL; + EXPECT_EQUAL(1u, fs->numFeatures()); + EXPECT_EQUAL("attribute(a2)", fs->getNames()[0]); + EXPECT_EQUAL(2u, fs->numDocs()); + f = fs->getFeaturesByDocId(10); + EXPECT_TRUE(f != NULL); + EXPECT_EQUAL(20, f[0]); + f = fs->getFeaturesByDocId(15); + EXPECT_TRUE(f == NULL); + f = fs->getFeaturesByDocId(30); + EXPECT_TRUE(f != NULL); + EXPECT_EQUAL(60, f[0]); +} + +TEST("require that search session can be cached") { + MyWorld world; + world.basicSetup(); + world.basicResults(); + SearchRequest::SP request = world.createSimpleRequest("f1", "foo"); + request->propertiesMap.lookupCreate(search::MapNames::CACHES).add("query", "true"); + request->sessionId.push_back('a'); + EXPECT_EQUAL(0u, world.sessionManager->getSearchStats().numInsert); + SearchReply::UP reply = world.performSearch(request, 1); + EXPECT_EQUAL(1u, world.sessionManager->getSearchStats().numInsert); + SearchSession::SP session = world.sessionManager->pickSearch("a"); + ASSERT_TRUE(session.get()); + EXPECT_EQUAL(request->getTimeOfDoom(), session->getTimeOfDoom()); + EXPECT_EQUAL("a", session->getSessionId()); +} + +TEST("require that getSummaryFeatures can use cached query setup") { + MyWorld world; + world.basicSetup(); + world.basicResults(); + SearchRequest::SP request = world.createSimpleRequest("f1", "foo"); + request->propertiesMap.lookupCreate(search::MapNames::CACHES).add("query", "true"); + request->sessionId.push_back('a'); + world.performSearch(request, 1); + + DocsumRequest::SP docsum_request(new DocsumRequest); // no stack dump + docsum_request->sessionId = request->sessionId; + docsum_request-> + propertiesMap.lookupCreate(search::MapNames::CACHES).add("query", "true"); + docsum_request->hits.push_back(DocsumRequest::Hit()); + docsum_request->hits.back().docid = 30; + + FeatureSet::SP fs = world.getSummaryFeatures(docsum_request); + ASSERT_EQUAL(2u, fs->numFeatures()); + EXPECT_EQUAL("attribute(a1)", fs->getNames()[0]); + EXPECT_EQUAL("value(100)", fs->getNames()[1]); + ASSERT_EQUAL(1u, fs->numDocs()); + const feature_t *f = fs->getFeaturesByDocId(30); + ASSERT_TRUE(f); + EXPECT_EQUAL(30, f[0]); + EXPECT_EQUAL(100, f[1]); + + // getSummaryFeatures can be called multiple times. + fs = world.getSummaryFeatures(docsum_request); + ASSERT_EQUAL(2u, fs->numFeatures()); + EXPECT_EQUAL("attribute(a1)", fs->getNames()[0]); + EXPECT_EQUAL("value(100)", fs->getNames()[1]); + ASSERT_EQUAL(1u, fs->numDocs()); + f = fs->getFeaturesByDocId(30); + ASSERT_TRUE(f); + EXPECT_EQUAL(30, f[0]); + EXPECT_EQUAL(100, f[1]); +} + +TEST("require that getSummaryFeatures prefers cached query setup") { + MyWorld world; + world.basicSetup(); + world.basicResults(); + SearchRequest::SP request = world.createSimpleRequest("f1", "spread"); + request->propertiesMap.lookupCreate(search::MapNames::CACHES).add("query", "true"); + request->sessionId.push_back('a'); + world.performSearch(request, 1); + + DocsumRequest::SP req = world.createSimpleDocsumRequest("f1", "foo"); + req->sessionId = request->sessionId; + req->propertiesMap.lookupCreate(search::MapNames::CACHES).add("query", "true"); + FeatureSet::SP fs = world.getSummaryFeatures(req); + EXPECT_EQUAL(2u, fs->numFeatures()); + ASSERT_EQUAL(0u, fs->numDocs()); // "spread" has no hits + + // Empty cache + auto pruneTime = fastos::ClockSystem::now() + + fastos::TimeStamp::MINUTE * 10; + world.sessionManager->pruneTimedOutSessions(pruneTime); + + fs = world.getSummaryFeatures(req); + EXPECT_EQUAL(2u, fs->numFeatures()); + ASSERT_EQUAL(2u, fs->numDocs()); // "foo" has two hits +} + +TEST("require that match params are set up straight with ranking on") { + MatchParams p(1, 2, 4, 0.7, 0, 1, true, true); + ASSERT_EQUAL(1u, p.numDocs); + ASSERT_EQUAL(2u, p.heapSize); + ASSERT_EQUAL(4u, p.arraySize); + ASSERT_EQUAL(0.7, p.rankDropLimit); + ASSERT_EQUAL(0u, p.offset); + ASSERT_EQUAL(1u, p.hits); +} + +TEST("require that match params are set up straight with ranking on arraySize is atleast the size of heapSize") { + MatchParams p(1, 6, 4, 0.7, 1, 1, true, true); + ASSERT_EQUAL(1u, p.numDocs); + ASSERT_EQUAL(6u, p.heapSize); + ASSERT_EQUAL(6u, p.arraySize); + ASSERT_EQUAL(0.7, p.rankDropLimit); + ASSERT_EQUAL(1u, p.offset); + ASSERT_EQUAL(1u, p.hits); +} + +TEST("require that match params are set up straight with ranking on arraySize is atleast the size of hits+offset") { + MatchParams p(1, 6, 4, 0.7, 4, 4, true, true); + ASSERT_EQUAL(1u, p.numDocs); + ASSERT_EQUAL(6u, p.heapSize); + ASSERT_EQUAL(8u, p.arraySize); + ASSERT_EQUAL(0.7, p.rankDropLimit); + ASSERT_EQUAL(4u, p.offset); + ASSERT_EQUAL(4u, p.hits); +} + +TEST("require that match params are set up straight with ranking off array and heap size is 0") { + MatchParams p(1, 6, 4, 0.7, 4, 4, true, false); + ASSERT_EQUAL(1u, p.numDocs); + ASSERT_EQUAL(0u, p.heapSize); + ASSERT_EQUAL(0u, p.arraySize); + ASSERT_EQUAL(0.7, p.rankDropLimit); + ASSERT_EQUAL(4u, p.offset); + ASSERT_EQUAL(4u, p.hits); +} + +TEST("require that match phase limiting works") { + for (int s = 0; s <= 1; ++s) { + for (int i = 0; i <= 6; ++i) { + bool enable = (i != 0); + bool index_time = (i == 1) || (i == 2) || (i == 5) || (i == 6); + bool query_time = (i == 3) || (i == 4) || (i == 5) || (i == 6); + bool descending = (i == 2) || (i == 4) || (i == 6); + bool use_sorting = (s == 1); + size_t want_threads = 75; + MyWorld world; + world.basicSetup(); + world.verbose_a1_result("all"); + if (enable) { + if (index_time) { + if (query_time) { + // inject bogus setup to be overridden by query + world.setup_match_phase_limiting("limiter", 10, true); + } else { + world.setup_match_phase_limiting("limiter", 150, descending); + } + } + world.add_match_phase_limiting_result("limiter", 152, descending, {948, 951, 963, 987, 991, 994, 997}); + } + SearchRequest::SP request = world.createSimpleRequest("a1", "all"); + if (query_time) { + inject_match_phase_limiting(request->propertiesMap.lookupCreate(search::MapNames::RANK), "limiter", 150, descending); + } + if (use_sorting) { + request->sortSpec = "-a1"; + } + SearchReply::UP reply = world.performSearch(request, want_threads); + ASSERT_EQUAL(10u, reply->hits.size()); + if (enable) { + EXPECT_EQUAL(79u, reply->totalHitCount); + if (!use_sorting) { + EXPECT_EQUAL(997.0, reply->hits[0].metric); + EXPECT_EQUAL(994.0, reply->hits[1].metric); + EXPECT_EQUAL(991.0, reply->hits[2].metric); + EXPECT_EQUAL(987.0, reply->hits[3].metric); + EXPECT_EQUAL(974.0, reply->hits[4].metric); + EXPECT_EQUAL(963.0, reply->hits[5].metric); + EXPECT_EQUAL(961.0, reply->hits[6].metric); + EXPECT_EQUAL(951.0, reply->hits[7].metric); + EXPECT_EQUAL(948.0, reply->hits[8].metric); + EXPECT_EQUAL(935.0, reply->hits[9].metric); + } + } else { + EXPECT_EQUAL(985u, reply->totalHitCount); + if (!use_sorting) { + EXPECT_EQUAL(999.0, reply->hits[0].metric); + EXPECT_EQUAL(998.0, reply->hits[1].metric); + EXPECT_EQUAL(997.0, reply->hits[2].metric); + EXPECT_EQUAL(996.0, reply->hits[3].metric); + } + } + } + } +} + +TEST("require that arithmetic used for rank drop limit works") { + double small = -HUGE_VAL; + double limit = -std::numeric_limits<feature_t>::quiet_NaN(); + EXPECT_TRUE(!(small <= limit)); +} + +TEST("require that termwise limit is set correctly for first phase ranking program") { + MyWorld world; + world.basicSetup(); + world.basicResults(); + EXPECT_EQUAL(1.0, world.get_first_phase_termwise_limit()); + world.set_property(indexproperties::matching::TermwiseLimit::NAME, "0.02"); + EXPECT_EQUAL(0.02, world.get_first_phase_termwise_limit()); +} + +TEST("require that fields are tagged with data type") { + MyWorld world; + world.basicSetup(); + auto int32_field = world.get_field_info("a1"); + auto string_field = world.get_field_info("f1"); + auto tensor_field = world.get_field_info("tensor_field"); + auto predicate_field = world.get_field_info("predicate_field"); + ASSERT_TRUE(bool(int32_field)); + ASSERT_TRUE(bool(string_field)); + ASSERT_TRUE(bool(tensor_field)); + ASSERT_TRUE(bool(predicate_field)); + EXPECT_EQUAL(int32_field->get_data_type(), FieldInfo::DataType::INT32); + EXPECT_EQUAL(string_field->get_data_type(), FieldInfo::DataType::STRING); + EXPECT_EQUAL(tensor_field->get_data_type(), FieldInfo::DataType::TENSOR); + EXPECT_EQUAL(predicate_field->get_data_type(), FieldInfo::DataType::BOOLEANTREE); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchcore/src/tests/proton/matching/partial_result/.gitignore b/searchcore/src/tests/proton/matching/partial_result/.gitignore new file mode 100644 index 00000000000..0284be2ead8 --- /dev/null +++ b/searchcore/src/tests/proton/matching/partial_result/.gitignore @@ -0,0 +1 @@ +searchcore_partial_result_test_app diff --git a/searchcore/src/tests/proton/matching/partial_result/CMakeLists.txt b/searchcore/src/tests/proton/matching/partial_result/CMakeLists.txt new file mode 100644 index 00000000000..39c1679fc27 --- /dev/null +++ b/searchcore/src/tests/proton/matching/partial_result/CMakeLists.txt @@ -0,0 +1,8 @@ +# Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchcore_partial_result_test_app + SOURCES + partial_result_test.cpp + DEPENDS + searchcore_matching +) +vespa_add_test(NAME searchcore_partial_result_test_app COMMAND searchcore_partial_result_test_app) diff --git a/searchcore/src/tests/proton/matching/partial_result/FILES b/searchcore/src/tests/proton/matching/partial_result/FILES new file mode 100644 index 00000000000..cb7cdbd3bb6 --- /dev/null +++ b/searchcore/src/tests/proton/matching/partial_result/FILES @@ -0,0 +1 @@ +partial_result_test.cpp diff --git a/searchcore/src/tests/proton/matching/partial_result/partial_result_test.cpp b/searchcore/src/tests/proton/matching/partial_result/partial_result_test.cpp new file mode 100644 index 00000000000..48b92c5ae46 --- /dev/null +++ b/searchcore/src/tests/proton/matching/partial_result/partial_result_test.cpp @@ -0,0 +1,159 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#include <vespa/fastos/fastos.h> +#include <vespa/vespalib/testkit/test_kit.h> +#include <vespa/searchcore/proton/matching/partial_result.h> +#include <vespa/vespalib/util/box.h> + +using proton::matching::PartialResult; +using namespace vespalib; + +void checkMerge(const std::vector<double> &a, const std::vector<double> &b, + size_t maxHits, const std::vector<double> &expect) +{ + PartialResult res_a(maxHits, false); + PartialResult res_b(maxHits, false); + for (size_t i = 0; i < a.size(); ++i) { + res_a.add(search::RankedHit(i, a[i])); + } + res_a.totalHits(a.size()); + for (size_t i = 0; i < b.size(); ++i) { + res_b.add(search::RankedHit(i, b[i])); + } + res_b.totalHits(b.size()); + res_a.merge(res_b); + EXPECT_EQUAL(a.size() + b.size(), res_a.totalHits()); + ASSERT_EQUAL(expect.size(), res_a.size()); + for (size_t i = 0; i < expect.size(); ++i) { + EXPECT_EQUAL(expect[i], res_a.hit(i)._rankValue); + } +} + +void checkMerge(const std::vector<std::string> &a, const std::vector<std::string> &b, + size_t maxHits, const std::vector<std::string> &expect) +{ + size_t len = 0; + PartialResult res_a(maxHits, true); + PartialResult res_b(maxHits, true); + len = 0; + for (size_t i = 0; i < a.size(); ++i) { + len += a[i].size(); + res_a.add(search::RankedHit(i, 0.0), PartialResult::SortRef(a[i].data(), a[i].size())); + } + res_a.totalHits(a.size()); + EXPECT_EQUAL(len, res_a.sortDataSize()); + len = 0; + for (size_t i = 0; i < b.size(); ++i) { + len += b[i].size(); + res_b.add(search::RankedHit(i, 0.0), PartialResult::SortRef(b[i].data(), b[i].size())); + } + res_b.totalHits(b.size()); + EXPECT_EQUAL(len, res_b.sortDataSize()); + res_a.merge(res_b); + EXPECT_EQUAL(a.size() + b.size(), res_a.totalHits()); + ASSERT_EQUAL(expect.size(), res_a.size()); + len = 0; + for (size_t i = 0; i < expect.size(); ++i) { + len += expect[i].size(); + EXPECT_EQUAL(expect[i], std::string(res_a.sortData(i).first, res_a.sortData(i).second)); + } + EXPECT_EQUAL(len, res_a.sortDataSize()); +} + +TEST("require that partial results can be created without sort data") { + PartialResult res(100, false); + EXPECT_EQUAL(0u, res.size()); + EXPECT_EQUAL(100u, res.maxSize()); + EXPECT_EQUAL(0u, res.totalHits()); + EXPECT_FALSE(res.hasSortData()); + EXPECT_EQUAL(0u, res.sortDataSize()); + res.add(search::RankedHit(1, 10.0)); + res.add(search::RankedHit(2, 5.0)); + res.totalHits(1000); + EXPECT_EQUAL(1000u, res.totalHits()); + ASSERT_EQUAL(2u, res.size()); + EXPECT_EQUAL(1u, res.hit(0)._docId); + EXPECT_EQUAL(10.0, res.hit(0)._rankValue); + EXPECT_EQUAL(2u, res.hit(1)._docId); + EXPECT_EQUAL(5.0, res.hit(1)._rankValue); +} + +TEST("require that partial results can be created with sort data") { + std::string str1("aaa"); + std::string str2("bbb"); + PartialResult res(100, true); + EXPECT_EQUAL(0u, res.size()); + EXPECT_EQUAL(100u, res.maxSize()); + EXPECT_EQUAL(0u, res.totalHits()); + EXPECT_TRUE(res.hasSortData()); + EXPECT_EQUAL(0u, res.sortDataSize()); + res.add(search::RankedHit(1, 10.0), PartialResult::SortRef(str1.data(), str1.size())); + res.add(search::RankedHit(2, 5.0), PartialResult::SortRef(str2.data(), str2.size())); + res.totalHits(1000); + EXPECT_EQUAL(1000u, res.totalHits()); + ASSERT_EQUAL(2u, res.size()); + EXPECT_EQUAL(1u, res.hit(0)._docId); + EXPECT_EQUAL(10.0, res.hit(0)._rankValue); + EXPECT_EQUAL(str1.data(), res.sortData(0).first); + EXPECT_EQUAL(str1.size(), res.sortData(0).second); + EXPECT_EQUAL(2u, res.hit(1)._docId); + EXPECT_EQUAL(5.0, res.hit(1)._rankValue); + EXPECT_EQUAL(str2.data(), res.sortData(1).first); + EXPECT_EQUAL(str2.size(), res.sortData(1).second); +} + +TEST("require that partial results without sort data are merged correctly") { + TEST_DO(checkMerge(make_box(5.0, 4.0, 3.0), make_box(4.5, 3.5), 3, make_box(5.0, 4.5, 4.0))); + TEST_DO(checkMerge(make_box(4.5, 3.5), make_box(5.0, 4.0, 3.0), 3, make_box(5.0, 4.5, 4.0))); + TEST_DO(checkMerge(make_box(1.0), make_box(2.0), 10, make_box(2.0, 1.0))); + TEST_DO(checkMerge(make_box(2.0), make_box(1.0), 10, make_box(2.0, 1.0))); + TEST_DO(checkMerge(std::vector<double>(), make_box(1.0), 10, make_box(1.0))); + TEST_DO(checkMerge(make_box(1.0), std::vector<double>(), 10, make_box(1.0))); + TEST_DO(checkMerge(std::vector<double>(), make_box(1.0), 0, std::vector<double>())); + TEST_DO(checkMerge(make_box(1.0), std::vector<double>(), 0, std::vector<double>())); + TEST_DO(checkMerge(std::vector<double>(), std::vector<double>(), 10, std::vector<double>())); +} + +TEST("require that partial results with sort data are merged correctly") { + TEST_DO(checkMerge(make_box<std::string>("a", "c", "e"), make_box<std::string>("b", "d"), 3, make_box<std::string>("a", "b", "c"))); + TEST_DO(checkMerge(make_box<std::string>("b", "d"), make_box<std::string>("a", "c", "e"), 3, make_box<std::string>("a", "b", "c"))); + TEST_DO(checkMerge(make_box<std::string>("a"), make_box<std::string>("aa"), 10, make_box<std::string>("a", "aa"))); + TEST_DO(checkMerge(make_box<std::string>("aa"), make_box<std::string>("a"), 10, make_box<std::string>("a", "aa"))); + TEST_DO(checkMerge(std::vector<std::string>(), make_box<std::string>("a"), 10, make_box<std::string>("a"))); + TEST_DO(checkMerge(make_box<std::string>("a"), std::vector<std::string>(), 10, make_box<std::string>("a"))); + TEST_DO(checkMerge(std::vector<std::string>(), make_box<std::string>("a"), 0, std::vector<std::string>())); + TEST_DO(checkMerge(make_box<std::string>("a"), std::vector<std::string>(), 0, std::vector<std::string>())); + TEST_DO(checkMerge(std::vector<std::string>(), std::vector<std::string>(), 10, std::vector<std::string>())); +} + +TEST("require that lower docid is preferred when sorting on rank") { + PartialResult res_a(1, false); + PartialResult res_b(1, false); + PartialResult res_c(1, false); + res_a.add(search::RankedHit(2, 1.0)); + res_b.add(search::RankedHit(3, 1.0)); + res_c.add(search::RankedHit(1, 1.0)); + res_a.merge(res_b); + ASSERT_EQUAL(1u, res_a.size()); + EXPECT_EQUAL(2u, res_a.hit(0)._docId); + res_a.merge(res_c); + ASSERT_EQUAL(1u, res_a.size()); + EXPECT_EQUAL(1u, res_a.hit(0)._docId); +} + +TEST("require that lower docid is preferred when using sortspec") { + std::string foo("foo"); + PartialResult res_a(1, true); + PartialResult res_b(1, true); + PartialResult res_c(1, true); + res_a.add(search::RankedHit(2, 1.0), PartialResult::SortRef(foo.data(), foo.size())); + res_b.add(search::RankedHit(3, 1.0), PartialResult::SortRef(foo.data(), foo.size())); + res_c.add(search::RankedHit(1, 1.0), PartialResult::SortRef(foo.data(), foo.size())); + res_a.merge(res_b); + ASSERT_EQUAL(1u, res_a.size()); + EXPECT_EQUAL(2u, res_a.hit(0)._docId); + res_a.merge(res_c); + ASSERT_EQUAL(1u, res_a.size()); + EXPECT_EQUAL(1u, res_a.hit(0)._docId); +} + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchcore/src/tests/proton/matching/query_test.cpp b/searchcore/src/tests/proton/matching/query_test.cpp new file mode 100644 index 00000000000..caf52a5fca4 --- /dev/null +++ b/searchcore/src/tests/proton/matching/query_test.cpp @@ -0,0 +1,900 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for query. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("query_test"); + +#include <vespa/document/datatype/positiondatatype.h> +#include <vespa/searchcore/proton/matching/fakesearchcontext.h> +#include <vespa/searchcore/proton/matching/matchdatareservevisitor.h> +#include <vespa/searchcore/proton/matching/blueprintbuilder.h> +#include <vespa/searchcore/proton/matching/query.h> +#include <vespa/searchcore/proton/matching/querynodes.h> +#include <vespa/searchcore/proton/matching/resolveviewvisitor.h> +#include <vespa/searchcore/proton/matching/termdataextractor.h> +#include <vespa/searchcore/proton/matching/viewresolver.h> +#include <vespa/searchlib/features/utils.h> +#include <vespa/searchlib/fef/itermfielddata.h> +#include <vespa/searchlib/fef/matchdata.h> +#include <vespa/searchlib/fef/matchdatalayout.h> +#include <vespa/searchlib/fef/test/indexenvironment.h> +#include <vespa/searchlib/query/tree/customtypetermvisitor.h> +#include <vespa/searchlib/query/tree/querybuilder.h> +#include <vespa/searchlib/query/tree/stackdumpcreator.h> +#include <vespa/searchlib/query/weight.h> +#include <vespa/searchlib/queryeval/intermediate_blueprints.h> +#include <vespa/searchlib/queryeval/wand/parallel_weak_and_blueprint.h> +#include <vespa/searchlib/queryeval/leaf_blueprints.h> +#include <vespa/searchlib/queryeval/searchiterator.h> +#include <vespa/searchlib/queryeval/simpleresult.h> +#include <vespa/searchlib/queryeval/fake_requestcontext.h> +#include <vespa/searchlib/queryeval/termasstring.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vector> + +using document::PositionDataType; +using search::fef::CollectionType; +using search::fef::FieldInfo; +using search::fef::FieldType; +using search::fef::ITermData; +using search::fef::ITermFieldData; +using search::fef::IllegalHandle; +using search::fef::MatchData; +using search::fef::MatchDataLayout; +using search::fef::TermFieldMatchData; +using search::fef::TermFieldHandle; +using search::query::CustomTypeTermVisitor; +using search::query::Node; +using search::query::QueryBuilder; +using search::query::Range; +using search::query::StackDumpCreator; +using search::query::Weight; +using search::queryeval::termAsString; +using search::queryeval::Blueprint; +using search::queryeval::FakeResult; +using search::queryeval::FakeSearchable; +using search::queryeval::FakeRequestContext; +using search::queryeval::FieldSpec; +using search::queryeval::FieldSpecList; +using search::queryeval::Searchable; +using search::queryeval::SearchIterator; +using search::queryeval::SimpleBlueprint; +using search::queryeval::SimpleResult; +using search::queryeval::ParallelWeakAndBlueprint; +using std::string; +using std::vector; +namespace fef_test = search::fef::test; + +namespace proton { +namespace matching { +namespace { + +class Test : public vespalib::TestApp { + MatchData::UP _match_data; + Blueprint::UP _blueprint; + FakeRequestContext _requestContext; + + void setUp(); + void tearDown(); + + void requireThatMatchDataIsReserved(); + void requireThatMatchDataIsReservedForEachFieldInAView(); + void requireThatTermsAreLookedUp(); + void requireThatTermsAreLookedUpInMultipleFieldsFromAView(); + void requireThatAttributeTermsAreLookedUpInAttributeSource(); + void requireThatAttributeTermDataHandlesAreAllocated(); + void requireThatTermDataIsFilledIn(); + + SearchIterator::UP getIterator(Node &node, ISearchContext &context); + + void requireThatSingleIndexCanUseBlendingAsBlacklisting(); + void requireThatIteratorsAreBuiltWithBlending(); + void requireThatIteratorsAreBuiltForAllTermNodes(); + void requireThatNearIteratorsCanBeBuilt(); + void requireThatONearIteratorsCanBeBuilt(); + void requireThatPhraseIteratorsCanBeBuilt(); + + void requireThatUnknownFieldActsEmpty(); + void requireThatIllegalFieldsAreIgnored(); + void requireThatQueryGluesEverythingTogether(); + void requireThatQueryAddsLocation(); + void requireThatQueryAddsLocationCutoff(); + void requireThatFakeFieldSearchDumpsDiffer(); + void requireThatNoDocsGiveZeroDocFrequency(); + void requireThatWeakAndBlueprintsAreCreatedCorrectly(); + void requireThatParallelWandBlueprintsAreCreatedCorrectly(); + void requireThatBlackListBlueprintCanBeUsed(); + +public: + int Main(); +}; + +#define TEST_CALL(func) \ + TEST_DO(setUp()); \ + TEST_DO(func()); \ + TEST_DO(tearDown()) + +void Test::setUp() { + _match_data.reset(0); + _blueprint.reset(0); +} + +void Test::tearDown() { + _match_data.reset(0); + _blueprint.reset(0); +} + +const string field = "field"; +const string resolved_field1 = "resolved1"; +const string resolved_field2 = "resolved2"; +const string unknown_field = "unknown_field"; +const string float_term = "3.14"; +const string int_term = "42"; +const string prefix_term = "foo"; +const string string_term = "bar"; +const uint32_t string_id = 4; +const Weight string_weight(4); +const string substring_term = "baz"; +const string suffix_term = "qux"; +const string phrase_term = "quux"; +const Range range_term = Range(32, 47); +const int doc_count = 100; +const int field_id = 154; +const uint32_t term_index = 23; +const uint32_t term_count = 8; + +fef_test::IndexEnvironment plain_index_env; +fef_test::IndexEnvironment resolved_index_env; +fef_test::IndexEnvironment attribute_index_env; + +void setupIndexEnvironments() +{ + FieldInfo field_info(FieldType::INDEX, CollectionType::SINGLE, field, field_id); + plain_index_env.getFields().push_back(field_info); + + FieldInfo field_info1(FieldType::INDEX, CollectionType::SINGLE, resolved_field1, field_id); + resolved_index_env.getFields().push_back(field_info1); + FieldInfo field_info2(FieldType::INDEX, CollectionType::SINGLE, resolved_field2, field_id + 1); + resolved_index_env.getFields().push_back(field_info2); + + FieldInfo attr_info(FieldType::ATTRIBUTE, CollectionType::SINGLE, field, 0); + attribute_index_env.getFields().push_back(attr_info); +} + +Node::UP buildQueryTree(const ViewResolver &resolver, + const search::fef::IIndexEnvironment &idxEnv) +{ + QueryBuilder<ProtonNodeTypes> query_builder; + query_builder.addOr(term_count); + query_builder.addNumberTerm(float_term, field, 0, Weight(0)); + query_builder.addNumberTerm(int_term, field, 1, Weight(0)); + query_builder.addPrefixTerm(prefix_term, field, 2, Weight(0)); + query_builder.addRangeTerm(range_term, field, 3, Weight(0)); + query_builder.addStringTerm(string_term, field, string_id, string_weight) + .setTermIndex(term_index); + query_builder.addSubstringTerm(substring_term, field, 5, Weight(0)); + query_builder.addSuffixTerm(suffix_term, field, 6, Weight(0)); + query_builder.addPhrase(2, field, 7, Weight(0)); + query_builder.addStringTerm(phrase_term, field, 8, Weight(0)); + query_builder.addStringTerm(phrase_term, field, 9, Weight(0)); + Node::UP node = query_builder.build(); + + ResolveViewVisitor visitor(resolver, idxEnv); + node->accept(visitor); + return node; +} + +void Test::requireThatMatchDataIsReserved() { + Node::UP node = buildQueryTree(ViewResolver(), plain_index_env); + + MatchDataLayout mdl; + MatchDataReserveVisitor visitor(mdl); + node->accept(visitor); + MatchData::UP match_data = mdl.createMatchData(); + + EXPECT_EQUAL(term_count, match_data->getNumTermFields()); +} + +ViewResolver getViewResolver() { + ViewResolver resolver; + resolver.add(field, resolved_field1); + resolver.add(field, resolved_field2); + return resolver; +} + +void Test::requireThatMatchDataIsReservedForEachFieldInAView() { + Node::UP node = buildQueryTree(getViewResolver(), resolved_index_env); + + MatchDataLayout mdl; + MatchDataReserveVisitor visitor(mdl); + node->accept(visitor); + MatchData::UP match_data = mdl.createMatchData(); + + EXPECT_EQUAL(term_count * 2, match_data->getNumTermFields()); +} + +class LookupTestCheckerVisitor : public CustomTypeTermVisitor<ProtonNodeTypes> +{ + int Main() { return 0; } + +public: + template <class TermType> + void checkNode(const TermType &n, int estimatedHitCount, bool empty) { + EXPECT_EQUAL(empty, (estimatedHitCount == 0)); + EXPECT_EQUAL((double)estimatedHitCount / doc_count, n.field(0).getDocFreq()); + } + + virtual void visit(ProtonNumberTerm &n) { checkNode(n, 1, false); } + virtual void visit(ProtonLocationTerm &n) { checkNode(n, 0, true); } + virtual void visit(ProtonPrefixTerm &n) { checkNode(n, 1, false); } + virtual void visit(ProtonRangeTerm &n) { checkNode(n, 2, false); } + virtual void visit(ProtonStringTerm &n) { checkNode(n, 2, false); } + virtual void visit(ProtonSubstringTerm &n) { checkNode(n, 0, true); } + virtual void visit(ProtonSuffixTerm &n) { checkNode(n, 2, false); } + virtual void visit(ProtonPhrase &n) { checkNode(n, 0, true); } + virtual void visit(ProtonWeightedSetTerm &) {} + virtual void visit(ProtonDotProduct &) {} + virtual void visit(ProtonWandTerm &) {} + virtual void visit(ProtonPredicateQuery &) {} + virtual void visit(ProtonRegExpTerm &) {} +}; + +void Test::requireThatTermsAreLookedUp() { + FakeRequestContext requestContext; + Node::UP node = buildQueryTree(ViewResolver(), plain_index_env); + + FakeSearchContext context; + context.addIdx(1).addIdx(2); + context.idx(0).getFake() + .addResult(field, prefix_term, FakeResult().doc(1).pos(2)) + .addResult(field, string_term, + FakeResult().doc(2).pos(3).doc(3).pos(4)) + .addResult(field, termAsString(int_term), + FakeResult().doc(4).pos(5)); + context.idx(1).getFake() + .addResult(field, string_term, FakeResult().doc(6).pos(7)) + .addResult(field, suffix_term, + FakeResult().doc(7).pos(8).doc(8).pos(9)) + .addResult(field, termAsString(float_term), + FakeResult().doc(9).pos(10)) + .addResult(field, termAsString(int_term), + FakeResult().doc(10).pos(11)) + .addResult(field, termAsString(range_term), + FakeResult().doc(12).pos(13).doc(13).pos(14)); + context.setLimit(doc_count + 1); + + MatchDataLayout mdl; + MatchDataReserveVisitor visitor(mdl); + node->accept(visitor); + + Blueprint::UP blueprint = BlueprintBuilder::build(requestContext, *node, context); + + LookupTestCheckerVisitor checker; + TEST_DO(node->accept(checker)); +} + +void Test::requireThatTermsAreLookedUpInMultipleFieldsFromAView() { + Node::UP node = buildQueryTree(getViewResolver(), resolved_index_env); + + FakeRequestContext requestContext; + FakeSearchContext context; + context.addIdx(1).addIdx(2); + context.idx(0).getFake() + .addResult(resolved_field1, prefix_term, + FakeResult().doc(1).pos(2)) + .addResult(resolved_field2, string_term, + FakeResult().doc(2).pos(3).doc(3).pos(4)) + .addResult(resolved_field1, termAsString(int_term), + FakeResult().doc(4).pos(5)); + context.idx(1).getFake() + .addResult(resolved_field1, string_term, + FakeResult().doc(6).pos(7)) + .addResult(resolved_field2, suffix_term, + FakeResult().doc(7).pos(8).doc(8).pos(9)) + .addResult(resolved_field1, termAsString(float_term), + FakeResult().doc(9).pos(10)) + .addResult(resolved_field2, termAsString(int_term), + FakeResult().doc(10).pos(11)) + .addResult(resolved_field1, termAsString(range_term), + FakeResult().doc(12).pos(13).doc(13).pos(14)); + context.setLimit(doc_count + 1); + + MatchDataLayout mdl; + MatchDataReserveVisitor visitor(mdl); + node->accept(visitor); + + Blueprint::UP blueprint = BlueprintBuilder::build(requestContext, *node, context); + + LookupTestCheckerVisitor checker; + TEST_DO(node->accept(checker)); +} + +void Test::requireThatAttributeTermsAreLookedUpInAttributeSource() { + const string term = "bar"; + ProtonStringTerm node(term, field, 1, Weight(2)); + node.resolve(ViewResolver(), attribute_index_env); + + FakeRequestContext requestContext; + FakeSearchContext context; + context.addIdx(1); + context.attr().addResult(field, term, FakeResult().doc(1).pos(2)); + + MatchDataLayout mdl; + MatchDataReserveVisitor visitor(mdl); + node.accept(visitor); + + Blueprint::UP blueprint = BlueprintBuilder::build(requestContext, node, context); + + EXPECT_TRUE(!blueprint->getState().estimate().empty); + EXPECT_EQUAL(1u, blueprint->getState().estimate().estHits); +} + +void Test::requireThatAttributeTermDataHandlesAreAllocated() { + const string term = "bar"; + ProtonStringTerm node(term, field, 1, Weight(2)); + node.resolve(ViewResolver(), attribute_index_env); + + FakeSearchContext context; + FakeRequestContext requestContext; + + MatchDataLayout mdl; + MatchDataReserveVisitor reserve_visitor(mdl); + node.accept(reserve_visitor); + + Blueprint::UP blueprint = BlueprintBuilder::build(requestContext, node, context); + + MatchData::UP match_data = mdl.createMatchData(); + + EXPECT_EQUAL(1u, match_data->getNumTermFields()); + EXPECT_TRUE(node.field(0).attribute_field); +} + + +class SetUpTermDataTestCheckerVisitor + : public CustomTypeTermVisitor<ProtonNodeTypes> +{ + int Main() { return 0; } + +public: + virtual void visit(ProtonNumberTerm &) {} + virtual void visit(ProtonLocationTerm &) {} + virtual void visit(ProtonPrefixTerm &) {} + virtual void visit(ProtonRangeTerm &) {} + + virtual void visit(ProtonStringTerm &n) { + const ITermData &term_data = n; + EXPECT_EQUAL(string_weight.percent(), + term_data.getWeight().percent()); + EXPECT_EQUAL(1u, term_data.getPhraseLength()); + EXPECT_EQUAL(-1u, term_data.getTermIndex()); + EXPECT_EQUAL(string_id, term_data.getUniqueId()); + EXPECT_EQUAL(term_data.numFields(), n.numFields()); + for (size_t i = 0; i < term_data.numFields(); ++i) { + const ITermFieldData &term_field_data = term_data.field(i); + EXPECT_APPROX(2.0 / doc_count, term_field_data.getDocFreq(), 1.0e-6); + EXPECT_TRUE(!n.field(i).attribute_field); + EXPECT_EQUAL(field_id + i, term_field_data.getFieldId()); + } + } + + virtual void visit(ProtonSubstringTerm &) {} + virtual void visit(ProtonSuffixTerm &) {} + virtual void visit(ProtonPhrase &n) { + const ITermData &term_data = n; + EXPECT_EQUAL(2u, term_data.getPhraseLength()); + } + virtual void visit(ProtonWeightedSetTerm &) {} + virtual void visit(ProtonDotProduct &) {} + virtual void visit(ProtonWandTerm &) {} + virtual void visit(ProtonPredicateQuery &) {} + virtual void visit(ProtonRegExpTerm &) {} +}; + +void Test::requireThatTermDataIsFilledIn() { + Node::UP node = buildQueryTree(getViewResolver(), resolved_index_env); + + FakeRequestContext requestContext; + FakeSearchContext context; + context.addIdx(1); + context.idx(0).getFake().addResult(resolved_field1, string_term, + FakeResult().doc(1).pos(2).doc(5).pos(3)); + context.setLimit(doc_count + 1); + + MatchDataLayout mdl; + MatchDataReserveVisitor reserve_visitor(mdl); + node->accept(reserve_visitor); + + Blueprint::UP blueprint = BlueprintBuilder::build(requestContext, *node, context); + + TEST_DO( + SetUpTermDataTestCheckerVisitor checker; + node->accept(checker); + ); +} + +SearchIterator::UP Test::getIterator(Node &node, ISearchContext &context) { + MatchDataLayout mdl; + MatchDataReserveVisitor mdr_visitor(mdl); + node.accept(mdr_visitor); + _match_data = mdl.createMatchData(); + + _blueprint = BlueprintBuilder::build(_requestContext, node, context); + + _blueprint->fetchPostings(true); + SearchIterator::UP search(_blueprint->createSearch(*_match_data, true)); + search->initFullRange(); + return search; +} + +FakeIndexSearchable getFakeSearchable(const string &term, int doc1, int doc2) { + FakeIndexSearchable source; + source.getFake().addResult(field, term, + FakeResult().doc(doc1).pos(2).doc(doc2).pos(3)); + return source; +} + +void Test::requireThatSingleIndexCanUseBlendingAsBlacklisting() { + QueryBuilder<ProtonNodeTypes> builder; + builder.addStringTerm(string_term, field, 1, Weight(2)) + .resolve(ViewResolver(), plain_index_env); + Node::UP node = builder.build(); + ASSERT_TRUE(node.get()); + + FakeSearchContext context; + context.addIdx(1).idx(0) = getFakeSearchable(string_term, 2, 5); + context.selector().setSource(5, 1); + + SearchIterator::UP iterator = getIterator(*node, context); + ASSERT_TRUE(iterator.get()); + EXPECT_TRUE(!iterator->seek(1)); + EXPECT_TRUE(!iterator->seek(2)); + EXPECT_TRUE(iterator->seek(5)); + iterator->unpack(5); +} + +void Test::requireThatIteratorsAreBuiltWithBlending() { + QueryBuilder<ProtonNodeTypes> builder; + builder.addStringTerm(string_term, field, 1, Weight(2)) + .resolve(ViewResolver(), plain_index_env); + Node::UP node = builder.build(); + ASSERT_TRUE(node.get()); + + FakeSearchContext context; + context.addIdx(1).idx(0) = getFakeSearchable(string_term, 3, 7); + context.addIdx(0).idx(1) = getFakeSearchable(string_term, 2, 6); + context.selector().setSource(3, 1); + context.selector().setSource(7, 1); + + SearchIterator::UP iterator = getIterator(*node, context); + ASSERT_TRUE(iterator.get()); + + EXPECT_TRUE(!iterator->seek(1)); + EXPECT_TRUE(iterator->seek(2)); + EXPECT_TRUE(iterator->seek(3)); + EXPECT_TRUE(iterator->seek(6)); + EXPECT_TRUE(iterator->seek(7)); +} + +void Test::requireThatIteratorsAreBuiltForAllTermNodes() { + Node::UP node = buildQueryTree(ViewResolver(), plain_index_env); + ASSERT_TRUE(node.get()); + + FakeSearchContext context(42); + context.addIdx(0).idx(0).getFake() + .addResult(field, termAsString(float_term), + FakeResult().doc(2).pos(2)) + .addResult(field, termAsString(int_term), + FakeResult().doc(4).pos(2)) + .addResult(field, prefix_term, FakeResult().doc(8).pos(2)) + .addResult(field, termAsString(range_term), + FakeResult().doc(15).pos(2)) + .addResult(field, string_term, FakeResult().doc(16).pos(2)) + .addResult(field, substring_term, FakeResult().doc(23).pos(2)) + .addResult(field, suffix_term, FakeResult().doc(42).pos(2)); + + SearchIterator::UP iterator = getIterator(*node, context); + ASSERT_TRUE(iterator.get()); + + EXPECT_TRUE(!iterator->seek(1)); + EXPECT_TRUE(iterator->seek(2)); + EXPECT_TRUE(iterator->seek(4)); + EXPECT_TRUE(iterator->seek(8)); + EXPECT_TRUE(iterator->seek(15)); + EXPECT_TRUE(iterator->seek(16)); + EXPECT_TRUE(iterator->seek(23)); + EXPECT_TRUE(iterator->seek(42)); +} + +void Test::requireThatNearIteratorsCanBeBuilt() { + QueryBuilder<ProtonNodeTypes> builder; + builder.addNear(2, 4); + builder.addStringTerm(string_term, field, 1, Weight(2)); + builder.addStringTerm(prefix_term, field, 1, Weight(2)); + Node::UP node = builder.build(); + ResolveViewVisitor resolver(ViewResolver(), plain_index_env); + node->accept(resolver); + ASSERT_TRUE(node.get()); + + FakeSearchContext context(8); + context.addIdx(0).idx(0).getFake() + .addResult(field, prefix_term, FakeResult() + .doc(4).pos(2).len(50).doc(8).pos(2).len(50)) + .addResult(field, string_term, FakeResult() + .doc(4).pos(40).len(50).doc(8).pos(5).len(50)); + + SearchIterator::UP iterator = getIterator(*node, context); + ASSERT_TRUE(iterator.get()); + EXPECT_TRUE(!iterator->seek(4)); + EXPECT_TRUE(iterator->seek(8)); +} + +void Test::requireThatONearIteratorsCanBeBuilt() { + QueryBuilder<ProtonNodeTypes> builder; + builder.addONear(2, 4); + builder.addStringTerm(string_term, field, 1, Weight(2)); + builder.addStringTerm(prefix_term, field, 1, Weight(2)); + Node::UP node = builder.build(); + ResolveViewVisitor resolver(ViewResolver(), plain_index_env); + node->accept(resolver); + ASSERT_TRUE(node.get()); + + FakeSearchContext context(8); + context.addIdx(0).idx(0).getFake() + .addResult(field, string_term, FakeResult() + .doc(4).pos(5).len(50).doc(8).pos(2).len(50)) + .addResult(field, prefix_term, FakeResult() + .doc(4).pos(2).len(50).doc(8).pos(5).len(50)); + + SearchIterator::UP iterator = getIterator(*node, context); + ASSERT_TRUE(iterator.get()); + EXPECT_TRUE(!iterator->seek(4)); + EXPECT_TRUE(iterator->seek(8)); +} + +void Test::requireThatPhraseIteratorsCanBeBuilt() { + QueryBuilder<ProtonNodeTypes> builder; + builder.addPhrase(3, field, 0, Weight(42)); + builder.addStringTerm(string_term, field, 1, Weight(2)); + builder.addStringTerm(prefix_term, field, 1, Weight(2)); + builder.addStringTerm(suffix_term, field, 1, Weight(2)); + Node::UP node = builder.build(); + ResolveViewVisitor resolver(ViewResolver(), plain_index_env); + node->accept(resolver); + ASSERT_TRUE(node.get()); + + FakeSearchContext context(9); + context.addIdx(0).idx(0).getFake() + .addResult(field, string_term, FakeResult() + .doc(4).pos(3).len(50) + .doc(5).pos(2).len(50) + .doc(8).pos(2).len(50) + .doc(9).pos(2).len(50)) + .addResult(field, prefix_term, FakeResult() + .doc(4).pos(2).len(50) + .doc(5).pos(4).len(50) + .doc(8).pos(3).len(50)) + .addResult(field, suffix_term, FakeResult() + .doc(4).pos(1).len(50) + .doc(5).pos(5).len(50) + .doc(8).pos(4).len(50)); + + SearchIterator::UP iterator = getIterator(*node, context); + ASSERT_TRUE(iterator.get()); + EXPECT_TRUE(!iterator->seek(4)); + EXPECT_TRUE(!iterator->seek(5)); + EXPECT_TRUE(iterator->seek(8)); + EXPECT_TRUE(!iterator->seek(9)); + EXPECT_TRUE(iterator->isAtEnd()); +} + +void +Test::requireThatUnknownFieldActsEmpty() +{ + FakeSearchContext context; + context.addIdx(0).idx(0).getFake() + .addResult(unknown_field, string_term, FakeResult() + .doc(4).pos(3).len(50) + .doc(5).pos(2).len(50)); + + ProtonNodeTypes::StringTerm + node(string_term, unknown_field, string_id, string_weight); + node.resolve(ViewResolver(), plain_index_env); + + std::vector<const ITermData *> terms; + TermDataExtractor::extractTerms(node, terms); + + SearchIterator::UP iterator = getIterator(node, context); + + ASSERT_TRUE(EXPECT_EQUAL(1u, terms.size())); + EXPECT_EQUAL(0u, terms[0]->numFields()); + + ASSERT_TRUE(iterator.get()); + EXPECT_TRUE(!iterator->seek(1)); + EXPECT_TRUE(iterator->isAtEnd()); +} + +void +Test::requireThatIllegalFieldsAreIgnored() +{ + ProtonNodeTypes::StringTerm + node(string_term, unknown_field, string_id, string_weight); + node.resolve(ViewResolver(), plain_index_env); + + FakeRequestContext requestContext; + FakeSearchContext context; + + MatchDataLayout mdl; + MatchDataReserveVisitor reserve_visitor(mdl); + node.accept(reserve_visitor); + + Blueprint::UP blueprint = BlueprintBuilder::build(requestContext, node, context); + + EXPECT_EQUAL(0u, node.numFields()); + + MatchData::UP match_data = mdl.createMatchData(); + EXPECT_EQUAL(0u, match_data->getNumTermFields()); +} + +void Test::requireThatQueryGluesEverythingTogether() { + QueryBuilder<ProtonNodeTypes> builder; + builder.addStringTerm(string_term, field, 1, Weight(2)); + string stack_dump = StackDumpCreator::create(*builder.build()); + + Query query; + query.buildTree(stack_dump, "", ViewResolver(), plain_index_env); + vector<const ITermData *> term_data; + query.extractTerms(term_data); + EXPECT_EQUAL(1u, term_data.size()); + + FakeRequestContext requestContext; + FakeSearchContext context; + context.setLimit(42); + MatchDataLayout mdl; + query.reserveHandles(requestContext, context, mdl); + MatchData::UP md = mdl.createMatchData(); + EXPECT_EQUAL(1u, md->getNumTermFields()); + + query.optimize(); + query.fetchPostings(); + SearchIterator::UP search = query.createSearch(*md); + ASSERT_TRUE(search.get()); +} + +void checkQueryAddsLocation(Test &test, const string &loc_string) { + const string loc_field = "location"; + + fef_test::IndexEnvironment index_environment; + FieldInfo field_info(FieldType::INDEX, CollectionType::SINGLE, field, 0); + index_environment.getFields().push_back(field_info); + field_info = FieldInfo(FieldType::ATTRIBUTE, CollectionType::SINGLE, + PositionDataType::getZCurveFieldName(loc_field), 1); + index_environment.getFields().push_back(field_info); + + QueryBuilder<ProtonNodeTypes> builder; + builder.addStringTerm(string_term, field, 1, Weight(2)); + string stack_dump = StackDumpCreator::create(*builder.build()); + + Query query; + query.buildTree(stack_dump, + loc_field + ":" + loc_string, + ViewResolver(), index_environment); + vector<const ITermData *> term_data; + query.extractTerms(term_data); + test.EXPECT_EQUAL(1u, term_data.size()); + + FakeRequestContext requestContext; + FakeSearchContext context; + context.addIdx(0).setLimit(42); + MatchDataLayout mdl; + query.reserveHandles(requestContext, context, mdl); + MatchData::UP md = mdl.createMatchData(); + test.EXPECT_EQUAL(2u, md->getNumTermFields()); + + query.fetchPostings(); + SearchIterator::UP search = query.createSearch(*md); + test.ASSERT_TRUE(search.get()); + if (!test.EXPECT_NOT_EQUAL(string::npos, search->asString().find(loc_string))) { + fprintf(stderr, "search (missing loc_string): %s", search->asString().c_str()); + } +} + +void Test::requireThatQueryAddsLocation() { + checkQueryAddsLocation(*this, "(2,10,10,3,0,1,0,0)"); +} + +void Test::requireThatQueryAddsLocationCutoff() { + checkQueryAddsLocation(*this, "[2,10,10,20,20]"); +} + +void +Test::requireThatFakeFieldSearchDumpsDiffer() +{ + FakeRequestContext requestContext; + uint32_t fieldId = 0; + MatchDataLayout mdl; + TermFieldHandle handle = mdl.allocTermField(fieldId); + MatchData::UP match_data = mdl.createMatchData(); + + FakeSearchable a; + FakeSearchable b; + a.tag("a"); + b.tag("b"); + ProtonStringTerm n1("term1", "field1", string_id, string_weight); + ProtonStringTerm n2("term2", "field1", string_id, string_weight); + ProtonStringTerm n3("term1", "field2", string_id, string_weight); + + FieldSpecList fields1; + FieldSpecList fields2; + fields1.add(FieldSpec("field1", fieldId, handle)); + fields2.add(FieldSpec("field2", fieldId, handle)); + + Blueprint::UP l1(a.createBlueprint(requestContext, fields1, n1)); // reference + Blueprint::UP l2(a.createBlueprint(requestContext, fields1, n2)); // term + Blueprint::UP l3(a.createBlueprint(requestContext, fields2, n3)); // field + Blueprint::UP l4(b.createBlueprint(requestContext, fields1, n1)); // tag + + l1->fetchPostings(true); + l2->fetchPostings(true); + l3->fetchPostings(true); + l4->fetchPostings(true); + + SearchIterator::UP s1(l1->createSearch(*match_data, true)); + SearchIterator::UP s2(l2->createSearch(*match_data, true)); + SearchIterator::UP s3(l3->createSearch(*match_data, true)); + SearchIterator::UP s4(l4->createSearch(*match_data, true)); + + EXPECT_NOT_EQUAL(s1->asString(), s2->asString()); + EXPECT_NOT_EQUAL(s1->asString(), s3->asString()); + EXPECT_NOT_EQUAL(s1->asString(), s4->asString()); +} + +void Test::requireThatNoDocsGiveZeroDocFrequency() { + ProtonStringTerm node(string_term, field, string_id, string_weight); + node.resolve(ViewResolver(), plain_index_env); + FakeSearchContext context; + FakeRequestContext requestContext; + context.setLimit(0); + + MatchDataLayout mdl; + MatchDataReserveVisitor reserve_visitor(mdl); + node.accept(reserve_visitor); + + Blueprint::UP blueprint = BlueprintBuilder::build(requestContext, node, context); + + EXPECT_EQUAL(1u, node.numFields()); + EXPECT_EQUAL(0.0, node.field(0).getDocFreq()); +} + +void Test::requireThatWeakAndBlueprintsAreCreatedCorrectly() { + using search::queryeval::WeakAndBlueprint; + + ProtonWeakAnd wand(123, "view"); + wand.append(Node::UP(new ProtonStringTerm("foo", field, 0, Weight(3)))); + wand.append(Node::UP(new ProtonStringTerm("bar", field, 0, Weight(7)))); + + ResolveViewVisitor resolve_visitor(ViewResolver(), plain_index_env); + wand.accept(resolve_visitor); + + FakeRequestContext requestContext; + FakeSearchContext context; + context.addIdx(0).idx(0).getFake() + .addResult(field, "foo", FakeResult().doc(1).doc(3)) + .addResult(field, "bar", FakeResult().doc(2).doc(3).doc(4)); + + MatchDataLayout mdl; + MatchDataReserveVisitor reserve_visitor(mdl); + wand.accept(reserve_visitor); + + Blueprint::UP blueprint = BlueprintBuilder::build(requestContext, wand, context); + WeakAndBlueprint *wbp = dynamic_cast<WeakAndBlueprint*>(blueprint.get()); + ASSERT_TRUE(wbp != 0); + ASSERT_EQUAL(2u, wbp->getWeights().size()); + ASSERT_EQUAL(2u, wbp->childCnt()); + EXPECT_EQUAL(123u, wbp->getN()); + EXPECT_EQUAL(3u, wbp->getWeights()[0]); + EXPECT_EQUAL(7u, wbp->getWeights()[1]); + EXPECT_EQUAL(2u, wbp->getChild(0).getState().estimate().estHits); + EXPECT_EQUAL(3u, wbp->getChild(1).getState().estimate().estHits); +} + +void Test::requireThatParallelWandBlueprintsAreCreatedCorrectly() { + using search::queryeval::WeakAndBlueprint; + + ProtonWandTerm wand(field, 42, Weight(100), 123, 9000, 1.25); + wand.append(Node::UP(new ProtonStringTerm("foo", field, 0, Weight(3)))); + wand.append(Node::UP(new ProtonStringTerm("bar", field, 0, Weight(7)))); + + ResolveViewVisitor resolve_visitor(ViewResolver(), attribute_index_env); + wand.accept(resolve_visitor); + + FakeRequestContext requestContext; + FakeSearchContext context; + context.setLimit(1000); + context.addIdx(0).idx(0).getFake() + .addResult(field, "foo", FakeResult().doc(1).doc(3)) + .addResult(field, "bar", FakeResult().doc(2).doc(3).doc(4)); + + MatchDataLayout mdl; + MatchDataReserveVisitor reserve_visitor(mdl); + wand.accept(reserve_visitor); + + Blueprint::UP blueprint = BlueprintBuilder::build(requestContext, wand, context); + ParallelWeakAndBlueprint *wbp = dynamic_cast<ParallelWeakAndBlueprint*>(blueprint.get()); + ASSERT_TRUE(wbp != nullptr); + EXPECT_EQUAL(9000, wbp->getScoreThreshold()); + EXPECT_EQUAL(1.25, wbp->getThresholdBoostFactor()); + EXPECT_EQUAL(1000u, wbp->get_docid_limit()); +} + +void +Test::requireThatBlackListBlueprintCanBeUsed() +{ + QueryBuilder<ProtonNodeTypes> builder; + builder.addStringTerm("foo", field, field_id, string_weight); + std::string stackDump = StackDumpCreator::create(*builder.build()); + + Query query; + query.buildTree(stackDump, "", ViewResolver(), plain_index_env); + + FakeSearchContext context(42); + context.addIdx(0).idx(0).getFake() + .addResult(field, "foo", FakeResult().doc(1).doc(3).doc(5).doc(7).doc(9).doc(11)); + context.setLimit(42); + + query.setBlackListBlueprint(SimpleBlueprint::UP(new SimpleBlueprint(SimpleResult().addHit(3).addHit(9)))); + + FakeRequestContext requestContext; + MatchDataLayout mdl; + query.reserveHandles(requestContext, context, mdl); + MatchData::UP md = mdl.createMatchData(); + + query.optimize(); + query.fetchPostings(); + SearchIterator::UP search = query.createSearch(*md); + SimpleResult exp = SimpleResult().addHit(1).addHit(5).addHit(7).addHit(11); + SimpleResult act; + act.search(*search); + EXPECT_EQUAL(exp, act); +} + +int +Test::Main() +{ + setupIndexEnvironments(); + + TEST_INIT("query_test"); + + TEST_CALL(requireThatMatchDataIsReserved); + TEST_CALL(requireThatMatchDataIsReservedForEachFieldInAView); + TEST_CALL(requireThatTermsAreLookedUp); + TEST_CALL(requireThatTermsAreLookedUpInMultipleFieldsFromAView); + TEST_CALL(requireThatAttributeTermsAreLookedUpInAttributeSource); + TEST_CALL(requireThatAttributeTermDataHandlesAreAllocated); + TEST_CALL(requireThatTermDataIsFilledIn); + TEST_CALL(requireThatSingleIndexCanUseBlendingAsBlacklisting); + TEST_CALL(requireThatIteratorsAreBuiltWithBlending); + TEST_CALL(requireThatIteratorsAreBuiltForAllTermNodes); + TEST_CALL(requireThatNearIteratorsCanBeBuilt); + TEST_CALL(requireThatONearIteratorsCanBeBuilt); + TEST_CALL(requireThatPhraseIteratorsCanBeBuilt); + TEST_CALL(requireThatUnknownFieldActsEmpty); + TEST_CALL(requireThatIllegalFieldsAreIgnored); + TEST_CALL(requireThatQueryGluesEverythingTogether); + TEST_CALL(requireThatQueryAddsLocation); + TEST_CALL(requireThatQueryAddsLocationCutoff); + TEST_CALL(requireThatFakeFieldSearchDumpsDiffer); + TEST_CALL(requireThatNoDocsGiveZeroDocFrequency); + TEST_CALL(requireThatWeakAndBlueprintsAreCreatedCorrectly); + TEST_CALL(requireThatParallelWandBlueprintsAreCreatedCorrectly); + TEST_CALL(requireThatBlackListBlueprintCanBeUsed); + + TEST_DONE(); +} + + +} // namespace +} // namespace matching +} // namespace proton + +TEST_APPHOOK(proton::matching::Test); diff --git a/searchcore/src/tests/proton/matching/querynodes_test.cpp b/searchcore/src/tests/proton/matching/querynodes_test.cpp new file mode 100644 index 00000000000..054b70f9b98 --- /dev/null +++ b/searchcore/src/tests/proton/matching/querynodes_test.cpp @@ -0,0 +1,486 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for querynodes. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("querynodes_test"); + +#include <vespa/searchcore/proton/matching/querynodes.h> + +#include <vespa/searchcore/proton/matching/fakesearchcontext.h> +#include <vespa/searchcore/proton/matching/blueprintbuilder.h> +#include <vespa/searchcore/proton/matching/matchdatareservevisitor.h> +#include <vespa/searchcore/proton/matching/resolveviewvisitor.h> +#include <vespa/searchcore/proton/matching/viewresolver.h> +#include <vespa/searchlib/fef/fieldinfo.h> +#include <vespa/searchlib/fef/fieldtype.h> +#include <vespa/searchlib/fef/matchdata.h> +#include <vespa/searchlib/fef/matchdatalayout.h> +#include <vespa/searchlib/fef/termfieldmatchdata.h> +#include <vespa/searchlib/fef/termfieldmatchdataarray.h> +#include <vespa/searchlib/fef/test/indexenvironment.h> +#include <vespa/searchlib/query/tree/node.h> +#include <vespa/searchlib/query/tree/querybuilder.h> +#include <vespa/searchlib/queryeval/blueprint.h> +#include <vespa/searchlib/queryeval/isourceselector.h> +#include <vespa/searchlib/queryeval/nearsearch.h> +#include <vespa/searchlib/queryeval/orsearch.h> +#include <vespa/searchlib/queryeval/andsearch.h> +#include <vespa/searchlib/queryeval/andnotsearch.h> +#include <vespa/searchlib/queryeval/ranksearch.h> +#include <vespa/searchlib/queryeval/searchiterator.h> +#include <vespa/searchlib/queryeval/simple_phrase_search.h> +#include <vespa/searchlib/queryeval/sourceblendersearch.h> +#include <vespa/searchlib/queryeval/fake_search.h> +#include <vespa/searchlib/queryeval/fake_requestcontext.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <cstdarg> +#include <string> +#include <vector> +#include <vespa/searchlib/attribute/singlenumericattribute.hpp> + +using search::fef::CollectionType; +using search::fef::FieldInfo; +using search::fef::FieldType; +using search::fef::MatchData; +using search::fef::MatchDataLayout; +using search::fef::TermFieldMatchData; +using search::fef::TermFieldHandle; +using search::fef::TermFieldMatchDataArray; +using search::fef::test::IndexEnvironment; +using search::query::Node; +using search::query::QueryBuilder; +using search::queryeval::ISourceSelector; +using search::queryeval::NearSearch; +using search::queryeval::ONearSearch; +using search::queryeval::OrSearch; +using search::queryeval::AndSearch; +using search::queryeval::AndNotSearch; +using search::queryeval::RankSearch; +using search::queryeval::Blueprint; +using search::queryeval::SearchIterator; +using search::queryeval::SourceBlenderSearch; +using search::queryeval::FieldSpec; +using search::queryeval::Searchable; +using search::queryeval::FakeSearch; +using search::queryeval::FakeResult; +using search::queryeval::FakeRequestContext; +using search::queryeval::SimplePhraseSearch; +using std::string; +using std::vector; +using namespace proton::matching; +namespace fef_test = search::fef::test; + +namespace { + +template <typename T> void checkTwoFieldsTwoAttributesTwoIndexes(); +template <typename T> void checkTwoFieldsTwoAttributesOneIndex(); +template <typename T> void checkOneFieldOneAttributeTwoIndexes(); +template <typename T> void checkOneFieldNoAttributesTwoIndexes(); +template <typename T> void checkTwoFieldsNoAttributesTwoIndexes(); +template <typename T> void checkOneFieldNoAttributesOneIndex(); + +template <typename T> void checkProperBlending(); +template <typename T> void checkProperBlendingWithParent(); + +const string term = "term"; +const string phrase_term1 = "hello"; +const string phrase_term2 = "world"; +const string view = "view"; +const uint32_t id = 3; +const search::query::Weight weight(7); +const string field[] = { "field1", "field2" }; +const string attribute[] = { "attribute1", "attribute2" }; +const string source_tag[] = { "Source 1", "Source 2" }; +const string attribute_tag = "Attribute source"; +const uint32_t distance = 13; + +template <class SearchType> +class Create { + bool _strict; + typename SearchType::Children _children; + +public: + explicit Create(bool strict = true) : _strict(strict) {} + + Create &add(SearchIterator *s) { + _children.push_back(s); + return *this; + } + + operator SearchIterator *() const { + return SearchType::create(_children, _strict); + } +}; +typedef Create<OrSearch> MyOr; + +class ISourceSelectorDummy : public ISourceSelector +{ +public: + static SourceStore _sourceStoreDummy; + + static Iterator::UP + makeDummyIterator() + { + return Iterator::UP(new Iterator(_sourceStoreDummy)); + } +}; + +ISourceSelector::SourceStore ISourceSelectorDummy::_sourceStoreDummy("foo"); + + +typedef uint32_t SourceId; +class Blender { + bool _strict; + SourceBlenderSearch::Children _children; + +public: + explicit Blender(bool strict = true) : _strict(strict) {} + + Blender &add(SourceId source_id, SearchIterator *search) { + _children.push_back(SourceBlenderSearch::Child(search, source_id)); + return *this; + } + + operator SearchIterator *() const { + return SourceBlenderSearch::create( + ISourceSelectorDummy::makeDummyIterator(), _children, _strict); + } +}; + +SearchIterator *getTerm(const string &trm, const string &fld, const string &tag) { + static TermFieldMatchData tmd; + TermFieldMatchDataArray tfmda; + tfmda.add(&tmd); + return new FakeSearch(tag, fld, trm, FakeResult(), tfmda); +} + +class IteratorStructureTest { + int _field_count; + int _attribute_count; + int _index_count; + +public: + void setFieldCount(int count) { _field_count = count; } + void setAttributeCount(int count) { _attribute_count = count; } + void setIndexCount(int count) { _index_count = count; } + + string getIteratorAsString(Node &node) { + ViewResolver resolver; + for (int i = 0; i < _field_count; ++i) { + resolver.add(view, field[i]); + } + for (int i = 0; i < _attribute_count; ++i) { + resolver.add(view, attribute[i]); + } + + fef_test::IndexEnvironment index_environment; + uint32_t fieldId = 0; + for (int i = 0; i < _field_count; ++i) { + FieldInfo field_info(FieldType::INDEX, CollectionType::SINGLE, field[i], fieldId++); + index_environment.getFields().push_back(field_info); + } + for (int i = 0; i < _attribute_count; ++i) { + FieldInfo field_info(FieldType::ATTRIBUTE, CollectionType::SINGLE, attribute[i], fieldId++); + index_environment.getFields().push_back(field_info); + } + + ResolveViewVisitor resolve_visitor(resolver, index_environment); + node.accept(resolve_visitor); + + FakeSearchContext context; + context.attr().tag(attribute_tag); + + for (int i = 0; i < _index_count; ++i) { + context.addIdx(i).idx(i).getFake().tag(source_tag[i]); + } + + MatchDataLayout mdl; + FakeRequestContext requestContext; + MatchDataReserveVisitor reserve_visitor(mdl); + node.accept(reserve_visitor); + MatchData::UP match_data = mdl.createMatchData(); + + Blueprint::UP blueprint = BlueprintBuilder::build(requestContext, node, context); + blueprint->fetchPostings(true); + return blueprint->createSearch(*match_data, true)->asString(); + } + + template <typename Tag> string getIteratorAsString(); +}; + +typedef QueryBuilder<ProtonNodeTypes> QB; +struct Phrase { + void addToBuilder(QB& b) { b.addPhrase(2, view, id, weight); } +}; +struct Near { void addToBuilder(QB& b) { b.addNear(2, distance); } }; +struct ONear { void addToBuilder(QB& b) { b.addONear(2, distance); } }; +struct Or { void addToBuilder(QB& b) { b.addOr(2); } }; +struct And { void addToBuilder(QB& b) { b.addAnd(2); } }; +struct AndNot { void addToBuilder(QB& b) { b.addAndNot(2); } }; +struct Rank { void addToBuilder(QB& b) { b.addRank(2); } }; +struct Term {}; + +template <typename Tag> +string IteratorStructureTest::getIteratorAsString() { + QueryBuilder<ProtonNodeTypes> query_builder; + Tag().addToBuilder(query_builder); + query_builder.addStringTerm(phrase_term1, view, id, weight); + query_builder.addStringTerm(phrase_term2, view, id, weight); + Node::UP node = query_builder.build(); + return getIteratorAsString(*node); +} + +template <> +string IteratorStructureTest::getIteratorAsString<Term>() { + ProtonStringTerm node(term, view, id, weight); + return getIteratorAsString(node); +} + +template <typename T> +SearchIterator *getLeaf(const string &fld, const string &tag) { + return getTerm(term, fld, tag); +} + +template <> +SearchIterator *getLeaf<Phrase>(const string &fld, const string &tag) { + SimplePhraseSearch::Children children; + children.push_back(getTerm(phrase_term1, fld, tag)); + children.push_back(getTerm(phrase_term2, fld, tag)); + static TermFieldMatchData tmd; + TermFieldMatchDataArray tfmda; + tfmda.add(&tmd).add(&tmd); + vector<uint32_t> eval_order(2); + return new SimplePhraseSearch(children, MatchData::UP(), tfmda, eval_order, tmd, true); +} + +template <typename NearType> +SearchIterator *getNearParent(SearchIterator *a, SearchIterator *b) { + typename NearType::Children children; + children.push_back(a); + children.push_back(b); + TermFieldMatchDataArray data; + static TermFieldMatchData tmd; + // we only check how many term/field combinations + // are below the NearType parent: + // two terms searching in (two index fields + two attribute fields) + data.add(&tmd).add(&tmd).add(&tmd).add(&tmd) + .add(&tmd).add(&tmd).add(&tmd).add(&tmd); + return new NearType(children, data, distance, true); +} + +template <typename SearchType> +SearchIterator *getSimpleParent(SearchIterator *a, SearchIterator *b) { + typename SearchType::Children children; + children.push_back(a); + children.push_back(b); + return SearchType::create(children, true); +} + +template <typename T> +SearchIterator *getParent(SearchIterator *a, SearchIterator *b); + +template <> +SearchIterator *getParent<Near>(SearchIterator *a, SearchIterator *b) { + return getNearParent<NearSearch>(a, b); +} + +template <> +SearchIterator *getParent<ONear>(SearchIterator *a, SearchIterator *b) { + return getNearParent<ONearSearch>(a, b); +} + +template <> +SearchIterator *getParent<Or>(SearchIterator *a, SearchIterator *b) { + return getSimpleParent<OrSearch>(a, b); +} + +template <> +SearchIterator *getParent<And>(SearchIterator *a, SearchIterator *b) { + return getSimpleParent<AndSearch>(a, b); +} + +template <> +SearchIterator *getParent<AndNot>(SearchIterator *a, SearchIterator *b) { + return getSimpleParent<AndNotSearch>(a, b); +} + +template <> +SearchIterator *getParent<Rank>(SearchIterator *a, SearchIterator *b) { + return getSimpleParent<RankSearch>(a, b); +} + +template <typename T> bool bothStrict() { return false; } + +template <> bool bothStrict<Or>() { return true; } + +template <typename T> +void checkTwoFieldsTwoAttributesTwoIndexes() { + IteratorStructureTest structure_test; + structure_test.setFieldCount(2); + structure_test.setAttributeCount(2); + structure_test.setIndexCount(2); + + SearchIterator::UP expected( + MyOr() + .add(getLeaf<T>(attribute[0], attribute_tag)) + .add(getLeaf<T>(attribute[1], attribute_tag)) + .add(Blender() + .add(SourceId(0), MyOr() + .add(getLeaf<T>(field[0], source_tag[0])) + .add(getLeaf<T>(field[1], source_tag[0]))) + .add(SourceId(1), MyOr() + .add(getLeaf<T>(field[0], source_tag[1])) + .add(getLeaf<T>(field[1], source_tag[1]))))); + EXPECT_EQUAL(expected->asString(), structure_test.getIteratorAsString<T>()); +} + +template <typename T> +void checkTwoFieldsTwoAttributesOneIndex() { + IteratorStructureTest structure_test; + structure_test.setFieldCount(2); + structure_test.setAttributeCount(2); + structure_test.setIndexCount(1); + + SearchIterator::UP expected( + MyOr() + .add(getLeaf<T>(attribute[0], attribute_tag)) + .add(getLeaf<T>(attribute[1], attribute_tag)) + .add(Blender() + .add(SourceId(0), MyOr() + .add(getLeaf<T>(field[0], source_tag[0])) + .add(getLeaf<T>(field[1], source_tag[0]))))); + EXPECT_EQUAL(expected->asString(), structure_test.getIteratorAsString<T>()); +} + +template <typename T> +void checkOneFieldOneAttributeTwoIndexes() { + IteratorStructureTest structure_test; + structure_test.setFieldCount(1); + structure_test.setAttributeCount(1); + structure_test.setIndexCount(2); + + SearchIterator::UP expected( + MyOr() + .add(getLeaf<T>(attribute[0], attribute_tag)) + .add(Blender() + .add(SourceId(0), + getLeaf<T>(field[0], source_tag[0])) + .add(SourceId(1), + getLeaf<T>(field[0], source_tag[1])))); + EXPECT_EQUAL(expected->asString(), structure_test.getIteratorAsString<T>()); +} + +template <typename T> +void checkOneFieldNoAttributesTwoIndexes() { + IteratorStructureTest structure_test; + structure_test.setFieldCount(1); + structure_test.setAttributeCount(0); + structure_test.setIndexCount(2); + + SearchIterator::UP expected( + Blender() + .add(SourceId(0), getLeaf<T>(field[0], source_tag[0])) + .add(SourceId(1), getLeaf<T>(field[0], source_tag[1]))); + EXPECT_EQUAL(expected->asString(), structure_test.getIteratorAsString<T>()); +} + +template <typename T> +void checkTwoFieldsNoAttributesTwoIndexes() { + IteratorStructureTest structure_test; + structure_test.setFieldCount(2); + structure_test.setAttributeCount(0); + structure_test.setIndexCount(2); + + SearchIterator::UP expected( + Blender() + .add(SourceId(0), MyOr() + .add(getLeaf<T>(field[0], source_tag[0])) + .add(getLeaf<T>(field[1], source_tag[0]))) + .add(SourceId(1), MyOr() + .add(getLeaf<T>(field[0], source_tag[1])) + .add(getLeaf<T>(field[1], source_tag[1])))); + EXPECT_EQUAL(expected->asString(), structure_test.getIteratorAsString<T>()); +} + +template <typename T> +void checkOneFieldNoAttributesOneIndex() { + IteratorStructureTest structure_test; + structure_test.setFieldCount(1); + structure_test.setAttributeCount(0); + structure_test.setIndexCount(1); + + SearchIterator::UP expected( + Blender() + .add(SourceId(0), getLeaf<T>(field[0], source_tag[0]))); + EXPECT_EQUAL(expected->asString(), structure_test.getIteratorAsString<T>()); +} + +template <typename T> +void checkProperBlending() { + TEST_DO(checkTwoFieldsTwoAttributesTwoIndexes<T>()); + TEST_DO(checkTwoFieldsTwoAttributesOneIndex<T>()); + TEST_DO(checkOneFieldOneAttributeTwoIndexes<T>()); + TEST_DO(checkOneFieldNoAttributesTwoIndexes<T>()); + TEST_DO(checkTwoFieldsNoAttributesTwoIndexes<T>()); + TEST_DO(checkOneFieldNoAttributesOneIndex<T>()); +} + +template <typename T> +void checkProperBlendingWithParent() { + IteratorStructureTest structure_test; + structure_test.setFieldCount(2); + structure_test.setAttributeCount(2); + structure_test.setIndexCount(2); + + SearchIterator::UP expected( + getParent<T>( + MyOr() + .add(getTerm(phrase_term1, attribute[0], attribute_tag)) + .add(getTerm(phrase_term1, attribute[1], attribute_tag)) + .add(Blender() + .add(SourceId(0), MyOr() + .add(getTerm(phrase_term1, field[0], source_tag[0])) + .add(getTerm(phrase_term1, field[1], source_tag[0]))) + .add(SourceId(1), MyOr() + .add(getTerm(phrase_term1, field[0], source_tag[1])) + .add(getTerm(phrase_term1, field[1], source_tag[1])))), + MyOr(bothStrict<T>()) + .add(getTerm(phrase_term2, attribute[0], attribute_tag)) + .add(getTerm(phrase_term2, attribute[1], attribute_tag)) + .add(Blender(bothStrict<T>()) + .add(SourceId(0), MyOr(bothStrict<T>()) + .add(getTerm(phrase_term2, field[0], source_tag[0])) + .add(getTerm(phrase_term2, field[1], source_tag[0]))) + .add(SourceId(1), MyOr(bothStrict<T>()) + .add(getTerm(phrase_term2, field[0], source_tag[1])) + .add(getTerm(phrase_term2, field[1], source_tag[1])))))); + EXPECT_EQUAL(expected->asString(), structure_test.getIteratorAsString<T>()); +} + +TEST("requireThatTermNodeSearchIteratorsGetProperBlending") { + TEST_DO(checkProperBlending<Term>()); +} + +TEST("requireThatPhrasesGetProperBlending") { + TEST_DO(checkProperBlending<Phrase>()); +} + +TEST("requireThatNearGetProperBlending") { + TEST_DO(checkProperBlendingWithParent<Near>()); +} + +TEST("requireThatONearGetProperBlending") { + TEST_DO(checkProperBlendingWithParent<ONear>()); +} + +TEST("requireThatSimpleIntermediatesGetProperBlending") { + TEST_DO(checkProperBlendingWithParent<And>()); + TEST_DO(checkProperBlendingWithParent<AndNot>()); + TEST_DO(checkProperBlendingWithParent<Or>()); + TEST_DO(checkProperBlendingWithParent<Rank>()); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchcore/src/tests/proton/matching/resolveviewvisitor_test.cpp b/searchcore/src/tests/proton/matching/resolveviewvisitor_test.cpp new file mode 100644 index 00000000000..212762389f0 --- /dev/null +++ b/searchcore/src/tests/proton/matching/resolveviewvisitor_test.cpp @@ -0,0 +1,142 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for resolveviewvisitor. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("resolveviewvisitor_test"); + +#include <vespa/searchlib/fef/test/indexenvironment.h> +#include <vespa/searchcore/proton/matching/querynodes.h> +#include <vespa/searchcore/proton/matching/resolveviewvisitor.h> +#include <vespa/searchcore/proton/matching/viewresolver.h> +#include <vespa/searchlib/query/tree/node.h> +#include <vespa/searchlib/query/tree/querybuilder.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <string> + +namespace fef_test = search::fef::test; +using search::fef::CollectionType; +using search::fef::FieldInfo; +using search::fef::FieldType; +using search::fef::test::IndexEnvironment; +using search::query::Node; +using search::query::QueryBuilder; +using std::string; +using namespace proton::matching; + +namespace { + +const string term = "term"; +const string view = "view"; +const string field1 = "field1"; +const string field2 = "field2"; +const uint32_t id = 1; +const search::query::Weight weight(2); + +ViewResolver getResolver(const string &test_view) { + ViewResolver resolver; + resolver.add(test_view, field1); + resolver.add(test_view, field2); + return resolver; +} + +struct Fixture { + IndexEnvironment index_environment; + + Fixture() { + index_environment.getFields().push_back(FieldInfo( + FieldType::INDEX, CollectionType::SINGLE, field1, 0)); + index_environment.getFields().push_back(FieldInfo( + FieldType::INDEX, CollectionType::SINGLE, field2, 1)); + } +}; + +TEST_F("requireThatFieldsResolveToThemselves", Fixture) { + ViewResolver resolver = getResolver(view); + + QueryBuilder<ProtonNodeTypes> builder; + ProtonTermData &base = builder.addStringTerm(term, field1, id, weight); + Node::UP node = builder.build(); + + ResolveViewVisitor visitor(resolver, f.index_environment); + node->accept(visitor); + + EXPECT_EQUAL(1u, base.numFields()); + EXPECT_EQUAL(field1, base.field(0).field_name); +} + +void checkResolveAlias(const string &view_name, const string &alias, + const Fixture &f) { + ViewResolver resolver = getResolver(view_name); + + QueryBuilder<ProtonNodeTypes> builder; + ProtonTermData &base = builder.addStringTerm(term, alias, id, weight); + Node::UP node = builder.build(); + + ResolveViewVisitor visitor(resolver, f.index_environment); + node->accept(visitor); + + ASSERT_EQUAL(2u, base.numFields()); + EXPECT_EQUAL(field1, base.field(0).field_name); + EXPECT_EQUAL(field2, base.field(1).field_name); +} + +TEST_F("requireThatViewsCanResolveToMultipleFields", Fixture) { + checkResolveAlias(view, view, f); +} + +TEST_F("requireThatEmptyViewResolvesAsDefault", Fixture) { + const string default_view = "default"; + const string empty_view = ""; + checkResolveAlias(default_view, empty_view, f); +} + +TEST_F("requireThatWeCanForceFilterField", Fixture) { + ViewResolver resolver = getResolver(view); + f.index_environment.getFields().back().setFilter(true); + ResolveViewVisitor visitor(resolver, f.index_environment); + + { // use filter field settings from index environment + QueryBuilder<ProtonNodeTypes> builder; + ProtonStringTerm &sterm = + builder.addStringTerm(term, view, id, weight); + Node::UP node = builder.build(); + node->accept(visitor); + ASSERT_EQUAL(2u, sterm.numFields()); + EXPECT_TRUE(!sterm.field(0).filter_field); + EXPECT_TRUE(sterm.field(1).filter_field); + } + { // force filter on all fields + QueryBuilder<ProtonNodeTypes> builder; + ProtonStringTerm &sterm = + builder.addStringTerm(term, view, id, weight); + sterm.setPositionData(false); // force filter + Node::UP node = builder.build(); + node->accept(visitor); + ASSERT_EQUAL(2u, sterm.numFields()); + EXPECT_TRUE(sterm.field(0).filter_field); + EXPECT_TRUE(sterm.field(1).filter_field); + } +} + +TEST_F("require that equiv nodes resolve view from children", Fixture) { + ViewResolver resolver; + resolver.add(view, field1); + + QueryBuilder<ProtonNodeTypes> builder; + ProtonTermData &base = builder.addEquiv(2, id, weight); + builder.addStringTerm(term, view, 42, weight); + builder.addStringTerm(term, field2, 43, weight); + Node::UP node = builder.build(); + + ResolveViewVisitor visitor(resolver, f.index_environment); + node->accept(visitor); + + ASSERT_EQUAL(2u, base.numFields()); + EXPECT_EQUAL(field1, base.field(0).field_name); + EXPECT_EQUAL(field2, base.field(1).field_name); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchcore/src/tests/proton/matching/sessionmanager_test.cpp b/searchcore/src/tests/proton/matching/sessionmanager_test.cpp new file mode 100644 index 00000000000..078a6985fc4 --- /dev/null +++ b/searchcore/src/tests/proton/matching/sessionmanager_test.cpp @@ -0,0 +1,87 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for sessionmanager. + +#include <vespa/log/log.h> +LOG_SETUP("sessionmanager_test"); +#include <vespa/fastos/fastos.h> + +#include <vespa/searchcore/proton/matching/sessionmanager.h> +#include <vespa/searchcore/proton/matching/session_manager_explorer.h> +#include <vespa/searchcore/proton/matching/search_session.h> +#include <vespa/vespalib/stllike/string.h> +#include <vespa/vespalib/test/insertion_operators.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <vespa/vespalib/data/slime/slime.h> + +using vespalib::string; +using namespace proton; +using namespace proton::matching; +using vespalib::StateExplorer; + +namespace { + +void checkStats(SessionManager::Stats stats, uint32_t numInsert, + uint32_t numPick, uint32_t numDropped, uint32_t numCached, + uint32_t numTimedout) { + EXPECT_EQUAL(numInsert, stats.numInsert); + EXPECT_EQUAL(numPick, stats.numPick); + EXPECT_EQUAL(numDropped, stats.numDropped); + EXPECT_EQUAL(numCached, stats.numCached); + EXPECT_EQUAL(numTimedout, stats.numTimedout); +} + + +TEST("require that SessionManager handles SearchSessions.") { + string session_id("foo"); + fastos::TimeStamp doom(1000); + MatchToolsFactory::UP mtf; + SearchSession::OwnershipBundle owned_objects; + SearchSession::SP session( + new SearchSession(session_id, doom, std::move(mtf), + std::move(owned_objects))); + + SessionManager session_manager(10); + TEST_DO(checkStats(session_manager.getSearchStats(), 0, 0, 0, 0, 0)); + session_manager.insert(std::move(session)); + TEST_DO(checkStats(session_manager.getSearchStats(), 1, 0, 0, 1, 0)); + session = session_manager.pickSearch(session_id); + EXPECT_TRUE(session.get()); + TEST_DO(checkStats(session_manager.getSearchStats(), 0, 1, 0, 1, 0)); + session_manager.insert(std::move(session)); + TEST_DO(checkStats(session_manager.getSearchStats(), 1, 0, 0, 1, 0)); + session_manager.pruneTimedOutSessions(500); + TEST_DO(checkStats(session_manager.getSearchStats(), 0, 0, 0, 1, 0)); + session_manager.pruneTimedOutSessions(2000); + TEST_DO(checkStats(session_manager.getSearchStats(), 0, 0, 0, 0, 1)); + + session = session_manager.pickSearch(session_id); + EXPECT_FALSE(session.get()); +} + +TEST("require that SessionManager can be explored") { + fastos::TimeStamp doom(1000); + SessionManager session_manager(10); + session_manager.insert(SearchSession::SP(new SearchSession("foo", doom, + MatchToolsFactory::UP(), SearchSession::OwnershipBundle()))); + session_manager.insert(SearchSession::SP(new SearchSession("bar", doom, + MatchToolsFactory::UP(), SearchSession::OwnershipBundle()))); + session_manager.insert(SearchSession::SP(new SearchSession("baz", doom, + MatchToolsFactory::UP(), SearchSession::OwnershipBundle()))); + SessionManagerExplorer explorer(session_manager); + EXPECT_EQUAL(std::vector<vespalib::string>({"search"}), + explorer.get_children_names()); + std::unique_ptr<StateExplorer> search = explorer.get_child("search"); + ASSERT_TRUE(search.get() != nullptr); + vespalib::Slime state; + vespalib::Slime full_state; + search->get_state(vespalib::slime::SlimeInserter(state), false); + search->get_state(vespalib::slime::SlimeInserter(full_state), true); + EXPECT_EQUAL(3, state.get()["numSessions"].asLong()); + EXPECT_EQUAL(3, full_state.get()["numSessions"].asLong()); + EXPECT_EQUAL(0u, state.get()["sessions"].entries()); + EXPECT_EQUAL(3u, full_state.get()["sessions"].entries()); +} + +} // namespace + +TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchcore/src/tests/proton/matching/termdataextractor_test.cpp b/searchcore/src/tests/proton/matching/termdataextractor_test.cpp new file mode 100644 index 00000000000..d61267b7d31 --- /dev/null +++ b/searchcore/src/tests/proton/matching/termdataextractor_test.cpp @@ -0,0 +1,167 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +// Unit tests for TermDataExtractor. + +#include <vespa/fastos/fastos.h> +#include <vespa/log/log.h> +LOG_SETUP("termdataextractor_test"); + +#include <vespa/searchcore/proton/matching/querynodes.h> +#include <vespa/searchcore/proton/matching/resolveviewvisitor.h> +#include <vespa/searchcore/proton/matching/termdataextractor.h> +#include <vespa/searchcore/proton/matching/viewresolver.h> +#include <vespa/searchlib/fef/tablemanager.h> +#include <vespa/searchlib/fef/itermdata.h> +#include <vespa/searchlib/fef/test/indexenvironment.h> +#include <vespa/searchlib/query/tree/location.h> +#include <vespa/searchlib/query/tree/point.h> +#include <vespa/searchlib/query/tree/querybuilder.h> +#include <vespa/searchlib/query/weight.h> +#include <vespa/vespalib/testkit/testapp.h> +#include <string> +#include <vector> + +namespace fef_test = search::fef::test; +using search::fef::CollectionType; +using search::fef::FieldInfo; +using search::fef::FieldType; +using search::fef::ITermData; +using search::fef::IIndexEnvironment; +using search::query::Location; +using search::query::Node; +using search::query::Point; +using search::query::QueryBuilder; +using search::query::Range; +using search::query::Weight; +using std::string; +using std::vector; +using namespace proton::matching; + +namespace search { class AttributeManager; } + +namespace { + +class Test : public vespalib::TestApp { + void requireThatTermsAreAdded(); + void requireThatAViewWithTwoFieldsGivesOneTermDataPerTerm(); + void requireThatUnrankedTermsAreSkipped(); + void requireThatNegativeTermsAreSkipped(); + +public: + int Main(); +}; + +int +Test::Main() +{ + TEST_INIT("termdataextractor_test"); + + TEST_DO(requireThatTermsAreAdded()); + TEST_DO(requireThatAViewWithTwoFieldsGivesOneTermDataPerTerm()); + TEST_DO(requireThatUnrankedTermsAreSkipped()); + TEST_DO(requireThatNegativeTermsAreSkipped()); + + TEST_DONE(); +} + +const string field = "field"; +const uint32_t id[] = { 10, 11, 12, 13, 14, 15, 16, 17, 18 }; + +Node::UP getQuery(const ViewResolver &resolver) +{ + QueryBuilder<ProtonNodeTypes> query_builder; + query_builder.addAnd(8); + query_builder.addNumberTerm("0.0", field, id[0], Weight(0)); + query_builder.addPrefixTerm("foo", field, id[1], Weight(0)); + query_builder.addStringTerm("bar", field, id[2], Weight(0)); + query_builder.addSubstringTerm("baz", field, id[3], Weight(0)); + query_builder.addSuffixTerm("qux", field, id[4], Weight(0)); + query_builder.addRangeTerm(Range(), field, id[5], Weight(0)); + query_builder.addWeightedSetTerm(1, field, id[6], Weight(0)); + { + // weighted token + query_builder.addStringTerm("bar", field, id[3], Weight(0)); + } + + query_builder.addLocationTerm(Location(Point(10, 10), 3, 0), + field, id[7], Weight(0)); + Node::UP node = query_builder.build(); + + fef_test::IndexEnvironment index_environment; + index_environment.getFields().push_back(FieldInfo(FieldType::INDEX, CollectionType::SINGLE, field, 0)); + index_environment.getFields().push_back(FieldInfo(FieldType::INDEX, CollectionType::SINGLE, "foo", 1)); + index_environment.getFields().push_back(FieldInfo(FieldType::INDEX, CollectionType::SINGLE, "bar", 2)); + + ResolveViewVisitor visitor(resolver, index_environment); + node->accept(visitor); + + return node; +} + +void Test::requireThatTermsAreAdded() { + Node::UP node = getQuery(ViewResolver()); + + vector<const ITermData *> term_data; + TermDataExtractor::extractTerms(*node, term_data); + EXPECT_EQUAL(7u, term_data.size()); + for (int i = 0; i < 7; ++i) { + EXPECT_EQUAL(id[i], term_data[i]->getUniqueId()); + EXPECT_EQUAL(1u, term_data[i]->numFields()); + } +} + +void Test::requireThatAViewWithTwoFieldsGivesOneTermDataPerTerm() { + ViewResolver resolver; + resolver.add(field, "foo"); + resolver.add(field, "bar"); + Node::UP node = getQuery(resolver); + + vector<const ITermData *> term_data; + TermDataExtractor::extractTerms(*node, term_data); + EXPECT_EQUAL(7u, term_data.size()); + for (int i = 0; i < 7; ++i) { + EXPECT_EQUAL(id[i], term_data[i]->getUniqueId()); + EXPECT_EQUAL(2u, term_data[i]->numFields()); + } +} + +void +Test::requireThatUnrankedTermsAreSkipped() +{ + QueryBuilder<ProtonNodeTypes> query_builder; + query_builder.addAnd(2); + query_builder.addStringTerm("term1", field, id[0], Weight(0)); + query_builder.addStringTerm("term2", field, id[1], Weight(0)) + .setRanked(false); + Node::UP node = query_builder.build(); + + vector<const ITermData *> term_data; + TermDataExtractor::extractTerms(*node, term_data); + EXPECT_EQUAL(1u, term_data.size()); + ASSERT_TRUE(term_data.size() >= 1); + EXPECT_EQUAL(id[0], term_data[0]->getUniqueId()); +} + +void +Test::requireThatNegativeTermsAreSkipped() +{ + QueryBuilder<ProtonNodeTypes> query_builder; + query_builder.addAnd(2); + query_builder.addStringTerm("term1", field, id[0], Weight(0)); + query_builder.addAndNot(2); + query_builder.addStringTerm("term2", field, id[1], Weight(0)); + query_builder.addAndNot(2); + query_builder.addStringTerm("term3", field, id[2], Weight(0)); + query_builder.addStringTerm("term4", field, id[3], Weight(0)); + Node::UP node = query_builder.build(); + + vector<const ITermData *> term_data; + TermDataExtractor::extractTerms(*node, term_data); + EXPECT_EQUAL(2u, term_data.size()); + ASSERT_TRUE(term_data.size() >= 2); + EXPECT_EQUAL(id[0], term_data[0]->getUniqueId()); + EXPECT_EQUAL(id[1], term_data[1]->getUniqueId()); +} + +} // namespace + +TEST_APPHOOK(Test); |