diff options
author | Håvard Pettersen <havardpe@oath.com> | 2019-09-19 12:26:33 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@oath.com> | 2019-09-19 12:31:57 +0000 |
commit | 2a8c7760f9035b81302c94a88a6b754c792eb87c (patch) | |
tree | fdbc27ef8f4965e0174433fa646cfa3384144558 /searchlib | |
parent | d082531b8c6244de5bc99ed887f706be3a1084df (diff) |
added function identifying which elements matched
only a skeleton for now; outlines the interface between the summary
generator and the matcher.
Diffstat (limited to 'searchlib')
10 files changed, 238 insertions, 0 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt index 9e997443343..449580e577b 100644 --- a/searchlib/CMakeLists.txt +++ b/searchlib/CMakeLists.txt @@ -100,9 +100,11 @@ vespa_define_module( src/tests/common/bitvector src/tests/common/foregroundtaskexecutor src/tests/common/location + src/tests/common/matching_elements src/tests/common/packets src/tests/common/resultset src/tests/common/sequencedtaskexecutor + src/tests/common/struct_field_mapper src/tests/common/summaryfeatures src/tests/diskindex/bitvector src/tests/diskindex/diskindex diff --git a/searchlib/src/tests/common/matching_elements/CMakeLists.txt b/searchlib/src/tests/common/matching_elements/CMakeLists.txt new file mode 100644 index 00000000000..cd1d3560c15 --- /dev/null +++ b/searchlib/src/tests/common/matching_elements/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_common_matching_elements_test_app TEST + SOURCES + matching_elements_test.cpp + DEPENDS + searchlib + gtest +) +vespa_add_test(NAME searchlib_common_matching_elements_test_app COMMAND searchlib_common_matching_elements_test_app) diff --git a/searchlib/src/tests/common/matching_elements/matching_elements_test.cpp b/searchlib/src/tests/common/matching_elements/matching_elements_test.cpp new file mode 100644 index 00000000000..e23460b83af --- /dev/null +++ b/searchlib/src/tests/common/matching_elements/matching_elements_test.cpp @@ -0,0 +1,45 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/gtest/gtest.h> +#include <vespa/searchlib/common/matching_elements.h> + +using namespace search; + +namespace { + +std::vector<uint32_t> vec(const std::initializer_list<uint32_t> list) { + return std::vector<uint32_t>(list); +} + +} + +struct MatchingElementsTest : ::testing::Test { + MatchingElements matches; + MatchingElementsTest() : matches() { + matches.add_matching_elements(1, "foo", vec({1, 3, 5})); + matches.add_matching_elements(1, "bar", vec({2, 4, 6})); + matches.add_matching_elements(2, "foo", vec({1, 2, 3})); + matches.add_matching_elements(2, "bar", vec({4, 5, 6})); + matches.add_matching_elements(2, "foo", vec({2, 3, 5})); + matches.add_matching_elements(2, "bar", vec({2, 4, 5})); + } + ~MatchingElementsTest() = default; +}; + + +TEST_F(MatchingElementsTest, require_that_added_matches_can_be_looked_up) { + EXPECT_EQ(matches.get_matching_elements(1, "foo"), vec({1, 3, 5})); + EXPECT_EQ(matches.get_matching_elements(1, "bar"), vec({2, 4, 6})); +} + +TEST_F(MatchingElementsTest, require_that_added_matches_are_merged) { + EXPECT_EQ(matches.get_matching_elements(2, "foo"), vec({1, 2, 3, 5})); + EXPECT_EQ(matches.get_matching_elements(2, "bar"), vec({2, 4, 5, 6})); +} + +TEST_F(MatchingElementsTest, require_that_nonexisting_lookup_gives_empty_result) { + EXPECT_EQ(matches.get_matching_elements(1, "bogus"), vec({})); + EXPECT_EQ(matches.get_matching_elements(7, "foo"), vec({})); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/tests/common/struct_field_mapper/CMakeLists.txt b/searchlib/src/tests/common/struct_field_mapper/CMakeLists.txt new file mode 100644 index 00000000000..f5712d22989 --- /dev/null +++ b/searchlib/src/tests/common/struct_field_mapper/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchlib_common_struct_field_mapper_test_app TEST + SOURCES + struct_field_mapper_test.cpp + DEPENDS + searchlib + gtest +) +vespa_add_test(NAME searchlib_common_struct_field_mapper_test_app COMMAND searchlib_common_struct_field_mapper_test_app) diff --git a/searchlib/src/tests/common/struct_field_mapper/struct_field_mapper_test.cpp b/searchlib/src/tests/common/struct_field_mapper/struct_field_mapper_test.cpp new file mode 100644 index 00000000000..c5368111859 --- /dev/null +++ b/searchlib/src/tests/common/struct_field_mapper/struct_field_mapper_test.cpp @@ -0,0 +1,52 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/vespalib/gtest/gtest.h> +#include <vespa/searchlib/common/struct_field_mapper.h> + +using namespace search; + +namespace { + +std::string str(const vespalib::string &s) { return std::string(s.data(), s.size()); } + +} + +struct StructFieldMapperTest : ::testing::Test { + StructFieldMapper mapper; + StructFieldMapperTest() : mapper() { + mapper.add_mapping("foo", "foo.a"); + mapper.add_mapping("foo", "foo.b"); + mapper.add_mapping("bar", "bar.x"); + mapper.add_mapping("bar", "bar.y"); + } + ~StructFieldMapperTest() = default; +}; + +TEST_F(StructFieldMapperTest, require_that_struct_field_can_be_identified) { + EXPECT_TRUE(mapper.is_struct_field("foo")); + EXPECT_TRUE(mapper.is_struct_field("bar")); + EXPECT_TRUE(!mapper.is_struct_field("foo.a")); + EXPECT_TRUE(!mapper.is_struct_field("bar.x")); + EXPECT_TRUE(!mapper.is_struct_field("bogus")); +} + +TEST_F(StructFieldMapperTest, require_that_struct_subfield_can_be_identified) { + EXPECT_TRUE(!mapper.is_struct_subfield("foo")); + EXPECT_TRUE(!mapper.is_struct_subfield("bar")); + EXPECT_TRUE(mapper.is_struct_subfield("foo.a")); + EXPECT_TRUE(mapper.is_struct_subfield("bar.x")); + EXPECT_TRUE(!mapper.is_struct_subfield("bogus")); +} + +TEST_F(StructFieldMapperTest, require_that_struct_subfield_maps_to_enclosing_struct_field_name) { + EXPECT_EQ(str(mapper.get_struct_field("foo.a")), str("foo")); + EXPECT_EQ(str(mapper.get_struct_field("foo.b")), str("foo")); + EXPECT_EQ(str(mapper.get_struct_field("bar.x")), str("bar")); + EXPECT_EQ(str(mapper.get_struct_field("bar.y")), str("bar")); +} + +TEST_F(StructFieldMapperTest, require_that_nonexisting_struct_subfield_maps_to_empty_string) { + EXPECT_EQ(str(mapper.get_struct_field("bogus")), str("")); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchlib/src/vespa/searchlib/common/CMakeLists.txt b/searchlib/src/vespa/searchlib/common/CMakeLists.txt index 9abb6c42c8d..4f0b241e98f 100644 --- a/searchlib/src/vespa/searchlib/common/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/common/CMakeLists.txt @@ -18,6 +18,7 @@ vespa_add_library(searchlib_common OBJECT location.cpp locationiterators.cpp mapnames.cpp + matching_elements.cpp packets.cpp partialbitvector.cpp resultset.cpp @@ -28,6 +29,7 @@ vespa_add_library(searchlib_common OBJECT sortdata.cpp sortresults.cpp sortspec.cpp + struct_field_mapper.cpp threaded_compactable_lid_space.cpp tunefileinfo.cpp DEPENDS diff --git a/searchlib/src/vespa/searchlib/common/matching_elements.cpp b/searchlib/src/vespa/searchlib/common/matching_elements.cpp new file mode 100644 index 00000000000..1a4653e267b --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/matching_elements.cpp @@ -0,0 +1,31 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "matching_elements.h" +#include <algorithm> + +namespace search { + +MatchingElements::MatchingElements() = default; +MatchingElements::~MatchingElements() = default; + +void +MatchingElements::add_matching_elements(uint32_t docid, const vespalib::string &struct_field_name, const std::vector<uint32_t> &elements) +{ + auto &list = _map[key_t(docid, struct_field_name)]; + std::vector<uint32_t> new_list; + std::set_union(list.begin(), list.end(), elements.begin(), elements.end(), std::back_inserter(new_list)); + list = std::move(new_list); +} + +const std::vector<uint32_t> & +MatchingElements::get_matching_elements(uint32_t docid, const vespalib::string &struct_field_name) const +{ + static const std::vector<uint32_t> empty; + auto res = _map.find(key_t(docid, struct_field_name)); + if (res == _map.end()) { + return empty; + } + return res->second; +} + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/common/matching_elements.h b/searchlib/src/vespa/searchlib/common/matching_elements.h new file mode 100644 index 00000000000..9299191e83a --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/matching_elements.h @@ -0,0 +1,31 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once +#include <utility> +#include <map> +#include <vector> +#include <vespa/vespalib/stllike/string.h> + +namespace search { + +/** + * Keeps track of which elements matched the query for a set of struct + * fields across multiple documents. + **/ +class MatchingElements +{ +private: + using key_t = std::pair<uint32_t, vespalib::string>; + using value_t = std::vector<uint32_t>; + + std::map<key_t, value_t> _map; + +public: + MatchingElements(); + ~MatchingElements(); + + void add_matching_elements(uint32_t docid, const vespalib::string &struct_field_name, const std::vector<uint32_t> &elements); + const std::vector<uint32_t> &get_matching_elements(uint32_t docid, const vespalib::string &struct_field_name) const; +}; + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/common/struct_field_mapper.cpp b/searchlib/src/vespa/searchlib/common/struct_field_mapper.cpp new file mode 100644 index 00000000000..849cfd06ade --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/struct_field_mapper.cpp @@ -0,0 +1,10 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "struct_field_mapper.h" + +namespace search { + +StructFieldMapper::StructFieldMapper() = default; +StructFieldMapper::~StructFieldMapper() = default; + +} // namespace search diff --git a/searchlib/src/vespa/searchlib/common/struct_field_mapper.h b/searchlib/src/vespa/searchlib/common/struct_field_mapper.h new file mode 100644 index 00000000000..07951db99f7 --- /dev/null +++ b/searchlib/src/vespa/searchlib/common/struct_field_mapper.h @@ -0,0 +1,47 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/stllike/string.h> +#include <set> +#include <map> + +namespace search { + +/** + * Keeps track of a set of struct field names and enables mapping the + * full name of struct subfields into the name of the enclosing struct + * field. + **/ +class StructFieldMapper +{ +private: + std::set<vespalib::string> _struct_fields; + std::map<vespalib::string,vespalib::string> _struct_subfields; + +public: + StructFieldMapper(); + ~StructFieldMapper(); + void add_mapping(const vespalib::string &struct_field_name, + const vespalib::string &struct_subfield_name) + { + _struct_fields.insert(struct_field_name); + _struct_subfields[struct_subfield_name] = struct_field_name; + } + bool is_struct_field(const vespalib::string &field_name) const { + return (_struct_fields.count(field_name) > 0); + } + bool is_struct_subfield(const vespalib::string &field_name) const { + return (_struct_subfields.find(field_name) != _struct_subfields.end()); + } + const vespalib::string &get_struct_field(const vespalib::string &struct_subfield_name) const { + static const vespalib::string empty; + auto res = _struct_subfields.find(struct_subfield_name); + if (res == _struct_subfields.end()) { + return empty; + } + return res->second; + } +}; + +} // namespace search |