aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@oath.com>2019-09-19 12:26:33 +0000
committerHåvard Pettersen <havardpe@oath.com>2019-09-19 12:31:57 +0000
commit2a8c7760f9035b81302c94a88a6b754c792eb87c (patch)
treefdbc27ef8f4965e0174433fa646cfa3384144558 /searchlib
parentd082531b8c6244de5bc99ed887f706be3a1084df (diff)
added function identifying which elements matched
only a skeleton for now; outlines the interface between the summary generator and the matcher.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/CMakeLists.txt2
-rw-r--r--searchlib/src/tests/common/matching_elements/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/common/matching_elements/matching_elements_test.cpp45
-rw-r--r--searchlib/src/tests/common/struct_field_mapper/CMakeLists.txt9
-rw-r--r--searchlib/src/tests/common/struct_field_mapper/struct_field_mapper_test.cpp52
-rw-r--r--searchlib/src/vespa/searchlib/common/CMakeLists.txt2
-rw-r--r--searchlib/src/vespa/searchlib/common/matching_elements.cpp31
-rw-r--r--searchlib/src/vespa/searchlib/common/matching_elements.h31
-rw-r--r--searchlib/src/vespa/searchlib/common/struct_field_mapper.cpp10
-rw-r--r--searchlib/src/vespa/searchlib/common/struct_field_mapper.h47
10 files changed, 238 insertions, 0 deletions
diff --git a/searchlib/CMakeLists.txt b/searchlib/CMakeLists.txt
index 9e997443343..449580e577b 100644
--- a/searchlib/CMakeLists.txt
+++ b/searchlib/CMakeLists.txt
@@ -100,9 +100,11 @@ vespa_define_module(
src/tests/common/bitvector
src/tests/common/foregroundtaskexecutor
src/tests/common/location
+ src/tests/common/matching_elements
src/tests/common/packets
src/tests/common/resultset
src/tests/common/sequencedtaskexecutor
+ src/tests/common/struct_field_mapper
src/tests/common/summaryfeatures
src/tests/diskindex/bitvector
src/tests/diskindex/diskindex
diff --git a/searchlib/src/tests/common/matching_elements/CMakeLists.txt b/searchlib/src/tests/common/matching_elements/CMakeLists.txt
new file mode 100644
index 00000000000..cd1d3560c15
--- /dev/null
+++ b/searchlib/src/tests/common/matching_elements/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_common_matching_elements_test_app TEST
+ SOURCES
+ matching_elements_test.cpp
+ DEPENDS
+ searchlib
+ gtest
+)
+vespa_add_test(NAME searchlib_common_matching_elements_test_app COMMAND searchlib_common_matching_elements_test_app)
diff --git a/searchlib/src/tests/common/matching_elements/matching_elements_test.cpp b/searchlib/src/tests/common/matching_elements/matching_elements_test.cpp
new file mode 100644
index 00000000000..e23460b83af
--- /dev/null
+++ b/searchlib/src/tests/common/matching_elements/matching_elements_test.cpp
@@ -0,0 +1,45 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/gtest/gtest.h>
+#include <vespa/searchlib/common/matching_elements.h>
+
+using namespace search;
+
+namespace {
+
+std::vector<uint32_t> vec(const std::initializer_list<uint32_t> list) {
+ return std::vector<uint32_t>(list);
+}
+
+}
+
+struct MatchingElementsTest : ::testing::Test {
+ MatchingElements matches;
+ MatchingElementsTest() : matches() {
+ matches.add_matching_elements(1, "foo", vec({1, 3, 5}));
+ matches.add_matching_elements(1, "bar", vec({2, 4, 6}));
+ matches.add_matching_elements(2, "foo", vec({1, 2, 3}));
+ matches.add_matching_elements(2, "bar", vec({4, 5, 6}));
+ matches.add_matching_elements(2, "foo", vec({2, 3, 5}));
+ matches.add_matching_elements(2, "bar", vec({2, 4, 5}));
+ }
+ ~MatchingElementsTest() = default;
+};
+
+
+TEST_F(MatchingElementsTest, require_that_added_matches_can_be_looked_up) {
+ EXPECT_EQ(matches.get_matching_elements(1, "foo"), vec({1, 3, 5}));
+ EXPECT_EQ(matches.get_matching_elements(1, "bar"), vec({2, 4, 6}));
+}
+
+TEST_F(MatchingElementsTest, require_that_added_matches_are_merged) {
+ EXPECT_EQ(matches.get_matching_elements(2, "foo"), vec({1, 2, 3, 5}));
+ EXPECT_EQ(matches.get_matching_elements(2, "bar"), vec({2, 4, 5, 6}));
+}
+
+TEST_F(MatchingElementsTest, require_that_nonexisting_lookup_gives_empty_result) {
+ EXPECT_EQ(matches.get_matching_elements(1, "bogus"), vec({}));
+ EXPECT_EQ(matches.get_matching_elements(7, "foo"), vec({}));
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/tests/common/struct_field_mapper/CMakeLists.txt b/searchlib/src/tests/common/struct_field_mapper/CMakeLists.txt
new file mode 100644
index 00000000000..f5712d22989
--- /dev/null
+++ b/searchlib/src/tests/common/struct_field_mapper/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchlib_common_struct_field_mapper_test_app TEST
+ SOURCES
+ struct_field_mapper_test.cpp
+ DEPENDS
+ searchlib
+ gtest
+)
+vespa_add_test(NAME searchlib_common_struct_field_mapper_test_app COMMAND searchlib_common_struct_field_mapper_test_app)
diff --git a/searchlib/src/tests/common/struct_field_mapper/struct_field_mapper_test.cpp b/searchlib/src/tests/common/struct_field_mapper/struct_field_mapper_test.cpp
new file mode 100644
index 00000000000..c5368111859
--- /dev/null
+++ b/searchlib/src/tests/common/struct_field_mapper/struct_field_mapper_test.cpp
@@ -0,0 +1,52 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/vespalib/gtest/gtest.h>
+#include <vespa/searchlib/common/struct_field_mapper.h>
+
+using namespace search;
+
+namespace {
+
+std::string str(const vespalib::string &s) { return std::string(s.data(), s.size()); }
+
+}
+
+struct StructFieldMapperTest : ::testing::Test {
+ StructFieldMapper mapper;
+ StructFieldMapperTest() : mapper() {
+ mapper.add_mapping("foo", "foo.a");
+ mapper.add_mapping("foo", "foo.b");
+ mapper.add_mapping("bar", "bar.x");
+ mapper.add_mapping("bar", "bar.y");
+ }
+ ~StructFieldMapperTest() = default;
+};
+
+TEST_F(StructFieldMapperTest, require_that_struct_field_can_be_identified) {
+ EXPECT_TRUE(mapper.is_struct_field("foo"));
+ EXPECT_TRUE(mapper.is_struct_field("bar"));
+ EXPECT_TRUE(!mapper.is_struct_field("foo.a"));
+ EXPECT_TRUE(!mapper.is_struct_field("bar.x"));
+ EXPECT_TRUE(!mapper.is_struct_field("bogus"));
+}
+
+TEST_F(StructFieldMapperTest, require_that_struct_subfield_can_be_identified) {
+ EXPECT_TRUE(!mapper.is_struct_subfield("foo"));
+ EXPECT_TRUE(!mapper.is_struct_subfield("bar"));
+ EXPECT_TRUE(mapper.is_struct_subfield("foo.a"));
+ EXPECT_TRUE(mapper.is_struct_subfield("bar.x"));
+ EXPECT_TRUE(!mapper.is_struct_subfield("bogus"));
+}
+
+TEST_F(StructFieldMapperTest, require_that_struct_subfield_maps_to_enclosing_struct_field_name) {
+ EXPECT_EQ(str(mapper.get_struct_field("foo.a")), str("foo"));
+ EXPECT_EQ(str(mapper.get_struct_field("foo.b")), str("foo"));
+ EXPECT_EQ(str(mapper.get_struct_field("bar.x")), str("bar"));
+ EXPECT_EQ(str(mapper.get_struct_field("bar.y")), str("bar"));
+}
+
+TEST_F(StructFieldMapperTest, require_that_nonexisting_struct_subfield_maps_to_empty_string) {
+ EXPECT_EQ(str(mapper.get_struct_field("bogus")), str(""));
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchlib/src/vespa/searchlib/common/CMakeLists.txt b/searchlib/src/vespa/searchlib/common/CMakeLists.txt
index 9abb6c42c8d..4f0b241e98f 100644
--- a/searchlib/src/vespa/searchlib/common/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/common/CMakeLists.txt
@@ -18,6 +18,7 @@ vespa_add_library(searchlib_common OBJECT
location.cpp
locationiterators.cpp
mapnames.cpp
+ matching_elements.cpp
packets.cpp
partialbitvector.cpp
resultset.cpp
@@ -28,6 +29,7 @@ vespa_add_library(searchlib_common OBJECT
sortdata.cpp
sortresults.cpp
sortspec.cpp
+ struct_field_mapper.cpp
threaded_compactable_lid_space.cpp
tunefileinfo.cpp
DEPENDS
diff --git a/searchlib/src/vespa/searchlib/common/matching_elements.cpp b/searchlib/src/vespa/searchlib/common/matching_elements.cpp
new file mode 100644
index 00000000000..1a4653e267b
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/matching_elements.cpp
@@ -0,0 +1,31 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "matching_elements.h"
+#include <algorithm>
+
+namespace search {
+
+MatchingElements::MatchingElements() = default;
+MatchingElements::~MatchingElements() = default;
+
+void
+MatchingElements::add_matching_elements(uint32_t docid, const vespalib::string &struct_field_name, const std::vector<uint32_t> &elements)
+{
+ auto &list = _map[key_t(docid, struct_field_name)];
+ std::vector<uint32_t> new_list;
+ std::set_union(list.begin(), list.end(), elements.begin(), elements.end(), std::back_inserter(new_list));
+ list = std::move(new_list);
+}
+
+const std::vector<uint32_t> &
+MatchingElements::get_matching_elements(uint32_t docid, const vespalib::string &struct_field_name) const
+{
+ static const std::vector<uint32_t> empty;
+ auto res = _map.find(key_t(docid, struct_field_name));
+ if (res == _map.end()) {
+ return empty;
+ }
+ return res->second;
+}
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/common/matching_elements.h b/searchlib/src/vespa/searchlib/common/matching_elements.h
new file mode 100644
index 00000000000..9299191e83a
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/matching_elements.h
@@ -0,0 +1,31 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+#include <utility>
+#include <map>
+#include <vector>
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+
+/**
+ * Keeps track of which elements matched the query for a set of struct
+ * fields across multiple documents.
+ **/
+class MatchingElements
+{
+private:
+ using key_t = std::pair<uint32_t, vespalib::string>;
+ using value_t = std::vector<uint32_t>;
+
+ std::map<key_t, value_t> _map;
+
+public:
+ MatchingElements();
+ ~MatchingElements();
+
+ void add_matching_elements(uint32_t docid, const vespalib::string &struct_field_name, const std::vector<uint32_t> &elements);
+ const std::vector<uint32_t> &get_matching_elements(uint32_t docid, const vespalib::string &struct_field_name) const;
+};
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/common/struct_field_mapper.cpp b/searchlib/src/vespa/searchlib/common/struct_field_mapper.cpp
new file mode 100644
index 00000000000..849cfd06ade
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/struct_field_mapper.cpp
@@ -0,0 +1,10 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "struct_field_mapper.h"
+
+namespace search {
+
+StructFieldMapper::StructFieldMapper() = default;
+StructFieldMapper::~StructFieldMapper() = default;
+
+} // namespace search
diff --git a/searchlib/src/vespa/searchlib/common/struct_field_mapper.h b/searchlib/src/vespa/searchlib/common/struct_field_mapper.h
new file mode 100644
index 00000000000..07951db99f7
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/common/struct_field_mapper.h
@@ -0,0 +1,47 @@
+// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/vespalib/stllike/string.h>
+#include <set>
+#include <map>
+
+namespace search {
+
+/**
+ * Keeps track of a set of struct field names and enables mapping the
+ * full name of struct subfields into the name of the enclosing struct
+ * field.
+ **/
+class StructFieldMapper
+{
+private:
+ std::set<vespalib::string> _struct_fields;
+ std::map<vespalib::string,vespalib::string> _struct_subfields;
+
+public:
+ StructFieldMapper();
+ ~StructFieldMapper();
+ void add_mapping(const vespalib::string &struct_field_name,
+ const vespalib::string &struct_subfield_name)
+ {
+ _struct_fields.insert(struct_field_name);
+ _struct_subfields[struct_subfield_name] = struct_field_name;
+ }
+ bool is_struct_field(const vespalib::string &field_name) const {
+ return (_struct_fields.count(field_name) > 0);
+ }
+ bool is_struct_subfield(const vespalib::string &field_name) const {
+ return (_struct_subfields.find(field_name) != _struct_subfields.end());
+ }
+ const vespalib::string &get_struct_field(const vespalib::string &struct_subfield_name) const {
+ static const vespalib::string empty;
+ auto res = _struct_subfields.find(struct_subfield_name);
+ if (res == _struct_subfields.end()) {
+ return empty;
+ }
+ return res->second;
+ }
+};
+
+} // namespace search