aboutsummaryrefslogtreecommitdiffstats
path: root/searchcore
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2023-01-04 16:42:55 +0000
committerGeir Storli <geirst@yahooinc.com>2023-01-04 16:42:55 +0000
commitf9575493a0653235b9fd3022b9fd25b9fd271829 (patch)
tree1a82600254db8100fd6cde62d00c4c6bdf6084a1 /searchcore
parent91255325ec7029f4a7adfb9491500a2b1c6ec03a (diff)
Extract virtual fields in the index environment.
Fields that are represented by a set of attributes in the backend are considered virtual fields. Currently, this is map or array of struct fields (from the SD file) with struct-field attributes.
Diffstat (limited to 'searchcore')
-rw-r--r--searchcore/src/tests/proton/matching/index_environment/index_environment_test.cpp35
-rw-r--r--searchcore/src/vespa/searchcore/proton/matching/indexenvironment.cpp36
2 files changed, 71 insertions, 0 deletions
diff --git a/searchcore/src/tests/proton/matching/index_environment/index_environment_test.cpp b/searchcore/src/tests/proton/matching/index_environment/index_environment_test.cpp
index bd7c3a4e8fd..404dc5d42e9 100644
--- a/searchcore/src/tests/proton/matching/index_environment/index_environment_test.cpp
+++ b/searchcore/src/tests/proton/matching/index_environment/index_environment_test.cpp
@@ -15,6 +15,7 @@ using search::index::Schema;
using search::index::schema::CollectionType;
using search::index::schema::DataType;
using vespalib::eval::ConstantValue;
+using SAF = Schema::AttributeField;
using SIAF = Schema::ImportedAttributeField;
const vespalib::string my_expr_ref(
@@ -118,6 +119,11 @@ struct Fixture {
EXPECT_TRUE(field->type() == FieldType::ATTRIBUTE);
EXPECT_FALSE(field->isFilter());
}
+ void assert_virtual_field(size_t idx,
+ const vespalib::string& name) const {
+ const auto* field = assertField(idx, name, DataType::COMBINED, CollectionType::ARRAY);
+ EXPECT_TRUE(field->type() == FieldType::VIRTUAL);
+ }
};
TEST_F("require that document meta store is always extracted in index environment", Fixture(buildEmptySchema()))
@@ -139,6 +145,35 @@ TEST_F("require that imported attribute fields are extracted in index environmen
EXPECT_EQUAL("[documentmetastore]", f.env.getField(2)->name());
}
+Schema::UP schema_with_virtual_fields() {
+ // These attributes represent parts of the following fields:
+ // * field person_map type map<int, person>, where the person struct has the fields name and year.
+ // * field int_map type map<int, int>
+ //
+ // In this example 'person_map', 'person_map.value', and 'int_map' are virtual fields as seen from the ranking framework.
+ auto result = std::make_unique<Schema>();
+ result->addAttributeField(SAF("person_map.key", DataType::INT32, CollectionType::ARRAY));
+ result->addAttributeField(SAF("person_map.value.name", DataType::STRING, CollectionType::ARRAY));
+ result->addAttributeField(SAF("person_map.value.year", DataType::INT32, CollectionType::ARRAY));
+ result->addAttributeField(SAF("int_map.key", DataType::INT32, CollectionType::ARRAY));
+ result->addAttributeField(SAF("int_map.value", DataType::INT32, CollectionType::ARRAY));
+ return result;
+}
+
+TEST_F("virtual fields are extracted in index environment", Fixture(schema_with_virtual_fields()))
+{
+ ASSERT_EQUAL(9u, f.env.getNumFields());
+ TEST_DO(f.assertAttributeField(0, "person_map.key", DataType::INT32, CollectionType::ARRAY));
+ TEST_DO(f.assertAttributeField(1, "person_map.value.name", DataType::STRING, CollectionType::ARRAY));
+ TEST_DO(f.assertAttributeField(2, "person_map.value.year", DataType::INT32, CollectionType::ARRAY));
+ TEST_DO(f.assertAttributeField(3, "int_map.key", DataType::INT32, CollectionType::ARRAY));
+ TEST_DO(f.assertAttributeField(4, "int_map.value", DataType::INT32, CollectionType::ARRAY));
+ EXPECT_EQUAL("[documentmetastore]", f.env.getField(5)->name());
+ TEST_DO(f.assert_virtual_field(6, "int_map"));
+ TEST_DO(f.assert_virtual_field(7, "person_map"));
+ TEST_DO(f.assert_virtual_field(8, "person_map.value"));
+}
+
TEST_F("require that onnx model config can be obtained", Fixture(buildEmptySchema())) {
{
auto model = f1.env.getOnnxModel("model1");
diff --git a/searchcore/src/vespa/searchcore/proton/matching/indexenvironment.cpp b/searchcore/src/vespa/searchcore/proton/matching/indexenvironment.cpp
index a90dfea8f40..488430b7fe2 100644
--- a/searchcore/src/vespa/searchcore/proton/matching/indexenvironment.cpp
+++ b/searchcore/src/vespa/searchcore/proton/matching/indexenvironment.cpp
@@ -5,9 +5,40 @@
#include <vespa/searchlib/fef/functiontablefactory.h>
#include <vespa/searchlib/fef/indexproperties.h>
#include <vespa/searchcore/proton/documentmetastore/documentmetastore.h>
+#include <set>
using namespace search::fef;
+namespace {
+
+using StringSet = std::set<vespalib::string>;
+
+void
+consider_field_for_extraction(const vespalib::string& field_name, StringSet& virtual_fields)
+{
+ size_t pos = field_name.find_last_of('.');
+ if (pos != vespalib::string::npos) {
+ vespalib::string virtual_field = field_name.substr(0, pos);
+ virtual_fields.insert(virtual_field);
+ consider_field_for_extraction(virtual_field, virtual_fields);
+ }
+}
+
+StringSet
+extract_virtual_fields(const search::index::Schema& schema)
+{
+ // Fields that are represented by a set of attributes in the backend are considered virtual fields.
+ // Currently, this is map or array of struct fields (from the SD file) with struct-field attributes.
+ StringSet result;
+ for (uint32_t i = 0; i < schema.getNumAttributeFields(); ++i) {
+ const auto& field = schema.getAttributeField(i);
+ consider_field_for_extraction(field.getName(), result);
+ }
+ return result;
+}
+
+}
+
namespace proton::matching {
void
@@ -51,6 +82,11 @@ IndexEnvironment::extractFields(const search::index::Schema &schema)
fieldInfo.setFilter(true);
insertField(fieldInfo);
}
+ for (const auto& field : extract_virtual_fields(schema)) {
+ FieldInfo info(FieldType::VIRTUAL, FieldInfo::CollectionType::ARRAY, field, _fields.size());
+ info.set_data_type(FieldInfo::DataType::COMBINED);
+ insertField(info);
+ }
}
void