From f9575493a0653235b9fd3022b9fd25b9fd271829 Mon Sep 17 00:00:00 2001 From: Geir Storli Date: Wed, 4 Jan 2023 16:42:55 +0000 Subject: Extract virtual fields in the index environment. Fields that are represented by a set of attributes in the backend are considered virtual fields. Currently, this is map or array of struct fields (from the SD file) with struct-field attributes. --- .../index_environment/index_environment_test.cpp | 35 +++++++++++++++++++++ .../proton/matching/indexenvironment.cpp | 36 ++++++++++++++++++++++ 2 files changed, 71 insertions(+) (limited to 'searchcore') diff --git a/searchcore/src/tests/proton/matching/index_environment/index_environment_test.cpp b/searchcore/src/tests/proton/matching/index_environment/index_environment_test.cpp index bd7c3a4e8fd..404dc5d42e9 100644 --- a/searchcore/src/tests/proton/matching/index_environment/index_environment_test.cpp +++ b/searchcore/src/tests/proton/matching/index_environment/index_environment_test.cpp @@ -15,6 +15,7 @@ using search::index::Schema; using search::index::schema::CollectionType; using search::index::schema::DataType; using vespalib::eval::ConstantValue; +using SAF = Schema::AttributeField; using SIAF = Schema::ImportedAttributeField; const vespalib::string my_expr_ref( @@ -118,6 +119,11 @@ struct Fixture { EXPECT_TRUE(field->type() == FieldType::ATTRIBUTE); EXPECT_FALSE(field->isFilter()); } + void assert_virtual_field(size_t idx, + const vespalib::string& name) const { + const auto* field = assertField(idx, name, DataType::COMBINED, CollectionType::ARRAY); + EXPECT_TRUE(field->type() == FieldType::VIRTUAL); + } }; TEST_F("require that document meta store is always extracted in index environment", Fixture(buildEmptySchema())) @@ -139,6 +145,35 @@ TEST_F("require that imported attribute fields are extracted in index environmen EXPECT_EQUAL("[documentmetastore]", f.env.getField(2)->name()); } +Schema::UP schema_with_virtual_fields() { + // These attributes represent parts of the following fields: + // * field person_map type map, where the person struct has the fields name and year. + // * field int_map type map + // + // In this example 'person_map', 'person_map.value', and 'int_map' are virtual fields as seen from the ranking framework. + auto result = std::make_unique(); + result->addAttributeField(SAF("person_map.key", DataType::INT32, CollectionType::ARRAY)); + result->addAttributeField(SAF("person_map.value.name", DataType::STRING, CollectionType::ARRAY)); + result->addAttributeField(SAF("person_map.value.year", DataType::INT32, CollectionType::ARRAY)); + result->addAttributeField(SAF("int_map.key", DataType::INT32, CollectionType::ARRAY)); + result->addAttributeField(SAF("int_map.value", DataType::INT32, CollectionType::ARRAY)); + return result; +} + +TEST_F("virtual fields are extracted in index environment", Fixture(schema_with_virtual_fields())) +{ + ASSERT_EQUAL(9u, f.env.getNumFields()); + TEST_DO(f.assertAttributeField(0, "person_map.key", DataType::INT32, CollectionType::ARRAY)); + TEST_DO(f.assertAttributeField(1, "person_map.value.name", DataType::STRING, CollectionType::ARRAY)); + TEST_DO(f.assertAttributeField(2, "person_map.value.year", DataType::INT32, CollectionType::ARRAY)); + TEST_DO(f.assertAttributeField(3, "int_map.key", DataType::INT32, CollectionType::ARRAY)); + TEST_DO(f.assertAttributeField(4, "int_map.value", DataType::INT32, CollectionType::ARRAY)); + EXPECT_EQUAL("[documentmetastore]", f.env.getField(5)->name()); + TEST_DO(f.assert_virtual_field(6, "int_map")); + TEST_DO(f.assert_virtual_field(7, "person_map")); + TEST_DO(f.assert_virtual_field(8, "person_map.value")); +} + TEST_F("require that onnx model config can be obtained", Fixture(buildEmptySchema())) { { auto model = f1.env.getOnnxModel("model1"); diff --git a/searchcore/src/vespa/searchcore/proton/matching/indexenvironment.cpp b/searchcore/src/vespa/searchcore/proton/matching/indexenvironment.cpp index a90dfea8f40..488430b7fe2 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/indexenvironment.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/indexenvironment.cpp @@ -5,9 +5,40 @@ #include #include #include +#include using namespace search::fef; +namespace { + +using StringSet = std::set; + +void +consider_field_for_extraction(const vespalib::string& field_name, StringSet& virtual_fields) +{ + size_t pos = field_name.find_last_of('.'); + if (pos != vespalib::string::npos) { + vespalib::string virtual_field = field_name.substr(0, pos); + virtual_fields.insert(virtual_field); + consider_field_for_extraction(virtual_field, virtual_fields); + } +} + +StringSet +extract_virtual_fields(const search::index::Schema& schema) +{ + // Fields that are represented by a set of attributes in the backend are considered virtual fields. + // Currently, this is map or array of struct fields (from the SD file) with struct-field attributes. + StringSet result; + for (uint32_t i = 0; i < schema.getNumAttributeFields(); ++i) { + const auto& field = schema.getAttributeField(i); + consider_field_for_extraction(field.getName(), result); + } + return result; +} + +} + namespace proton::matching { void @@ -51,6 +82,11 @@ IndexEnvironment::extractFields(const search::index::Schema &schema) fieldInfo.setFilter(true); insertField(fieldInfo); } + for (const auto& field : extract_virtual_fields(schema)) { + FieldInfo info(FieldType::VIRTUAL, FieldInfo::CollectionType::ARRAY, field, _fields.size()); + info.set_data_type(FieldInfo::DataType::COMBINED); + insertField(info); + } } void -- cgit v1.2.3