aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@yahooinc.com>2023-07-13 13:35:45 +0200
committerTor Egge <Tor.Egge@yahooinc.com>2023-07-13 13:35:45 +0200
commitd69d069d5377670bf619abf6df4cc0997d3afd2a (patch)
treeb6f3c30414fae0cf018cfcc92ec81aca1c5b74f3 /searchlib
parent96d8009fb2921d4fc3152a89b97a888bd7e6f166 (diff)
Limit fields used for match feature.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/features/prod_features.cpp25
-rw-r--r--searchlib/src/vespa/searchlib/features/matchfeature.cpp51
2 files changed, 60 insertions, 16 deletions
diff --git a/searchlib/src/tests/features/prod_features.cpp b/searchlib/src/tests/features/prod_features.cpp
index dc64c3328e4..c22d3b3abb8 100644
--- a/searchlib/src/tests/features/prod_features.cpp
+++ b/searchlib/src/tests/features/prod_features.cpp
@@ -67,6 +67,7 @@ using search::attribute::WeightedEnumContent;
using search::attribute::test::AttributeBuilder;
using search::common::GeoLocation;
using search::common::GeoLocationSpec;
+using vespalib::eval::ValueType;
using AttributePtr = AttributeVector::SP;
using AVC = search::attribute::Config;
@@ -391,6 +392,14 @@ Test::setupForAttributeTest(FtFeatureTest &ft, bool setup_env)
avs.push_back(AttributeFactory::createAttribute("sbool", AVC(AVBT::BOOL, AVCT::SINGLE))); // 14
avs.push_back(AttributeFactory::createAttribute("sebool", AVC(AVBT::BOOL, AVCT::SINGLE))); // 15
avs.push_back(AttributeFactory::createAttribute("sdouble", AVC(AVBT::DOUBLE, AVCT::SINGLE))); // 16
+ {
+ AVC cfg(AVBT::TENSOR, AVCT::SINGLE);
+ cfg.setTensorType(ValueType::from_spec("tensor(x[2])"));
+ avs.push_back(AttributeFactory::createAttribute("tensor", cfg));
+ }
+ avs.push_back(AttributeFactory::createAttribute("predicate", AVC(AVBT::PREDICATE, AVCT::SINGLE))); // 18
+ avs.push_back(AttributeFactory::createAttribute("reference", AVC(AVBT::REFERENCE, AVCT::SINGLE))); // 19
+ avs.push_back(AttributeFactory::createAttribute("raw", AVC(AVBT::RAW, AVCT::SINGLE))); // 20
// simulate a unique only attribute as specified in sd
AVC cfg(AVBT::INT32, AVCT::SINGLE);
@@ -417,7 +426,11 @@ Test::setupForAttributeTest(FtFeatureTest &ft, bool setup_env)
.addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sdouble")
.addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sbyte")
.addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::BOOL,"sbool")
- .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::BOOL,"sebool");
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::BOOL,"sebool")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::TENSOR, "tensor")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::BOOLEANTREE, "predicate")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::REFERENCE, "reference")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::RAW, "raw");
}
for (const auto & attr : avs) {
@@ -1499,6 +1512,10 @@ Test::testMatch()
ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint");
ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint");
ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "tensor");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "predicate");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "reference");
+ ft.getIndexEnv().getBuilder().addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "raw");
FtIndexEnvironment idx_env;
idx_env.getBuilder()
@@ -1507,7 +1524,11 @@ Test::testMatch()
.addField(FieldType::INDEX, CollectionType::WEIGHTEDSET, "baz")
.addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, "sint")
.addField(FieldType::ATTRIBUTE, CollectionType::ARRAY, "aint")
- .addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint");
+ .addField(FieldType::ATTRIBUTE, CollectionType::WEIGHTEDSET, "wsint")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::TENSOR, "tensor")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::BOOLEANTREE, "predicate")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::REFERENCE, "reference")
+ .addField(FieldType::ATTRIBUTE, CollectionType::SINGLE, DataType::RAW, "raw");
StringList params, in, out;
FT_SETUP_OK(pt, params, in, out.add("score").add("totalWeight"));
diff --git a/searchlib/src/vespa/searchlib/features/matchfeature.cpp b/searchlib/src/vespa/searchlib/features/matchfeature.cpp
index 53a7ce6e108..7a2148510d4 100644
--- a/searchlib/src/vespa/searchlib/features/matchfeature.cpp
+++ b/searchlib/src/vespa/searchlib/features/matchfeature.cpp
@@ -10,9 +10,34 @@
using namespace search::fef;
using CollectionType = FieldInfo::CollectionType;
+using DataType = FieldInfo::DataType;
namespace search::features {
+namespace {
+
+auto attribute_match_data_types = ParameterDataTypeSet::normalTypeSet();
+
+bool matchable_field(const FieldInfo& info)
+{
+ auto field_type = info.type();
+ if (field_type != FieldType::INDEX && field_type != FieldType::ATTRIBUTE) {
+ return false;
+ }
+ auto data_type = info.get_data_type();
+ if (data_type == DataType::TENSOR || data_type == DataType::RAW) {
+ // not matchable
+ return false;
+ }
+ if (field_type == FieldType::ATTRIBUTE && !attribute_match_data_types.allowedType(data_type)) {
+ // bad data type for attributeMatch feature
+ return false;
+ }
+ return true;
+}
+
+}
+
MatchExecutor::MatchExecutor(const MatchParams & params) :
FeatureExecutor(),
_params(params)
@@ -67,30 +92,28 @@ MatchBlueprint::setup(const IIndexEnvironment & env,
{
for (uint32_t i = 0; i < env.getNumFields(); ++i) {
const FieldInfo * info = env.getField(i);
- if (info->get_data_type() == FieldInfo::DataType::TENSOR) {
- // not matchable
+ if (!matchable_field(*info)) {
continue;
}
- if ((info->type() == FieldType::INDEX) || (info->type() == FieldType::ATTRIBUTE)) {
- _params.weights.push_back(indexproperties::FieldWeight::lookup(env.getProperties(), info->name()));
- if (info->type() == FieldType::INDEX) {
- if (info->collection() == CollectionType::SINGLE) {
- defineInput("fieldMatch(" + info->name() + ")");
- } else {
- defineInput("elementCompleteness(" + info->name() + ")");
- }
- } else if (info->type() == FieldType::ATTRIBUTE) {
- defineInput("attributeMatch(" + info->name() + ")");
+ _params.weights.push_back(indexproperties::FieldWeight::lookup(env.getProperties(), info->name()));
+ if (info->type() == FieldType::INDEX) {
+ if (info->collection() == CollectionType::SINGLE) {
+ defineInput("fieldMatch(" + info->name() + ")");
+ } else {
+ defineInput("elementCompleteness(" + info->name() + ")");
}
+ } else if (info->type() == FieldType::ATTRIBUTE) {
+ defineInput("attributeMatch(" + info->name() + ")");
}
}
describeOutput("score", "Normalized sum over all matched fields");
describeOutput("totalWeight", "Sum of rank weights for all matched fields");
for (uint32_t i = 0; i < env.getNumFields(); ++i) {
const FieldInfo * info = env.getField(i);
- if ((info->type() == FieldType::INDEX) || (info->type() == FieldType::ATTRIBUTE)) {
- describeOutput("weight." + info->name(), "The rank weight value for field '" + info->name() + "'");
+ if (!matchable_field(*info)) {
+ continue;
}
+ describeOutput("weight." + info->name(), "The rank weight value for field '" + info->name() + "'");
}
return true;
}