aboutsummaryrefslogtreecommitdiffstats
path: root/searchcore/src/tests/proton/matching
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@oath.com>2021-11-03 16:00:03 +0000
committerHåvard Pettersen <havardpe@oath.com>2021-11-04 15:11:32 +0000
commit2b2a16ac12b6fd40008bac37d59ec6fc89f66539 (patch)
tree3559cd7f934940a4fb2dc7cb1299133b0acf6462 /searchcore/src/tests/proton/matching
parent6ebe77e2ceebd37aa26aa762f4c608fee22c1b40 (diff)
calculate match features
+ factor out feature extraction + improve summary feature testing + extract returned docids with ordering
Diffstat (limited to 'searchcore/src/tests/proton/matching')
-rw-r--r--searchcore/src/tests/proton/matching/matching_test.cpp159
1 files changed, 122 insertions, 37 deletions
diff --git a/searchcore/src/tests/proton/matching/matching_test.cpp b/searchcore/src/tests/proton/matching/matching_test.cpp
index d690fb29795..5d084a2448f 100644
--- a/searchcore/src/tests/proton/matching/matching_test.cpp
+++ b/searchcore/src/tests/proton/matching/matching_test.cpp
@@ -143,10 +143,11 @@ struct MyWorld {
config.add(indexproperties::rank::FirstPhase::NAME, "attribute(a1)");
config.add(indexproperties::hitcollector::HeapSize::NAME, (vespalib::asciistream() << heapSize).str());
config.add(indexproperties::hitcollector::ArraySize::NAME, (vespalib::asciistream() << arraySize).str());
- config.add(indexproperties::summary::Feature::NAME, "attribute(a1)");
+ config.add(indexproperties::summary::Feature::NAME, "matches(f1)");
config.add(indexproperties::summary::Feature::NAME, "rankingExpression(\"reduce(tensor(x[3])(x),sum)\")");
config.add(indexproperties::summary::Feature::NAME, "rankingExpression(\"tensor(x[3])(x)\")");
config.add(indexproperties::summary::Feature::NAME, "value(100)");
+ config.add(indexproperties::summary::Feature::NAME, " attribute ( a1 ) "); // will be sorted and normalized
config.add(indexproperties::dump::IgnoreDefaultFeatures::NAME, "true");
config.add(indexproperties::dump::Feature::NAME, "attribute(a2)");
@@ -211,6 +212,44 @@ struct MyWorld {
config.import(cfg);
}
+ void setup_match_features() {
+ config.add(indexproperties::match::Feature::NAME, "attribute(a1)");
+ config.add(indexproperties::match::Feature::NAME, "attribute(a2)");
+ config.add(indexproperties::match::Feature::NAME, "matches(a1)");
+ config.add(indexproperties::match::Feature::NAME, "matches(f1)");
+ config.add(indexproperties::match::Feature::NAME, "rankingExpression(\"tensor(x[3])(x)\")");
+ }
+
+ static void verify_match_features(SearchReply &reply, const vespalib::string &matched_field) {
+ if (reply.hits.empty()) {
+ EXPECT_EQUAL(reply.match_features.names.size(), 0u);
+ EXPECT_EQUAL(reply.match_features.values.size(), 0u);
+ } else {
+ ASSERT_EQUAL(reply.match_features.names.size(), 5u);
+ EXPECT_EQUAL(reply.match_features.names[0], "attribute(a1)");
+ EXPECT_EQUAL(reply.match_features.names[1], "attribute(a2)");
+ EXPECT_EQUAL(reply.match_features.names[2], "matches(a1)");
+ EXPECT_EQUAL(reply.match_features.names[3], "matches(f1)");
+ EXPECT_EQUAL(reply.match_features.names[4], "rankingExpression(\"tensor(x[3])(x)\")");
+ ASSERT_EQUAL(reply.match_features.values.size(), 5 * reply.hits.size());
+ for (size_t i = 0; i < reply.hits.size(); ++i) {
+ const auto *f = &reply.match_features.values[i * 5];
+ EXPECT_GREATER(f[0].as_double(), 0.0);
+ EXPECT_GREATER(f[1].as_double(), 0.0);
+ EXPECT_EQUAL(f[0].as_double() * 2, f[1].as_double());
+ EXPECT_EQUAL(f[2].as_double(), double(matched_field == "a1"));
+ EXPECT_EQUAL(f[3].as_double(), double(matched_field == "f1"));
+ EXPECT_TRUE(f[4].is_data());
+ {
+ nbostream buf(f[4].as_data().data, f[4].as_data().size);
+ auto actual = spec_from_value(*SimpleValue::from_stream(buf));
+ auto expect = TensorSpec("tensor(x[3])").add({{"x", 0}}, 0).add({{"x", 1}}, 1).add({{"x", 2}}, 2);
+ EXPECT_EQUAL(actual, expect);
+ }
+ }
+ }
+ }
+
void setup_match_phase_limiting(const vespalib::string &attribute, size_t max_hits, bool descending)
{
inject_match_phase_limiting(config, attribute, max_hits, descending);
@@ -442,6 +481,30 @@ TEST("require that matching is performed (multi-threaded)") {
}
}
+TEST("require that match features are calculated (multi-threaded)") {
+ for (size_t threads = 1; threads <= 16; ++threads) {
+ MyWorld world;
+ world.basicSetup();
+ world.basicResults();
+ world.setup_match_features();
+ SearchRequest::SP request = world.createSimpleRequest("f1", "spread");
+ SearchReply::UP reply = world.performSearch(request, threads);
+ EXPECT_GREATER(reply->hits.size(), 0u);
+ world.verify_match_features(*reply, "f1");
+ }
+}
+
+TEST("require that no hits gives no match feature names") {
+ MyWorld world;
+ world.basicSetup();
+ world.basicResults();
+ world.setup_match_features();
+ SearchRequest::SP request = world.createSimpleRequest("f1", "not_found");
+ SearchReply::UP reply = world.performSearch(request, 1);
+ EXPECT_EQUAL(reply->hits.size(), 0u);
+ world.verify_match_features(*reply, "f1");
+}
+
TEST("require that matching also returns hits when only bitvector is used (multi-threaded)") {
for (size_t threads = 1; threads <= 16; ++threads) {
MyWorld world;
@@ -645,30 +708,36 @@ TEST("require that summary features are filled") {
world.basicResults();
DocsumRequest::SP req = world.createSimpleDocsumRequest("f1", "foo");
FeatureSet::SP fs = world.getSummaryFeatures(req);
- const FeatureSet::Value * f = NULL;
- EXPECT_EQUAL(4u, fs->numFeatures());
+ const FeatureSet::Value * f = nullptr;
+ EXPECT_EQUAL(5u, fs->numFeatures());
EXPECT_EQUAL("attribute(a1)", fs->getNames()[0]);
- EXPECT_EQUAL("rankingExpression(\"reduce(tensor(x[3])(x),sum)\")", fs->getNames()[1]);
- EXPECT_EQUAL("rankingExpression(\"tensor(x[3])(x)\")", fs->getNames()[2]);
- EXPECT_EQUAL("value(100)", fs->getNames()[3]);
- EXPECT_EQUAL(2u, fs->numDocs());
+ EXPECT_EQUAL("matches(f1)", fs->getNames()[1]);
+ EXPECT_EQUAL("rankingExpression(\"reduce(tensor(x[3])(x),sum)\")", fs->getNames()[2]);
+ EXPECT_EQUAL("rankingExpression(\"tensor(x[3])(x)\")", fs->getNames()[3]);
+ EXPECT_EQUAL("value(100)", fs->getNames()[4]);
+ EXPECT_EQUAL(3u, fs->numDocs());
f = fs->getFeaturesByDocId(10);
- EXPECT_TRUE(f != NULL);
+ EXPECT_TRUE(f != nullptr);
EXPECT_EQUAL(10, f[0].as_double());
- EXPECT_EQUAL(100, f[3].as_double());
+ EXPECT_EQUAL(1, f[1].as_double());
+ EXPECT_EQUAL(100, f[4].as_double());
f = fs->getFeaturesByDocId(15);
- EXPECT_TRUE(f == NULL);
+ EXPECT_TRUE(f != nullptr);
+ EXPECT_EQUAL(15, f[0].as_double());
+ EXPECT_EQUAL(0, f[1].as_double());
+ EXPECT_EQUAL(100, f[4].as_double());
f = fs->getFeaturesByDocId(30);
- EXPECT_TRUE(f != NULL);
+ EXPECT_TRUE(f != nullptr);
EXPECT_EQUAL(30, f[0].as_double());
- EXPECT_EQUAL(100, f[3].as_double());
- EXPECT_TRUE(f[1].is_double());
- EXPECT_TRUE(!f[1].is_data());
- EXPECT_EQUAL(f[1].as_double(), 3.0); // 0 + 1 + 2
- EXPECT_TRUE(!f[2].is_double());
- EXPECT_TRUE(f[2].is_data());
+ EXPECT_EQUAL(1, f[1].as_double());
+ EXPECT_TRUE(f[2].is_double());
+ EXPECT_TRUE(!f[2].is_data());
+ EXPECT_EQUAL(f[2].as_double(), 3.0); // 0 + 1 + 2
+ EXPECT_TRUE(!f[3].is_double());
+ EXPECT_TRUE(f[3].is_data());
+ EXPECT_EQUAL(100, f[4].as_double());
{
- nbostream buf(f[2].as_data().data, f[2].as_data().size);
+ nbostream buf(f[3].as_data().data, f[3].as_data().size);
auto actual = spec_from_value(*SimpleValue::from_stream(buf));
auto expect = TensorSpec("tensor(x[3])").add({{"x", 0}}, 0).add({{"x", 1}}, 1).add({{"x", 2}}, 2);
EXPECT_EQUAL(actual, expect);
@@ -681,17 +750,18 @@ TEST("require that rank features are filled") {
world.basicResults();
DocsumRequest::SP req = world.createSimpleDocsumRequest("f1", "foo");
FeatureSet::SP fs = world.getRankFeatures(req);
- const FeatureSet::Value * f = NULL;
+ const FeatureSet::Value * f = nullptr;
EXPECT_EQUAL(1u, fs->numFeatures());
EXPECT_EQUAL("attribute(a2)", fs->getNames()[0]);
- EXPECT_EQUAL(2u, fs->numDocs());
+ EXPECT_EQUAL(3u, fs->numDocs());
f = fs->getFeaturesByDocId(10);
- EXPECT_TRUE(f != NULL);
+ EXPECT_TRUE(f != nullptr);
EXPECT_EQUAL(20, f[0].as_double());
f = fs->getFeaturesByDocId(15);
- EXPECT_TRUE(f == NULL);
+ EXPECT_TRUE(f != nullptr);
+ EXPECT_EQUAL(30, f[0].as_double());
f = fs->getFeaturesByDocId(30);
- EXPECT_TRUE(f != NULL);
+ EXPECT_TRUE(f != nullptr);
EXPECT_EQUAL(60, f[0].as_double());
}
@@ -727,29 +797,42 @@ TEST("require that getSummaryFeatures can use cached query setup") {
docsum_request->hits.back().docid = 30;
FeatureSet::SP fs = world.getSummaryFeatures(docsum_request);
- ASSERT_EQUAL(4u, fs->numFeatures());
+ ASSERT_EQUAL(5u, fs->numFeatures());
EXPECT_EQUAL("attribute(a1)", fs->getNames()[0]);
- EXPECT_EQUAL("rankingExpression(\"reduce(tensor(x[3])(x),sum)\")", fs->getNames()[1]);
- EXPECT_EQUAL("rankingExpression(\"tensor(x[3])(x)\")", fs->getNames()[2]);
- EXPECT_EQUAL("value(100)", fs->getNames()[3]);
+ EXPECT_EQUAL("matches(f1)", fs->getNames()[1]);
+ EXPECT_EQUAL("rankingExpression(\"reduce(tensor(x[3])(x),sum)\")", fs->getNames()[2]);
+ EXPECT_EQUAL("rankingExpression(\"tensor(x[3])(x)\")", fs->getNames()[3]);
+ EXPECT_EQUAL("value(100)", fs->getNames()[4]);
ASSERT_EQUAL(1u, fs->numDocs());
const auto *f = fs->getFeaturesByDocId(30);
ASSERT_TRUE(f);
EXPECT_EQUAL(30, f[0].as_double());
- EXPECT_EQUAL(100, f[3].as_double());
+ EXPECT_EQUAL(100, f[4].as_double());
// getSummaryFeatures can be called multiple times.
fs = world.getSummaryFeatures(docsum_request);
- ASSERT_EQUAL(4u, fs->numFeatures());
+ ASSERT_EQUAL(5u, fs->numFeatures());
EXPECT_EQUAL("attribute(a1)", fs->getNames()[0]);
- EXPECT_EQUAL("rankingExpression(\"reduce(tensor(x[3])(x),sum)\")", fs->getNames()[1]);
- EXPECT_EQUAL("rankingExpression(\"tensor(x[3])(x)\")", fs->getNames()[2]);
- EXPECT_EQUAL("value(100)", fs->getNames()[3]);
+ EXPECT_EQUAL("matches(f1)", fs->getNames()[1]);
+ EXPECT_EQUAL("rankingExpression(\"reduce(tensor(x[3])(x),sum)\")", fs->getNames()[2]);
+ EXPECT_EQUAL("rankingExpression(\"tensor(x[3])(x)\")", fs->getNames()[3]);
+ EXPECT_EQUAL("value(100)", fs->getNames()[4]);
ASSERT_EQUAL(1u, fs->numDocs());
f = fs->getFeaturesByDocId(30);
ASSERT_TRUE(f);
EXPECT_EQUAL(30, f[0].as_double());
- EXPECT_EQUAL(100, f[3].as_double());
+ EXPECT_EQUAL(100, f[4].as_double());
+}
+
+double count_f1_matches(FeatureSet &fs) {
+ ASSERT_TRUE(fs.getNames().size() > 1);
+ ASSERT_EQUAL(fs.getNames()[1], "matches(f1)");
+ double sum = 0.0;
+ for (size_t i = 0; i < fs.numDocs(); ++i) {
+ auto *f = fs.getFeaturesByIndex(i);
+ sum += f[1].as_double();
+ }
+ return sum;
}
TEST("require that getSummaryFeatures prefers cached query setup") {
@@ -765,16 +848,18 @@ TEST("require that getSummaryFeatures prefers cached query setup") {
req->sessionId = request->sessionId;
req->propertiesMap.lookupCreate(search::MapNames::CACHES).add("query", "true");
FeatureSet::SP fs = world.getSummaryFeatures(req);
- EXPECT_EQUAL(4u, fs->numFeatures());
- ASSERT_EQUAL(0u, fs->numDocs()); // "spread" has no hits
+ EXPECT_EQUAL(5u, fs->numFeatures());
+ EXPECT_EQUAL(3u, fs->numDocs());
+ EXPECT_EQUAL(0.0, count_f1_matches(*fs)); // "spread" has no hits
// Empty cache
auto pruneTime = vespalib::steady_clock::now() + 600s;
world.sessionManager->pruneTimedOutSessions(pruneTime);
fs = world.getSummaryFeatures(req);
- EXPECT_EQUAL(4u, fs->numFeatures());
- ASSERT_EQUAL(2u, fs->numDocs()); // "foo" has two hits
+ EXPECT_EQUAL(5u, fs->numFeatures());
+ EXPECT_EQUAL(3u, fs->numDocs());
+ EXPECT_EQUAL(2.0, count_f1_matches(*fs)); // "foo" has two hits
}
TEST("require that match params are set up straight with ranking on") {