diff options
author | Håvard Pettersen <havardpe@oath.com> | 2021-11-03 16:00:03 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@oath.com> | 2021-11-04 15:11:32 +0000 |
commit | 2b2a16ac12b6fd40008bac37d59ec6fc89f66539 (patch) | |
tree | 3559cd7f934940a4fb2dc7cb1299133b0acf6462 /searchcore/src/tests/proton/matching | |
parent | 6ebe77e2ceebd37aa26aa762f4c608fee22c1b40 (diff) |
calculate match features
+ factor out feature extraction
+ improve summary feature testing
+ extract returned docids with ordering
Diffstat (limited to 'searchcore/src/tests/proton/matching')
-rw-r--r-- | searchcore/src/tests/proton/matching/matching_test.cpp | 159 |
1 files changed, 122 insertions, 37 deletions
diff --git a/searchcore/src/tests/proton/matching/matching_test.cpp b/searchcore/src/tests/proton/matching/matching_test.cpp index d690fb29795..5d084a2448f 100644 --- a/searchcore/src/tests/proton/matching/matching_test.cpp +++ b/searchcore/src/tests/proton/matching/matching_test.cpp @@ -143,10 +143,11 @@ struct MyWorld { config.add(indexproperties::rank::FirstPhase::NAME, "attribute(a1)"); config.add(indexproperties::hitcollector::HeapSize::NAME, (vespalib::asciistream() << heapSize).str()); config.add(indexproperties::hitcollector::ArraySize::NAME, (vespalib::asciistream() << arraySize).str()); - config.add(indexproperties::summary::Feature::NAME, "attribute(a1)"); + config.add(indexproperties::summary::Feature::NAME, "matches(f1)"); config.add(indexproperties::summary::Feature::NAME, "rankingExpression(\"reduce(tensor(x[3])(x),sum)\")"); config.add(indexproperties::summary::Feature::NAME, "rankingExpression(\"tensor(x[3])(x)\")"); config.add(indexproperties::summary::Feature::NAME, "value(100)"); + config.add(indexproperties::summary::Feature::NAME, " attribute ( a1 ) "); // will be sorted and normalized config.add(indexproperties::dump::IgnoreDefaultFeatures::NAME, "true"); config.add(indexproperties::dump::Feature::NAME, "attribute(a2)"); @@ -211,6 +212,44 @@ struct MyWorld { config.import(cfg); } + void setup_match_features() { + config.add(indexproperties::match::Feature::NAME, "attribute(a1)"); + config.add(indexproperties::match::Feature::NAME, "attribute(a2)"); + config.add(indexproperties::match::Feature::NAME, "matches(a1)"); + config.add(indexproperties::match::Feature::NAME, "matches(f1)"); + config.add(indexproperties::match::Feature::NAME, "rankingExpression(\"tensor(x[3])(x)\")"); + } + + static void verify_match_features(SearchReply &reply, const vespalib::string &matched_field) { + if (reply.hits.empty()) { + EXPECT_EQUAL(reply.match_features.names.size(), 0u); + EXPECT_EQUAL(reply.match_features.values.size(), 0u); + } else { + ASSERT_EQUAL(reply.match_features.names.size(), 5u); + EXPECT_EQUAL(reply.match_features.names[0], "attribute(a1)"); + EXPECT_EQUAL(reply.match_features.names[1], "attribute(a2)"); + EXPECT_EQUAL(reply.match_features.names[2], "matches(a1)"); + EXPECT_EQUAL(reply.match_features.names[3], "matches(f1)"); + EXPECT_EQUAL(reply.match_features.names[4], "rankingExpression(\"tensor(x[3])(x)\")"); + ASSERT_EQUAL(reply.match_features.values.size(), 5 * reply.hits.size()); + for (size_t i = 0; i < reply.hits.size(); ++i) { + const auto *f = &reply.match_features.values[i * 5]; + EXPECT_GREATER(f[0].as_double(), 0.0); + EXPECT_GREATER(f[1].as_double(), 0.0); + EXPECT_EQUAL(f[0].as_double() * 2, f[1].as_double()); + EXPECT_EQUAL(f[2].as_double(), double(matched_field == "a1")); + EXPECT_EQUAL(f[3].as_double(), double(matched_field == "f1")); + EXPECT_TRUE(f[4].is_data()); + { + nbostream buf(f[4].as_data().data, f[4].as_data().size); + auto actual = spec_from_value(*SimpleValue::from_stream(buf)); + auto expect = TensorSpec("tensor(x[3])").add({{"x", 0}}, 0).add({{"x", 1}}, 1).add({{"x", 2}}, 2); + EXPECT_EQUAL(actual, expect); + } + } + } + } + void setup_match_phase_limiting(const vespalib::string &attribute, size_t max_hits, bool descending) { inject_match_phase_limiting(config, attribute, max_hits, descending); @@ -442,6 +481,30 @@ TEST("require that matching is performed (multi-threaded)") { } } +TEST("require that match features are calculated (multi-threaded)") { + for (size_t threads = 1; threads <= 16; ++threads) { + MyWorld world; + world.basicSetup(); + world.basicResults(); + world.setup_match_features(); + SearchRequest::SP request = world.createSimpleRequest("f1", "spread"); + SearchReply::UP reply = world.performSearch(request, threads); + EXPECT_GREATER(reply->hits.size(), 0u); + world.verify_match_features(*reply, "f1"); + } +} + +TEST("require that no hits gives no match feature names") { + MyWorld world; + world.basicSetup(); + world.basicResults(); + world.setup_match_features(); + SearchRequest::SP request = world.createSimpleRequest("f1", "not_found"); + SearchReply::UP reply = world.performSearch(request, 1); + EXPECT_EQUAL(reply->hits.size(), 0u); + world.verify_match_features(*reply, "f1"); +} + TEST("require that matching also returns hits when only bitvector is used (multi-threaded)") { for (size_t threads = 1; threads <= 16; ++threads) { MyWorld world; @@ -645,30 +708,36 @@ TEST("require that summary features are filled") { world.basicResults(); DocsumRequest::SP req = world.createSimpleDocsumRequest("f1", "foo"); FeatureSet::SP fs = world.getSummaryFeatures(req); - const FeatureSet::Value * f = NULL; - EXPECT_EQUAL(4u, fs->numFeatures()); + const FeatureSet::Value * f = nullptr; + EXPECT_EQUAL(5u, fs->numFeatures()); EXPECT_EQUAL("attribute(a1)", fs->getNames()[0]); - EXPECT_EQUAL("rankingExpression(\"reduce(tensor(x[3])(x),sum)\")", fs->getNames()[1]); - EXPECT_EQUAL("rankingExpression(\"tensor(x[3])(x)\")", fs->getNames()[2]); - EXPECT_EQUAL("value(100)", fs->getNames()[3]); - EXPECT_EQUAL(2u, fs->numDocs()); + EXPECT_EQUAL("matches(f1)", fs->getNames()[1]); + EXPECT_EQUAL("rankingExpression(\"reduce(tensor(x[3])(x),sum)\")", fs->getNames()[2]); + EXPECT_EQUAL("rankingExpression(\"tensor(x[3])(x)\")", fs->getNames()[3]); + EXPECT_EQUAL("value(100)", fs->getNames()[4]); + EXPECT_EQUAL(3u, fs->numDocs()); f = fs->getFeaturesByDocId(10); - EXPECT_TRUE(f != NULL); + EXPECT_TRUE(f != nullptr); EXPECT_EQUAL(10, f[0].as_double()); - EXPECT_EQUAL(100, f[3].as_double()); + EXPECT_EQUAL(1, f[1].as_double()); + EXPECT_EQUAL(100, f[4].as_double()); f = fs->getFeaturesByDocId(15); - EXPECT_TRUE(f == NULL); + EXPECT_TRUE(f != nullptr); + EXPECT_EQUAL(15, f[0].as_double()); + EXPECT_EQUAL(0, f[1].as_double()); + EXPECT_EQUAL(100, f[4].as_double()); f = fs->getFeaturesByDocId(30); - EXPECT_TRUE(f != NULL); + EXPECT_TRUE(f != nullptr); EXPECT_EQUAL(30, f[0].as_double()); - EXPECT_EQUAL(100, f[3].as_double()); - EXPECT_TRUE(f[1].is_double()); - EXPECT_TRUE(!f[1].is_data()); - EXPECT_EQUAL(f[1].as_double(), 3.0); // 0 + 1 + 2 - EXPECT_TRUE(!f[2].is_double()); - EXPECT_TRUE(f[2].is_data()); + EXPECT_EQUAL(1, f[1].as_double()); + EXPECT_TRUE(f[2].is_double()); + EXPECT_TRUE(!f[2].is_data()); + EXPECT_EQUAL(f[2].as_double(), 3.0); // 0 + 1 + 2 + EXPECT_TRUE(!f[3].is_double()); + EXPECT_TRUE(f[3].is_data()); + EXPECT_EQUAL(100, f[4].as_double()); { - nbostream buf(f[2].as_data().data, f[2].as_data().size); + nbostream buf(f[3].as_data().data, f[3].as_data().size); auto actual = spec_from_value(*SimpleValue::from_stream(buf)); auto expect = TensorSpec("tensor(x[3])").add({{"x", 0}}, 0).add({{"x", 1}}, 1).add({{"x", 2}}, 2); EXPECT_EQUAL(actual, expect); @@ -681,17 +750,18 @@ TEST("require that rank features are filled") { world.basicResults(); DocsumRequest::SP req = world.createSimpleDocsumRequest("f1", "foo"); FeatureSet::SP fs = world.getRankFeatures(req); - const FeatureSet::Value * f = NULL; + const FeatureSet::Value * f = nullptr; EXPECT_EQUAL(1u, fs->numFeatures()); EXPECT_EQUAL("attribute(a2)", fs->getNames()[0]); - EXPECT_EQUAL(2u, fs->numDocs()); + EXPECT_EQUAL(3u, fs->numDocs()); f = fs->getFeaturesByDocId(10); - EXPECT_TRUE(f != NULL); + EXPECT_TRUE(f != nullptr); EXPECT_EQUAL(20, f[0].as_double()); f = fs->getFeaturesByDocId(15); - EXPECT_TRUE(f == NULL); + EXPECT_TRUE(f != nullptr); + EXPECT_EQUAL(30, f[0].as_double()); f = fs->getFeaturesByDocId(30); - EXPECT_TRUE(f != NULL); + EXPECT_TRUE(f != nullptr); EXPECT_EQUAL(60, f[0].as_double()); } @@ -727,29 +797,42 @@ TEST("require that getSummaryFeatures can use cached query setup") { docsum_request->hits.back().docid = 30; FeatureSet::SP fs = world.getSummaryFeatures(docsum_request); - ASSERT_EQUAL(4u, fs->numFeatures()); + ASSERT_EQUAL(5u, fs->numFeatures()); EXPECT_EQUAL("attribute(a1)", fs->getNames()[0]); - EXPECT_EQUAL("rankingExpression(\"reduce(tensor(x[3])(x),sum)\")", fs->getNames()[1]); - EXPECT_EQUAL("rankingExpression(\"tensor(x[3])(x)\")", fs->getNames()[2]); - EXPECT_EQUAL("value(100)", fs->getNames()[3]); + EXPECT_EQUAL("matches(f1)", fs->getNames()[1]); + EXPECT_EQUAL("rankingExpression(\"reduce(tensor(x[3])(x),sum)\")", fs->getNames()[2]); + EXPECT_EQUAL("rankingExpression(\"tensor(x[3])(x)\")", fs->getNames()[3]); + EXPECT_EQUAL("value(100)", fs->getNames()[4]); ASSERT_EQUAL(1u, fs->numDocs()); const auto *f = fs->getFeaturesByDocId(30); ASSERT_TRUE(f); EXPECT_EQUAL(30, f[0].as_double()); - EXPECT_EQUAL(100, f[3].as_double()); + EXPECT_EQUAL(100, f[4].as_double()); // getSummaryFeatures can be called multiple times. fs = world.getSummaryFeatures(docsum_request); - ASSERT_EQUAL(4u, fs->numFeatures()); + ASSERT_EQUAL(5u, fs->numFeatures()); EXPECT_EQUAL("attribute(a1)", fs->getNames()[0]); - EXPECT_EQUAL("rankingExpression(\"reduce(tensor(x[3])(x),sum)\")", fs->getNames()[1]); - EXPECT_EQUAL("rankingExpression(\"tensor(x[3])(x)\")", fs->getNames()[2]); - EXPECT_EQUAL("value(100)", fs->getNames()[3]); + EXPECT_EQUAL("matches(f1)", fs->getNames()[1]); + EXPECT_EQUAL("rankingExpression(\"reduce(tensor(x[3])(x),sum)\")", fs->getNames()[2]); + EXPECT_EQUAL("rankingExpression(\"tensor(x[3])(x)\")", fs->getNames()[3]); + EXPECT_EQUAL("value(100)", fs->getNames()[4]); ASSERT_EQUAL(1u, fs->numDocs()); f = fs->getFeaturesByDocId(30); ASSERT_TRUE(f); EXPECT_EQUAL(30, f[0].as_double()); - EXPECT_EQUAL(100, f[3].as_double()); + EXPECT_EQUAL(100, f[4].as_double()); +} + +double count_f1_matches(FeatureSet &fs) { + ASSERT_TRUE(fs.getNames().size() > 1); + ASSERT_EQUAL(fs.getNames()[1], "matches(f1)"); + double sum = 0.0; + for (size_t i = 0; i < fs.numDocs(); ++i) { + auto *f = fs.getFeaturesByIndex(i); + sum += f[1].as_double(); + } + return sum; } TEST("require that getSummaryFeatures prefers cached query setup") { @@ -765,16 +848,18 @@ TEST("require that getSummaryFeatures prefers cached query setup") { req->sessionId = request->sessionId; req->propertiesMap.lookupCreate(search::MapNames::CACHES).add("query", "true"); FeatureSet::SP fs = world.getSummaryFeatures(req); - EXPECT_EQUAL(4u, fs->numFeatures()); - ASSERT_EQUAL(0u, fs->numDocs()); // "spread" has no hits + EXPECT_EQUAL(5u, fs->numFeatures()); + EXPECT_EQUAL(3u, fs->numDocs()); + EXPECT_EQUAL(0.0, count_f1_matches(*fs)); // "spread" has no hits // Empty cache auto pruneTime = vespalib::steady_clock::now() + 600s; world.sessionManager->pruneTimedOutSessions(pruneTime); fs = world.getSummaryFeatures(req); - EXPECT_EQUAL(4u, fs->numFeatures()); - ASSERT_EQUAL(2u, fs->numDocs()); // "foo" has two hits + EXPECT_EQUAL(5u, fs->numFeatures()); + EXPECT_EQUAL(3u, fs->numDocs()); + EXPECT_EQUAL(2.0, count_f1_matches(*fs)); // "foo" has two hits } TEST("require that match params are set up straight with ranking on") { |