diff options
author | Håvard Pettersen <havardpe@oath.com> | 2019-06-13 16:19:28 +0000 |
---|---|---|
committer | Håvard Pettersen <havardpe@oath.com> | 2019-06-13 16:19:28 +0000 |
commit | bec261866af1a690e9f0ed43bc4f4bd42dc08d3a (patch) | |
tree | 81efc9957ee2990f59a2f7dd05bab9b35f60d479 /streamingvisitors/src | |
parent | 534cf911ca025f17961979f1d52259b823749d62 (diff) |
populate feature sets with serialized tensors
Diffstat (limited to 'streamingvisitors/src')
-rw-r--r-- | streamingvisitors/src/tests/hitcollector/hitcollector.cpp | 68 | ||||
-rw-r--r-- | streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp | 21 |
2 files changed, 70 insertions, 19 deletions
diff --git a/streamingvisitors/src/tests/hitcollector/hitcollector.cpp b/streamingvisitors/src/tests/hitcollector/hitcollector.cpp index 3ff01cada85..9650834d0f1 100644 --- a/streamingvisitors/src/tests/hitcollector/hitcollector.cpp +++ b/streamingvisitors/src/tests/hitcollector/hitcollector.cpp @@ -6,12 +6,22 @@ #include <vespa/searchlib/fef/matchdata.h> #include <vespa/searchlib/fef/feature_resolver.h> #include <vespa/searchvisitor/hitcollector.h> +#include <vespa/eval/eval/value.h> +#include <vespa/eval/eval/tensor_spec.h> +#include <vespa/eval/tensor/default_tensor_engine.h> +#include <vespa/vespalib/objects/nbostream.h> using namespace document; using namespace search::fef; using namespace vespalib; using namespace vdslib; using namespace vsm; +using vespalib::nbostream; +using vespalib::eval::Value; +using vespalib::eval::DoubleValue; +using vespalib::eval::TensorSpec; +using vespalib::tensor::DefaultTensorEngine; + namespace storage { @@ -226,26 +236,38 @@ HitCollectorTest::testEmpty() class MyRankProgram : public HitCollector::IRankProgram { private: + Value::UP _boxed_double; + Value::UP _tensor; NumberOrObject _fooValue; NumberOrObject _barValue; + NumberOrObject _bazValue; public: MyRankProgram() - : _fooValue(), - _barValue() + : _boxed_double(), + _tensor(), + _fooValue(), + _barValue(), + _bazValue() {} - virtual void run(uint32_t docid, const std::vector<search::fef::TermFieldMatchData> &) override { + ~MyRankProgram(); + virtual void run(uint32_t docid, const std::vector<search::fef::TermFieldMatchData> &) override { + _boxed_double = std::make_unique<DoubleValue>(docid + 30); + _tensor = DefaultTensorEngine::ref().from_spec(TensorSpec("tensor(x{})").add({{"x", "a"}}, docid + 20)); _fooValue.as_number = docid + 10; - _barValue.as_number = docid + 30; + _barValue.as_object = *_boxed_double; + _bazValue.as_object = *_tensor; } FeatureResolver get_resolver() { FeatureResolver resolver(2); resolver.add("foo", LazyValue(&_fooValue), false); - resolver.add("bar", LazyValue(&_barValue), false); + resolver.add("bar", LazyValue(&_barValue), true); + resolver.add("baz", LazyValue(&_bazValue), true); return resolver; } }; +MyRankProgram::~MyRankProgram() = default; void HitCollectorTest::testFeatureSet() @@ -262,28 +284,42 @@ HitCollectorTest::testFeatureSet() FeatureResolver resolver(rankProgram.get_resolver()); search::FeatureSet::SP sf = hc.getFeatureSet(rankProgram, resolver); - EXPECT_EQUAL(sf->getNames().size(), 2u); + EXPECT_EQUAL(sf->getNames().size(), 3u); EXPECT_EQUAL(sf->getNames()[0], "foo"); EXPECT_EQUAL(sf->getNames()[1], "bar"); - EXPECT_EQUAL(sf->numFeatures(), 2u); + EXPECT_EQUAL(sf->getNames()[2], "baz"); + EXPECT_EQUAL(sf->numFeatures(), 3u); EXPECT_EQUAL(sf->numDocs(), 3u); { - const search::feature_t * f = sf->getFeaturesByDocId(1); + const auto * f = sf->getFeaturesByDocId(1); ASSERT_TRUE(f != NULL); - EXPECT_EQUAL(f[0], 11); // 10 + docId - EXPECT_EQUAL(f[1], 31); // 30 + docId + EXPECT_EQUAL(f[0].as_double(), 11); // 10 + docId + EXPECT_EQUAL(f[1].as_double(), 31); // 30 + docId } { - const search::feature_t * f = sf->getFeaturesByDocId(3); + const auto * f = sf->getFeaturesByDocId(3); ASSERT_TRUE(f != NULL); - EXPECT_EQUAL(f[0], 13); - EXPECT_EQUAL(f[1], 33); + EXPECT_TRUE(f[0].is_double()); + EXPECT_TRUE(!f[0].is_data()); + EXPECT_EQUAL(f[0].as_double(), 13); + EXPECT_TRUE(f[1].is_double()); + EXPECT_TRUE(!f[1].is_data()); + EXPECT_EQUAL(f[1].as_double(), 33); + EXPECT_TRUE(!f[2].is_double()); + EXPECT_TRUE(f[2].is_data()); + { + auto &engine = DefaultTensorEngine::ref(); + nbostream buf(f[2].as_data().data, f[2].as_data().size); + auto actual = engine.to_spec(*engine.decode(buf)); + auto expect = TensorSpec("tensor(x{})").add({{"x", "a"}}, 23); + EXPECT_EQUAL(actual, expect); + } } { - const search::feature_t * f = sf->getFeaturesByDocId(4); + const auto * f = sf->getFeaturesByDocId(4); ASSERT_TRUE(f != NULL); - EXPECT_EQUAL(f[0], 14); - EXPECT_EQUAL(f[1], 34); + EXPECT_EQUAL(f[0].as_double(), 14); + EXPECT_EQUAL(f[1].as_double(), 34); } ASSERT_TRUE(sf->getFeaturesByDocId(0) == NULL); ASSERT_TRUE(sf->getFeaturesByDocId(2) == NULL); diff --git a/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp index a9b09cd7089..ce0cd967e06 100644 --- a/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp @@ -4,6 +4,9 @@ #include <vespa/searchlib/fef/feature_resolver.h> #include <vespa/vespalib/util/stringfmt.h> #include <algorithm> +#include <vespa/eval/eval/tensor.h> +#include <vespa/eval/eval/tensor_engine.h> +#include <vespa/vespalib/objects/nbostream.h> #include <vespa/log/log.h> LOG_SETUP(".searchvisitor.hitcollector"); @@ -156,10 +159,22 @@ HitCollector::getFeatureSet(IRankProgram &rankProgram, for (const Hit & hit : _hits) { rankProgram.run(hit.getDocId(), hit.getMatchData()); uint32_t docId = hit.getDocId(); - search::feature_t * f = retval->getFeaturesByIndex(retval->addDocId(docId)); + auto * f = retval->getFeaturesByIndex(retval->addDocId(docId)); for (uint32_t j = 0; j < names.size(); ++j) { - f[j] = resolver.resolve(j).as_number(docId); - LOG(debug, "getFeatureSet: lDocId(%u), '%s': %f", docId, names[j].c_str(), f[j]); + if (resolver.is_object(j)) { + auto obj = resolver.resolve(j).as_object(docId); + if (const auto *tensor = obj.get().as_tensor()) { + vespalib::nbostream buf; + tensor->engine().encode(*tensor, buf); + f[j].set_data(vespalib::Memory(buf.peek(), buf.size())); + } else { + f[j].set_double(obj.get().as_double()); + } + } else { + f[j].set_double(resolver.resolve(j).as_number(docId)); + } + LOG(debug, "getFeatureSet: lDocId(%u), '%s': %f %s", docId, names[j].c_str(), f[j].as_double(), + f[j].is_data() ? "[tensor]" : ""); } } return retval; |