summaryrefslogtreecommitdiffstats
path: root/streamingvisitors
diff options
context:
space:
mode:
authorHåvard Pettersen <havardpe@oath.com>2019-06-13 16:19:28 +0000
committerHåvard Pettersen <havardpe@oath.com>2019-06-13 16:19:28 +0000
commitbec261866af1a690e9f0ed43bc4f4bd42dc08d3a (patch)
tree81efc9957ee2990f59a2f7dd05bab9b35f60d479 /streamingvisitors
parent534cf911ca025f17961979f1d52259b823749d62 (diff)
populate feature sets with serialized tensors
Diffstat (limited to 'streamingvisitors')
-rw-r--r--streamingvisitors/src/tests/hitcollector/hitcollector.cpp68
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp21
2 files changed, 70 insertions, 19 deletions
diff --git a/streamingvisitors/src/tests/hitcollector/hitcollector.cpp b/streamingvisitors/src/tests/hitcollector/hitcollector.cpp
index 3ff01cada85..9650834d0f1 100644
--- a/streamingvisitors/src/tests/hitcollector/hitcollector.cpp
+++ b/streamingvisitors/src/tests/hitcollector/hitcollector.cpp
@@ -6,12 +6,22 @@
#include <vespa/searchlib/fef/matchdata.h>
#include <vespa/searchlib/fef/feature_resolver.h>
#include <vespa/searchvisitor/hitcollector.h>
+#include <vespa/eval/eval/value.h>
+#include <vespa/eval/eval/tensor_spec.h>
+#include <vespa/eval/tensor/default_tensor_engine.h>
+#include <vespa/vespalib/objects/nbostream.h>
using namespace document;
using namespace search::fef;
using namespace vespalib;
using namespace vdslib;
using namespace vsm;
+using vespalib::nbostream;
+using vespalib::eval::Value;
+using vespalib::eval::DoubleValue;
+using vespalib::eval::TensorSpec;
+using vespalib::tensor::DefaultTensorEngine;
+
namespace storage {
@@ -226,26 +236,38 @@ HitCollectorTest::testEmpty()
class MyRankProgram : public HitCollector::IRankProgram
{
private:
+ Value::UP _boxed_double;
+ Value::UP _tensor;
NumberOrObject _fooValue;
NumberOrObject _barValue;
+ NumberOrObject _bazValue;
public:
MyRankProgram()
- : _fooValue(),
- _barValue()
+ : _boxed_double(),
+ _tensor(),
+ _fooValue(),
+ _barValue(),
+ _bazValue()
{}
- virtual void run(uint32_t docid, const std::vector<search::fef::TermFieldMatchData> &) override {
+ ~MyRankProgram();
+ virtual void run(uint32_t docid, const std::vector<search::fef::TermFieldMatchData> &) override {
+ _boxed_double = std::make_unique<DoubleValue>(docid + 30);
+ _tensor = DefaultTensorEngine::ref().from_spec(TensorSpec("tensor(x{})").add({{"x", "a"}}, docid + 20));
_fooValue.as_number = docid + 10;
- _barValue.as_number = docid + 30;
+ _barValue.as_object = *_boxed_double;
+ _bazValue.as_object = *_tensor;
}
FeatureResolver get_resolver() {
FeatureResolver resolver(2);
resolver.add("foo", LazyValue(&_fooValue), false);
- resolver.add("bar", LazyValue(&_barValue), false);
+ resolver.add("bar", LazyValue(&_barValue), true);
+ resolver.add("baz", LazyValue(&_bazValue), true);
return resolver;
}
};
+MyRankProgram::~MyRankProgram() = default;
void
HitCollectorTest::testFeatureSet()
@@ -262,28 +284,42 @@ HitCollectorTest::testFeatureSet()
FeatureResolver resolver(rankProgram.get_resolver());
search::FeatureSet::SP sf = hc.getFeatureSet(rankProgram, resolver);
- EXPECT_EQUAL(sf->getNames().size(), 2u);
+ EXPECT_EQUAL(sf->getNames().size(), 3u);
EXPECT_EQUAL(sf->getNames()[0], "foo");
EXPECT_EQUAL(sf->getNames()[1], "bar");
- EXPECT_EQUAL(sf->numFeatures(), 2u);
+ EXPECT_EQUAL(sf->getNames()[2], "baz");
+ EXPECT_EQUAL(sf->numFeatures(), 3u);
EXPECT_EQUAL(sf->numDocs(), 3u);
{
- const search::feature_t * f = sf->getFeaturesByDocId(1);
+ const auto * f = sf->getFeaturesByDocId(1);
ASSERT_TRUE(f != NULL);
- EXPECT_EQUAL(f[0], 11); // 10 + docId
- EXPECT_EQUAL(f[1], 31); // 30 + docId
+ EXPECT_EQUAL(f[0].as_double(), 11); // 10 + docId
+ EXPECT_EQUAL(f[1].as_double(), 31); // 30 + docId
}
{
- const search::feature_t * f = sf->getFeaturesByDocId(3);
+ const auto * f = sf->getFeaturesByDocId(3);
ASSERT_TRUE(f != NULL);
- EXPECT_EQUAL(f[0], 13);
- EXPECT_EQUAL(f[1], 33);
+ EXPECT_TRUE(f[0].is_double());
+ EXPECT_TRUE(!f[0].is_data());
+ EXPECT_EQUAL(f[0].as_double(), 13);
+ EXPECT_TRUE(f[1].is_double());
+ EXPECT_TRUE(!f[1].is_data());
+ EXPECT_EQUAL(f[1].as_double(), 33);
+ EXPECT_TRUE(!f[2].is_double());
+ EXPECT_TRUE(f[2].is_data());
+ {
+ auto &engine = DefaultTensorEngine::ref();
+ nbostream buf(f[2].as_data().data, f[2].as_data().size);
+ auto actual = engine.to_spec(*engine.decode(buf));
+ auto expect = TensorSpec("tensor(x{})").add({{"x", "a"}}, 23);
+ EXPECT_EQUAL(actual, expect);
+ }
}
{
- const search::feature_t * f = sf->getFeaturesByDocId(4);
+ const auto * f = sf->getFeaturesByDocId(4);
ASSERT_TRUE(f != NULL);
- EXPECT_EQUAL(f[0], 14);
- EXPECT_EQUAL(f[1], 34);
+ EXPECT_EQUAL(f[0].as_double(), 14);
+ EXPECT_EQUAL(f[1].as_double(), 34);
}
ASSERT_TRUE(sf->getFeaturesByDocId(0) == NULL);
ASSERT_TRUE(sf->getFeaturesByDocId(2) == NULL);
diff --git a/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp
index a9b09cd7089..ce0cd967e06 100644
--- a/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp
@@ -4,6 +4,9 @@
#include <vespa/searchlib/fef/feature_resolver.h>
#include <vespa/vespalib/util/stringfmt.h>
#include <algorithm>
+#include <vespa/eval/eval/tensor.h>
+#include <vespa/eval/eval/tensor_engine.h>
+#include <vespa/vespalib/objects/nbostream.h>
#include <vespa/log/log.h>
LOG_SETUP(".searchvisitor.hitcollector");
@@ -156,10 +159,22 @@ HitCollector::getFeatureSet(IRankProgram &rankProgram,
for (const Hit & hit : _hits) {
rankProgram.run(hit.getDocId(), hit.getMatchData());
uint32_t docId = hit.getDocId();
- search::feature_t * f = retval->getFeaturesByIndex(retval->addDocId(docId));
+ auto * f = retval->getFeaturesByIndex(retval->addDocId(docId));
for (uint32_t j = 0; j < names.size(); ++j) {
- f[j] = resolver.resolve(j).as_number(docId);
- LOG(debug, "getFeatureSet: lDocId(%u), '%s': %f", docId, names[j].c_str(), f[j]);
+ if (resolver.is_object(j)) {
+ auto obj = resolver.resolve(j).as_object(docId);
+ if (const auto *tensor = obj.get().as_tensor()) {
+ vespalib::nbostream buf;
+ tensor->engine().encode(*tensor, buf);
+ f[j].set_data(vespalib::Memory(buf.peek(), buf.size()));
+ } else {
+ f[j].set_double(obj.get().as_double());
+ }
+ } else {
+ f[j].set_double(resolver.resolve(j).as_number(docId));
+ }
+ LOG(debug, "getFeatureSet: lDocId(%u), '%s': %f %s", docId, names[j].c_str(), f[j].as_double(),
+ f[j].is_data() ? "[tensor]" : "");
}
}
return retval;