From 927ba66e28c4db79201c849d88518863dda8a040 Mon Sep 17 00:00:00 2001 From: HÃ¥vard Pettersen Date: Thu, 13 Jun 2019 13:18:21 +0000 Subject: support binary values in feature sets this will be used to store serialized tensors --- .../common/summaryfeatures/summaryfeatures.cpp | 93 ++++++++++++---------- .../src/vespa/searchlib/common/featureset.cpp | 4 +- searchlib/src/vespa/searchlib/common/featureset.h | 31 +++++++- 3 files changed, 78 insertions(+), 50 deletions(-) (limited to 'searchlib') diff --git a/searchlib/src/tests/common/summaryfeatures/summaryfeatures.cpp b/searchlib/src/tests/common/summaryfeatures/summaryfeatures.cpp index 0dc43898441..646b2b818b3 100644 --- a/searchlib/src/tests/common/summaryfeatures/summaryfeatures.cpp +++ b/searchlib/src/tests/common/summaryfeatures/summaryfeatures.cpp @@ -5,6 +5,7 @@ LOG_SETUP("summaryfeatures_test"); #include using namespace search; +using vespalib::Memory; TEST_SETUP(Test); @@ -43,34 +44,34 @@ Test::Main() EXPECT_EQUAL(sf.addDocId(40), 3u); EXPECT_EQUAL(sf.addDocId(50), 4u); EXPECT_EQUAL(sf.numDocs(), 5u); - feature_t *f; - const feature_t *cf; + FeatureSet::Value *f; + const FeatureSet::Value *cf; f = sf.getFeaturesByIndex(0); ASSERT_TRUE(f != 0); - f[0] = 11.0; - f[1] = 12.0; - f[2] = 13.0; + f[0].set_double(11.0); + f[1].set_double(12.0); + f[2].set_double(13.0); f = sf.getFeaturesByIndex(1); ASSERT_TRUE(f != 0); - f[0] = 21.0; - f[1] = 22.0; - f[2] = 23.0; + f[0].set_double(21.0); + f[1].set_double(22.0); + f[2].set_double(23.0); f = sf.getFeaturesByIndex(2); ASSERT_TRUE(f != 0); - f[0] = 31.0; - f[1] = 32.0; - f[2] = 33.0; + f[0].set_double(31.0); + f[1].set_double(32.0); + f[2].set_double(33.0); f = sf.getFeaturesByIndex(3); ASSERT_TRUE(f != 0); - f[0] = 41.0; - f[1] = 42.0; - f[2] = 43.0; + f[0].set_double(41.0); + f[1].set_data(Memory("test", 4)); + f[2].set_double(43.0); f = sf.getFeaturesByIndex(4); ASSERT_TRUE(f != 0); - f[0] = 51.0; - f[1] = 52.0; - f[2] = 53.0; - EXPECT_TRUE(sf.getFeaturesByIndex(5) == 0); + f[0].set_double(51.0); + f[1].set_double(52.0); + f[2].set_double(53.0); + EXPECT_TRUE(sf.getFeaturesByIndex(5) == nullptr); { std::vector docs; EXPECT_TRUE(sf.contains(docs)); @@ -107,45 +108,49 @@ Test::Main() } { cf = sf.getFeaturesByDocId(10); - ASSERT_TRUE(cf != 0); - EXPECT_APPROX(cf[0], 11.0, 10e-6); - EXPECT_APPROX(cf[1], 12.0, 10e-6); - EXPECT_APPROX(cf[2], 13.0, 10e-6); + ASSERT_TRUE(cf != nullptr); + EXPECT_APPROX(cf[0].as_double(), 11.0, 10e-6); + EXPECT_APPROX(cf[1].as_double(), 12.0, 10e-6); + EXPECT_APPROX(cf[2].as_double(), 13.0, 10e-6); } { cf = sf.getFeaturesByDocId(20); - ASSERT_TRUE(cf != 0); - EXPECT_APPROX(cf[0], 21.0, 10e-6); - EXPECT_APPROX(cf[1], 22.0, 10e-6); - EXPECT_APPROX(cf[2], 23.0, 10e-6); + ASSERT_TRUE(cf != nullptr); + EXPECT_APPROX(cf[0].as_double(), 21.0, 10e-6); + EXPECT_APPROX(cf[1].as_double(), 22.0, 10e-6); + EXPECT_APPROX(cf[2].as_double(), 23.0, 10e-6); } { cf = sf.getFeaturesByDocId(30); - ASSERT_TRUE(cf != 0); - EXPECT_APPROX(cf[0], 31.0, 10e-6); - EXPECT_APPROX(cf[1], 32.0, 10e-6); - EXPECT_APPROX(cf[2], 33.0, 10e-6); + ASSERT_TRUE(cf != nullptr); + EXPECT_APPROX(cf[0].as_double(), 31.0, 10e-6); + EXPECT_APPROX(cf[1].as_double(), 32.0, 10e-6); + EXPECT_APPROX(cf[2].as_double(), 33.0, 10e-6); } { cf = sf.getFeaturesByDocId(40); - ASSERT_TRUE(cf != 0); - EXPECT_APPROX(cf[0], 41.0, 10e-6); - EXPECT_APPROX(cf[1], 42.0, 10e-6); - EXPECT_APPROX(cf[2], 43.0, 10e-6); + ASSERT_TRUE(cf != nullptr); + EXPECT_TRUE(cf[0].is_double()); + EXPECT_TRUE(!cf[0].is_data()); + EXPECT_EQUAL(cf[0].as_double(), 41.0); + EXPECT_TRUE(!cf[1].is_double()); + EXPECT_TRUE(cf[1].is_data()); + EXPECT_EQUAL(cf[1].as_data(), Memory("test", 4)); + EXPECT_EQUAL(cf[2].as_double(), 43.0); } { cf = sf.getFeaturesByDocId(50); - ASSERT_TRUE(cf != 0); - EXPECT_APPROX(cf[0], 51.0, 10e-6); - EXPECT_APPROX(cf[1], 52.0, 10e-6); - EXPECT_APPROX(cf[2], 53.0, 10e-6); + ASSERT_TRUE(cf != nullptr); + EXPECT_APPROX(cf[0].as_double(), 51.0, 10e-6); + EXPECT_APPROX(cf[1].as_double(), 52.0, 10e-6); + EXPECT_APPROX(cf[2].as_double(), 53.0, 10e-6); } - EXPECT_TRUE(sf.getFeaturesByDocId(5) == 0); - EXPECT_TRUE(sf.getFeaturesByDocId(15) == 0); - EXPECT_TRUE(sf.getFeaturesByDocId(25) == 0); - EXPECT_TRUE(sf.getFeaturesByDocId(35) == 0); - EXPECT_TRUE(sf.getFeaturesByDocId(45) == 0); - EXPECT_TRUE(sf.getFeaturesByDocId(55) == 0); + EXPECT_TRUE(sf.getFeaturesByDocId(5) == nullptr); + EXPECT_TRUE(sf.getFeaturesByDocId(15) == nullptr); + EXPECT_TRUE(sf.getFeaturesByDocId(25) == nullptr); + EXPECT_TRUE(sf.getFeaturesByDocId(35) == nullptr); + EXPECT_TRUE(sf.getFeaturesByDocId(45) == nullptr); + EXPECT_TRUE(sf.getFeaturesByDocId(55) == nullptr); } TEST_DONE(); } diff --git a/searchlib/src/vespa/searchlib/common/featureset.cpp b/searchlib/src/vespa/searchlib/common/featureset.cpp index 07b04f2f675..adf24196200 100644 --- a/searchlib/src/vespa/searchlib/common/featureset.cpp +++ b/searchlib/src/vespa/searchlib/common/featureset.cpp @@ -59,7 +59,7 @@ FeatureSet::contains(const std::vector &docIds) const return true; } -feature_t * +FeatureSet::Value * FeatureSet::getFeaturesByIndex(uint32_t idx) { if (idx >= _docIds.size()) { @@ -68,7 +68,7 @@ FeatureSet::getFeaturesByIndex(uint32_t idx) return &(_values[idx * _names.size()]); } -const feature_t * +const FeatureSet::Value * FeatureSet::getFeaturesByDocId(uint32_t docId) const { uint32_t low = 0; diff --git a/searchlib/src/vespa/searchlib/common/featureset.h b/searchlib/src/vespa/searchlib/common/featureset.h index f57cf918ba5..1ec662685a7 100644 --- a/searchlib/src/vespa/searchlib/common/featureset.h +++ b/searchlib/src/vespa/searchlib/common/featureset.h @@ -4,6 +4,7 @@ #include "feature.h" #include +#include #include #include @@ -16,12 +17,34 @@ namespace search { class FeatureSet { public: + class Value { + private: + std::vector _data; + double _value; + public: + bool operator==(const Value &rhs) const { + return ((_data == rhs._data) && (_value == rhs._value)); + } + bool is_double() const { return _data.empty(); } + bool is_data() const { return !_data.empty(); } + double as_double() const { return _value; } + vespalib::Memory as_data() const { return vespalib::Memory(&_data[0], _data.size()); } + void set_double(double value) { + _data.clear(); + _value = value; + } + void set_data(vespalib::Memory data) { + _data.assign(data.data, data.data + data.size); + _value = 0.0; + } + }; + typedef vespalib::string string; typedef std::vector StringVector; private: StringVector _names; - std::vector _docIds; - std::vector _values; + std::vector _docIds; + std::vector _values; FeatureSet(const FeatureSet &); FeatureSet & operator=(const FeatureSet &); @@ -112,7 +135,7 @@ public: * @return pointer to features * @param idx index into docid array **/ - feature_t *getFeaturesByIndex(uint32_t idx); + Value *getFeaturesByIndex(uint32_t idx); /** * Obtain the feature values belonging to a document based on the @@ -122,7 +145,7 @@ public: * @return pointer to features * @param docId docid value **/ - const feature_t *getFeaturesByDocId(uint32_t docId) const; + const Value *getFeaturesByDocId(uint32_t docId) const; }; } // namespace search -- cgit v1.2.3