diff options
author | Tor Egge <Tor.Egge@yahoo-inc.com> | 2017-01-18 14:53:31 +0000 |
---|---|---|
committer | Tor Egge <Tor.Egge@yahoo-inc.com> | 2017-01-18 14:59:59 +0000 |
commit | 2775b05f9e94fc1c4a3f217ad0a75bcc6cf67479 (patch) | |
tree | e0b84297a39e91b483ae9de49f38be7f5a501765 /searchcore | |
parent | ac728c6a77543ea618bee127221f950670e84eb8 (diff) |
Use serialized tensors in docsum blobs instead of slime objects.
Pass tensors as serialized tensors in slime version of docsum blob.
Diffstat (limited to 'searchcore')
5 files changed, 91 insertions, 41 deletions
diff --git a/searchcore/src/tests/proton/docsummary/docsummary.cpp b/searchcore/src/tests/proton/docsummary/docsummary.cpp index a0b947e11f8..d0705e7b538 100644 --- a/searchcore/src/tests/proton/docsummary/docsummary.cpp +++ b/searchcore/src/tests/proton/docsummary/docsummary.cpp @@ -26,6 +26,8 @@ #include <vespa/searchcore/proton/common/hw_info.h> #include <vespa/vespalib/data/slime/slime.h> #include <vespa/config/helper/configgetter.hpp> +#include <vespa/vespalib/tensor/serialization/typed_binary_format.h> +#include <vespa/vespalib/objects/nbostream.h> #include <vespa/log/log.h> LOG_SETUP("docsummary_test"); @@ -300,10 +302,18 @@ private: uint32_t id, uint32_t resultClassID); + void + assertTensor(const Tensor::UP &exp, + const std::string &fieldName, + const DocsumReply &reply, + uint32_t id, + uint32_t resultClassID); + bool assertSlime(const std::string &exp, const DocsumReply &reply, - uint32_t id); + uint32_t id, + bool relaxed = false); void requireThatAdapterHandlesAllFieldTypes(); @@ -400,8 +410,26 @@ Test::assertString(const std::string & exp, const std::string & fieldName, } +void +Test::assertTensor(const Tensor::UP & exp, const std::string & fieldName, + const DocsumReply & reply, + uint32_t id, uint32_t resultClassID) +{ + GeneralResultPtr res = getResult(reply, id, resultClassID); + const void *data = res->GetEntry(fieldName.c_str())->_stringval; + size_t len = res->GetEntry(fieldName.c_str())->_stringlen; + EXPECT_EQUAL(exp.get() == nullptr, len == 0u); + if (exp) { + vespalib::nbostream serialized(data, len); + Tensor::UP tensor = vespalib::tensor::TypedBinaryFormat::deserialize(serialized); + EXPECT_TRUE(tensor.get() != nullptr); + EXPECT_EQUAL(*exp, *tensor); + } +} + + bool -Test::assertSlime(const std::string &exp, const DocsumReply &reply, uint32_t id) +Test::assertSlime(const std::string &exp, const DocsumReply &reply, uint32_t id, bool relaxed) { const DocsumReply::Docsum & docsum = reply.docsums[id]; uint32_t classId; @@ -414,6 +442,14 @@ Test::assertSlime(const std::string &exp, const DocsumReply &reply, uint32_t id) size_t decodeRes = vespalib::slime::BinaryFormat::decode(serialized, slime); ASSERT_EQUAL(decodeRes, serialized.size); + if (relaxed) { + vespalib::slime::SimpleBuffer buf; + vespalib::slime::JsonFormat::encode(slime, buf, false); + vespalib::Slime tmpSlime; + size_t used = vespalib::slime::JsonFormat::decode(buf.get(), tmpSlime); + EXPECT_EQUAL(buf.get().size, used); + slime = std::move(tmpSlime); + } vespalib::Slime expSlime; size_t used = vespalib::slime::JsonFormat::decode(exp, expSlime); EXPECT_EQUAL(exp.size(), used); @@ -726,7 +762,7 @@ Test::requireThatAttributesAreUsed() endElement(). endField(). startAttributeField("bj"). - addTensor(createTensor({ {{}, 3} }, { "x", "y"})). + addTensor(createTensor({ {{{"x","f"},{"y","g"}}, 3} }, { "x", "y"})). endField(). endDocument(), 2); @@ -755,9 +791,8 @@ Test::requireThatAttributesAreUsed() *rep, 0, rclass)); EXPECT_TRUE(assertString("[[\"quux\",7],[\"qux\",6]]", "bi", *rep, 0, rclass)); - EXPECT_TRUE(assertString("{\"dimensions\":[\"x\",\"y\"]," - "\"cells\":[{\"address\":{},\"value\":3}]}", - "bj", *rep, 0, rclass)); + TEST_DO(assertTensor(createTensor({ {{{"x","f"},{"y","g"}}, 3} }, { "x", "y"}), + "bj", *rep, 0, rclass)); // empty doc EXPECT_TRUE(search::attribute::isUndefined<int32_t> @@ -771,7 +806,7 @@ Test::requireThatAttributesAreUsed() EXPECT_TRUE(assertString("[]", "bg", *rep, 1, rclass)); EXPECT_TRUE(assertString("[]", "bh", *rep, 1, rclass)); EXPECT_TRUE(assertString("[]", "bi", *rep, 1, rclass)); - EXPECT_TRUE(assertString("", "bj", *rep, 1, rclass)); + TEST_DO(assertTensor(Tensor::UP(), "bj", *rep, 1, rclass)); proton::IAttributeManager::SP attributeManager = dc._ddb->getReadySubDB()->getAttributeManager(); @@ -785,14 +820,13 @@ Test::requireThatAttributesAreUsed() attributeFieldWriter. execute("bj", [&]() { bjTensorAttr->setTensor(3, - *createTensor({ {{}, 4} }, { "x"})); + *createTensor({ {{{"x", "a"},{"y", "b"}}, 4} }, { "x"})); bjTensorAttr->commit(); }); attributeFieldWriter.sync(); DocsumReply::UP rep2 = dc._ddb->getDocsums(req); - EXPECT_TRUE(assertString("{\"dimensions\":[\"x\",\"y\"]," - "\"cells\":[{\"address\":{},\"value\":4}]}", - "bj", *rep2, 1, rclass)); + TEST_DO(assertTensor(createTensor({ {{{"x","a"},{"y","b"}}, 4} }, { "x", "y"}), + "bj", *rep2, 1, rclass)); DocsumRequest req3; req3.resultClassName = "class3"; @@ -802,9 +836,8 @@ Test::requireThatAttributesAreUsed() EXPECT_TRUE(assertSlime("{bd:[],be:[],bf:[],bg:[]," "bh:[],bi:[]," - "bj:{dimensions:['x','y']," - "cells:[{address:{},value:4.0}]}}", - *rep3, 0)); + "bj:'0x01020178017901016101624010000000000000'}", + *rep3, 0, true)); } diff --git a/searchcore/src/tests/proton/docsummary/summary.cfg b/searchcore/src/tests/proton/docsummary/summary.cfg index 52f300ae3e0..33fd90f4c82 100644 --- a/searchcore/src/tests/proton/docsummary/summary.cfg +++ b/searchcore/src/tests/proton/docsummary/summary.cfg @@ -85,7 +85,7 @@ classes[3].fields[7].type "jsonstring" classes[3].fields[8].name "bi" classes[3].fields[8].type "jsonstring" classes[3].fields[9].name "bj" -classes[3].fields[9].type "jsonstring" +classes[3].fields[9].type "tensor" classes[4].name "class4" classes[4].id 4 classes[4].fields[1] diff --git a/searchcore/src/tests/proton/docsummary/summaryfieldconverter_test.cpp b/searchcore/src/tests/proton/docsummary/summaryfieldconverter_test.cpp index e9496093cfe..27a50c9c57f 100644 --- a/searchcore/src/tests/proton/docsummary/summaryfieldconverter_test.cpp +++ b/searchcore/src/tests/proton/docsummary/summaryfieldconverter_test.cpp @@ -150,6 +150,7 @@ class Test : public vespalib::TestApp { void checkString(const string &str, const FieldValue *value); void checkData(const search::RawBuf &data, const FieldValue *value); + void checkTensor(const Tensor::UP &tensor, const FieldValue *value); template <unsigned int N> void checkArray(const char *(&str)[N], const FieldValue *value); void setSummaryField(const string &name); @@ -170,7 +171,7 @@ class Test : public vespalib::TestApp { void requireThatSearchDataTypeUsesDefaultDataTypes(); void requireThatLinguisticsAnnotationUsesDefaultDataTypes(); void requireThatPredicateIsPrinted(); - void requireThatTensorIsPrinted(); + void requireThatTensorIsNotConverted(); const DocumentType &getDocType() const { return *_documentType; } Document makeDocument(); StringFieldValue annotateTerm(const string &term); @@ -245,7 +246,7 @@ Test::Main() TEST_CALL(requireThatSearchDataTypeUsesDefaultDataTypes()); TEST_CALL(requireThatLinguisticsAnnotationUsesDefaultDataTypes()); TEST_CALL(requireThatPredicateIsPrinted()); - TEST_CALL(requireThatTensorIsPrinted()); + TEST_CALL(requireThatTensorIsNotConverted()); TEST_DONE(); } @@ -430,6 +431,17 @@ void Test::checkData(const search::RawBuf &buf, const FieldValue *value) { EXPECT_TRUE(memcmp(buf.GetDrainPos(), got.first, got.second) == 0); } +void Test::checkTensor(const Tensor::UP &tensor, const FieldValue *value) { + ASSERT_TRUE(value); + const TensorFieldValue *s = dynamic_cast<const TensorFieldValue *>(value); + ASSERT_TRUE(s); + const Tensor::UP &tvalue = s->getAsTensorPtr(); + EXPECT_EQUAL(tensor.get() != nullptr, tvalue.get() != nullptr); + if (tensor) { + EXPECT_EQUAL(*tensor, *tvalue); + } +} + template <unsigned int N> void Test::checkArray(const char *(&str)[N], const FieldValue *value) { ASSERT_TRUE(value); @@ -649,7 +661,7 @@ createTensor(const TensorCells &cells, const TensorDimensions &dimensions) { } void -Test::requireThatTensorIsPrinted() +Test::requireThatTensorIsNotConverted() { TensorFieldValue tensorFieldValue; tensorFieldValue = createTensor({ {{{"x", "4"}, {"y", "5"}}, 7} }, @@ -658,30 +670,17 @@ Test::requireThatTensorIsPrinted() doc.setRepo(*_documentRepo); doc.setValue("tensor", tensorFieldValue); - FieldBlock expect1("{ dimensions: [ 'x', 'y' ], cells: [" - "{ address: { x:'4', y:'5' }, value: 7.0 }" - "] }"); - - TEST_CALL(checkString(expect1.json, + TEST_CALL(checkTensor(createTensor({ {{{"x", "4"}, {"y", "5"}}, 7} }, + {"x", "y"}), SFC::convertSummaryField(false, *doc.getValue("tensor"), - false).get())); - TEST_CALL(checkData(expect1.binary, - SFC::convertSummaryField(false, - *doc.getValue("tensor"), - true).get())); + true).get())); doc.setValue("tensor", TensorFieldValue()); - FieldBlock expect2("{ }"); - - TEST_CALL(checkString(expect2.json, + TEST_CALL(checkTensor(Tensor::UP(), SFC::convertSummaryField(false, *doc.getValue("tensor"), - false).get())); - TEST_CALL(checkData(expect2.binary, - SFC::convertSummaryField(false, - *doc.getValue("tensor"), - true).get())); + true).get())); } } // namespace diff --git a/searchcore/src/vespa/searchcore/proton/docsummary/documentstoreadapter.cpp b/searchcore/src/vespa/searchcore/proton/docsummary/documentstoreadapter.cpp index 855126bd064..4f61873b938 100644 --- a/searchcore/src/vespa/searchcore/proton/docsummary/documentstoreadapter.cpp +++ b/searchcore/src/vespa/searchcore/proton/docsummary/documentstoreadapter.cpp @@ -3,11 +3,16 @@ #include "documentstoreadapter.h" #include "summaryfieldconverter.h" #include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/vespalib/tensor/tensor.h> +#include <vespa/vespalib/tensor/serialization/typed_binary_format.h> +#include <vespa/vespalib/objects/nbostream.h> +#include <vespa/document/fieldvalue/tensorfieldvalue.h> #include <vespa/log/log.h> LOG_SETUP(".proton.docsummary.documentstoreadapter"); using namespace document; using namespace search::docsummary; +using vespalib::tensor::Tensor; namespace proton { @@ -76,6 +81,18 @@ DocumentStoreAdapter::writeField(const FieldValue &value, ResType type) std::pair<const char *, size_t> buf = value.getAsRaw(); return _resultPacker.AddLongData(buf.first, buf.second); } + case RES_TENSOR: + { + vespalib::nbostream serialized; + if (value.getClass().inherits(TensorFieldValue::classId)) { + const TensorFieldValue &tvalue = static_cast<const TensorFieldValue &>(value); + const std::unique_ptr<Tensor> &tensor = tvalue.getAsTensorPtr(); + if (tensor) { + vespalib::tensor::TypedBinaryFormat::serialize(serialized, *tensor); + } + } + return _resultPacker.AddSerializedTensor(serialized.peek(), serialized.size()); + } default: LOG(warning, "Unknown docsum field type: %s. Add empty field", diff --git a/searchcore/src/vespa/searchcore/proton/docsummary/summaryfieldconverter.cpp b/searchcore/src/vespa/searchcore/proton/docsummary/summaryfieldconverter.cpp index 0c4d739be79..c74442bd72b 100644 --- a/searchcore/src/vespa/searchcore/proton/docsummary/summaryfieldconverter.cpp +++ b/searchcore/src/vespa/searchcore/proton/docsummary/summaryfieldconverter.cpp @@ -42,6 +42,8 @@ #include <vespa/vespalib/data/slime/binary_format.h> #include <vespa/vespalib/data/slime/json_format.h> #include <vespa/vespalib/tensor/serialization/slime_binary_format.h> +#include <vespa/vespalib/tensor/serialization/typed_binary_format.h> +#include <vespa/vespalib/objects/nbostream.h> #include <vespa/searchlib/util/slime_output_raw_buf_adapter.h> #include <vespa/vespalib/util/exceptions.h> @@ -472,7 +474,7 @@ class SummaryFieldValueConverter : protected ConstFieldValueVisitor } virtual void visit(const TensorFieldValue &value) override { - _field_value = _structuredFieldConverter.convert(value); + visitPrimitive(value); } public: @@ -632,12 +634,11 @@ class SlimeFiller : public ConstFieldValueVisitor { virtual void visit(const TensorFieldValue &value) override { const auto &tensor = value.getAsTensorPtr(); + vespalib::nbostream s; if (tensor) { - vespalib::tensor::SlimeBinaryFormat::serialize(_inserter, *tensor); - } else { - // No tensor value => empty object - _inserter.insertObject(); + vespalib::tensor::TypedBinaryFormat::serialize(s, *tensor); } + _inserter.insertData(vespalib::slime::Memory(s.peek(), s.size())); } public: |