diff options
author | Tor Egge <Tor.Egge@yahoo-inc.com> | 2017-01-18 14:53:31 +0000 |
---|---|---|
committer | Tor Egge <Tor.Egge@yahoo-inc.com> | 2017-01-18 14:59:59 +0000 |
commit | 2775b05f9e94fc1c4a3f217ad0a75bcc6cf67479 (patch) | |
tree | e0b84297a39e91b483ae9de49f38be7f5a501765 /searchsummary/src | |
parent | ac728c6a77543ea618bee127221f950670e84eb8 (diff) |
Use serialized tensors in docsum blobs instead of slime objects.
Pass tensors as serialized tensors in slime version of docsum blob.
Diffstat (limited to 'searchsummary/src')
10 files changed, 58 insertions, 26 deletions
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp index b0704b2d148..cb8a3dc7680 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp @@ -8,7 +8,8 @@ #include "attributedfw.h" #include "docsumstate.h" #include <vespa/vespalib/tensor/tensor.h> -#include <vespa/vespalib/tensor/serialization/slime_binary_format.h> +#include <vespa/vespalib/tensor/serialization/typed_binary_format.h> +#include <vespa/vespalib/objects/nbostream.h> #include <vespa/vespalib/data/slime/slime.h> #include <vespa/log/log.h> @@ -44,7 +45,13 @@ ResType inferType(const IAttributeVector & vec) { } else if (vec.isFloatingPointType()) { retval = (fw == sizeof(float)) ? RES_FLOAT : RES_DOUBLE; } else { - retval = RES_STRING; + BasicType::Type t = vec.getBasicType(); + switch (t) { + case BasicType::TENSOR: + retval = RES_TENSOR; + default: + retval = RES_STRING; + } } } } @@ -133,34 +140,27 @@ SingleAttrDFW::WriteField(uint32_t docid, target->append(s, slen); return (sizeof(slen) + slen); break; } - case RES_JSONSTRING: { + case RES_TENSOR: { + vespalib::nbostream str; BasicType::Type t = v.getBasicType(); switch (t) { case BasicType::TENSOR: { const tensor::TensorAttribute &tv = static_cast<const tensor::TensorAttribute &>(v); const auto tensor = tv.getTensor(docid); - vespalib::string str; if (tensor) { - auto slime = - vespalib::tensor::SlimeBinaryFormat::serialize(*tensor); - vespalib::slime::SimpleBuffer buf; - vespalib::slime::JsonFormat::encode(*slime, buf, true); - str = buf.get().make_string(); - } else { - // No tensor value => empty object - str = ""; + vespalib::tensor::TypedBinaryFormat::serialize(str, *tensor); } - uint32_t slen = str.size(); - target->append(&slen, sizeof(slen)); - target->append(str.c_str(), slen); - return (sizeof(slen) + slen); } default: break; - }; + } + uint32_t slen = str.size(); + target->append(&slen, sizeof(slen)); + target->append(str.peek(), slen); + return (sizeof(slen) + slen); } - /* FALLTHROUGH */ + case RES_JSONSTRING: case RES_XMLSTRING: case RES_FEATUREDATA: case RES_LONG_STRING: @@ -222,7 +222,7 @@ SingleAttrDFW::insertField(uint32_t docid, target.insertLong(val); break; } - case RES_JSONSTRING: { + case RES_TENSOR: { BasicType::Type t = v.getBasicType(); switch (t) { case BasicType::TENSOR: { @@ -230,17 +230,17 @@ SingleAttrDFW::insertField(uint32_t docid, static_cast<const tensor::TensorAttribute &>(v); const auto tensor = tv.getTensor(docid); if (tensor) { - vespalib::tensor::SlimeBinaryFormat::serialize(target, *tensor); - } else { - // No tensor value => no object + vespalib::nbostream str; + vespalib::tensor::TypedBinaryFormat::serialize(str, *tensor); + target.insertData(vespalib::slime::Memory(str.peek(), str.size())); } - return; } default: - break; - }; + ; + } } - /* FALLTHROUGH */ + break; + case RES_JSONSTRING: case RES_XMLSTRING: case RES_FEATUREDATA: case RES_LONG_STRING: diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumfieldwriter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsumfieldwriter.cpp index be188e9a871..446ca347bff 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsumfieldwriter.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumfieldwriter.cpp @@ -164,6 +164,7 @@ CopyDFW::insertField(uint32_t /*docid*/, target.insertString(value); break; } + case RES_TENSOR: case RES_LONG_DATA: case RES_DATA: { uint32_t len; @@ -259,6 +260,7 @@ CopyDFW::WriteField(uint32_t docid, break; } + case RES_TENSOR: case RES_LONG_DATA: { uint32_t flen = entry->_len; diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumformat.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsumformat.cpp index a837fca3bdb..27b20f8b1bd 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsumformat.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumformat.cpp @@ -97,6 +97,7 @@ DocsumFormat::addEmpty(ResType type, search::RawBuf &target) case RES_LONG_DATA: case RES_XMLSTRING: case RES_JSONSTRING: + case RES_TENSOR: case RES_FEATUREDATA: return addLongData(target, "", 0); } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp index f790d38e70e..4273c89d7f5 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp @@ -151,6 +151,7 @@ DynamicDocsumWriter::RepackDocsum(GeneralResult *gres, written += slen; break; } + case RES_TENSOR: case RES_LONG_DATA: { uint32_t flen = entry->_len; uint32_t dlen = entry->_get_length(); @@ -304,6 +305,7 @@ static void convertEntry(GetDocsumsState *state, inserter.insertString(Memory(ptr, len)); break; case RES_DATA: + case RES_TENSOR: case RES_LONG_DATA: entry->_resolve_field(&ptr, &len, &state->_docSumFieldSpace); inserter.insertData(Memory(ptr, len)); diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h index e35408a796c..58a245a364a 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h @@ -31,6 +31,7 @@ enum ResType { RES_LONG_DATA, RES_XMLSTRING, RES_JSONSTRING, + RES_TENSOR, RES_FEATUREDATA }; diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.cpp b/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.cpp index a08b0d11d5e..aa3029b9535 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.cpp @@ -55,6 +55,7 @@ ResultConfig::GetResTypeName(ResType type) case RES_LONG_DATA: return "longdata"; case RES_XMLSTRING: return "xmlstring"; case RES_JSONSTRING: return "jsonstring"; + case RES_TENSOR: return "tensor"; case RES_FEATUREDATA: return "featuredata"; } return "unknown-type"; @@ -172,6 +173,8 @@ ResultConfig::ReadConfig(const vespa::config::search::SummaryConfig &cfg, const rc = resClass->AddConfigEntry(fieldname, RES_XMLSTRING); } else if (strcmp(fieldtype, "jsonstring") == 0) { rc = resClass->AddConfigEntry(fieldname, RES_JSONSTRING); + } else if (strcmp(fieldtype, "tensor") == 0) { + rc = resClass->AddConfigEntry(fieldname, RES_TENSOR); } else if (strcmp(fieldtype, "featuredata") == 0) { rc = resClass->AddConfigEntry(fieldname, RES_FEATUREDATA); } else { // FAIL: unknown field type diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.h b/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.h index 7082a5636cc..4ae1b14e486 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.h @@ -164,6 +164,8 @@ public: case RES_DATA: case RES_LONG_DATA: return (b == RES_DATA || b == RES_LONG_DATA); + case RES_TENSOR: + return (b == RES_TENSOR); case RES_FEATUREDATA: return (b == RES_FEATUREDATA); } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.cpp b/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.cpp index 83d504f2429..bda2f5c1b74 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.cpp @@ -121,6 +121,7 @@ ResultPacker::AddEmpty() case RES_JSONSTRING: case RES_FEATUREDATA: case RES_LONG_STRING: return AddLongString(NULL, 0); + case RES_TENSOR: return AddSerializedTensor(NULL, 0); case RES_LONG_DATA: return AddLongData(NULL, 0); } } @@ -251,6 +252,17 @@ ResultPacker::AddLongData(const char *buf, uint32_t buflen) bool +ResultPacker::AddSerializedTensor(const char *buf, uint32_t buflen) +{ + if (CheckEntry(RES_TENSOR)) { + _buf.append(&buflen, sizeof(buflen)); + _buf.append(buf, buflen); + } + return !_error; +} + + +bool ResultPacker::GetDocsumBlob(const char **buf, uint32_t *buflen) { if (!_error && diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.h b/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.h index 8280ebe0980..634084fea6a 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.h @@ -230,6 +230,14 @@ public: **/ bool AddLongData(const char *buf, uint32_t buflen); + /* + * Add a 'tensor' field to the docsum blob we are currently creating. + * + * @return true(ok)/false(error). + * @param buf pointer to serialized tensor to add. + * @param buflen length of serialized tensor to add. + **/ + bool AddSerializedTensor(const char *buf, uint32_t buflen); /** * Obtain a pointer to, and the length of, the created docsum diff --git a/searchsummary/src/vespa/searchsummary/docsummary/urlresult.cpp b/searchsummary/src/vespa/searchsummary/docsummary/urlresult.cpp index 92ebe07d457..47cc0cf3a33 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/urlresult.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/urlresult.cpp @@ -419,6 +419,7 @@ GeneralResult::unpack(const char *buf, const size_t buflen) break; } + case RES_TENSOR: case RES_LONG_DATA: { uint32_t ldlen; |