diff options
author | Tor Egge <Tor.Egge@yahoo-inc.com> | 2017-01-18 14:53:31 +0000 |
---|---|---|
committer | Tor Egge <Tor.Egge@yahoo-inc.com> | 2017-01-18 14:59:59 +0000 |
commit | 2775b05f9e94fc1c4a3f217ad0a75bcc6cf67479 (patch) | |
tree | e0b84297a39e91b483ae9de49f38be7f5a501765 /searchsummary | |
parent | ac728c6a77543ea618bee127221f950670e84eb8 (diff) |
Use serialized tensors in docsum blobs instead of slime objects.
Pass tensors as serialized tensors in slime version of docsum blob.
Diffstat (limited to 'searchsummary')
10 files changed, 58 insertions, 26 deletions
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp index b0704b2d148..cb8a3dc7680 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp @@ -8,7 +8,8 @@ #include "attributedfw.h" #include "docsumstate.h" #include <vespa/vespalib/tensor/tensor.h> -#include <vespa/vespalib/tensor/serialization/slime_binary_format.h> +#include <vespa/vespalib/tensor/serialization/typed_binary_format.h> +#include <vespa/vespalib/objects/nbostream.h> #include <vespa/vespalib/data/slime/slime.h> #include <vespa/log/log.h> @@ -44,7 +45,13 @@ ResType inferType(const IAttributeVector & vec) { } else if (vec.isFloatingPointType()) { retval = (fw == sizeof(float)) ? RES_FLOAT : RES_DOUBLE; } else { - retval = RES_STRING; + BasicType::Type t = vec.getBasicType(); + switch (t) { + case BasicType::TENSOR: + retval = RES_TENSOR; + default: + retval = RES_STRING; + } } } } @@ -133,34 +140,27 @@ SingleAttrDFW::WriteField(uint32_t docid, target->append(s, slen); return (sizeof(slen) + slen); break; } - case RES_JSONSTRING: { + case RES_TENSOR: { + vespalib::nbostream str; BasicType::Type t = v.getBasicType(); switch (t) { case BasicType::TENSOR: { const tensor::TensorAttribute &tv = static_cast<const tensor::TensorAttribute &>(v); const auto tensor = tv.getTensor(docid); - vespalib::string str; if (tensor) { - auto slime = - vespalib::tensor::SlimeBinaryFormat::serialize(*tensor); - vespalib::slime::SimpleBuffer buf; - vespalib::slime::JsonFormat::encode(*slime, buf, true); - str = buf.get().make_string(); - } else { - // No tensor value => empty object - str = ""; + vespalib::tensor::TypedBinaryFormat::serialize(str, *tensor); } - uint32_t slen = str.size(); - target->append(&slen, sizeof(slen)); - target->append(str.c_str(), slen); - return (sizeof(slen) + slen); } default: break; - }; + } + uint32_t slen = str.size(); + target->append(&slen, sizeof(slen)); + target->append(str.peek(), slen); + return (sizeof(slen) + slen); } - /* FALLTHROUGH */ + case RES_JSONSTRING: case RES_XMLSTRING: case RES_FEATUREDATA: case RES_LONG_STRING: @@ -222,7 +222,7 @@ SingleAttrDFW::insertField(uint32_t docid, target.insertLong(val); break; } - case RES_JSONSTRING: { + case RES_TENSOR: { BasicType::Type t = v.getBasicType(); switch (t) { case BasicType::TENSOR: { @@ -230,17 +230,17 @@ SingleAttrDFW::insertField(uint32_t docid, static_cast<const tensor::TensorAttribute &>(v); const auto tensor = tv.getTensor(docid); if (tensor) { - vespalib::tensor::SlimeBinaryFormat::serialize(target, *tensor); - } else { - // No tensor value => no object + vespalib::nbostream str; + vespalib::tensor::TypedBinaryFormat::serialize(str, *tensor); + target.insertData(vespalib::slime::Memory(str.peek(), str.size())); } - return; } default: - break; - }; + ; + } } - /* FALLTHROUGH */ + break; + case RES_JSONSTRING: case RES_XMLSTRING: case RES_FEATUREDATA: case RES_LONG_STRING: diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumfieldwriter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsumfieldwriter.cpp index be188e9a871..446ca347bff 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsumfieldwriter.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumfieldwriter.cpp @@ -164,6 +164,7 @@ CopyDFW::insertField(uint32_t /*docid*/, target.insertString(value); break; } + case RES_TENSOR: case RES_LONG_DATA: case RES_DATA: { uint32_t len; @@ -259,6 +260,7 @@ CopyDFW::WriteField(uint32_t docid, break; } + case RES_TENSOR: case RES_LONG_DATA: { uint32_t flen = entry->_len; diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumformat.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsumformat.cpp index a837fca3bdb..27b20f8b1bd 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsumformat.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumformat.cpp @@ -97,6 +97,7 @@ DocsumFormat::addEmpty(ResType type, search::RawBuf &target) case RES_LONG_DATA: case RES_XMLSTRING: case RES_JSONSTRING: + case RES_TENSOR: case RES_FEATUREDATA: return addLongData(target, "", 0); } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp index f790d38e70e..4273c89d7f5 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp @@ -151,6 +151,7 @@ DynamicDocsumWriter::RepackDocsum(GeneralResult *gres, written += slen; break; } + case RES_TENSOR: case RES_LONG_DATA: { uint32_t flen = entry->_len; uint32_t dlen = entry->_get_length(); @@ -304,6 +305,7 @@ static void convertEntry(GetDocsumsState *state, inserter.insertString(Memory(ptr, len)); break; case RES_DATA: + case RES_TENSOR: case RES_LONG_DATA: entry->_resolve_field(&ptr, &len, &state->_docSumFieldSpace); inserter.insertData(Memory(ptr, len)); diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h index e35408a796c..58a245a364a 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.h @@ -31,6 +31,7 @@ enum ResType { RES_LONG_DATA, RES_XMLSTRING, RES_JSONSTRING, + RES_TENSOR, RES_FEATUREDATA }; diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.cpp b/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.cpp index a08b0d11d5e..aa3029b9535 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.cpp @@ -55,6 +55,7 @@ ResultConfig::GetResTypeName(ResType type) case RES_LONG_DATA: return "longdata"; case RES_XMLSTRING: return "xmlstring"; case RES_JSONSTRING: return "jsonstring"; + case RES_TENSOR: return "tensor"; case RES_FEATUREDATA: return "featuredata"; } return "unknown-type"; @@ -172,6 +173,8 @@ ResultConfig::ReadConfig(const vespa::config::search::SummaryConfig &cfg, const rc = resClass->AddConfigEntry(fieldname, RES_XMLSTRING); } else if (strcmp(fieldtype, "jsonstring") == 0) { rc = resClass->AddConfigEntry(fieldname, RES_JSONSTRING); + } else if (strcmp(fieldtype, "tensor") == 0) { + rc = resClass->AddConfigEntry(fieldname, RES_TENSOR); } else if (strcmp(fieldtype, "featuredata") == 0) { rc = resClass->AddConfigEntry(fieldname, RES_FEATUREDATA); } else { // FAIL: unknown field type diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.h b/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.h index 7082a5636cc..4ae1b14e486 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.h @@ -164,6 +164,8 @@ public: case RES_DATA: case RES_LONG_DATA: return (b == RES_DATA || b == RES_LONG_DATA); + case RES_TENSOR: + return (b == RES_TENSOR); case RES_FEATUREDATA: return (b == RES_FEATUREDATA); } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.cpp b/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.cpp index 83d504f2429..bda2f5c1b74 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.cpp @@ -121,6 +121,7 @@ ResultPacker::AddEmpty() case RES_JSONSTRING: case RES_FEATUREDATA: case RES_LONG_STRING: return AddLongString(NULL, 0); + case RES_TENSOR: return AddSerializedTensor(NULL, 0); case RES_LONG_DATA: return AddLongData(NULL, 0); } } @@ -251,6 +252,17 @@ ResultPacker::AddLongData(const char *buf, uint32_t buflen) bool +ResultPacker::AddSerializedTensor(const char *buf, uint32_t buflen) +{ + if (CheckEntry(RES_TENSOR)) { + _buf.append(&buflen, sizeof(buflen)); + _buf.append(buf, buflen); + } + return !_error; +} + + +bool ResultPacker::GetDocsumBlob(const char **buf, uint32_t *buflen) { if (!_error && diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.h b/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.h index 8280ebe0980..634084fea6a 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/resultpacker.h @@ -230,6 +230,14 @@ public: **/ bool AddLongData(const char *buf, uint32_t buflen); + /* + * Add a 'tensor' field to the docsum blob we are currently creating. + * + * @return true(ok)/false(error). + * @param buf pointer to serialized tensor to add. + * @param buflen length of serialized tensor to add. + **/ + bool AddSerializedTensor(const char *buf, uint32_t buflen); /** * Obtain a pointer to, and the length of, the created docsum diff --git a/searchsummary/src/vespa/searchsummary/docsummary/urlresult.cpp b/searchsummary/src/vespa/searchsummary/docsummary/urlresult.cpp index 92ebe07d457..47cc0cf3a33 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/urlresult.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/urlresult.cpp @@ -419,6 +419,7 @@ GeneralResult::unpack(const char *buf, const size_t buflen) break; } + case RES_TENSOR: case RES_LONG_DATA: { uint32_t ldlen; |