diff options
author | Arne H Juul <arnej@yahoo-inc.com> | 2017-02-20 10:54:42 +0100 |
---|---|---|
committer | Arne H Juul <arnej@yahoo-inc.com> | 2017-02-20 10:58:19 +0100 |
commit | 888225e44d66ae2247b1c845e894d39edd02136f (patch) | |
tree | 9445debec8b20086315ea58f725cce535f4f84d9 /vsm | |
parent | 401dd08db643cefb25ef1cc891bca1c3e2013f1d (diff) |
add new SlimeFieldWriter class
* we cannot use shared SummaryFieldConverter::convertSummaryField
code because VSM has its own config for only returning a subset
of a document field, so copy the json writing code and mutate
it for writing slime instead.
Diffstat (limited to 'vsm')
-rw-r--r-- | vsm/src/vespa/vsm/vsm/CMakeLists.txt | 1 | ||||
-rw-r--r-- | vsm/src/vespa/vsm/vsm/docsumfilter.cpp | 30 | ||||
-rw-r--r-- | vsm/src/vespa/vsm/vsm/slimefieldwriter.cpp | 177 | ||||
-rw-r--r-- | vsm/src/vespa/vsm/vsm/slimefieldwriter.h | 49 |
4 files changed, 242 insertions, 15 deletions
diff --git a/vsm/src/vespa/vsm/vsm/CMakeLists.txt b/vsm/src/vespa/vsm/vsm/CMakeLists.txt index db5af316f1e..237b0771c95 100644 --- a/vsm/src/vespa/vsm/vsm/CMakeLists.txt +++ b/vsm/src/vespa/vsm/vsm/CMakeLists.txt @@ -6,6 +6,7 @@ vespa_add_library(vsm_vsmbase OBJECT fieldsearchspec.cpp flattendocsumwriter.cpp jsondocsumwriter.cpp + slimefieldwriter.cpp snippetmodifier.cpp vsm-adapter.cpp docsumconfig.cpp diff --git a/vsm/src/vespa/vsm/vsm/docsumfilter.cpp b/vsm/src/vespa/vsm/vsm/docsumfilter.cpp index 2c3e2e6e3c7..eb4e0bf532f 100644 --- a/vsm/src/vespa/vsm/vsm/docsumfilter.cpp +++ b/vsm/src/vespa/vsm/vsm/docsumfilter.cpp @@ -2,6 +2,7 @@ #include "docsumfilter.h" #include "jsondocsumwriter.h" +#include "slimefieldwriter.h" #include <vespa/searchsummary/docsummary/resultclass.h> #include <vespa/searchsummary/docsummary/summaryfieldconverter.h> #include <vespa/vespalib/util/jsonwriter.h> @@ -154,12 +155,20 @@ DocsumFilter::prepareFieldSpec(DocsumFieldSpec & spec, const DocsumTools::FieldS FieldIdT field = fieldMap.fieldNo(name); if (field != FieldMap::npos) { if (field < fieldPathMap.size()) { + LOG(debug, "field %u < map size %zu", field, fieldPathMap.size()); if (!fieldPathMap[field].empty()) { + FieldPath relPath(fieldPathMap[field].begin() + 1, + fieldPathMap[field].end()); + LOG(debug, "map[%u] -> %zu elements", field, fieldPathMap[field].end() - fieldPathMap[field].begin()); + for (document::FieldPathEntry entry : fieldPathMap[field]) { + LOG(debug, "entry: %s", entry.getName().c_str()); + } // skip the element that correspond to the start field value spec.getInputFields().push_back(DocsumFieldSpec::FieldIdentifier (field, FieldPath(fieldPathMap[field].begin() + 1, fieldPathMap[field].end()))); } else { + LOG(debug, "map[%u] empty", field); spec.getInputFields().push_back(DocsumFieldSpec::FieldIdentifier(field, FieldPath())); } } else { @@ -346,22 +355,13 @@ DocsumFilter::writeSlimeField(const DocsumFieldSpec & fieldSpec, if (fv != NULL) { LOG(debug, "writeSlimeField: About to write field '%d' as Slime: field value = '%s'", fieldId.getId(), fv->toString().c_str()); - const document::FieldValue::UP converted = - SummaryFieldConverter::convertSummaryField(false, *fv, true); - if (converted.get() != NULL) { - if (converted->getClass().inherits(document::LiteralFieldValueB::classId)) { - const document::LiteralFieldValueB *lfv = - static_cast<const document::LiteralFieldValueB *>(converted.get()); - vespalib::stringref s = lfv->getValueRef(); - packer.AddLongString(s.c_str(), s.size()); - } else { - vespalib::string s = converted->getAsString(); - packer.AddLongString(s.c_str(), s.size()); - } - } else { - LOG(debug, "writeSlimeField: Could not convert value for field '%d'", fieldId.getId()); - packer.AddEmpty(); + SlimeFieldWriter writer; + if (! fieldSpec.hasIdentityMapping()) { + writer.setInputFields(fieldSpec.getInputFields()); } + writer.convert(*fv); + const vespalib::stringref out = writer.out(); + packer.AddLongString(out.data(), out.size()); } else { LOG(debug, "writeSlimeField: Field value not set for field '%d'", fieldId.getId()); packer.AddEmpty(); diff --git a/vsm/src/vespa/vsm/vsm/slimefieldwriter.cpp b/vsm/src/vespa/vsm/vsm/slimefieldwriter.cpp new file mode 100644 index 00000000000..2e44e8307f4 --- /dev/null +++ b/vsm/src/vespa/vsm/vsm/slimefieldwriter.cpp @@ -0,0 +1,177 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "slimefieldwriter.h" +#include <vespa/vespalib/data/slime/slime.h> +#include <vespa/vespalib/data/slime/convenience.h> +#include <vespa/vespalib/data/slime/binary_format.h> +#include <vespa/searchlib/util/slime_output_raw_buf_adapter.h> +#include <vespa/vespalib/stllike/asciistream.h> + +#include <vespa/log/log.h> +LOG_SETUP(".vsm.slimefieldwriter"); + +namespace { + +vespalib::string +toString(const vsm::FieldPath & fp) +{ + vespalib::asciistream oss; + for (size_t i = 0; i < fp.size(); ++i) { + if (i > 0) { + oss << "."; + } + oss << fp[i].getName(); + } + return oss.str(); +} + +} // namespace <unnamed> + +using namespace vespalib::slime::convenience; + + +namespace vsm { + +void +SlimeFieldWriter::traverseRecursive(const document::FieldValue & fv, + Inserter &inserter) +{ + LOG(debug, "traverseRecursive: class(%s), fieldValue(%s), currentPath(%s)", + fv.getClass().name(), fv.toString().c_str(), toString(_currPath).c_str()); + + if (fv.getClass().inherits(document::CollectionFieldValue::classId)) { + const document::CollectionFieldValue & cfv = static_cast<const document::CollectionFieldValue &>(fv); + if (cfv.inherits(document::ArrayFieldValue::classId)) { + const document::ArrayFieldValue & afv = static_cast<const document::ArrayFieldValue &>(cfv); + Cursor &a = inserter.insertArray(); + for (size_t i = 0; i < afv.size(); ++i) { + const document::FieldValue & nfv = afv[i]; + ArrayInserter ai(a); + traverseRecursive(nfv, ai); + } + } else if (cfv.inherits(document::WeightedSetFieldValue::classId)) { + const document::WeightedSetFieldValue & wsfv = static_cast<const document::WeightedSetFieldValue &>(cfv); + Cursor &a = inserter.insertArray(); + Memory imem("item"); + Memory wmem("weight"); + for (document::WeightedSetFieldValue::const_iterator itr = wsfv.begin(); itr != wsfv.end(); ++itr) { + Cursor &o = a.addObject(); + const document::FieldValue & nfv = *itr->first; + ObjectInserter oi(o, imem); + traverseRecursive(nfv, oi); + int weight = static_cast<const document::IntFieldValue &>(*itr->second).getValue(); + o.setLong(wmem, weight); + } + } else { + LOG(warning, "traverseRecursive: Cannot handle collection field value of type '%s'", + fv.getClass().name()); + } + + } else if (fv.getClass().inherits(document::MapFieldValue::classId)) { + const document::MapFieldValue & mfv = static_cast<const document::MapFieldValue &>(fv); + Cursor &a = inserter.insertArray(); + Memory keymem("key"); + Memory valmem("value"); + for (document::MapFieldValue::const_iterator itr = mfv.begin(); itr != mfv.end(); ++itr) { + Cursor &o = a.addObject(); + ObjectInserter ki(o, keymem); + traverseRecursive(*itr->first, ki); + const document::MapDataType& mapType = static_cast<const document::MapDataType &>(*mfv.getDataType()); + document::FieldPathEntry valueEntry( + mapType, mapType.getKeyType(), mapType.getValueType(), + false, true); + _currPath.push_back(valueEntry); + ObjectInserter vi(o, valmem); + traverseRecursive(*itr->second, vi); + _currPath.pop_back(); + } + } else if (fv.getClass().inherits(document::StructuredFieldValue::classId)) { + const document::StructuredFieldValue & sfv = static_cast<const document::StructuredFieldValue &>(fv); + Cursor &o = inserter.insertObject(); + for (document::StructuredFieldValue::const_iterator itr = sfv.begin(); itr != sfv.end(); ++itr) { + // TODO: Why do we have to iterate like this? + document::FieldPathEntry fi(sfv.getField(itr.field().getName())); + _currPath.push_back(fi); + if (explorePath()) { + Memory keymem(itr.field().getName()); + ObjectInserter oi(o, keymem); + document::FieldValue::UP fval(sfv.getValue(itr.field())); + traverseRecursive(*fval, oi); + } + _currPath.pop_back(); + } + } else { + if (fv.getClass().inherits(document::LiteralFieldValueB::classId)) { + const document::LiteralFieldValueB & lfv = static_cast<const document::LiteralFieldValueB &>(fv); + inserter.insertString(lfv.getValueRef()); + } else if (fv.getClass().inherits(document::NumericFieldValueBase::classId)) { + switch (fv.getDataType()->getId()) { + case document::DataType::T_BYTE: + case document::DataType::T_SHORT: + case document::DataType::T_INT: + case document::DataType::T_LONG: + inserter.insertLong(fv.getAsLong()); + break; + case document::DataType::T_FLOAT: + case document::DataType::T_DOUBLE: + inserter.insertDouble(fv.getAsFloat()); + break; + default: + inserter.insertString(fv.getAsString()); + } + } else { + inserter.insertString(fv.toString()); + } + } +} + +bool +SlimeFieldWriter::explorePath() +{ + if (_inputFields == NULL) { + return true; + } + // find out if we should explore the current path + for (size_t i = 0; i < _inputFields->size(); ++i) { + const FieldPath & fp = (*_inputFields)[i].getPath(); + if (_currPath.size() <= fp.size()) { + bool equal = true; + for (size_t j = 0; j < _currPath.size() && equal; ++j) { + equal = (fp[j].getName() == _currPath[j].getName()); + } + if (equal) { + // the current path matches one of the input field paths + return true; + } + } + } + return false; +} + +SlimeFieldWriter::SlimeFieldWriter() : + _rbuf(4096), + _slime(), + _inputFields(NULL), + _currPath() +{ +} + +void +SlimeFieldWriter::convert(const document::FieldValue & fv) +{ + if (LOG_WOULD_LOG(debug)) { + if (_inputFields != NULL) { + for (size_t i = 0; i < _inputFields->size(); ++i) { + LOG(debug, "write: input field path [%zd] '%s'", i, toString((*_inputFields)[i].getPath()).c_str()); + } + } else { + LOG(debug, "write: no input fields"); + } + } + SlimeInserter inserter(_slime); + traverseRecursive(fv, inserter); + search::SlimeOutputRawBufAdapter adapter(_rbuf); + vespalib::slime::BinaryFormat::encode(_slime, adapter); +} + +} diff --git a/vsm/src/vespa/vsm/vsm/slimefieldwriter.h b/vsm/src/vespa/vsm/vsm/slimefieldwriter.h new file mode 100644 index 00000000000..6e0708286a0 --- /dev/null +++ b/vsm/src/vespa/vsm/vsm/slimefieldwriter.h @@ -0,0 +1,49 @@ +// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include <vespa/document/fieldvalue/fieldvalues.h> +#include <vespa/vsm/common/storagedocument.h> +#include <vespa/vsm/vsm/docsumfieldspec.h> +#include <vespa/vespalib/data/slime/slime.h> +#include <vespa/searchlib/util/rawbuf.h> + +namespace vsm { + +/** + * This class is used to write a field value as slime binary data. + * If only a subset of the field value should be written this subset + * is specified using the setInputFields() function. + **/ +class SlimeFieldWriter +{ +private: + search::RawBuf _rbuf; + vespalib::Slime _slime; + const DocsumFieldSpec::FieldIdentifierVector * _inputFields; + FieldPath _currPath; + + void traverseRecursive(const document::FieldValue & fv, vespalib::slime::Inserter & inserter); + bool explorePath(); + +public: + SlimeFieldWriter(); + + /** + * Specifies the subset of the field value that should be written. + **/ + void setInputFields(const DocsumFieldSpec::FieldIdentifierVector & inputFields) { _inputFields = &inputFields; } + + /** + * Convert the given field value + **/ + void convert(const document::FieldValue & fv); + + /** + * Return a reference to the output binary data + **/ + vespalib::stringref out() const { + return vespalib::stringref(_rbuf.GetDrainPos(), _rbuf.GetUsedLen()); + } +}; + +} |