aboutsummaryrefslogtreecommitdiffstats
path: root/vsm
diff options
context:
space:
mode:
authorArne H Juul <arnej@yahoo-inc.com>2017-02-20 10:54:42 +0100
committerArne H Juul <arnej@yahoo-inc.com>2017-02-20 10:58:19 +0100
commit888225e44d66ae2247b1c845e894d39edd02136f (patch)
tree9445debec8b20086315ea58f725cce535f4f84d9 /vsm
parent401dd08db643cefb25ef1cc891bca1c3e2013f1d (diff)
add new SlimeFieldWriter class
* we cannot use shared SummaryFieldConverter::convertSummaryField code because VSM has its own config for only returning a subset of a document field, so copy the json writing code and mutate it for writing slime instead.
Diffstat (limited to 'vsm')
-rw-r--r--vsm/src/vespa/vsm/vsm/CMakeLists.txt1
-rw-r--r--vsm/src/vespa/vsm/vsm/docsumfilter.cpp30
-rw-r--r--vsm/src/vespa/vsm/vsm/slimefieldwriter.cpp177
-rw-r--r--vsm/src/vespa/vsm/vsm/slimefieldwriter.h49
4 files changed, 242 insertions, 15 deletions
diff --git a/vsm/src/vespa/vsm/vsm/CMakeLists.txt b/vsm/src/vespa/vsm/vsm/CMakeLists.txt
index db5af316f1e..237b0771c95 100644
--- a/vsm/src/vespa/vsm/vsm/CMakeLists.txt
+++ b/vsm/src/vespa/vsm/vsm/CMakeLists.txt
@@ -6,6 +6,7 @@ vespa_add_library(vsm_vsmbase OBJECT
fieldsearchspec.cpp
flattendocsumwriter.cpp
jsondocsumwriter.cpp
+ slimefieldwriter.cpp
snippetmodifier.cpp
vsm-adapter.cpp
docsumconfig.cpp
diff --git a/vsm/src/vespa/vsm/vsm/docsumfilter.cpp b/vsm/src/vespa/vsm/vsm/docsumfilter.cpp
index 2c3e2e6e3c7..eb4e0bf532f 100644
--- a/vsm/src/vespa/vsm/vsm/docsumfilter.cpp
+++ b/vsm/src/vespa/vsm/vsm/docsumfilter.cpp
@@ -2,6 +2,7 @@
#include "docsumfilter.h"
#include "jsondocsumwriter.h"
+#include "slimefieldwriter.h"
#include <vespa/searchsummary/docsummary/resultclass.h>
#include <vespa/searchsummary/docsummary/summaryfieldconverter.h>
#include <vespa/vespalib/util/jsonwriter.h>
@@ -154,12 +155,20 @@ DocsumFilter::prepareFieldSpec(DocsumFieldSpec & spec, const DocsumTools::FieldS
FieldIdT field = fieldMap.fieldNo(name);
if (field != FieldMap::npos) {
if (field < fieldPathMap.size()) {
+ LOG(debug, "field %u < map size %zu", field, fieldPathMap.size());
if (!fieldPathMap[field].empty()) {
+ FieldPath relPath(fieldPathMap[field].begin() + 1,
+ fieldPathMap[field].end());
+ LOG(debug, "map[%u] -> %zu elements", field, fieldPathMap[field].end() - fieldPathMap[field].begin());
+ for (document::FieldPathEntry entry : fieldPathMap[field]) {
+ LOG(debug, "entry: %s", entry.getName().c_str());
+ }
// skip the element that correspond to the start field value
spec.getInputFields().push_back(DocsumFieldSpec::FieldIdentifier
(field, FieldPath(fieldPathMap[field].begin() + 1,
fieldPathMap[field].end())));
} else {
+ LOG(debug, "map[%u] empty", field);
spec.getInputFields().push_back(DocsumFieldSpec::FieldIdentifier(field, FieldPath()));
}
} else {
@@ -346,22 +355,13 @@ DocsumFilter::writeSlimeField(const DocsumFieldSpec & fieldSpec,
if (fv != NULL) {
LOG(debug, "writeSlimeField: About to write field '%d' as Slime: field value = '%s'",
fieldId.getId(), fv->toString().c_str());
- const document::FieldValue::UP converted =
- SummaryFieldConverter::convertSummaryField(false, *fv, true);
- if (converted.get() != NULL) {
- if (converted->getClass().inherits(document::LiteralFieldValueB::classId)) {
- const document::LiteralFieldValueB *lfv =
- static_cast<const document::LiteralFieldValueB *>(converted.get());
- vespalib::stringref s = lfv->getValueRef();
- packer.AddLongString(s.c_str(), s.size());
- } else {
- vespalib::string s = converted->getAsString();
- packer.AddLongString(s.c_str(), s.size());
- }
- } else {
- LOG(debug, "writeSlimeField: Could not convert value for field '%d'", fieldId.getId());
- packer.AddEmpty();
+ SlimeFieldWriter writer;
+ if (! fieldSpec.hasIdentityMapping()) {
+ writer.setInputFields(fieldSpec.getInputFields());
}
+ writer.convert(*fv);
+ const vespalib::stringref out = writer.out();
+ packer.AddLongString(out.data(), out.size());
} else {
LOG(debug, "writeSlimeField: Field value not set for field '%d'", fieldId.getId());
packer.AddEmpty();
diff --git a/vsm/src/vespa/vsm/vsm/slimefieldwriter.cpp b/vsm/src/vespa/vsm/vsm/slimefieldwriter.cpp
new file mode 100644
index 00000000000..2e44e8307f4
--- /dev/null
+++ b/vsm/src/vespa/vsm/vsm/slimefieldwriter.cpp
@@ -0,0 +1,177 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "slimefieldwriter.h"
+#include <vespa/vespalib/data/slime/slime.h>
+#include <vespa/vespalib/data/slime/convenience.h>
+#include <vespa/vespalib/data/slime/binary_format.h>
+#include <vespa/searchlib/util/slime_output_raw_buf_adapter.h>
+#include <vespa/vespalib/stllike/asciistream.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP(".vsm.slimefieldwriter");
+
+namespace {
+
+vespalib::string
+toString(const vsm::FieldPath & fp)
+{
+ vespalib::asciistream oss;
+ for (size_t i = 0; i < fp.size(); ++i) {
+ if (i > 0) {
+ oss << ".";
+ }
+ oss << fp[i].getName();
+ }
+ return oss.str();
+}
+
+} // namespace <unnamed>
+
+using namespace vespalib::slime::convenience;
+
+
+namespace vsm {
+
+void
+SlimeFieldWriter::traverseRecursive(const document::FieldValue & fv,
+ Inserter &inserter)
+{
+ LOG(debug, "traverseRecursive: class(%s), fieldValue(%s), currentPath(%s)",
+ fv.getClass().name(), fv.toString().c_str(), toString(_currPath).c_str());
+
+ if (fv.getClass().inherits(document::CollectionFieldValue::classId)) {
+ const document::CollectionFieldValue & cfv = static_cast<const document::CollectionFieldValue &>(fv);
+ if (cfv.inherits(document::ArrayFieldValue::classId)) {
+ const document::ArrayFieldValue & afv = static_cast<const document::ArrayFieldValue &>(cfv);
+ Cursor &a = inserter.insertArray();
+ for (size_t i = 0; i < afv.size(); ++i) {
+ const document::FieldValue & nfv = afv[i];
+ ArrayInserter ai(a);
+ traverseRecursive(nfv, ai);
+ }
+ } else if (cfv.inherits(document::WeightedSetFieldValue::classId)) {
+ const document::WeightedSetFieldValue & wsfv = static_cast<const document::WeightedSetFieldValue &>(cfv);
+ Cursor &a = inserter.insertArray();
+ Memory imem("item");
+ Memory wmem("weight");
+ for (document::WeightedSetFieldValue::const_iterator itr = wsfv.begin(); itr != wsfv.end(); ++itr) {
+ Cursor &o = a.addObject();
+ const document::FieldValue & nfv = *itr->first;
+ ObjectInserter oi(o, imem);
+ traverseRecursive(nfv, oi);
+ int weight = static_cast<const document::IntFieldValue &>(*itr->second).getValue();
+ o.setLong(wmem, weight);
+ }
+ } else {
+ LOG(warning, "traverseRecursive: Cannot handle collection field value of type '%s'",
+ fv.getClass().name());
+ }
+
+ } else if (fv.getClass().inherits(document::MapFieldValue::classId)) {
+ const document::MapFieldValue & mfv = static_cast<const document::MapFieldValue &>(fv);
+ Cursor &a = inserter.insertArray();
+ Memory keymem("key");
+ Memory valmem("value");
+ for (document::MapFieldValue::const_iterator itr = mfv.begin(); itr != mfv.end(); ++itr) {
+ Cursor &o = a.addObject();
+ ObjectInserter ki(o, keymem);
+ traverseRecursive(*itr->first, ki);
+ const document::MapDataType& mapType = static_cast<const document::MapDataType &>(*mfv.getDataType());
+ document::FieldPathEntry valueEntry(
+ mapType, mapType.getKeyType(), mapType.getValueType(),
+ false, true);
+ _currPath.push_back(valueEntry);
+ ObjectInserter vi(o, valmem);
+ traverseRecursive(*itr->second, vi);
+ _currPath.pop_back();
+ }
+ } else if (fv.getClass().inherits(document::StructuredFieldValue::classId)) {
+ const document::StructuredFieldValue & sfv = static_cast<const document::StructuredFieldValue &>(fv);
+ Cursor &o = inserter.insertObject();
+ for (document::StructuredFieldValue::const_iterator itr = sfv.begin(); itr != sfv.end(); ++itr) {
+ // TODO: Why do we have to iterate like this?
+ document::FieldPathEntry fi(sfv.getField(itr.field().getName()));
+ _currPath.push_back(fi);
+ if (explorePath()) {
+ Memory keymem(itr.field().getName());
+ ObjectInserter oi(o, keymem);
+ document::FieldValue::UP fval(sfv.getValue(itr.field()));
+ traverseRecursive(*fval, oi);
+ }
+ _currPath.pop_back();
+ }
+ } else {
+ if (fv.getClass().inherits(document::LiteralFieldValueB::classId)) {
+ const document::LiteralFieldValueB & lfv = static_cast<const document::LiteralFieldValueB &>(fv);
+ inserter.insertString(lfv.getValueRef());
+ } else if (fv.getClass().inherits(document::NumericFieldValueBase::classId)) {
+ switch (fv.getDataType()->getId()) {
+ case document::DataType::T_BYTE:
+ case document::DataType::T_SHORT:
+ case document::DataType::T_INT:
+ case document::DataType::T_LONG:
+ inserter.insertLong(fv.getAsLong());
+ break;
+ case document::DataType::T_FLOAT:
+ case document::DataType::T_DOUBLE:
+ inserter.insertDouble(fv.getAsFloat());
+ break;
+ default:
+ inserter.insertString(fv.getAsString());
+ }
+ } else {
+ inserter.insertString(fv.toString());
+ }
+ }
+}
+
+bool
+SlimeFieldWriter::explorePath()
+{
+ if (_inputFields == NULL) {
+ return true;
+ }
+ // find out if we should explore the current path
+ for (size_t i = 0; i < _inputFields->size(); ++i) {
+ const FieldPath & fp = (*_inputFields)[i].getPath();
+ if (_currPath.size() <= fp.size()) {
+ bool equal = true;
+ for (size_t j = 0; j < _currPath.size() && equal; ++j) {
+ equal = (fp[j].getName() == _currPath[j].getName());
+ }
+ if (equal) {
+ // the current path matches one of the input field paths
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+SlimeFieldWriter::SlimeFieldWriter() :
+ _rbuf(4096),
+ _slime(),
+ _inputFields(NULL),
+ _currPath()
+{
+}
+
+void
+SlimeFieldWriter::convert(const document::FieldValue & fv)
+{
+ if (LOG_WOULD_LOG(debug)) {
+ if (_inputFields != NULL) {
+ for (size_t i = 0; i < _inputFields->size(); ++i) {
+ LOG(debug, "write: input field path [%zd] '%s'", i, toString((*_inputFields)[i].getPath()).c_str());
+ }
+ } else {
+ LOG(debug, "write: no input fields");
+ }
+ }
+ SlimeInserter inserter(_slime);
+ traverseRecursive(fv, inserter);
+ search::SlimeOutputRawBufAdapter adapter(_rbuf);
+ vespalib::slime::BinaryFormat::encode(_slime, adapter);
+}
+
+}
diff --git a/vsm/src/vespa/vsm/vsm/slimefieldwriter.h b/vsm/src/vespa/vsm/vsm/slimefieldwriter.h
new file mode 100644
index 00000000000..6e0708286a0
--- /dev/null
+++ b/vsm/src/vespa/vsm/vsm/slimefieldwriter.h
@@ -0,0 +1,49 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include <vespa/document/fieldvalue/fieldvalues.h>
+#include <vespa/vsm/common/storagedocument.h>
+#include <vespa/vsm/vsm/docsumfieldspec.h>
+#include <vespa/vespalib/data/slime/slime.h>
+#include <vespa/searchlib/util/rawbuf.h>
+
+namespace vsm {
+
+/**
+ * This class is used to write a field value as slime binary data.
+ * If only a subset of the field value should be written this subset
+ * is specified using the setInputFields() function.
+ **/
+class SlimeFieldWriter
+{
+private:
+ search::RawBuf _rbuf;
+ vespalib::Slime _slime;
+ const DocsumFieldSpec::FieldIdentifierVector * _inputFields;
+ FieldPath _currPath;
+
+ void traverseRecursive(const document::FieldValue & fv, vespalib::slime::Inserter & inserter);
+ bool explorePath();
+
+public:
+ SlimeFieldWriter();
+
+ /**
+ * Specifies the subset of the field value that should be written.
+ **/
+ void setInputFields(const DocsumFieldSpec::FieldIdentifierVector & inputFields) { _inputFields = &inputFields; }
+
+ /**
+ * Convert the given field value
+ **/
+ void convert(const document::FieldValue & fv);
+
+ /**
+ * Return a reference to the output binary data
+ **/
+ vespalib::stringref out() const {
+ return vespalib::stringref(_rbuf.GetDrainPos(), _rbuf.GetUsedLen());
+ }
+};
+
+}