summaryrefslogtreecommitdiffstats
path: root/searchsummary
diff options
context:
space:
mode:
authorArne H Juul <arnej@yahoo-inc.com>2017-02-10 11:54:15 +0100
committerArne H Juul <arnej@yahoo-inc.com>2017-02-10 13:58:41 +0100
commit8b78415877a70def8f543f10b9343a11112c3095 (patch)
treeb8cb1979e1085ce72ce2a7e996a88895cafae724 /searchsummary
parent0a1048e4bccd90fe0a2ba50e1e74aed76606838b (diff)
move some code
* move classes that can be used for summary field conversion in both proton and vsm.
Diffstat (limited to 'searchsummary')
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt3
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/linguisticsannotation.cpp34
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/linguisticsannotation.h15
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/searchdatatype.cpp40
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/searchdatatype.h15
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp708
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h21
7 files changed, 836 insertions, 0 deletions
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt b/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt
index bbd7dc1e177..03635dd36be 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt
+++ b/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt
@@ -21,6 +21,9 @@ vespa_add_library(searchsummary_docsummary OBJECT
geoposdfw.cpp
tokenizer.cpp
positionsdfw.cpp
+ linguisticsannotation.cpp
+ searchdatatype.cpp
+ summaryfieldconverter.cpp
AFTER
searchsummary_config
)
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/linguisticsannotation.cpp b/searchsummary/src/vespa/searchsummary/docsummary/linguisticsannotation.cpp
new file mode 100644
index 00000000000..124396aac5a
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/linguisticsannotation.cpp
@@ -0,0 +1,34 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".searchsummary.docsummary.linguisticsannotation");
+
+#include "linguisticsannotation.h"
+#include <vespa/document/datatype/datatype.h>
+#include <vespa/document/datatype/primitivedatatype.h>
+
+using document::AnnotationType;
+using document::DataType;
+using document::PrimitiveDataType;
+using vespalib::string;
+
+namespace search {
+namespace linguistics {
+
+namespace {
+AnnotationType makeType(int id, string name, const DataType &type) {
+ AnnotationType annotation_type(id, name);
+ annotation_type.setDataType(type);
+ return annotation_type;
+}
+
+const PrimitiveDataType STRING_OBJ(DataType::T_STRING);
+AnnotationType TERM_OBJ(makeType(1, "term", STRING_OBJ));
+} // namespace
+
+const string SPANTREE_NAME("linguistics");
+const AnnotationType *const TERM(&TERM_OBJ);
+
+} // namespace search::linguistics
+} // namespace search
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/linguisticsannotation.h b/searchsummary/src/vespa/searchsummary/docsummary/linguisticsannotation.h
new file mode 100644
index 00000000000..21aa3bce49e
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/linguisticsannotation.h
@@ -0,0 +1,15 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/document/datatype/annotationtype.h>
+#include <vespa/vespalib/stllike/string.h>
+
+namespace search {
+namespace linguistics {
+
+extern const vespalib::string SPANTREE_NAME;
+extern const document::AnnotationType *const TERM;
+
+} // namespace search::linguistics
+} // namespace search
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/searchdatatype.cpp b/searchsummary/src/vespa/searchsummary/docsummary/searchdatatype.cpp
new file mode 100644
index 00000000000..6e60628739a
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/searchdatatype.cpp
@@ -0,0 +1,40 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/fastos/fastos.h>
+#include <vespa/log/log.h>
+LOG_SETUP(".search.docsummary.searchdatatype");
+
+#include "searchdatatype.h"
+#include <vespa/document/base/field.h>
+#include <vespa/document/datatype/primitivedatatype.h>
+#include <vespa/document/datatype/structdatatype.h>
+
+using document::DataType;
+using document::Field;
+using document::PrimitiveDataType;
+using document::StructDataType;
+
+namespace search {
+namespace docsummary {
+
+namespace {
+
+PrimitiveDataType STRING_OBJ(DataType::T_STRING);
+StructDataType URI_OBJ("url");
+
+const StructDataType *setUpUriType() {
+ URI_OBJ.addField(Field("all", STRING_OBJ, true));
+ URI_OBJ.addField(Field("scheme", STRING_OBJ, true));
+ URI_OBJ.addField(Field("host", STRING_OBJ, true));
+ URI_OBJ.addField(Field("port", STRING_OBJ, true));
+ URI_OBJ.addField(Field("path", STRING_OBJ, true));
+ URI_OBJ.addField(Field("query", STRING_OBJ, true));
+ URI_OBJ.addField(Field("fragment", STRING_OBJ, true));
+ return &URI_OBJ;
+}
+} // namespace
+
+const DataType *SearchDataType::URI(setUpUriType());
+
+} // namespace search::docsummary
+} // namespace search
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/searchdatatype.h b/searchsummary/src/vespa/searchsummary/docsummary/searchdatatype.h
new file mode 100644
index 00000000000..115aed9cbdb
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/searchdatatype.h
@@ -0,0 +1,15 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/document/datatype/datatype.h>
+
+namespace search {
+namespace docsummary {
+
+struct SearchDataType {
+ static const document::DataType *URI;
+};
+
+} // namespace search::docsummary
+} // namespace search
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp
new file mode 100644
index 00000000000..92378ed83ce
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp
@@ -0,0 +1,708 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "summaryfieldconverter.h"
+#include "linguisticsannotation.h"
+#include "searchdatatype.h"
+#include <vespa/document/annotation/alternatespanlist.h>
+#include <vespa/document/annotation/annotation.h>
+#include <vespa/document/annotation/span.h>
+#include <vespa/document/annotation/spanlist.h>
+#include <vespa/document/annotation/spannode.h>
+#include <vespa/document/annotation/spantree.h>
+#include <vespa/document/annotation/spantreevisitor.h>
+#include <vespa/document/datatype/arraydatatype.h>
+#include <vespa/document/datatype/datatype.h>
+#include <vespa/document/datatype/documenttype.h>
+#include <vespa/document/datatype/weightedsetdatatype.h>
+#include <vespa/document/fieldvalue/arrayfieldvalue.h>
+#include <vespa/document/fieldvalue/bytefieldvalue.h>
+#include <vespa/document/fieldvalue/document.h>
+#include <vespa/document/fieldvalue/doublefieldvalue.h>
+#include <vespa/document/fieldvalue/fieldvaluevisitor.h>
+#include <vespa/document/fieldvalue/floatfieldvalue.h>
+#include <vespa/document/fieldvalue/intfieldvalue.h>
+#include <vespa/document/fieldvalue/longfieldvalue.h>
+#include <vespa/document/fieldvalue/predicatefieldvalue.h>
+#include <vespa/document/fieldvalue/rawfieldvalue.h>
+#include <vespa/document/fieldvalue/shortfieldvalue.h>
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/document/fieldvalue/weightedsetfieldvalue.h>
+#include <vespa/document/fieldvalue/annotationreferencefieldvalue.h>
+#include <vespa/document/fieldvalue/tensorfieldvalue.h>
+#include <vespa/document/fieldvalue/referencefieldvalue.h>
+#include <vespa/searchcommon/common/schema.h>
+#include <vespa/searchlib/util/url.h>
+#include <vespa/vespalib/encoding/base64.h>
+#include <vespa/vespalib/geo/zcurve.h>
+#include <vespa/vespalib/stllike/asciistream.h>
+#include <vespa/vespalib/stllike/string.h>
+#include <vespa/vespalib/util/jsonwriter.h>
+#include <vespa/vespalib/util/stringfmt.h>
+#include <vespa/vespalib/data/slime/slime.h>
+#include <vespa/vespalib/data/slime/convenience.h>
+#include <vespa/vespalib/data/slime/binary_format.h>
+#include <vespa/vespalib/data/slime/json_format.h>
+#include <vespa/eval/tensor/serialization/slime_binary_format.h>
+#include <vespa/eval/tensor/serialization/typed_binary_format.h>
+#include <vespa/vespalib/objects/nbostream.h>
+#include <vespa/searchlib/util/slime_output_raw_buf_adapter.h>
+#include <vespa/vespalib/util/exceptions.h>
+
+
+using document::AlternateSpanList;
+using document::Annotation;
+using document::AnnotationReferenceFieldValue;
+using document::ArrayDataType;
+using document::ArrayFieldValue;
+using document::ByteFieldValue;
+using document::DataType;
+using document::Document;
+using document::DocumentType;
+using document::DoubleFieldValue;
+using document::FieldValue;
+using document::FixedTypeRepo;
+using document::ConstFieldValueVisitor;
+using document::FloatFieldValue;
+using document::IntFieldValue;
+using document::LongFieldValue;
+using document::MapFieldValue;
+using document::PredicateFieldValue;
+using document::RawFieldValue;
+using document::ShortFieldValue;
+using document::Span;
+using document::SpanList;
+using document::SimpleSpanList;
+using document::SpanNode;
+using document::SpanTree;
+using document::SpanTreeVisitor;
+using document::StringFieldValue;
+using document::StructFieldValue;
+using document::WeightedSetDataType;
+using document::WeightedSetFieldValue;
+using document::TensorFieldValue;
+using document::ReferenceFieldValue;
+using search::index::Schema;
+using search::util::URL;
+using std::make_pair;
+using std::pair;
+using std::vector;
+using vespalib::JSONWriter;
+using vespalib::asciistream;
+using vespalib::geo::ZCurve;
+using vespalib::make_string;
+using vespalib::string;
+using vespalib::stringref;
+
+namespace search {
+namespace docsummary {
+
+namespace {
+string getSpanString(const string &s, const Span &span) {
+ return string(&s[span.from()], &s[span.from() + span.length()]);
+}
+
+struct SpanFinder : SpanTreeVisitor {
+ int32_t begin_pos;
+ int32_t end_pos;
+
+ SpanFinder() : begin_pos(0x7fffffff), end_pos(-1) {}
+ Span span() { return Span(begin_pos, end_pos - begin_pos); }
+
+ void visit(const Span &node) override {
+ begin_pos = std::min(begin_pos, node.from());
+ end_pos = std::max(end_pos, node.from() + node.length());
+ }
+ void visit(const SpanList &node) override {
+ for (const auto & span_ : node) {
+ span_->accept(*this);
+ }
+ }
+ void visit(const SimpleSpanList &node) override {
+ for (const auto & span_ : node) {
+ span_.accept(*this);
+ }
+ }
+ void visit(const AlternateSpanList &node) override {
+ for (size_t i = 0; i < node.getNumSubtrees(); ++i) {
+ visit(node.getSubtree(i));
+ }
+ }
+};
+
+Span getSpan(const SpanNode &span_node) {
+ SpanFinder finder;
+ span_node.accept(finder);
+ return finder.span();
+}
+
+// Extract the FieldValues from all TERM annotations. For each span
+// with such annotations, the Handler is invoked with a set of
+// iterators over the FieldValues for that span.
+template <typename Handler>
+void handleIndexingTerms(Handler &handler, const StringFieldValue &value) {
+ StringFieldValue::SpanTrees trees = value.getSpanTrees();
+ const SpanTree *tree = StringFieldValue::findTree(trees, linguistics::SPANTREE_NAME);
+ typedef pair<Span, const FieldValue *> SpanTerm;
+ typedef vector<SpanTerm> SpanTermVector;
+ if (!tree) {
+ // Treat a string without annotations as a single span.
+ SpanTerm str(Span(0, handler.text.size()),
+ static_cast<const FieldValue*>(0));
+ handler.handleAnnotations(str.first, &str, &str + 1);
+ return;
+ }
+ SpanTermVector terms;
+ for (const Annotation & annotation : *tree) {
+ // For now, skip any composite spans.
+ const Span *span = dynamic_cast<const Span*>(annotation.getSpanNode());
+ if ((span != nullptr) && annotation.valid() &&
+ (annotation.getType() == *linguistics::TERM)) {
+ terms.push_back(make_pair(getSpan(*span),
+ annotation.getFieldValue()));
+ }
+ }
+ sort(terms.begin(), terms.end());
+ SpanTermVector::const_iterator it = terms.begin();
+ SpanTermVector::const_iterator ite = terms.end();
+ int32_t endPos = 0;
+ for (; it != ite; ) {
+ SpanTermVector::const_iterator it_begin = it;
+ if (it_begin->first.from() > endPos) {
+ Span tmpSpan(endPos, it_begin->first.from() - endPos);
+ handler.handleAnnotations(tmpSpan, it, it);
+ endPos = it_begin->first.from();
+ }
+ for (; it != ite && it->first == it_begin->first; ++it);
+ handler.handleAnnotations(it_begin->first, it_begin, it);
+ endPos = it_begin->first.from() + it_begin->first.length();
+ }
+ int32_t wantEndPos = handler.text.size();
+ if (endPos < wantEndPos) {
+ Span tmpSpan(endPos, wantEndPos - endPos);
+ handler.handleAnnotations(tmpSpan, ite, ite);
+ }
+}
+
+const StringFieldValue &ensureStringFieldValue(const FieldValue &value) __attribute__((noinline));
+
+const StringFieldValue &ensureStringFieldValue(const FieldValue &value) {
+ if (!value.inherits(IDENTIFIABLE_CLASSID(StringFieldValue))) {
+ throw vespalib::IllegalArgumentException("Illegal field type. " + value.toString(), VESPA_STRLOC);
+ }
+ return static_cast<const StringFieldValue &>(value);
+}
+
+struct FieldValueConverter {
+ virtual FieldValue::UP convert(const FieldValue &input) = 0;
+ virtual ~FieldValueConverter() {}
+};
+
+
+struct SummaryHandler {
+ const string text;
+ asciistream &out;
+
+ SummaryHandler(const string &s, asciistream &stream)
+ : text(s), out(stream) {}
+
+ template <typename ForwardIt>
+ void handleAnnotations(const Span &span, ForwardIt it, ForwardIt last) {
+ int annCnt = (last - it);
+ if (annCnt > 1 || (annCnt == 1 && it->second)) {
+ annotateSpans(span, it, last);
+ } else {
+ out << getSpanString(text, span) << '\037';
+ }
+ }
+
+ template <typename ForwardIt>
+ void annotateSpans(const Span &span, ForwardIt it, ForwardIt last) {
+ out << "\357\277\271" // ANCHOR
+ << (getSpanString(text, span))
+ << "\357\277\272"; // SEPARATOR
+ while (it != last) {
+ if (it->second) {
+ out << ensureStringFieldValue(*it->second).getValue();
+ } else {
+ out << getSpanString(text, span);
+ }
+ if (++it != last) {
+ out << " ";
+ }
+ }
+ out << "\357\277\273" // TERMINATOR
+ << "\037";
+ }
+};
+
+
+
+class JsonFiller : public ConstFieldValueVisitor {
+ JSONWriter &_json;
+ bool _tokenize;
+
+ virtual void visit(const AnnotationReferenceFieldValue & v ) {
+ (void)v;
+ _json.beginObject();
+ _json.appendKey("error");
+ _json.appendString("cannot convert from annotation reference field");
+ _json.endObject();
+ }
+ virtual void visit(const Document & v) {
+ (void)v;
+ _json.beginObject();
+ _json.appendKey("error");
+ _json.appendString("cannot convert from field of type document");
+ _json.endObject();
+ }
+
+ virtual void visit(const MapFieldValue & v) {
+ _json.beginArray();
+ for (const auto & entry : v) {
+ _json.beginObject();
+
+ _json.appendKey("key");
+ const FieldValue &key = *(entry.first);
+ key.accept(*this);
+
+ const FieldValue &val = *(entry.second);
+ _json.appendKey("value");
+ val.accept(*this);
+
+ _json.endObject();
+ }
+ _json.endArray();
+ }
+
+ virtual void visit(const ArrayFieldValue &value) {
+ _json.beginArray();
+ if (value.size() > 0) {
+ for (const FieldValue &fv : value) {
+ fv.accept(*this);
+ }
+ }
+ _json.endArray();
+ }
+
+ virtual void visit(const StringFieldValue &value) {
+ if (_tokenize) {
+ asciistream tmp;
+ SummaryHandler handler(value.getValue(), tmp);
+ handleIndexingTerms(handler, value);
+ _json.appendString(tmp.str());
+ } else {
+ _json.appendString(value.getValue());
+ }
+ }
+
+ virtual void visit(const IntFieldValue &value) {
+ int32_t v = value.getValue(); _json.appendInt64(v);
+ }
+ virtual void visit(const LongFieldValue &value) {
+ int64_t v = value.getValue(); _json.appendInt64(v);
+ }
+ virtual void visit(const ShortFieldValue &value) {
+ int16_t v = value.getValue(); _json.appendInt64(v);
+ }
+ virtual void visit(const ByteFieldValue &value) {
+ int8_t v = value.getAsByte(); _json.appendInt64(v);
+ }
+ virtual void visit(const DoubleFieldValue &value) {
+ double v = value.getValue(); _json.appendDouble(v);
+ }
+ virtual void visit(const FloatFieldValue &value) {
+ float v = value.getValue(); _json.appendFloat(v);
+ }
+
+ virtual void
+ visit(const PredicateFieldValue &value)
+ {
+ _json.appendJSON(value.toString());
+ }
+
+ virtual void
+ visit(const RawFieldValue &value)
+ {
+ // Use base64 coding to represent raw values in json strings.
+ std::pair<const char *, size_t> buf = value.getAsRaw();
+ vespalib::string rawVal(buf.first, buf.first + buf.second);
+ _json.appendString(vespalib::Base64::encode(rawVal));
+ }
+
+ virtual void visit(const StructFieldValue &value) {
+ // stringref type_name = value.getDataType()->getName();
+ if (*value.getDataType() == *SearchDataType::URI) {
+ FieldValue::UP uriAllValue = value.getValue("all");
+ if (uriAllValue.get() != NULL &&
+ uriAllValue->inherits(IDENTIFIABLE_CLASSID(StringFieldValue)))
+ {
+ uriAllValue->accept(*this);
+ return;
+ }
+ }
+ _json.beginObject();
+ for (StructFieldValue::const_iterator itr = value.begin(); itr != value.end(); ++itr) {
+ _json.appendKey(itr.field().getName());
+ FieldValue::UP nextValue(value.getValue(itr.field()));
+ (*nextValue).accept(*this);
+ }
+ _json.endObject();
+ }
+
+ virtual void visit(const WeightedSetFieldValue &value) {
+ _json.beginArray();
+ if ( value.size() > 0) {
+ for (const auto & entry : value) {
+ _json.beginObject();
+ _json.appendKey("item");
+ entry.first->accept(*this);
+ _json.appendKey("weight");
+ int weight = static_cast<const IntFieldValue &>(*entry.second).getValue();
+ _json.appendInt64(weight);
+ _json.endObject();
+ }
+ }
+ _json.endArray();
+ }
+
+ virtual void visit(const TensorFieldValue &value) override {
+ const auto &tensor = value.getAsTensorPtr();
+ if (tensor) {
+ auto slime =
+ vespalib::tensor::SlimeBinaryFormat::serialize(*tensor);
+ vespalib::SimpleBuffer buf;
+ vespalib::slime::JsonFormat::encode(*slime, buf, true);
+ _json.appendJSON(buf.get().make_string());
+ } else {
+ // No tensor value => empty object
+ _json.beginObject();
+ _json.endObject();
+ }
+ }
+
+ void visit(const ReferenceFieldValue& value) override {
+ _json.appendString(value.hasValidDocumentId()
+ ? value.getDocumentId().toString()
+ : string());
+ }
+
+public:
+ JsonFiller(bool markup, JSONWriter &json)
+ : _json(json), _tokenize(markup) {}
+};
+
+class JsonConverter : public FieldValueConverter {
+ bool _tokenize;
+public:
+ JsonConverter(bool tokenize)
+ : _tokenize(tokenize)
+ {}
+
+ FieldValue::UP convert(const FieldValue &input) {
+ asciistream target;
+ JSONWriter json(target);
+ JsonFiller visitor(_tokenize, json);
+ input.accept(visitor);
+ return FieldValue::UP(new StringFieldValue(target.str()));
+ }
+
+};
+
+class SummaryFieldValueConverter : protected ConstFieldValueVisitor
+{
+ asciistream _str;
+ bool _tokenize;
+ FieldValue::UP _field_value;
+ FieldValueConverter &_structuredFieldConverter;
+
+ virtual void visit(const ArrayFieldValue &value) {
+ _field_value = _structuredFieldConverter.convert(value);
+ }
+
+ template <typename T>
+ void visitPrimitive(const T &t) {
+ _field_value.reset(t.clone());
+ }
+ virtual void visit(const IntFieldValue &value) { visitPrimitive(value); }
+ virtual void visit(const LongFieldValue &value) { visitPrimitive(value); }
+ virtual void visit(const ShortFieldValue &value) { visitPrimitive(value); }
+ virtual void visit(const ByteFieldValue &value) {
+ int8_t signedValue = value.getAsByte();
+ _field_value.reset(new ShortFieldValue(signedValue));
+ }
+ virtual void visit(const DoubleFieldValue &value) { visitPrimitive(value); }
+ virtual void visit(const FloatFieldValue &value) { visitPrimitive(value); }
+
+ virtual void visit(const StringFieldValue &value) {
+ if (_tokenize) {
+ SummaryHandler handler(value.getValue(), _str);
+ handleIndexingTerms(handler, value);
+ } else {
+ _str << value.getValue();
+ }
+ }
+
+ virtual void visit(const AnnotationReferenceFieldValue & v ) {
+ _field_value = _structuredFieldConverter.convert(v);
+ }
+ virtual void visit(const Document & v) {
+ _field_value = _structuredFieldConverter.convert(v);
+ }
+
+ virtual void
+ visit(const PredicateFieldValue &value)
+ {
+ _str << value.toString();
+ }
+
+ virtual void
+ visit(const RawFieldValue &value)
+ {
+ visitPrimitive(value);
+ }
+
+ virtual void visit(const MapFieldValue & v) {
+ _field_value = _structuredFieldConverter.convert(v);
+ }
+
+ virtual void visit(const StructFieldValue &value) {
+ if (*value.getDataType() == *SearchDataType::URI) {
+ FieldValue::UP uriAllValue = value.getValue("all");
+ if (uriAllValue.get() != NULL &&
+ uriAllValue->inherits(IDENTIFIABLE_CLASSID(StringFieldValue)))
+ {
+ uriAllValue->accept(*this);
+ return;
+ }
+ }
+ _field_value = _structuredFieldConverter.convert(value);
+ }
+
+ virtual void visit(const WeightedSetFieldValue &value) {
+ _field_value = _structuredFieldConverter.convert(value);
+ }
+
+ virtual void visit(const TensorFieldValue &value) override {
+ visitPrimitive(value);
+ }
+
+ void visit(const ReferenceFieldValue& value) override {
+ if (value.hasValidDocumentId()) {
+ _str << value.getDocumentId().toString();
+ } // else: implicit empty string
+ }
+
+public:
+ SummaryFieldValueConverter(bool tokenize, FieldValueConverter &subConverter)
+ : _str(), _tokenize(tokenize),
+ _structuredFieldConverter(subConverter)
+ {}
+
+ FieldValue::UP convert(const FieldValue &input) {
+ input.accept(*this);
+ if (_field_value.get()) {
+ return std::move(_field_value);
+ }
+ return FieldValue::UP(new StringFieldValue(_str.str()));
+ }
+};
+
+
+
+using namespace vespalib::slime::convenience;
+
+class SlimeFiller : public ConstFieldValueVisitor {
+ Inserter &_inserter;
+ bool _tokenize;
+
+ virtual void visit(const AnnotationReferenceFieldValue & v ) {
+ (void)v;
+ Cursor &c = _inserter.insertObject();
+ Memory key("error");
+ Memory val("cannot convert from annotation reference field");
+ c.setString(key, val);
+ }
+ virtual void visit(const Document & v) {
+ (void)v;
+ Cursor &c = _inserter.insertObject();
+ Memory key("error");
+ Memory val("cannot convert from field of type document");
+ c.setString(key, val);
+ }
+
+ virtual void visit(const MapFieldValue & v) {
+ Cursor &a = _inserter.insertArray();
+ Memory keymem("key");
+ Memory valmem("value");
+ for (const auto & entry : v) {
+ Cursor &c = a.addObject();
+ ObjectInserter ki(c, keymem);
+ ObjectInserter vi(c, valmem);
+ SlimeFiller keyConv(ki, _tokenize);
+ SlimeFiller valConv(vi, _tokenize);
+
+ const FieldValue &key = *(entry.first);
+ key.accept(keyConv);
+ const FieldValue &val = *(entry.second);
+ val.accept(valConv);
+ }
+ }
+
+ virtual void visit(const ArrayFieldValue &value) {
+ Cursor &a = _inserter.insertArray();
+ if (value.size() > 0) {
+ ArrayInserter ai(a);
+ SlimeFiller conv(ai, _tokenize);
+ for (const FieldValue &fv : value) {
+ fv.accept(conv);
+ }
+ }
+ }
+
+ virtual void visit(const StringFieldValue &value) {
+ if (_tokenize) {
+ asciistream tmp;
+ SummaryHandler handler(value.getValue(), tmp);
+ handleIndexingTerms(handler, value);
+ _inserter.insertString(Memory(tmp.str()));
+ } else {
+ _inserter.insertString(Memory(value.getValue()));
+ }
+ }
+
+ virtual void visit(const IntFieldValue &value) {
+ int32_t v = value.getValue();
+ _inserter.insertLong(v);
+ }
+ virtual void visit(const LongFieldValue &value) {
+ int64_t v = value.getValue();
+ _inserter.insertLong(v);
+ }
+ virtual void visit(const ShortFieldValue &value) {
+ int16_t v = value.getValue();
+ _inserter.insertLong(v);
+ }
+ virtual void visit(const ByteFieldValue &value) {
+ int8_t v = value.getAsByte();
+ _inserter.insertLong(v);
+ }
+ virtual void visit(const DoubleFieldValue &value) {
+ double v = value.getValue();
+ _inserter.insertDouble(v);
+ }
+ virtual void visit(const FloatFieldValue &value) {
+ float v = value.getValue();
+ _inserter.insertDouble(v);
+ }
+
+ virtual void
+ visit(const PredicateFieldValue &value)
+ {
+ vespalib::slime::inject(value.getSlime().get(), _inserter);
+ }
+
+ virtual void
+ visit(const RawFieldValue &value)
+ {
+ // Use base64 coding to represent raw values
+ std::pair<const char *, size_t> buf = value.getAsRaw();
+ vespalib::string rawVal(buf.first, buf.first + buf.second);
+ vespalib::string encVal(vespalib::Base64::encode(rawVal));
+ _inserter.insertString(Memory(encVal.c_str()));
+ }
+
+ virtual void visit(const StructFieldValue &value) {
+ if (*value.getDataType() == *SearchDataType::URI) {
+ FieldValue::UP uriAllValue = value.getValue("all");
+ if (uriAllValue.get() != NULL &&
+ uriAllValue->inherits(IDENTIFIABLE_CLASSID(StringFieldValue)))
+ {
+ uriAllValue->accept(*this);
+ return;
+ }
+ }
+ Cursor &c = _inserter.insertObject();
+ for (StructFieldValue::const_iterator itr = value.begin(); itr != value.end(); ++itr) {
+ Memory keymem(itr.field().getName());
+ ObjectInserter vi(c, keymem);
+ SlimeFiller conv(vi, _tokenize);
+ FieldValue::UP nextValue(value.getValue(itr.field()));
+ (*nextValue).accept(conv);
+ }
+ }
+
+ virtual void visit(const WeightedSetFieldValue &value) {
+ Cursor &a = _inserter.insertArray();
+ if (value.size() > 0) {
+ Memory imem("item");
+ Memory wmem("weight");
+ for (const auto & entry : value) {
+ Cursor &o = a.addObject();
+ ObjectInserter ki(o, imem);
+ SlimeFiller conv(ki, _tokenize);
+ entry.first->accept(conv);
+ int weight = static_cast<const IntFieldValue &>(*entry.second).getValue();
+ o.setLong(wmem, weight);
+ }
+ }
+ }
+
+ virtual void visit(const TensorFieldValue &value) override {
+ const auto &tensor = value.getAsTensorPtr();
+ vespalib::nbostream s;
+ if (tensor) {
+ vespalib::tensor::TypedBinaryFormat::serialize(s, *tensor);
+ }
+ _inserter.insertData(vespalib::Memory(s.peek(), s.size()));
+ }
+
+ void visit(const ReferenceFieldValue& value) override {
+ _inserter.insertString(Memory(value.hasValidDocumentId()
+ ? value.getDocumentId().toString()
+ : string()));
+ }
+
+public:
+ SlimeFiller(Inserter &inserter, bool tokenize)
+ : _inserter(inserter), _tokenize(tokenize) {}
+};
+
+class SlimeConverter : public FieldValueConverter {
+ bool _tokenize;
+public:
+ SlimeConverter(bool tokenize)
+ : _tokenize(tokenize)
+ {}
+
+ FieldValue::UP convert(const FieldValue &input) {
+ vespalib::Slime slime;
+ SlimeInserter inserter(slime);
+ SlimeFiller visitor(inserter, _tokenize);
+ input.accept(visitor);
+ search::RawBuf rbuf(4096);
+ search::SlimeOutputRawBufAdapter adapter(rbuf);
+ vespalib::slime::BinaryFormat::encode(slime, adapter);
+ return FieldValue::UP(new RawFieldValue(rbuf.GetDrainPos(), rbuf.GetUsedLen()));
+ }
+};
+
+
+} // namespace
+
+FieldValue::UP
+SummaryFieldConverter::convertSummaryField(bool markup,
+ const FieldValue &value,
+ bool useSlimeInsideFields)
+{
+ if (useSlimeInsideFields) {
+ SlimeConverter subConv(markup);
+ return SummaryFieldValueConverter(markup, subConv).convert(value);
+ } else {
+ JsonConverter subConv(markup);
+ return SummaryFieldValueConverter(markup, subConv).convert(value);
+ }
+}
+
+
+} // namespace search::docsummary
+} // namespace search
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h
new file mode 100644
index 00000000000..9d6f54755cc
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h
@@ -0,0 +1,21 @@
+// Copyright 2016 Yahoo Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/document/fieldvalue/fieldvalue.h>
+
+namespace search {
+namespace docsummary {
+
+/**
+ * This class converts a summary field for docsum fetching.
+ */
+class SummaryFieldConverter
+{
+public:
+ static document::FieldValue::UP
+ convertSummaryField(bool markup, const document::FieldValue &value, bool useSlimeInsideFields);
+};
+
+} // namespace search::docsummary
+} // namespace search