summaryrefslogtreecommitdiffstats
path: root/searchsummary
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2022-09-15 14:39:31 +0200
committerTor Egge <Tor.Egge@online.no>2022-09-15 14:39:31 +0200
commita4945088ccc3a5ff072e04ecdd61b0ec7f8d884f (patch)
tree0082ebb71b709dcc333953bcd9d8739253a3d2d0 /searchsummary
parente123e08a279e32ce7dec845bf79b291aefeab16c (diff)
Remove unused portions of SummaryFieldConverter.
Diffstat (limited to 'searchsummary')
-rw-r--r--searchsummary/CMakeLists.txt1
-rw-r--r--searchsummary/src/tests/docsummary/summary_field_converter/CMakeLists.txt8
-rw-r--r--searchsummary/src/tests/docsummary/summary_field_converter/summary_field_converter_test.cpp741
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp195
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h11
5 files changed, 5 insertions, 951 deletions
diff --git a/searchsummary/CMakeLists.txt b/searchsummary/CMakeLists.txt
index 5fcd0f7e19a..6e970bdfc00 100644
--- a/searchsummary/CMakeLists.txt
+++ b/searchsummary/CMakeLists.txt
@@ -22,6 +22,5 @@ vespa_define_module(
src/tests/docsummary/matched_elements_filter
src/tests/docsummary/slime_filler
src/tests/docsummary/slime_summary
- src/tests/docsummary/summary_field_converter
src/tests/juniper
)
diff --git a/searchsummary/src/tests/docsummary/summary_field_converter/CMakeLists.txt b/searchsummary/src/tests/docsummary/summary_field_converter/CMakeLists.txt
deleted file mode 100644
index cfda566ee6c..00000000000
--- a/searchsummary/src/tests/docsummary/summary_field_converter/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-vespa_add_executable(searchsummary_summary_field_converter_test_app
- SOURCES
- summary_field_converter_test.cpp
- DEPENDS
- searchsummary
-)
-vespa_add_test(NAME searchsummary_summary_field_converter_test_app COMMAND searchsummary_summary_field_converter_test_app)
diff --git a/searchsummary/src/tests/docsummary/summary_field_converter/summary_field_converter_test.cpp b/searchsummary/src/tests/docsummary/summary_field_converter/summary_field_converter_test.cpp
deleted file mode 100644
index 0eff397bc10..00000000000
--- a/searchsummary/src/tests/docsummary/summary_field_converter/summary_field_converter_test.cpp
+++ /dev/null
@@ -1,741 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-// Unit tests for summaryfieldconverter.
-
-#include <vespa/document/annotation/annotation.h>
-#include <vespa/document/annotation/span.h>
-#include <vespa/document/annotation/spanlist.h>
-#include <vespa/document/annotation/spantree.h>
-#include <vespa/document/base/documentid.h>
-#include <vespa/document/base/exceptions.h>
-#include <vespa/document/base/field.h>
-#include <vespa/document/config/documenttypes_config_fwd.h>
-#include <vespa/document/datatype/annotationtype.h>
-#include <vespa/document/datatype/arraydatatype.h>
-#include <vespa/document/datatype/datatype.h>
-#include <vespa/document/datatype/documenttype.h>
-#include <vespa/document/datatype/structdatatype.h>
-#include <vespa/document/datatype/urldatatype.h>
-#include <vespa/document/datatype/weightedsetdatatype.h>
-#include <vespa/document/datatype/referencedatatype.h>
-#include <vespa/document/datatype/tensor_data_type.h>
-#include <vespa/document/fieldvalue/arrayfieldvalue.h>
-#include <vespa/document/fieldvalue/bytefieldvalue.h>
-#include <vespa/document/fieldvalue/document.h>
-#include <vespa/document/fieldvalue/doublefieldvalue.h>
-#include <vespa/document/fieldvalue/floatfieldvalue.h>
-#include <vespa/document/fieldvalue/intfieldvalue.h>
-#include <vespa/document/fieldvalue/longfieldvalue.h>
-#include <vespa/document/fieldvalue/predicatefieldvalue.h>
-#include <vespa/document/fieldvalue/rawfieldvalue.h>
-#include <vespa/document/fieldvalue/shortfieldvalue.h>
-#include <vespa/document/fieldvalue/stringfieldvalue.h>
-#include <vespa/document/fieldvalue/structfieldvalue.h>
-#include <vespa/document/fieldvalue/weightedsetfieldvalue.h>
-#include <vespa/document/fieldvalue/tensorfieldvalue.h>
-#include <vespa/document/fieldvalue/referencefieldvalue.h>
-#include <vespa/document/predicate/predicate.h>
-#include <vespa/document/repo/configbuilder.h>
-#include <vespa/document/repo/fixedtyperepo.h>
-#include <vespa/searchsummary/docsummary/summaryfieldconverter.h>
-#include <vespa/searchsummary/docsummary/linguisticsannotation.h>
-#include <vespa/searchsummary/docsummary/searchdatatype.h>
-#include <vespa/searchcommon/common/schema.h>
-#include <vespa/vespalib/geo/zcurve.h>
-#include <vespa/vespalib/testkit/testapp.h>
-#include <vespa/vespalib/data/slime/json_format.h>
-#include <vespa/vespalib/data/slime/binary_format.h>
-#include <vespa/searchlib/util/slime_output_raw_buf_adapter.h>
-#include <vespa/eval/eval/simple_value.h>
-#include <vespa/eval/eval/tensor_spec.h>
-#include <vespa/eval/eval/value.h>
-#include <vespa/eval/eval/test/value_compare.h>
-#include <vespa/vespalib/data/slime/slime.h>
-
-using document::Annotation;
-using document::AnnotationType;
-using document::ArrayDataType;
-using document::ArrayFieldValue;
-using document::ByteFieldValue;
-using document::DataType;
-using document::Document;
-using document::DocumentId;
-using document::DocumentType;
-using document::DocumentTypeRepo;
-using document::DoubleFieldValue;
-using document::FeatureSet;
-using document::Field;
-using document::FieldNotFoundException;
-using document::FieldValue;
-using document::FloatFieldValue;
-using document::IntFieldValue;
-using document::LongFieldValue;
-using document::Predicate;
-using document::PredicateFieldValue;
-using document::RawFieldValue;
-using document::ReferenceDataType;
-using document::ReferenceFieldValue;
-using document::ShortFieldValue;
-using document::Span;
-using document::SpanList;
-using document::SpanTree;
-using document::StringFieldValue;
-using document::StructDataType;
-using document::StructFieldValue;
-using document::TensorDataType;
-using document::TensorFieldValue;
-using document::UrlDataType;
-using document::WeightedSetDataType;
-using document::WeightedSetFieldValue;
-using search::index::Schema;
-using search::linguistics::SPANTREE_NAME;
-using search::linguistics::TERM;
-using vespalib::Slime;
-using vespalib::eval::SimpleValue;
-using vespalib::eval::TensorSpec;
-using vespalib::eval::Value;
-using vespalib::eval::ValueType;
-using vespalib::geo::ZCurve;
-using vespalib::slime::Cursor;
-using vespalib::string;
-
-using namespace search::docsummary;
-
-typedef SummaryFieldConverter SFC;
-
-namespace {
-
-struct FieldBlock {
- vespalib::string input;
- Slime slime;
- search::RawBuf binary;
- vespalib::string json;
-
- explicit FieldBlock(const vespalib::string &jsonInput);
- ~FieldBlock();
-};
-
-FieldBlock::FieldBlock(const vespalib::string &jsonInput)
- : input(jsonInput), slime(), binary(1024), json()
-{
- size_t used = vespalib::slime::JsonFormat::decode(jsonInput, slime);
- EXPECT_TRUE(used > 0);
- {
- search::SlimeOutputRawBufAdapter adapter(binary);
- vespalib::slime::JsonFormat::encode(slime, adapter, true);
- json.assign(binary.GetDrainPos(), binary.GetUsedLen());
- binary.reset();
- }
- search::SlimeOutputRawBufAdapter adapter(binary);
- vespalib::slime::BinaryFormat::encode(slime, adapter);
-}
-
-FieldBlock::~FieldBlock() = default;
-
-class Test : public vespalib::TestApp {
- std::unique_ptr<Schema> _schema;
- std::shared_ptr<const DocumentTypeRepo> _documentRepo;
- const DocumentType *_documentType;
- document::FixedTypeRepo _fixedRepo;
-
- void setUp();
- void tearDown();
-
- const DataType &getDataType(const string &name) const;
- const ReferenceDataType& getAsRefType(const string& name) const;
-
- template <typename T>
- T getValueAs(const string &field_name, const Document &doc);
-
- template <typename T>
- T
- cvtValueAs(const FieldValue::UP &fv);
-
- template <typename T>
- T
- cvtAttributeAs(const FieldValue::UP &fv);
-
- template <typename T>
- T
- cvtSummaryAs(bool markup, const FieldValue::UP &fv);
-
- void checkString(const string &str, const FieldValue *value);
- void checkStringForAllConversions(const string& expected, const FieldValue* fv);
- void checkData(const search::RawBuf &data, const FieldValue *value);
- void checkTensor(const Value::UP &tensor, const FieldValue *value);
- template <unsigned int N>
- void checkArray(const char *(&str)[N], const FieldValue *value);
- void setSummaryField(const string &name);
- void setAttributeField(const string &name);
-
- void requireThatSummaryIsAnUnmodifiedString();
- void requireThatAttributeIsAnUnmodifiedString();
- void requireThatArrayIsFlattenedInSummaryField();
- void requireThatWeightedSetIsFlattenedInSummaryField();
- void requireThatPositionsAreTransformedInSummary();
- void requireThatArrayIsPreservedInAttributeField();
- void requireThatPositionsAreTransformedInAttributeField();
- void requireThatPositionArrayIsTransformedInAttributeField();
- void requireThatPositionWeightedSetIsTransformedInAttributeField();
- void requireThatAttributeCanBePrimitiveTypes();
- void requireThatSummaryCanBePrimitiveTypes();
- void requireThatSummaryHandlesCjk();
- void requireThatSearchDataTypeUsesDefaultDataTypes();
- void requireThatLinguisticsAnnotationUsesDefaultDataTypes();
- void requireThatPredicateIsPrinted();
- void requireThatTensorIsNotConverted();
- void requireThatNonEmptyReferenceIsConvertedToStringWithId();
- void requireThatEmptyReferenceIsConvertedToEmptyString();
- void requireThatReferenceInCompositeTypeEmitsSlimeData();
- const DocumentType &getDocType() const { return *_documentType; }
- Document makeDocument();
- StringFieldValue annotateTerm(const string &term);
- StringFieldValue makeAnnotatedChineseString();
- StringFieldValue makeAnnotatedString();
- void setSpanTree(StringFieldValue & value, SpanTree::UP tree);
-public:
- Test();
- ~Test();
- int Main() override;
-};
-
-DocumenttypesConfig getDocumenttypesConfig() {
- using namespace document::config_builder;
- DocumenttypesConfigBuilderHelper builder;
- const int ref_target_doctype_id = 1234;
- const int ref_type_id = 5678;
- builder.document(ref_target_doctype_id, "target_dummy_document",
- Struct("target_dummy_document.header"),
- Struct("target_dummy_document.body"));
- builder.document(42, "indexingdocument",
- Struct("indexingdocument.header")
- .addField("empty", DataType::T_STRING)
- .addField("string", DataType::T_STRING)
- .addField("plain_string", DataType::T_STRING)
- .addField("string_array", Array(DataType::T_STRING))
- .addField("string_wset", Wset(DataType::T_STRING))
- .addField("position1", DataType::T_INT)
- .addField("position2", DataType::T_LONG)
- .addField("position2_array", Array(DataType::T_LONG))
- .addField("position2_wset", Wset(DataType::T_LONG))
- .addField("uri", UrlDataType::getInstance().getId())
- .addField("uri_array",
- Array(UrlDataType::getInstance().getId()))
- .addField("int", DataType::T_INT)
- .addField("long", DataType::T_LONG)
- .addField("short", DataType::T_SHORT)
- .addField("byte", DataType::T_BYTE)
- .addField("double", DataType::T_DOUBLE)
- .addField("float", DataType::T_FLOAT)
- .addField("chinese", DataType::T_STRING)
- .addField("predicate", DataType::T_PREDICATE)
- .addTensorField("tensor", "tensor(x{},y{})")
- .addField("ref", ref_type_id)
- .addField("nested", Struct("indexingdocument.header.nested")
- .addField("inner_ref", ref_type_id)),
- Struct("indexingdocument.body"))
- .referenceType(ref_type_id, ref_target_doctype_id);
- return builder.config();
-}
-
-Test::Test() :
- _documentRepo(std::make_unique<DocumentTypeRepo>(getDocumenttypesConfig())),
- _documentType(_documentRepo->getDocumentType("indexingdocument")),
- _fixedRepo(*_documentRepo, *_documentType)
-{
- ASSERT_TRUE(_documentType);
-}
-
-Test::~Test() {}
-
-#define TEST_CALL(func) \
- TEST_DO(setUp()); \
- TEST_DO(func); \
- TEST_DO(tearDown())
-
-int
-Test::Main()
-{
- TEST_INIT("summaryfieldconverter_test");
-
- TEST_CALL(requireThatSummaryIsAnUnmodifiedString());
- TEST_CALL(requireThatAttributeIsAnUnmodifiedString());
- TEST_CALL(requireThatArrayIsFlattenedInSummaryField());
- TEST_CALL(requireThatWeightedSetIsFlattenedInSummaryField());
- TEST_CALL(requireThatPositionsAreTransformedInSummary());
- TEST_CALL(requireThatArrayIsPreservedInAttributeField());
- TEST_CALL(requireThatPositionsAreTransformedInAttributeField());
- TEST_CALL(requireThatPositionArrayIsTransformedInAttributeField());
- TEST_CALL(requireThatPositionWeightedSetIsTransformedInAttributeField());
- TEST_CALL(requireThatAttributeCanBePrimitiveTypes());
- TEST_CALL(requireThatSummaryCanBePrimitiveTypes());
- TEST_CALL(requireThatSummaryHandlesCjk());
- TEST_CALL(requireThatSearchDataTypeUsesDefaultDataTypes());
- TEST_CALL(requireThatLinguisticsAnnotationUsesDefaultDataTypes());
- TEST_CALL(requireThatPredicateIsPrinted());
- TEST_CALL(requireThatTensorIsNotConverted());
- TEST_CALL(requireThatNonEmptyReferenceIsConvertedToStringWithId());
- TEST_CALL(requireThatEmptyReferenceIsConvertedToEmptyString());
- TEST_CALL(requireThatReferenceInCompositeTypeEmitsSlimeData());
-
- TEST_DONE();
-}
-
-void Test::setUp() {
- _schema = std::make_unique<Schema>();
-}
-
-void Test::tearDown() {
-}
-
-const DataType &Test::getDataType(const string &name) const {
- const DataType *type = _documentRepo->getDataType(*_documentType, name);
- ASSERT_TRUE(type);
- return *type;
-}
-
-StringFieldValue Test::makeAnnotatedString() {
- auto span_list_up = std::make_unique<SpanList>();
- auto span_list = span_list_up.get();
- auto tree = std::make_unique<SpanTree>(SPANTREE_NAME, std::move(span_list_up));
- // Annotations don't have to be added sequentially.
- tree->annotate(span_list->add(std::make_unique<Span>(8, 3)),
- Annotation(*TERM, std::make_unique<StringFieldValue>("Annotation")));
- tree->annotate(span_list->add(std::make_unique<Span>(0, 3)), *TERM);
- tree->annotate(span_list->add(std::make_unique<Span>(4, 3)), *TERM);
- tree->annotate(span_list->add(std::make_unique<Span>(4, 3)),
- Annotation(*TERM, std::make_unique<StringFieldValue>("Multiple")));
- tree->annotate(span_list->add(std::make_unique<Span>(1, 2)),
- Annotation(*TERM, std::make_unique<StringFieldValue>("Overlap")));
- StringFieldValue value("Foo Bar Baz");
- setSpanTree(value, std::move(tree));
- return value;
-}
-
-StringFieldValue Test::annotateTerm(const string &term) {
- auto tree = std::make_unique<SpanTree>(SPANTREE_NAME, std::make_unique<Span>(0, term.size()));
- tree->annotate(tree->getRoot(), *TERM);
- StringFieldValue value(term);
- setSpanTree(value, std::move(tree));
- return value;
-}
-
-void Test::setSpanTree(StringFieldValue & value, SpanTree::UP tree) {
- StringFieldValue::SpanTrees trees;
- trees.push_back(std::move(tree));
- value.setSpanTrees(trees, _fixedRepo);
-}
-
-StringFieldValue Test::makeAnnotatedChineseString() {
- auto span_list_up = std::make_unique<SpanList>();
- auto span_list = span_list_up.get();
- auto tree = std::make_unique<SpanTree>(SPANTREE_NAME, std::move(span_list_up));
- // These chinese characters each use 3 bytes in their UTF8 encoding.
- tree->annotate(span_list->add(std::make_unique<Span>(0, 15)), *TERM);
- tree->annotate(span_list->add(std::make_unique<Span>(15, 9)), *TERM);
- StringFieldValue value("我就是那个大灰狼");
- setSpanTree(value, std::move(tree));
- return value;
-}
-
-Document Test::makeDocument() {
- Document doc(getDocType(), DocumentId("id:ns:indexingdocument::"));
- doc.setRepo(*_documentRepo);
- doc.setValue("string", makeAnnotatedString());
-
- doc.setValue("plain_string", StringFieldValue("Plain"));
-
- ArrayFieldValue array(getDataType("Array<String>"));
- array.add(annotateTerm("\"foO\""));
- array.add(annotateTerm("ba\\R"));
- doc.setValue("string_array", array);
-
- WeightedSetFieldValue wset(getDataType("WeightedSet<String>"));
- wset.add(annotateTerm("\"foo\""), 2);
- wset.add(annotateTerm("ba\\r"), 4);
- doc.setValue("string_wset", wset);
-
- doc.setValue("position1", IntFieldValue(5));
-
- doc.setValue("position2", LongFieldValue(ZCurve::encode(4, 2)));
-
- StructFieldValue uri(getDataType("url"));
- uri.setValue("all", annotateTerm("http://www.example.com:42/foobar?q#frag"));
- uri.setValue("scheme", annotateTerm("http"));
- uri.setValue("host", annotateTerm("www.example.com"));
- uri.setValue("port", annotateTerm("42"));
- uri.setValue("path", annotateTerm("foobar"));
- uri.setValue("query", annotateTerm("q"));
- uri.setValue("fragment", annotateTerm("frag"));
- doc.setValue("uri", uri);
-
- ArrayFieldValue uri_array(getDataType("Array<url>"));
- uri.setValue("all", annotateTerm("http://www.example.com:80/foobar?q#frag"));
- uri.setValue("port", annotateTerm("80"));
- uri_array.add(uri);
- uri.setValue("all", annotateTerm("https://www.example.com:443/foo?q#frag"));
- uri.setValue("scheme", annotateTerm("https"));
- uri.setValue("path", annotateTerm("foo"));
- uri.setValue("port", annotateTerm("443"));
- uri_array.add(uri);
- doc.setValue("uri_array", uri_array);
-
- ArrayFieldValue position2_array(getDataType("Array<Long>"));
- position2_array.add(LongFieldValue(ZCurve::encode(4, 2)));
- position2_array.add(LongFieldValue(ZCurve::encode(4, 4)));
- doc.setValue("position2_array", position2_array);
-
- WeightedSetFieldValue position2_wset(getDataType("WeightedSet<Long>"));
- position2_wset.add(LongFieldValue(ZCurve::encode(4, 2)), 4);
- position2_wset.add(LongFieldValue(ZCurve::encode(4, 4)), 2);
- doc.setValue("position2_wset", position2_wset);
-
- doc.setValue("int", IntFieldValue(42));
- doc.setValue("long", LongFieldValue(84));
- doc.setValue("short", ShortFieldValue(21));
- doc.setValue("byte", ByteFieldValue(11));
- doc.setValue("double", DoubleFieldValue(0.4));
- doc.setValue("float", FloatFieldValue(0.2f));
-
- doc.setValue("chinese", makeAnnotatedChineseString());
- return doc;
-}
-
-template <typename T>
-T Test::getValueAs(const string &field_name, const Document &doc) {
- FieldValue::UP fv(doc.getValue(field_name));
- const T *value = dynamic_cast<const T *>(fv.get());
- ASSERT_TRUE(value);
- return *value;
-}
-
-template <typename T>
-T
-Test::cvtValueAs(const FieldValue::UP &fv)
-{
- ASSERT_TRUE(fv.get() != NULL);
- const T *value = dynamic_cast<const T *>(fv.get());
- ASSERT_TRUE(value);
- return *value;
-}
-
-template <typename T>
-T
-Test::cvtAttributeAs(const FieldValue::UP &fv)
-{
- ASSERT_TRUE(fv.get() != NULL);
- return cvtValueAs<T>(fv);
-}
-
-template <typename T>
-T
-Test::cvtSummaryAs(bool markup, const FieldValue::UP &fv)
-{
- ASSERT_TRUE(fv.get() != NULL);
- FieldValue::UP r = SFC::convertSummaryField(markup, *fv);
- return cvtValueAs<T>(r);
-}
-
-void Test::checkString(const string &str, const FieldValue *value) {
- ASSERT_TRUE(value);
- const StringFieldValue *s = dynamic_cast<const StringFieldValue *>(value);
- ASSERT_TRUE(s);
- // fprintf(stderr, ">>>%s<<< >>>%s<<<\n", str.c_str(), s->getValue().c_str());
- EXPECT_EQUAL(str, s->getValue());
-}
-
-void Test::checkData(const search::RawBuf &buf, const FieldValue *value) {
- ASSERT_TRUE(value);
- const RawFieldValue *s = dynamic_cast<const RawFieldValue *>(value);
- ASSERT_TRUE(s);
- auto got = s->getAsRaw();
- ASSERT_EQUAL(buf.GetUsedLen(), got.second);
- EXPECT_TRUE(memcmp(buf.GetDrainPos(), got.first, got.second) == 0);
-}
-
-void Test::checkTensor(const Value::UP &tensor, const FieldValue *value) {
- ASSERT_TRUE(value);
- const TensorFieldValue *s = dynamic_cast<const TensorFieldValue *>(value);
- ASSERT_TRUE(s);
- auto tvalue = s->getAsTensorPtr();
- EXPECT_EQUAL(tensor.get() != nullptr, tvalue != nullptr);
- if (tensor) {
- EXPECT_EQUAL(*tensor, *tvalue);
- }
-}
-
-template <unsigned int N>
-void Test::checkArray(const char *(&str)[N], const FieldValue *value) {
- ASSERT_TRUE(value);
- const ArrayFieldValue *a = dynamic_cast<const ArrayFieldValue *>(value);
- ASSERT_TRUE(a);
- EXPECT_EQUAL(N, a->size());
- for (size_t i = 0; i < a->size() && i < N; ++i) {
- checkString(str[i], &(*a)[i]);
- }
-}
-
-void Test::setSummaryField(const string &field) {
- _schema->addSummaryField(Schema::Field(field, search::index::schema::DataType::STRING));
-}
-
-void Test::setAttributeField(const string &field) {
- _schema->addAttributeField(Schema::Field(field, search::index::schema::DataType::STRING));
-}
-
-void Test::requireThatSummaryIsAnUnmodifiedString() {
- setSummaryField("string");
- Document summary = makeDocument();
- checkString("Foo Bar Baz", SFC::convertSummaryField(false,
- *summary.getValue("string")).get());
-}
-
-void Test::requireThatAttributeIsAnUnmodifiedString() {
- setAttributeField("string");
- Document attribute = makeDocument();
- checkString("Foo Bar Baz",
- attribute.getValue("string").get());
-}
-
-void Test::requireThatArrayIsFlattenedInSummaryField() {
- setSummaryField("string_array");
- Document summary = makeDocument();
- FieldBlock expect("[\"\\\"foO\\\"\",\"ba\\\\R\"]");
- checkData(expect.binary,
- SFC::convertSummaryField(false,
- *summary.getValue("string_array")).get());
-}
-
-void Test::requireThatWeightedSetIsFlattenedInSummaryField() {
- setSummaryField("string_wset");
- Document summary = makeDocument();
- FieldBlock expect("[{\"item\":\"\\\"foo\\\"\",\"weight\":2},{\"item\":\"ba\\\\r\",\"weight\":4}]");
- checkData(expect.binary,
- SFC::convertSummaryField(false,
- *summary.getValue("string_wset")).get());
-}
-
-void Test::requireThatPositionsAreTransformedInSummary() {
- setSummaryField("position1");
- setSummaryField("position2");
- Document summary = makeDocument();
- FieldValue::UP fv = summary.getValue("position1");
- EXPECT_EQUAL(5, cvtSummaryAs<IntFieldValue>(false, fv).getValue());
- FieldValue::UP fv2 = summary.getValue("position2");
- EXPECT_EQUAL(24, cvtSummaryAs<LongFieldValue>(false, fv2).getValue());
-}
-
-void Test::requireThatArrayIsPreservedInAttributeField() {
- setAttributeField("string_array");
- Document attribute = makeDocument();
- const char *array[] = { "\"foO\"", "ba\\R" };
- checkArray(array,
- attribute.getValue("string_array").get());
-}
-
-void Test::requireThatPositionsAreTransformedInAttributeField() {
- setAttributeField("position1");
- setAttributeField("position2");
- Document attr = makeDocument();
- FieldValue::UP fv = attr.getValue("position1");
- EXPECT_EQUAL(5, cvtAttributeAs<IntFieldValue>(fv).getValue());
- fv = attr.getValue("position2");
- EXPECT_EQUAL(24, cvtAttributeAs<LongFieldValue>(fv).getValue());
-}
-
-void Test::requireThatPositionArrayIsTransformedInAttributeField() {
- setAttributeField("position2_array");
- Document attr = makeDocument();
- FieldValue::UP fv = attr.getValue("position2_array");
- ArrayFieldValue a = cvtAttributeAs<ArrayFieldValue>(fv);
- EXPECT_EQUAL(2u, a.size());
- EXPECT_EQUAL(24, dynamic_cast<LongFieldValue &>(a[0]).getValue());
- EXPECT_EQUAL(48, dynamic_cast<LongFieldValue &>(a[1]).getValue());
-}
-
-void Test::requireThatPositionWeightedSetIsTransformedInAttributeField() {
- setAttributeField("position2_wset");
- Document attr = makeDocument();
- FieldValue::UP fv = attr.getValue("position2_wset");
- WeightedSetFieldValue w = cvtAttributeAs<WeightedSetFieldValue>(fv);
- EXPECT_EQUAL(2u, w.size());
- WeightedSetFieldValue::iterator it = w.begin();
- EXPECT_EQUAL(24, dynamic_cast<const LongFieldValue&>(*it->first).getValue());
- EXPECT_EQUAL(4, dynamic_cast<IntFieldValue &>(*it->second).getValue());
- ++it;
- EXPECT_EQUAL(48, dynamic_cast<const LongFieldValue&>(*it->first).getValue());
- EXPECT_EQUAL(2, dynamic_cast<IntFieldValue &>(*it->second).getValue());
-}
-
-void Test::requireThatAttributeCanBePrimitiveTypes() {
- setAttributeField("int");
- setAttributeField("long");
- setAttributeField("short");
- setAttributeField("byte");
- setAttributeField("double");
- setAttributeField("float");
- Document attribute = makeDocument();
- FieldValue::UP fv = attribute.getValue("int");
- EXPECT_EQUAL(42, cvtAttributeAs<IntFieldValue>(fv).getValue());
- fv = attribute.getValue("long");
- EXPECT_EQUAL(84, cvtAttributeAs<LongFieldValue>(fv).getValue());
- fv = attribute.getValue("short");
- EXPECT_EQUAL(21, cvtAttributeAs<ShortFieldValue>(fv).getValue());
- fv = attribute.getValue("byte");
- EXPECT_EQUAL(11, cvtAttributeAs<ByteFieldValue>(fv).getValue());
- fv = attribute.getValue("double");
- EXPECT_EQUAL(0.4, cvtAttributeAs<DoubleFieldValue>(fv).getValue());
- fv = attribute.getValue("float");
- EXPECT_EQUAL(0.2f, cvtAttributeAs<FloatFieldValue>(fv).getValue());
-}
-
-void Test::requireThatSummaryCanBePrimitiveTypes() {
- setSummaryField("int");
- setSummaryField("long");
- setSummaryField("short");
- setSummaryField("byte");
- setSummaryField("double");
- setSummaryField("float");
- Document summary = makeDocument();
- FieldValue::UP fv = summary.getValue("int");
- EXPECT_EQUAL(42, cvtSummaryAs<IntFieldValue>(false, fv).getValue());
- fv = summary.getValue("long");
- EXPECT_EQUAL(84, cvtSummaryAs<LongFieldValue>(false, fv).getValue());
- fv = summary.getValue("short");
- EXPECT_EQUAL(21, cvtSummaryAs<ShortFieldValue>(false, fv).getValue());
- fv = summary.getValue("byte");
- EXPECT_EQUAL(11, cvtSummaryAs<ShortFieldValue>(false, fv).getValue());
- fv = summary.getValue("double");
- EXPECT_EQUAL(0.4, cvtSummaryAs<DoubleFieldValue>(false, fv).getValue());
- fv = summary.getValue("float");
- EXPECT_EQUAL(0.2f, cvtSummaryAs<FloatFieldValue>(false, fv).getValue());
-}
-
-void Test::requireThatSummaryHandlesCjk() {
- Document summary = makeDocument();
- FieldValue::UP fv = summary.getValue("chinese");
- EXPECT_EQUAL("我就是那个\037大灰狼\037",
- cvtSummaryAs<StringFieldValue>(true, fv).getValue());
-}
-
-void Test::requireThatSearchDataTypeUsesDefaultDataTypes() {
- const StructDataType *uri =
- dynamic_cast<const StructDataType *>(SearchDataType::URI);
- ASSERT_TRUE(uri);
- ASSERT_TRUE(uri->hasField("all"));
- ASSERT_TRUE(uri->hasField("scheme"));
- ASSERT_TRUE(uri->hasField("host"));
- ASSERT_TRUE(uri->hasField("port"));
- ASSERT_TRUE(uri->hasField("path"));
- ASSERT_TRUE(uri->hasField("query"));
- ASSERT_TRUE(uri->hasField("fragment"));
- EXPECT_EQUAL(*DataType::STRING, uri->getField("all").getDataType());
- EXPECT_EQUAL(*DataType::STRING, uri->getField("scheme").getDataType());
- EXPECT_EQUAL(*DataType::STRING, uri->getField("host").getDataType());
- EXPECT_EQUAL(*DataType::STRING, uri->getField("port").getDataType());
- EXPECT_EQUAL(*DataType::STRING, uri->getField("path").getDataType());
- EXPECT_EQUAL(*DataType::STRING, uri->getField("query").getDataType());
- EXPECT_EQUAL(*DataType::STRING, uri->getField("fragment").getDataType());
-}
-
-void Test::requireThatLinguisticsAnnotationUsesDefaultDataTypes() {
- EXPECT_EQUAL(*AnnotationType::TERM, *search::linguistics::TERM);
- ASSERT_TRUE(AnnotationType::TERM->getDataType());
- ASSERT_TRUE(search::linguistics::TERM->getDataType());
- EXPECT_EQUAL(*AnnotationType::TERM->getDataType(),
- *search::linguistics::TERM->getDataType());
-}
-
-void
-Test::requireThatPredicateIsPrinted()
-{
- auto input = std::make_unique<Slime>();
- Cursor &obj = input->setObject();
- obj.setLong(Predicate::NODE_TYPE, Predicate::TYPE_FEATURE_SET);
- obj.setString(Predicate::KEY, "foo");
- Cursor &arr = obj.setArray(Predicate::SET);
- arr.addString("bar");
-
- Document doc(getDocType(), DocumentId("id:ns:indexingdocument::"));
- doc.setRepo(*_documentRepo);
- doc.setValue("predicate", PredicateFieldValue(std::move(input)));
-
- checkString("'foo' in ['bar']\n",
- SFC::convertSummaryField(false, *doc.getValue("predicate")).get());
-}
-
-Value::UP make_tensor(const TensorSpec &spec) {
- return SimpleValue::from_spec(spec);
-}
-
-void
-Test::requireThatTensorIsNotConverted()
-{
- TensorDataType tensorDataType(ValueType::from_spec("tensor(x{},y{})"));
- TensorFieldValue tensorFieldValue(tensorDataType);
- tensorFieldValue = make_tensor(TensorSpec("tensor(x{},y{})")
- .add({{"x", "4"}, {"y", "5"}}, 7));
- Document doc(getDocType(), DocumentId("id:ns:indexingdocument::"));
- doc.setRepo(*_documentRepo);
- doc.setValue("tensor", tensorFieldValue);
-
- TEST_CALL(checkTensor(make_tensor(TensorSpec("tensor(x{},y{})")
- .add({{"x", "4"}, {"y", "5"}}, 7)),
- SFC::convertSummaryField(false,
- *doc.getValue("tensor")).get()));
- doc.setValue("tensor", TensorFieldValue());
-
- TEST_CALL(checkTensor(Value::UP(),
- SFC::convertSummaryField(false,
- *doc.getValue("tensor")).get()));
-}
-
-void Test::checkStringForAllConversions(const string& expected, const FieldValue* fv) {
- ASSERT_TRUE(fv != nullptr);
- checkString(expected, SFC::convertSummaryField(false, *fv).get());
-}
-
-const ReferenceDataType& Test::getAsRefType(const string& name) const {
- return dynamic_cast<const ReferenceDataType&>(getDataType(name));
-}
-
-void Test::requireThatNonEmptyReferenceIsConvertedToStringWithId() {
- Document doc(getDocType(), DocumentId("id:ns:indexingdocument::"));
- doc.setRepo(*_documentRepo);
- doc.setValue("ref", ReferenceFieldValue(
- getAsRefType("Reference<target_dummy_document>"),
- DocumentId("id:ns:target_dummy_document::foo")));
-
- checkStringForAllConversions("id:ns:target_dummy_document::foo",
- doc.getValue("ref").get());
-}
-
-void Test::requireThatEmptyReferenceIsConvertedToEmptyString() {
- Document doc(getDocType(), DocumentId("id:ns:indexingdocument::"));
- doc.setRepo(*_documentRepo);
- doc.setValue("ref", ReferenceFieldValue(
- getAsRefType("Reference<target_dummy_document>")));
-
- checkStringForAllConversions("", doc.getValue("ref").get());
-
-}
-
-// Own test for this to ensure that SlimeFiller code path is executed,
-// as this only triggers for composite field types.
-void Test::requireThatReferenceInCompositeTypeEmitsSlimeData() {
- Document doc(getDocType(), DocumentId("id:ns:indexingdocument::"));
- doc.setRepo(*_documentRepo);
-
- StructFieldValue sfv(getDataType("indexingdocument.header.nested"));
- sfv.setValue("inner_ref", ReferenceFieldValue(
- getAsRefType("Reference<target_dummy_document>"),
- DocumentId("id:ns:target_dummy_document::foo")));
- doc.setValue("nested", sfv);
-
- FieldBlock expect(R"({"inner_ref":"id:ns:target_dummy_document::foo"})");
- checkData(expect.binary,
- SFC::convertSummaryField(false, *doc.getValue("nested")).get());
-}
-
-} // namespace
-
-TEST_APPHOOK(Test);
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp
index 1a21c1d3eab..d55e00a1905 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp
+++ b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp
@@ -1,207 +1,14 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "summaryfieldconverter.h"
-#include "annotation_converter.h"
#include "check_undefined_value_visitor.h"
-#include "searchdatatype.h"
#include "slime_filler.h"
-#include <vespa/document/fieldvalue/arrayfieldvalue.h>
-#include <vespa/document/fieldvalue/boolfieldvalue.h>
-#include <vespa/document/fieldvalue/bytefieldvalue.h>
-#include <vespa/document/fieldvalue/document.h>
-#include <vespa/document/fieldvalue/doublefieldvalue.h>
-#include <vespa/document/fieldvalue/floatfieldvalue.h>
-#include <vespa/document/fieldvalue/intfieldvalue.h>
-#include <vespa/document/fieldvalue/longfieldvalue.h>
-#include <vespa/document/fieldvalue/predicatefieldvalue.h>
-#include <vespa/document/fieldvalue/rawfieldvalue.h>
-#include <vespa/document/fieldvalue/shortfieldvalue.h>
-#include <vespa/document/fieldvalue/stringfieldvalue.h>
-#include <vespa/document/fieldvalue/weightedsetfieldvalue.h>
-#include <vespa/document/fieldvalue/annotationreferencefieldvalue.h>
-#include <vespa/document/fieldvalue/tensorfieldvalue.h>
-#include <vespa/document/fieldvalue/referencefieldvalue.h>
-#include <vespa/vespalib/stllike/asciistream.h>
-#include <vespa/vespalib/util/size_literals.h>
-#include <vespa/vespalib/data/slime/slime.h>
-#include <vespa/vespalib/data/smart_buffer.h>
+#include <vespa/document/fieldvalue/fieldvalue.h>
-using document::AnnotationReferenceFieldValue;
-using document::ArrayFieldValue;
-using document::BoolFieldValue;
-using document::ByteFieldValue;
-using document::Document;
-using document::DoubleFieldValue;
using document::FieldValue;
-using document::ConstFieldValueVisitor;
-using document::FloatFieldValue;
-using document::IntFieldValue;
-using document::LongFieldValue;
-using document::MapFieldValue;
-using document::PredicateFieldValue;
-using document::RawFieldValue;
-using document::ShortFieldValue;
-using document::StringFieldValue;
-using document::StructFieldValue;
-using document::WeightedSetFieldValue;
-using document::TensorFieldValue;
-using document::ReferenceFieldValue;
namespace search::docsummary {
-namespace {
-
-struct FieldValueConverter {
- virtual FieldValue::UP convert(const FieldValue &input) = 0;
- virtual ~FieldValueConverter() = default;
-};
-
-
-class SummaryFieldValueConverter : protected ConstFieldValueVisitor
-{
- vespalib::asciistream _str;
- bool _tokenize;
- FieldValue::UP _field_value;
- FieldValueConverter &_structuredFieldConverter;
-
- template <typename T>
- void visitPrimitive(const T &t) {
- _field_value.reset(t.clone());
- }
- void visit(const IntFieldValue &value) override { visitPrimitive(value); }
- void visit(const LongFieldValue &value) override { visitPrimitive(value); }
- void visit(const ShortFieldValue &value) override { visitPrimitive(value); }
- void visit(const BoolFieldValue &value) override { visitPrimitive(value); }
- void visit(const ByteFieldValue &value) override {
- int8_t signedValue = value.getAsByte();
- _field_value = std::make_unique<ShortFieldValue>(signedValue);
- }
- void visit(const DoubleFieldValue &value) override { visitPrimitive(value); }
- void visit(const FloatFieldValue &value) override { visitPrimitive(value); }
-
- void visit(const StringFieldValue &value) override {
- if (_tokenize) {
- AnnotationConverter converter(value.getValue(), _str);
- converter.handleIndexingTerms(value);
- } else {
- _str << value.getValue();
- }
- }
-
- void visit(const AnnotationReferenceFieldValue & v ) override {
- _field_value = _structuredFieldConverter.convert(v);
- }
- void visit(const Document & v) override {
- _field_value = _structuredFieldConverter.convert(v);
- }
-
- void visit(const PredicateFieldValue &value) override {
- _str << value.toString();
- }
-
- void visit(const RawFieldValue &value) override {
- visitPrimitive(value);
- }
-
- void visit(const ArrayFieldValue &value) override {
- if (value.size() > 0) {
- _field_value = _structuredFieldConverter.convert(value);
- } // else: implicit empty string
- }
-
- void visit(const MapFieldValue & value) override {
- if (value.size() > 0) {
- _field_value = _structuredFieldConverter.convert(value);
- } // else: implicit empty string
- }
-
- void visit(const StructFieldValue &value) override {
- if (*value.getDataType() == *SearchDataType::URI) {
- FieldValue::UP uriAllValue = value.getValue("all");
- if (uriAllValue && uriAllValue->isA(FieldValue::Type::STRING)) {
- uriAllValue->accept(*this);
- return;
- }
- }
- _field_value = _structuredFieldConverter.convert(value);
- }
-
- void visit(const WeightedSetFieldValue &value) override {
- if (value.size() > 0) {
- _field_value = _structuredFieldConverter.convert(value);
- } // else: implicit empty string
- }
-
- void visit(const TensorFieldValue &value) override {
- visitPrimitive(value);
- }
-
- void visit(const ReferenceFieldValue& value) override {
- if (value.hasValidDocumentId()) {
- _str << value.getDocumentId().toString();
- } // else: implicit empty string
- }
-
-public:
- SummaryFieldValueConverter(bool tokenize, FieldValueConverter &subConverter);
- ~SummaryFieldValueConverter() override;
-
- FieldValue::UP convert(const FieldValue &input) {
- input.accept(*this);
- if (_field_value.get()) {
- return std::move(_field_value);
- }
- return StringFieldValue::make(_str.str());
- }
-};
-
-SummaryFieldValueConverter::SummaryFieldValueConverter(bool tokenize, FieldValueConverter &subConverter)
- : _str(), _tokenize(tokenize),
- _structuredFieldConverter(subConverter)
-{}
-SummaryFieldValueConverter::~SummaryFieldValueConverter() = default;
-
-using namespace vespalib::slime::convenience;
-
-class SlimeConverter : public FieldValueConverter {
-private:
- bool _tokenize;
- const std::vector<uint32_t>* _matching_elems;
-
-public:
- explicit SlimeConverter(bool tokenize)
- : _tokenize(tokenize),
- _matching_elems()
- {}
-
- SlimeConverter(bool tokenize, const std::vector<uint32_t>& matching_elems)
- : _tokenize(tokenize),
- _matching_elems(&matching_elems)
- {}
-
- FieldValue::UP convert(const FieldValue &input) override {
- vespalib::Slime slime;
- SlimeInserter inserter(slime);
- SlimeFiller visitor(inserter, _tokenize, _matching_elems);
- input.accept(visitor);
- vespalib::SmartBuffer buffer(4_Ki);
- vespalib::slime::BinaryFormat::encode(slime, buffer);
- vespalib::Memory mem = buffer.obtain();
- return std::make_unique<RawFieldValue>(mem.data, mem.size);
- }
-};
-
-
-} // namespace
-
-FieldValue::UP
-SummaryFieldConverter::convertSummaryField(bool markup,
- const FieldValue &value)
-{
- SlimeConverter subConv(markup);
- return SummaryFieldValueConverter(markup, subConv).convert(value);
-}
-
void
SummaryFieldConverter::insert_summary_field(const FieldValue& value, vespalib::slime::Inserter& inserter)
{
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h
index 924ec6f402e..425b357d355 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h
+++ b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h
@@ -2,7 +2,10 @@
#pragma once
-#include <vespa/document/fieldvalue/fieldvalue.h>
+#include <cstdint>
+#include <vector>
+
+namespace document { class FieldValue; }
namespace vespalib::slime { struct Inserter; }
@@ -16,12 +19,6 @@ class IJuniperConverter;
class SummaryFieldConverter
{
public:
- static document::FieldValue::UP convertSummaryField(bool markup, const document::FieldValue &value);
-
- static document::FieldValue::UP convert_field_with_filter(bool markup,
- const document::FieldValue& value,
- const std::vector<uint32_t>& matching_elems);
-
static void insert_summary_field(const document::FieldValue& value, vespalib::slime::Inserter& inserter);
/**
* Insert the given field value, but only the elements that are contained in the matching_elems vector.