diff options
author | Tor Egge <Tor.Egge@online.no> | 2022-09-15 14:39:31 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2022-09-15 14:39:31 +0200 |
commit | a4945088ccc3a5ff072e04ecdd61b0ec7f8d884f (patch) | |
tree | 0082ebb71b709dcc333953bcd9d8739253a3d2d0 /searchsummary | |
parent | e123e08a279e32ce7dec845bf79b291aefeab16c (diff) |
Remove unused portions of SummaryFieldConverter.
Diffstat (limited to 'searchsummary')
5 files changed, 5 insertions, 951 deletions
diff --git a/searchsummary/CMakeLists.txt b/searchsummary/CMakeLists.txt index 5fcd0f7e19a..6e970bdfc00 100644 --- a/searchsummary/CMakeLists.txt +++ b/searchsummary/CMakeLists.txt @@ -22,6 +22,5 @@ vespa_define_module( src/tests/docsummary/matched_elements_filter src/tests/docsummary/slime_filler src/tests/docsummary/slime_summary - src/tests/docsummary/summary_field_converter src/tests/juniper ) diff --git a/searchsummary/src/tests/docsummary/summary_field_converter/CMakeLists.txt b/searchsummary/src/tests/docsummary/summary_field_converter/CMakeLists.txt deleted file mode 100644 index cfda566ee6c..00000000000 --- a/searchsummary/src/tests/docsummary/summary_field_converter/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(searchsummary_summary_field_converter_test_app - SOURCES - summary_field_converter_test.cpp - DEPENDS - searchsummary -) -vespa_add_test(NAME searchsummary_summary_field_converter_test_app COMMAND searchsummary_summary_field_converter_test_app) diff --git a/searchsummary/src/tests/docsummary/summary_field_converter/summary_field_converter_test.cpp b/searchsummary/src/tests/docsummary/summary_field_converter/summary_field_converter_test.cpp deleted file mode 100644 index 0eff397bc10..00000000000 --- a/searchsummary/src/tests/docsummary/summary_field_converter/summary_field_converter_test.cpp +++ /dev/null @@ -1,741 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -// Unit tests for summaryfieldconverter. - -#include <vespa/document/annotation/annotation.h> -#include <vespa/document/annotation/span.h> -#include <vespa/document/annotation/spanlist.h> -#include <vespa/document/annotation/spantree.h> -#include <vespa/document/base/documentid.h> -#include <vespa/document/base/exceptions.h> -#include <vespa/document/base/field.h> -#include <vespa/document/config/documenttypes_config_fwd.h> -#include <vespa/document/datatype/annotationtype.h> -#include <vespa/document/datatype/arraydatatype.h> -#include <vespa/document/datatype/datatype.h> -#include <vespa/document/datatype/documenttype.h> -#include <vespa/document/datatype/structdatatype.h> -#include <vespa/document/datatype/urldatatype.h> -#include <vespa/document/datatype/weightedsetdatatype.h> -#include <vespa/document/datatype/referencedatatype.h> -#include <vespa/document/datatype/tensor_data_type.h> -#include <vespa/document/fieldvalue/arrayfieldvalue.h> -#include <vespa/document/fieldvalue/bytefieldvalue.h> -#include <vespa/document/fieldvalue/document.h> -#include <vespa/document/fieldvalue/doublefieldvalue.h> -#include <vespa/document/fieldvalue/floatfieldvalue.h> -#include <vespa/document/fieldvalue/intfieldvalue.h> -#include <vespa/document/fieldvalue/longfieldvalue.h> -#include <vespa/document/fieldvalue/predicatefieldvalue.h> -#include <vespa/document/fieldvalue/rawfieldvalue.h> -#include <vespa/document/fieldvalue/shortfieldvalue.h> -#include <vespa/document/fieldvalue/stringfieldvalue.h> -#include <vespa/document/fieldvalue/structfieldvalue.h> -#include <vespa/document/fieldvalue/weightedsetfieldvalue.h> -#include <vespa/document/fieldvalue/tensorfieldvalue.h> -#include <vespa/document/fieldvalue/referencefieldvalue.h> -#include <vespa/document/predicate/predicate.h> -#include <vespa/document/repo/configbuilder.h> -#include <vespa/document/repo/fixedtyperepo.h> -#include <vespa/searchsummary/docsummary/summaryfieldconverter.h> -#include <vespa/searchsummary/docsummary/linguisticsannotation.h> -#include <vespa/searchsummary/docsummary/searchdatatype.h> -#include <vespa/searchcommon/common/schema.h> -#include <vespa/vespalib/geo/zcurve.h> -#include <vespa/vespalib/testkit/testapp.h> -#include <vespa/vespalib/data/slime/json_format.h> -#include <vespa/vespalib/data/slime/binary_format.h> -#include <vespa/searchlib/util/slime_output_raw_buf_adapter.h> -#include <vespa/eval/eval/simple_value.h> -#include <vespa/eval/eval/tensor_spec.h> -#include <vespa/eval/eval/value.h> -#include <vespa/eval/eval/test/value_compare.h> -#include <vespa/vespalib/data/slime/slime.h> - -using document::Annotation; -using document::AnnotationType; -using document::ArrayDataType; -using document::ArrayFieldValue; -using document::ByteFieldValue; -using document::DataType; -using document::Document; -using document::DocumentId; -using document::DocumentType; -using document::DocumentTypeRepo; -using document::DoubleFieldValue; -using document::FeatureSet; -using document::Field; -using document::FieldNotFoundException; -using document::FieldValue; -using document::FloatFieldValue; -using document::IntFieldValue; -using document::LongFieldValue; -using document::Predicate; -using document::PredicateFieldValue; -using document::RawFieldValue; -using document::ReferenceDataType; -using document::ReferenceFieldValue; -using document::ShortFieldValue; -using document::Span; -using document::SpanList; -using document::SpanTree; -using document::StringFieldValue; -using document::StructDataType; -using document::StructFieldValue; -using document::TensorDataType; -using document::TensorFieldValue; -using document::UrlDataType; -using document::WeightedSetDataType; -using document::WeightedSetFieldValue; -using search::index::Schema; -using search::linguistics::SPANTREE_NAME; -using search::linguistics::TERM; -using vespalib::Slime; -using vespalib::eval::SimpleValue; -using vespalib::eval::TensorSpec; -using vespalib::eval::Value; -using vespalib::eval::ValueType; -using vespalib::geo::ZCurve; -using vespalib::slime::Cursor; -using vespalib::string; - -using namespace search::docsummary; - -typedef SummaryFieldConverter SFC; - -namespace { - -struct FieldBlock { - vespalib::string input; - Slime slime; - search::RawBuf binary; - vespalib::string json; - - explicit FieldBlock(const vespalib::string &jsonInput); - ~FieldBlock(); -}; - -FieldBlock::FieldBlock(const vespalib::string &jsonInput) - : input(jsonInput), slime(), binary(1024), json() -{ - size_t used = vespalib::slime::JsonFormat::decode(jsonInput, slime); - EXPECT_TRUE(used > 0); - { - search::SlimeOutputRawBufAdapter adapter(binary); - vespalib::slime::JsonFormat::encode(slime, adapter, true); - json.assign(binary.GetDrainPos(), binary.GetUsedLen()); - binary.reset(); - } - search::SlimeOutputRawBufAdapter adapter(binary); - vespalib::slime::BinaryFormat::encode(slime, adapter); -} - -FieldBlock::~FieldBlock() = default; - -class Test : public vespalib::TestApp { - std::unique_ptr<Schema> _schema; - std::shared_ptr<const DocumentTypeRepo> _documentRepo; - const DocumentType *_documentType; - document::FixedTypeRepo _fixedRepo; - - void setUp(); - void tearDown(); - - const DataType &getDataType(const string &name) const; - const ReferenceDataType& getAsRefType(const string& name) const; - - template <typename T> - T getValueAs(const string &field_name, const Document &doc); - - template <typename T> - T - cvtValueAs(const FieldValue::UP &fv); - - template <typename T> - T - cvtAttributeAs(const FieldValue::UP &fv); - - template <typename T> - T - cvtSummaryAs(bool markup, const FieldValue::UP &fv); - - void checkString(const string &str, const FieldValue *value); - void checkStringForAllConversions(const string& expected, const FieldValue* fv); - void checkData(const search::RawBuf &data, const FieldValue *value); - void checkTensor(const Value::UP &tensor, const FieldValue *value); - template <unsigned int N> - void checkArray(const char *(&str)[N], const FieldValue *value); - void setSummaryField(const string &name); - void setAttributeField(const string &name); - - void requireThatSummaryIsAnUnmodifiedString(); - void requireThatAttributeIsAnUnmodifiedString(); - void requireThatArrayIsFlattenedInSummaryField(); - void requireThatWeightedSetIsFlattenedInSummaryField(); - void requireThatPositionsAreTransformedInSummary(); - void requireThatArrayIsPreservedInAttributeField(); - void requireThatPositionsAreTransformedInAttributeField(); - void requireThatPositionArrayIsTransformedInAttributeField(); - void requireThatPositionWeightedSetIsTransformedInAttributeField(); - void requireThatAttributeCanBePrimitiveTypes(); - void requireThatSummaryCanBePrimitiveTypes(); - void requireThatSummaryHandlesCjk(); - void requireThatSearchDataTypeUsesDefaultDataTypes(); - void requireThatLinguisticsAnnotationUsesDefaultDataTypes(); - void requireThatPredicateIsPrinted(); - void requireThatTensorIsNotConverted(); - void requireThatNonEmptyReferenceIsConvertedToStringWithId(); - void requireThatEmptyReferenceIsConvertedToEmptyString(); - void requireThatReferenceInCompositeTypeEmitsSlimeData(); - const DocumentType &getDocType() const { return *_documentType; } - Document makeDocument(); - StringFieldValue annotateTerm(const string &term); - StringFieldValue makeAnnotatedChineseString(); - StringFieldValue makeAnnotatedString(); - void setSpanTree(StringFieldValue & value, SpanTree::UP tree); -public: - Test(); - ~Test(); - int Main() override; -}; - -DocumenttypesConfig getDocumenttypesConfig() { - using namespace document::config_builder; - DocumenttypesConfigBuilderHelper builder; - const int ref_target_doctype_id = 1234; - const int ref_type_id = 5678; - builder.document(ref_target_doctype_id, "target_dummy_document", - Struct("target_dummy_document.header"), - Struct("target_dummy_document.body")); - builder.document(42, "indexingdocument", - Struct("indexingdocument.header") - .addField("empty", DataType::T_STRING) - .addField("string", DataType::T_STRING) - .addField("plain_string", DataType::T_STRING) - .addField("string_array", Array(DataType::T_STRING)) - .addField("string_wset", Wset(DataType::T_STRING)) - .addField("position1", DataType::T_INT) - .addField("position2", DataType::T_LONG) - .addField("position2_array", Array(DataType::T_LONG)) - .addField("position2_wset", Wset(DataType::T_LONG)) - .addField("uri", UrlDataType::getInstance().getId()) - .addField("uri_array", - Array(UrlDataType::getInstance().getId())) - .addField("int", DataType::T_INT) - .addField("long", DataType::T_LONG) - .addField("short", DataType::T_SHORT) - .addField("byte", DataType::T_BYTE) - .addField("double", DataType::T_DOUBLE) - .addField("float", DataType::T_FLOAT) - .addField("chinese", DataType::T_STRING) - .addField("predicate", DataType::T_PREDICATE) - .addTensorField("tensor", "tensor(x{},y{})") - .addField("ref", ref_type_id) - .addField("nested", Struct("indexingdocument.header.nested") - .addField("inner_ref", ref_type_id)), - Struct("indexingdocument.body")) - .referenceType(ref_type_id, ref_target_doctype_id); - return builder.config(); -} - -Test::Test() : - _documentRepo(std::make_unique<DocumentTypeRepo>(getDocumenttypesConfig())), - _documentType(_documentRepo->getDocumentType("indexingdocument")), - _fixedRepo(*_documentRepo, *_documentType) -{ - ASSERT_TRUE(_documentType); -} - -Test::~Test() {} - -#define TEST_CALL(func) \ - TEST_DO(setUp()); \ - TEST_DO(func); \ - TEST_DO(tearDown()) - -int -Test::Main() -{ - TEST_INIT("summaryfieldconverter_test"); - - TEST_CALL(requireThatSummaryIsAnUnmodifiedString()); - TEST_CALL(requireThatAttributeIsAnUnmodifiedString()); - TEST_CALL(requireThatArrayIsFlattenedInSummaryField()); - TEST_CALL(requireThatWeightedSetIsFlattenedInSummaryField()); - TEST_CALL(requireThatPositionsAreTransformedInSummary()); - TEST_CALL(requireThatArrayIsPreservedInAttributeField()); - TEST_CALL(requireThatPositionsAreTransformedInAttributeField()); - TEST_CALL(requireThatPositionArrayIsTransformedInAttributeField()); - TEST_CALL(requireThatPositionWeightedSetIsTransformedInAttributeField()); - TEST_CALL(requireThatAttributeCanBePrimitiveTypes()); - TEST_CALL(requireThatSummaryCanBePrimitiveTypes()); - TEST_CALL(requireThatSummaryHandlesCjk()); - TEST_CALL(requireThatSearchDataTypeUsesDefaultDataTypes()); - TEST_CALL(requireThatLinguisticsAnnotationUsesDefaultDataTypes()); - TEST_CALL(requireThatPredicateIsPrinted()); - TEST_CALL(requireThatTensorIsNotConverted()); - TEST_CALL(requireThatNonEmptyReferenceIsConvertedToStringWithId()); - TEST_CALL(requireThatEmptyReferenceIsConvertedToEmptyString()); - TEST_CALL(requireThatReferenceInCompositeTypeEmitsSlimeData()); - - TEST_DONE(); -} - -void Test::setUp() { - _schema = std::make_unique<Schema>(); -} - -void Test::tearDown() { -} - -const DataType &Test::getDataType(const string &name) const { - const DataType *type = _documentRepo->getDataType(*_documentType, name); - ASSERT_TRUE(type); - return *type; -} - -StringFieldValue Test::makeAnnotatedString() { - auto span_list_up = std::make_unique<SpanList>(); - auto span_list = span_list_up.get(); - auto tree = std::make_unique<SpanTree>(SPANTREE_NAME, std::move(span_list_up)); - // Annotations don't have to be added sequentially. - tree->annotate(span_list->add(std::make_unique<Span>(8, 3)), - Annotation(*TERM, std::make_unique<StringFieldValue>("Annotation"))); - tree->annotate(span_list->add(std::make_unique<Span>(0, 3)), *TERM); - tree->annotate(span_list->add(std::make_unique<Span>(4, 3)), *TERM); - tree->annotate(span_list->add(std::make_unique<Span>(4, 3)), - Annotation(*TERM, std::make_unique<StringFieldValue>("Multiple"))); - tree->annotate(span_list->add(std::make_unique<Span>(1, 2)), - Annotation(*TERM, std::make_unique<StringFieldValue>("Overlap"))); - StringFieldValue value("Foo Bar Baz"); - setSpanTree(value, std::move(tree)); - return value; -} - -StringFieldValue Test::annotateTerm(const string &term) { - auto tree = std::make_unique<SpanTree>(SPANTREE_NAME, std::make_unique<Span>(0, term.size())); - tree->annotate(tree->getRoot(), *TERM); - StringFieldValue value(term); - setSpanTree(value, std::move(tree)); - return value; -} - -void Test::setSpanTree(StringFieldValue & value, SpanTree::UP tree) { - StringFieldValue::SpanTrees trees; - trees.push_back(std::move(tree)); - value.setSpanTrees(trees, _fixedRepo); -} - -StringFieldValue Test::makeAnnotatedChineseString() { - auto span_list_up = std::make_unique<SpanList>(); - auto span_list = span_list_up.get(); - auto tree = std::make_unique<SpanTree>(SPANTREE_NAME, std::move(span_list_up)); - // These chinese characters each use 3 bytes in their UTF8 encoding. - tree->annotate(span_list->add(std::make_unique<Span>(0, 15)), *TERM); - tree->annotate(span_list->add(std::make_unique<Span>(15, 9)), *TERM); - StringFieldValue value("我就是那个大灰狼"); - setSpanTree(value, std::move(tree)); - return value; -} - -Document Test::makeDocument() { - Document doc(getDocType(), DocumentId("id:ns:indexingdocument::")); - doc.setRepo(*_documentRepo); - doc.setValue("string", makeAnnotatedString()); - - doc.setValue("plain_string", StringFieldValue("Plain")); - - ArrayFieldValue array(getDataType("Array<String>")); - array.add(annotateTerm("\"foO\"")); - array.add(annotateTerm("ba\\R")); - doc.setValue("string_array", array); - - WeightedSetFieldValue wset(getDataType("WeightedSet<String>")); - wset.add(annotateTerm("\"foo\""), 2); - wset.add(annotateTerm("ba\\r"), 4); - doc.setValue("string_wset", wset); - - doc.setValue("position1", IntFieldValue(5)); - - doc.setValue("position2", LongFieldValue(ZCurve::encode(4, 2))); - - StructFieldValue uri(getDataType("url")); - uri.setValue("all", annotateTerm("http://www.example.com:42/foobar?q#frag")); - uri.setValue("scheme", annotateTerm("http")); - uri.setValue("host", annotateTerm("www.example.com")); - uri.setValue("port", annotateTerm("42")); - uri.setValue("path", annotateTerm("foobar")); - uri.setValue("query", annotateTerm("q")); - uri.setValue("fragment", annotateTerm("frag")); - doc.setValue("uri", uri); - - ArrayFieldValue uri_array(getDataType("Array<url>")); - uri.setValue("all", annotateTerm("http://www.example.com:80/foobar?q#frag")); - uri.setValue("port", annotateTerm("80")); - uri_array.add(uri); - uri.setValue("all", annotateTerm("https://www.example.com:443/foo?q#frag")); - uri.setValue("scheme", annotateTerm("https")); - uri.setValue("path", annotateTerm("foo")); - uri.setValue("port", annotateTerm("443")); - uri_array.add(uri); - doc.setValue("uri_array", uri_array); - - ArrayFieldValue position2_array(getDataType("Array<Long>")); - position2_array.add(LongFieldValue(ZCurve::encode(4, 2))); - position2_array.add(LongFieldValue(ZCurve::encode(4, 4))); - doc.setValue("position2_array", position2_array); - - WeightedSetFieldValue position2_wset(getDataType("WeightedSet<Long>")); - position2_wset.add(LongFieldValue(ZCurve::encode(4, 2)), 4); - position2_wset.add(LongFieldValue(ZCurve::encode(4, 4)), 2); - doc.setValue("position2_wset", position2_wset); - - doc.setValue("int", IntFieldValue(42)); - doc.setValue("long", LongFieldValue(84)); - doc.setValue("short", ShortFieldValue(21)); - doc.setValue("byte", ByteFieldValue(11)); - doc.setValue("double", DoubleFieldValue(0.4)); - doc.setValue("float", FloatFieldValue(0.2f)); - - doc.setValue("chinese", makeAnnotatedChineseString()); - return doc; -} - -template <typename T> -T Test::getValueAs(const string &field_name, const Document &doc) { - FieldValue::UP fv(doc.getValue(field_name)); - const T *value = dynamic_cast<const T *>(fv.get()); - ASSERT_TRUE(value); - return *value; -} - -template <typename T> -T -Test::cvtValueAs(const FieldValue::UP &fv) -{ - ASSERT_TRUE(fv.get() != NULL); - const T *value = dynamic_cast<const T *>(fv.get()); - ASSERT_TRUE(value); - return *value; -} - -template <typename T> -T -Test::cvtAttributeAs(const FieldValue::UP &fv) -{ - ASSERT_TRUE(fv.get() != NULL); - return cvtValueAs<T>(fv); -} - -template <typename T> -T -Test::cvtSummaryAs(bool markup, const FieldValue::UP &fv) -{ - ASSERT_TRUE(fv.get() != NULL); - FieldValue::UP r = SFC::convertSummaryField(markup, *fv); - return cvtValueAs<T>(r); -} - -void Test::checkString(const string &str, const FieldValue *value) { - ASSERT_TRUE(value); - const StringFieldValue *s = dynamic_cast<const StringFieldValue *>(value); - ASSERT_TRUE(s); - // fprintf(stderr, ">>>%s<<< >>>%s<<<\n", str.c_str(), s->getValue().c_str()); - EXPECT_EQUAL(str, s->getValue()); -} - -void Test::checkData(const search::RawBuf &buf, const FieldValue *value) { - ASSERT_TRUE(value); - const RawFieldValue *s = dynamic_cast<const RawFieldValue *>(value); - ASSERT_TRUE(s); - auto got = s->getAsRaw(); - ASSERT_EQUAL(buf.GetUsedLen(), got.second); - EXPECT_TRUE(memcmp(buf.GetDrainPos(), got.first, got.second) == 0); -} - -void Test::checkTensor(const Value::UP &tensor, const FieldValue *value) { - ASSERT_TRUE(value); - const TensorFieldValue *s = dynamic_cast<const TensorFieldValue *>(value); - ASSERT_TRUE(s); - auto tvalue = s->getAsTensorPtr(); - EXPECT_EQUAL(tensor.get() != nullptr, tvalue != nullptr); - if (tensor) { - EXPECT_EQUAL(*tensor, *tvalue); - } -} - -template <unsigned int N> -void Test::checkArray(const char *(&str)[N], const FieldValue *value) { - ASSERT_TRUE(value); - const ArrayFieldValue *a = dynamic_cast<const ArrayFieldValue *>(value); - ASSERT_TRUE(a); - EXPECT_EQUAL(N, a->size()); - for (size_t i = 0; i < a->size() && i < N; ++i) { - checkString(str[i], &(*a)[i]); - } -} - -void Test::setSummaryField(const string &field) { - _schema->addSummaryField(Schema::Field(field, search::index::schema::DataType::STRING)); -} - -void Test::setAttributeField(const string &field) { - _schema->addAttributeField(Schema::Field(field, search::index::schema::DataType::STRING)); -} - -void Test::requireThatSummaryIsAnUnmodifiedString() { - setSummaryField("string"); - Document summary = makeDocument(); - checkString("Foo Bar Baz", SFC::convertSummaryField(false, - *summary.getValue("string")).get()); -} - -void Test::requireThatAttributeIsAnUnmodifiedString() { - setAttributeField("string"); - Document attribute = makeDocument(); - checkString("Foo Bar Baz", - attribute.getValue("string").get()); -} - -void Test::requireThatArrayIsFlattenedInSummaryField() { - setSummaryField("string_array"); - Document summary = makeDocument(); - FieldBlock expect("[\"\\\"foO\\\"\",\"ba\\\\R\"]"); - checkData(expect.binary, - SFC::convertSummaryField(false, - *summary.getValue("string_array")).get()); -} - -void Test::requireThatWeightedSetIsFlattenedInSummaryField() { - setSummaryField("string_wset"); - Document summary = makeDocument(); - FieldBlock expect("[{\"item\":\"\\\"foo\\\"\",\"weight\":2},{\"item\":\"ba\\\\r\",\"weight\":4}]"); - checkData(expect.binary, - SFC::convertSummaryField(false, - *summary.getValue("string_wset")).get()); -} - -void Test::requireThatPositionsAreTransformedInSummary() { - setSummaryField("position1"); - setSummaryField("position2"); - Document summary = makeDocument(); - FieldValue::UP fv = summary.getValue("position1"); - EXPECT_EQUAL(5, cvtSummaryAs<IntFieldValue>(false, fv).getValue()); - FieldValue::UP fv2 = summary.getValue("position2"); - EXPECT_EQUAL(24, cvtSummaryAs<LongFieldValue>(false, fv2).getValue()); -} - -void Test::requireThatArrayIsPreservedInAttributeField() { - setAttributeField("string_array"); - Document attribute = makeDocument(); - const char *array[] = { "\"foO\"", "ba\\R" }; - checkArray(array, - attribute.getValue("string_array").get()); -} - -void Test::requireThatPositionsAreTransformedInAttributeField() { - setAttributeField("position1"); - setAttributeField("position2"); - Document attr = makeDocument(); - FieldValue::UP fv = attr.getValue("position1"); - EXPECT_EQUAL(5, cvtAttributeAs<IntFieldValue>(fv).getValue()); - fv = attr.getValue("position2"); - EXPECT_EQUAL(24, cvtAttributeAs<LongFieldValue>(fv).getValue()); -} - -void Test::requireThatPositionArrayIsTransformedInAttributeField() { - setAttributeField("position2_array"); - Document attr = makeDocument(); - FieldValue::UP fv = attr.getValue("position2_array"); - ArrayFieldValue a = cvtAttributeAs<ArrayFieldValue>(fv); - EXPECT_EQUAL(2u, a.size()); - EXPECT_EQUAL(24, dynamic_cast<LongFieldValue &>(a[0]).getValue()); - EXPECT_EQUAL(48, dynamic_cast<LongFieldValue &>(a[1]).getValue()); -} - -void Test::requireThatPositionWeightedSetIsTransformedInAttributeField() { - setAttributeField("position2_wset"); - Document attr = makeDocument(); - FieldValue::UP fv = attr.getValue("position2_wset"); - WeightedSetFieldValue w = cvtAttributeAs<WeightedSetFieldValue>(fv); - EXPECT_EQUAL(2u, w.size()); - WeightedSetFieldValue::iterator it = w.begin(); - EXPECT_EQUAL(24, dynamic_cast<const LongFieldValue&>(*it->first).getValue()); - EXPECT_EQUAL(4, dynamic_cast<IntFieldValue &>(*it->second).getValue()); - ++it; - EXPECT_EQUAL(48, dynamic_cast<const LongFieldValue&>(*it->first).getValue()); - EXPECT_EQUAL(2, dynamic_cast<IntFieldValue &>(*it->second).getValue()); -} - -void Test::requireThatAttributeCanBePrimitiveTypes() { - setAttributeField("int"); - setAttributeField("long"); - setAttributeField("short"); - setAttributeField("byte"); - setAttributeField("double"); - setAttributeField("float"); - Document attribute = makeDocument(); - FieldValue::UP fv = attribute.getValue("int"); - EXPECT_EQUAL(42, cvtAttributeAs<IntFieldValue>(fv).getValue()); - fv = attribute.getValue("long"); - EXPECT_EQUAL(84, cvtAttributeAs<LongFieldValue>(fv).getValue()); - fv = attribute.getValue("short"); - EXPECT_EQUAL(21, cvtAttributeAs<ShortFieldValue>(fv).getValue()); - fv = attribute.getValue("byte"); - EXPECT_EQUAL(11, cvtAttributeAs<ByteFieldValue>(fv).getValue()); - fv = attribute.getValue("double"); - EXPECT_EQUAL(0.4, cvtAttributeAs<DoubleFieldValue>(fv).getValue()); - fv = attribute.getValue("float"); - EXPECT_EQUAL(0.2f, cvtAttributeAs<FloatFieldValue>(fv).getValue()); -} - -void Test::requireThatSummaryCanBePrimitiveTypes() { - setSummaryField("int"); - setSummaryField("long"); - setSummaryField("short"); - setSummaryField("byte"); - setSummaryField("double"); - setSummaryField("float"); - Document summary = makeDocument(); - FieldValue::UP fv = summary.getValue("int"); - EXPECT_EQUAL(42, cvtSummaryAs<IntFieldValue>(false, fv).getValue()); - fv = summary.getValue("long"); - EXPECT_EQUAL(84, cvtSummaryAs<LongFieldValue>(false, fv).getValue()); - fv = summary.getValue("short"); - EXPECT_EQUAL(21, cvtSummaryAs<ShortFieldValue>(false, fv).getValue()); - fv = summary.getValue("byte"); - EXPECT_EQUAL(11, cvtSummaryAs<ShortFieldValue>(false, fv).getValue()); - fv = summary.getValue("double"); - EXPECT_EQUAL(0.4, cvtSummaryAs<DoubleFieldValue>(false, fv).getValue()); - fv = summary.getValue("float"); - EXPECT_EQUAL(0.2f, cvtSummaryAs<FloatFieldValue>(false, fv).getValue()); -} - -void Test::requireThatSummaryHandlesCjk() { - Document summary = makeDocument(); - FieldValue::UP fv = summary.getValue("chinese"); - EXPECT_EQUAL("我就是那个\037大灰狼\037", - cvtSummaryAs<StringFieldValue>(true, fv).getValue()); -} - -void Test::requireThatSearchDataTypeUsesDefaultDataTypes() { - const StructDataType *uri = - dynamic_cast<const StructDataType *>(SearchDataType::URI); - ASSERT_TRUE(uri); - ASSERT_TRUE(uri->hasField("all")); - ASSERT_TRUE(uri->hasField("scheme")); - ASSERT_TRUE(uri->hasField("host")); - ASSERT_TRUE(uri->hasField("port")); - ASSERT_TRUE(uri->hasField("path")); - ASSERT_TRUE(uri->hasField("query")); - ASSERT_TRUE(uri->hasField("fragment")); - EXPECT_EQUAL(*DataType::STRING, uri->getField("all").getDataType()); - EXPECT_EQUAL(*DataType::STRING, uri->getField("scheme").getDataType()); - EXPECT_EQUAL(*DataType::STRING, uri->getField("host").getDataType()); - EXPECT_EQUAL(*DataType::STRING, uri->getField("port").getDataType()); - EXPECT_EQUAL(*DataType::STRING, uri->getField("path").getDataType()); - EXPECT_EQUAL(*DataType::STRING, uri->getField("query").getDataType()); - EXPECT_EQUAL(*DataType::STRING, uri->getField("fragment").getDataType()); -} - -void Test::requireThatLinguisticsAnnotationUsesDefaultDataTypes() { - EXPECT_EQUAL(*AnnotationType::TERM, *search::linguistics::TERM); - ASSERT_TRUE(AnnotationType::TERM->getDataType()); - ASSERT_TRUE(search::linguistics::TERM->getDataType()); - EXPECT_EQUAL(*AnnotationType::TERM->getDataType(), - *search::linguistics::TERM->getDataType()); -} - -void -Test::requireThatPredicateIsPrinted() -{ - auto input = std::make_unique<Slime>(); - Cursor &obj = input->setObject(); - obj.setLong(Predicate::NODE_TYPE, Predicate::TYPE_FEATURE_SET); - obj.setString(Predicate::KEY, "foo"); - Cursor &arr = obj.setArray(Predicate::SET); - arr.addString("bar"); - - Document doc(getDocType(), DocumentId("id:ns:indexingdocument::")); - doc.setRepo(*_documentRepo); - doc.setValue("predicate", PredicateFieldValue(std::move(input))); - - checkString("'foo' in ['bar']\n", - SFC::convertSummaryField(false, *doc.getValue("predicate")).get()); -} - -Value::UP make_tensor(const TensorSpec &spec) { - return SimpleValue::from_spec(spec); -} - -void -Test::requireThatTensorIsNotConverted() -{ - TensorDataType tensorDataType(ValueType::from_spec("tensor(x{},y{})")); - TensorFieldValue tensorFieldValue(tensorDataType); - tensorFieldValue = make_tensor(TensorSpec("tensor(x{},y{})") - .add({{"x", "4"}, {"y", "5"}}, 7)); - Document doc(getDocType(), DocumentId("id:ns:indexingdocument::")); - doc.setRepo(*_documentRepo); - doc.setValue("tensor", tensorFieldValue); - - TEST_CALL(checkTensor(make_tensor(TensorSpec("tensor(x{},y{})") - .add({{"x", "4"}, {"y", "5"}}, 7)), - SFC::convertSummaryField(false, - *doc.getValue("tensor")).get())); - doc.setValue("tensor", TensorFieldValue()); - - TEST_CALL(checkTensor(Value::UP(), - SFC::convertSummaryField(false, - *doc.getValue("tensor")).get())); -} - -void Test::checkStringForAllConversions(const string& expected, const FieldValue* fv) { - ASSERT_TRUE(fv != nullptr); - checkString(expected, SFC::convertSummaryField(false, *fv).get()); -} - -const ReferenceDataType& Test::getAsRefType(const string& name) const { - return dynamic_cast<const ReferenceDataType&>(getDataType(name)); -} - -void Test::requireThatNonEmptyReferenceIsConvertedToStringWithId() { - Document doc(getDocType(), DocumentId("id:ns:indexingdocument::")); - doc.setRepo(*_documentRepo); - doc.setValue("ref", ReferenceFieldValue( - getAsRefType("Reference<target_dummy_document>"), - DocumentId("id:ns:target_dummy_document::foo"))); - - checkStringForAllConversions("id:ns:target_dummy_document::foo", - doc.getValue("ref").get()); -} - -void Test::requireThatEmptyReferenceIsConvertedToEmptyString() { - Document doc(getDocType(), DocumentId("id:ns:indexingdocument::")); - doc.setRepo(*_documentRepo); - doc.setValue("ref", ReferenceFieldValue( - getAsRefType("Reference<target_dummy_document>"))); - - checkStringForAllConversions("", doc.getValue("ref").get()); - -} - -// Own test for this to ensure that SlimeFiller code path is executed, -// as this only triggers for composite field types. -void Test::requireThatReferenceInCompositeTypeEmitsSlimeData() { - Document doc(getDocType(), DocumentId("id:ns:indexingdocument::")); - doc.setRepo(*_documentRepo); - - StructFieldValue sfv(getDataType("indexingdocument.header.nested")); - sfv.setValue("inner_ref", ReferenceFieldValue( - getAsRefType("Reference<target_dummy_document>"), - DocumentId("id:ns:target_dummy_document::foo"))); - doc.setValue("nested", sfv); - - FieldBlock expect(R"({"inner_ref":"id:ns:target_dummy_document::foo"})"); - checkData(expect.binary, - SFC::convertSummaryField(false, *doc.getValue("nested")).get()); -} - -} // namespace - -TEST_APPHOOK(Test); diff --git a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp index 1a21c1d3eab..d55e00a1905 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp @@ -1,207 +1,14 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "summaryfieldconverter.h" -#include "annotation_converter.h" #include "check_undefined_value_visitor.h" -#include "searchdatatype.h" #include "slime_filler.h" -#include <vespa/document/fieldvalue/arrayfieldvalue.h> -#include <vespa/document/fieldvalue/boolfieldvalue.h> -#include <vespa/document/fieldvalue/bytefieldvalue.h> -#include <vespa/document/fieldvalue/document.h> -#include <vespa/document/fieldvalue/doublefieldvalue.h> -#include <vespa/document/fieldvalue/floatfieldvalue.h> -#include <vespa/document/fieldvalue/intfieldvalue.h> -#include <vespa/document/fieldvalue/longfieldvalue.h> -#include <vespa/document/fieldvalue/predicatefieldvalue.h> -#include <vespa/document/fieldvalue/rawfieldvalue.h> -#include <vespa/document/fieldvalue/shortfieldvalue.h> -#include <vespa/document/fieldvalue/stringfieldvalue.h> -#include <vespa/document/fieldvalue/weightedsetfieldvalue.h> -#include <vespa/document/fieldvalue/annotationreferencefieldvalue.h> -#include <vespa/document/fieldvalue/tensorfieldvalue.h> -#include <vespa/document/fieldvalue/referencefieldvalue.h> -#include <vespa/vespalib/stllike/asciistream.h> -#include <vespa/vespalib/util/size_literals.h> -#include <vespa/vespalib/data/slime/slime.h> -#include <vespa/vespalib/data/smart_buffer.h> +#include <vespa/document/fieldvalue/fieldvalue.h> -using document::AnnotationReferenceFieldValue; -using document::ArrayFieldValue; -using document::BoolFieldValue; -using document::ByteFieldValue; -using document::Document; -using document::DoubleFieldValue; using document::FieldValue; -using document::ConstFieldValueVisitor; -using document::FloatFieldValue; -using document::IntFieldValue; -using document::LongFieldValue; -using document::MapFieldValue; -using document::PredicateFieldValue; -using document::RawFieldValue; -using document::ShortFieldValue; -using document::StringFieldValue; -using document::StructFieldValue; -using document::WeightedSetFieldValue; -using document::TensorFieldValue; -using document::ReferenceFieldValue; namespace search::docsummary { -namespace { - -struct FieldValueConverter { - virtual FieldValue::UP convert(const FieldValue &input) = 0; - virtual ~FieldValueConverter() = default; -}; - - -class SummaryFieldValueConverter : protected ConstFieldValueVisitor -{ - vespalib::asciistream _str; - bool _tokenize; - FieldValue::UP _field_value; - FieldValueConverter &_structuredFieldConverter; - - template <typename T> - void visitPrimitive(const T &t) { - _field_value.reset(t.clone()); - } - void visit(const IntFieldValue &value) override { visitPrimitive(value); } - void visit(const LongFieldValue &value) override { visitPrimitive(value); } - void visit(const ShortFieldValue &value) override { visitPrimitive(value); } - void visit(const BoolFieldValue &value) override { visitPrimitive(value); } - void visit(const ByteFieldValue &value) override { - int8_t signedValue = value.getAsByte(); - _field_value = std::make_unique<ShortFieldValue>(signedValue); - } - void visit(const DoubleFieldValue &value) override { visitPrimitive(value); } - void visit(const FloatFieldValue &value) override { visitPrimitive(value); } - - void visit(const StringFieldValue &value) override { - if (_tokenize) { - AnnotationConverter converter(value.getValue(), _str); - converter.handleIndexingTerms(value); - } else { - _str << value.getValue(); - } - } - - void visit(const AnnotationReferenceFieldValue & v ) override { - _field_value = _structuredFieldConverter.convert(v); - } - void visit(const Document & v) override { - _field_value = _structuredFieldConverter.convert(v); - } - - void visit(const PredicateFieldValue &value) override { - _str << value.toString(); - } - - void visit(const RawFieldValue &value) override { - visitPrimitive(value); - } - - void visit(const ArrayFieldValue &value) override { - if (value.size() > 0) { - _field_value = _structuredFieldConverter.convert(value); - } // else: implicit empty string - } - - void visit(const MapFieldValue & value) override { - if (value.size() > 0) { - _field_value = _structuredFieldConverter.convert(value); - } // else: implicit empty string - } - - void visit(const StructFieldValue &value) override { - if (*value.getDataType() == *SearchDataType::URI) { - FieldValue::UP uriAllValue = value.getValue("all"); - if (uriAllValue && uriAllValue->isA(FieldValue::Type::STRING)) { - uriAllValue->accept(*this); - return; - } - } - _field_value = _structuredFieldConverter.convert(value); - } - - void visit(const WeightedSetFieldValue &value) override { - if (value.size() > 0) { - _field_value = _structuredFieldConverter.convert(value); - } // else: implicit empty string - } - - void visit(const TensorFieldValue &value) override { - visitPrimitive(value); - } - - void visit(const ReferenceFieldValue& value) override { - if (value.hasValidDocumentId()) { - _str << value.getDocumentId().toString(); - } // else: implicit empty string - } - -public: - SummaryFieldValueConverter(bool tokenize, FieldValueConverter &subConverter); - ~SummaryFieldValueConverter() override; - - FieldValue::UP convert(const FieldValue &input) { - input.accept(*this); - if (_field_value.get()) { - return std::move(_field_value); - } - return StringFieldValue::make(_str.str()); - } -}; - -SummaryFieldValueConverter::SummaryFieldValueConverter(bool tokenize, FieldValueConverter &subConverter) - : _str(), _tokenize(tokenize), - _structuredFieldConverter(subConverter) -{} -SummaryFieldValueConverter::~SummaryFieldValueConverter() = default; - -using namespace vespalib::slime::convenience; - -class SlimeConverter : public FieldValueConverter { -private: - bool _tokenize; - const std::vector<uint32_t>* _matching_elems; - -public: - explicit SlimeConverter(bool tokenize) - : _tokenize(tokenize), - _matching_elems() - {} - - SlimeConverter(bool tokenize, const std::vector<uint32_t>& matching_elems) - : _tokenize(tokenize), - _matching_elems(&matching_elems) - {} - - FieldValue::UP convert(const FieldValue &input) override { - vespalib::Slime slime; - SlimeInserter inserter(slime); - SlimeFiller visitor(inserter, _tokenize, _matching_elems); - input.accept(visitor); - vespalib::SmartBuffer buffer(4_Ki); - vespalib::slime::BinaryFormat::encode(slime, buffer); - vespalib::Memory mem = buffer.obtain(); - return std::make_unique<RawFieldValue>(mem.data, mem.size); - } -}; - - -} // namespace - -FieldValue::UP -SummaryFieldConverter::convertSummaryField(bool markup, - const FieldValue &value) -{ - SlimeConverter subConv(markup); - return SummaryFieldValueConverter(markup, subConv).convert(value); -} - void SummaryFieldConverter::insert_summary_field(const FieldValue& value, vespalib::slime::Inserter& inserter) { diff --git a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h index 924ec6f402e..425b357d355 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h @@ -2,7 +2,10 @@ #pragma once -#include <vespa/document/fieldvalue/fieldvalue.h> +#include <cstdint> +#include <vector> + +namespace document { class FieldValue; } namespace vespalib::slime { struct Inserter; } @@ -16,12 +19,6 @@ class IJuniperConverter; class SummaryFieldConverter { public: - static document::FieldValue::UP convertSummaryField(bool markup, const document::FieldValue &value); - - static document::FieldValue::UP convert_field_with_filter(bool markup, - const document::FieldValue& value, - const std::vector<uint32_t>& matching_elems); - static void insert_summary_field(const document::FieldValue& value, vespalib::slime::Inserter& inserter); /** * Insert the given field value, but only the elements that are contained in the matching_elems vector. |