diff options
Diffstat (limited to 'searchsummary')
64 files changed, 826 insertions, 1465 deletions
diff --git a/searchsummary/CMakeLists.txt b/searchsummary/CMakeLists.txt index 5fcd0f7e19a..a5dc62da5c0 100644 --- a/searchsummary/CMakeLists.txt +++ b/searchsummary/CMakeLists.txt @@ -16,12 +16,12 @@ vespa_define_module( TESTS src/tests/docsummary + src/tests/docsummary/annotation_converter src/tests/docsummary/attribute_combiner src/tests/docsummary/attributedfw src/tests/docsummary/document_id_dfw src/tests/docsummary/matched_elements_filter src/tests/docsummary/slime_filler src/tests/docsummary/slime_summary - src/tests/docsummary/summary_field_converter src/tests/juniper ) diff --git a/searchsummary/src/tests/docsummary/CMakeLists.txt b/searchsummary/src/tests/docsummary/CMakeLists.txt index 26a2963809a..4cd12eb4db6 100644 --- a/searchsummary/src/tests/docsummary/CMakeLists.txt +++ b/searchsummary/src/tests/docsummary/CMakeLists.txt @@ -4,5 +4,6 @@ vespa_add_executable(searchsummary_positionsdfw_test_app TEST positionsdfw_test.cpp DEPENDS searchsummary + GTest::GTest ) vespa_add_test(NAME searchsummary_positionsdfw_test_app COMMAND searchsummary_positionsdfw_test_app) diff --git a/searchsummary/src/tests/docsummary/annotation_converter/CMakeLists.txt b/searchsummary/src/tests/docsummary/annotation_converter/CMakeLists.txt new file mode 100644 index 00000000000..22e0d3e6477 --- /dev/null +++ b/searchsummary/src/tests/docsummary/annotation_converter/CMakeLists.txt @@ -0,0 +1,9 @@ +# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +vespa_add_executable(searchsummary_annotation_converter_test_app TEST + SOURCES + annotation_converter_test.cpp + DEPENDS + searchsummary + GTest::GTest +) +vespa_add_test(NAME searchsummary_annotation_converter_test_app COMMAND searchsummary_annotation_converter_test_app) diff --git a/searchsummary/src/tests/docsummary/annotation_converter/annotation_converter_test.cpp b/searchsummary/src/tests/docsummary/annotation_converter/annotation_converter_test.cpp new file mode 100644 index 00000000000..753ae8d9044 --- /dev/null +++ b/searchsummary/src/tests/docsummary/annotation_converter/annotation_converter_test.cpp @@ -0,0 +1,176 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include <vespa/document/annotation/annotation.h> +#include <vespa/document/annotation/span.h> +#include <vespa/document/annotation/spanlist.h> +#include <vespa/document/annotation/spantree.h> +#include <vespa/document/fieldvalue/stringfieldvalue.h> +#include <vespa/document/repo/configbuilder.h> +#include <vespa/document/repo/fixedtyperepo.h> +#include <vespa/juniper/juniper_separators.h> +#include <vespa/searchsummary/docsummary/annotation_converter.h> +#include <vespa/searchsummary/docsummary/i_juniper_converter.h> +#include <vespa/searchsummary/docsummary/linguisticsannotation.h> +#include <vespa/vespalib/data/slime/slime.h> +#include <vespa/vespalib/gtest/gtest.h> +#include <vespa/vespalib/stllike/asciistream.h> + +using document::Annotation; +using document::DocumentType; +using document::DocumentTypeRepo; +using document::Span; +using document::SpanList; +using document::SpanTree; +using document::StringFieldValue; +using search::docsummary::AnnotationConverter; +using search::docsummary::IJuniperConverter; +using search::linguistics::SPANTREE_NAME; +using search::linguistics::TERM; +using vespalib::Slime; +using vespalib::slime::SlimeInserter; + +namespace { + +DocumenttypesConfig +get_document_types_config() +{ + using namespace document::config_builder; + DocumenttypesConfigBuilderHelper builder; + builder.document(42, "indexingdocument", + Struct("indexingdocument.header"), + Struct("indexingdocument.body")); + return builder.config(); +} + +class MockJuniperConverter : public IJuniperConverter +{ + vespalib::string _result; +public: + void convert(vespalib::stringref input, vespalib::slime::Inserter&) override { + _result = input; + } + const vespalib::string& get_result() const noexcept { return _result; } +}; + +} + +class AnnotationConverterTest : public testing::Test +{ +protected: + std::shared_ptr<const DocumentTypeRepo> _repo; + const DocumentType* _document_type; + document::FixedTypeRepo _fixed_repo; + + AnnotationConverterTest(); + ~AnnotationConverterTest() override; + void set_span_tree(StringFieldValue& value, std::unique_ptr<SpanTree> tree); + StringFieldValue make_annotated_string(); + StringFieldValue make_annotated_chinese_string(); + vespalib::string make_exp_il_annotated_string(); + vespalib::string make_exp_il_annotated_chinese_string(); + void expect_annotated(const vespalib::string& exp, const StringFieldValue& fv); +}; + +AnnotationConverterTest::AnnotationConverterTest() + : testing::Test(), + _repo(std::make_unique<DocumentTypeRepo>(get_document_types_config())), + _document_type(_repo->getDocumentType("indexingdocument")), + _fixed_repo(*_repo, *_document_type) +{ +} + +AnnotationConverterTest::~AnnotationConverterTest() = default; + +void +AnnotationConverterTest::set_span_tree(StringFieldValue & value, std::unique_ptr<SpanTree> tree) +{ + StringFieldValue::SpanTrees trees; + trees.push_back(std::move(tree)); + value.setSpanTrees(trees, _fixed_repo); +} + +StringFieldValue +AnnotationConverterTest::make_annotated_string() +{ + auto span_list_up = std::make_unique<SpanList>(); + auto span_list = span_list_up.get(); + auto tree = std::make_unique<SpanTree>(SPANTREE_NAME, std::move(span_list_up)); + tree->annotate(span_list->add(std::make_unique<Span>(0, 3)), *TERM); + tree->annotate(span_list->add(std::make_unique<Span>(4, 3)), + Annotation(*TERM, std::make_unique<StringFieldValue>("baz"))); + StringFieldValue value("foo bar"); + set_span_tree(value, std::move(tree)); + return value; +} + +StringFieldValue +AnnotationConverterTest::make_annotated_chinese_string() +{ + auto span_list_up = std::make_unique<SpanList>(); + auto span_list = span_list_up.get(); + auto tree = std::make_unique<SpanTree>(SPANTREE_NAME, std::move(span_list_up)); + // These chinese characters each use 3 bytes in their UTF8 encoding. + tree->annotate(span_list->add(std::make_unique<Span>(0, 15)), *TERM); + tree->annotate(span_list->add(std::make_unique<Span>(15, 9)), *TERM); + StringFieldValue value("我就是那个大灰狼"); + set_span_tree(value, std::move(tree)); + return value; +} + +vespalib::string +AnnotationConverterTest::make_exp_il_annotated_string() +{ + using namespace juniper::separators; + vespalib::asciistream exp; + exp << "foo" << unit_separator_string << + " " << unit_separator_string << interlinear_annotation_anchor_string << + "bar" << interlinear_annotation_separator_string << + "baz" << interlinear_annotation_terminator_string << unit_separator_string; + return exp.str(); +} + +vespalib::string +AnnotationConverterTest::make_exp_il_annotated_chinese_string() +{ + using namespace juniper::separators; + vespalib::asciistream exp; + exp << "我就是那个" << unit_separator_string << + "大灰狼" << unit_separator_string; + return exp.str(); +} + +void +AnnotationConverterTest::expect_annotated(const vespalib::string& exp, const StringFieldValue& fv) +{ + MockJuniperConverter juniper_converter; + AnnotationConverter annotation_converter(juniper_converter); + Slime slime; + SlimeInserter inserter(slime); + annotation_converter.convert(fv, inserter); + EXPECT_EQ(exp, juniper_converter.get_result()); +} + + +TEST_F(AnnotationConverterTest, convert_plain_string) +{ + using namespace juniper::separators; + vespalib::string exp("Foo Bar Baz"); + StringFieldValue plain_string("Foo Bar Baz"); + expect_annotated(exp + unit_separator_string, plain_string); +} + +TEST_F(AnnotationConverterTest, convert_annotated_string) +{ + auto exp = make_exp_il_annotated_string(); + auto annotated_string = make_annotated_string(); + expect_annotated(exp, annotated_string); +} + +TEST_F(AnnotationConverterTest, convert_annotated_chinese_string) +{ + auto exp = make_exp_il_annotated_chinese_string(); + auto annotated_chinese_string = make_annotated_chinese_string(); + expect_annotated(exp, annotated_chinese_string); +} + +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchsummary/src/tests/docsummary/attribute_combiner/attribute_combiner_test.cpp b/searchsummary/src/tests/docsummary/attribute_combiner/attribute_combiner_test.cpp index 8bf3db4d112..005fed41838 100644 --- a/searchsummary/src/tests/docsummary/attribute_combiner/attribute_combiner_test.cpp +++ b/searchsummary/src/tests/docsummary/attribute_combiner/attribute_combiner_test.cpp @@ -91,7 +91,7 @@ AttributeCombinerTest::assertWritten(const vespalib::string &exp_slime_as_json, { vespalib::Slime act; vespalib::slime::SlimeInserter inserter(act); - writer->insertField(docId, nullptr, &state, search::docsummary::RES_JSONSTRING, inserter); + writer->insertField(docId, nullptr, state, inserter); SlimeValue exp(exp_slime_as_json); EXPECT_EQ(exp.slime, act); diff --git a/searchsummary/src/tests/docsummary/attributedfw/attributedfw_test.cpp b/searchsummary/src/tests/docsummary/attributedfw/attributedfw_test.cpp index e9d00629d6f..bba3a5ab506 100644 --- a/searchsummary/src/tests/docsummary/attributedfw/attributedfw_test.cpp +++ b/searchsummary/src/tests/docsummary/attributedfw/attributedfw_test.cpp @@ -69,7 +69,7 @@ public: void expect_field(const vespalib::string& exp_slime_as_json, uint32_t docid) { vespalib::Slime act; vespalib::slime::SlimeInserter inserter(act); - _writer->insertField(docid, nullptr, &_state, search::docsummary::RES_JSONSTRING, inserter); + _writer->insertField(docid, nullptr, _state, inserter); SlimeValue exp(exp_slime_as_json); EXPECT_EQ(exp.slime, act); diff --git a/searchsummary/src/tests/docsummary/document_id_dfw/document_id_dfw_test.cpp b/searchsummary/src/tests/docsummary/document_id_dfw/document_id_dfw_test.cpp index 1c4e201e745..4819c34272c 100644 --- a/searchsummary/src/tests/docsummary/document_id_dfw/document_id_dfw_test.cpp +++ b/searchsummary/src/tests/docsummary/document_id_dfw/document_id_dfw_test.cpp @@ -5,6 +5,8 @@ #include <vespa/document/fieldvalue/document.h> #include <vespa/document/repo/configbuilder.h> #include <vespa/document/repo/documenttyperepo.h> +#include <vespa/searchlib/common/matching_elements.h> +#include <vespa/searchsummary/docsummary/docsumstate.h> #include <vespa/searchsummary/docsummary/docsum_store_document.h> #include <vespa/searchsummary/docsummary/document_id_dfw.h> #include <vespa/searchsummary/docsummary/resultclass.h> @@ -20,8 +22,12 @@ using document::DocumentType; using document::DocumentTypeRepo; using document::config_builder::DocumenttypesConfigBuilderHelper; using document::config_builder::Struct; +using search::MatchingElements; +using search::MatchingElementsFields; using search::docsummary::DocsumStoreDocument; using search::docsummary::DocumentIdDFW; +using search::docsummary::GetDocsumsState; +using search::docsummary::GetDocsumsStateCallback; using search::docsummary::IDocsumStoreDocument; using search::docsummary::ResultClass; using search::docsummary::ResultConfig; @@ -47,6 +53,12 @@ make_doc_type_repo() return std::make_unique<const DocumentTypeRepo>(builder.config()); } +struct MyGetDocsumsStateCallback : GetDocsumsStateCallback { + virtual void FillSummaryFeatures(GetDocsumsState&) override {} + virtual void FillRankFeatures(GetDocsumsState&) override {} + std::unique_ptr<MatchingElements> fill_matching_elements(const MatchingElementsFields &) override { abort(); } +}; + class DocumentIdDFWTest : public ::testing::Test { vespalib::string _field_name; @@ -96,7 +108,9 @@ DocumentIdDFWTest::write(const IDocsumStoreDocument* doc) Cursor & docsum = top_inserter.insertObject(); ObjectInserter field_inserter(docsum, _field_name_view); DocumentIdDFW writer; - writer.insertField(0, doc, nullptr, search::docsummary::RES_LONG_STRING, field_inserter); + MyGetDocsumsStateCallback callback; + GetDocsumsState state(callback); + writer.insertField(0, doc, state, field_inserter); return slime; } diff --git a/searchsummary/src/tests/docsummary/matched_elements_filter/matched_elements_filter_test.cpp b/searchsummary/src/tests/docsummary/matched_elements_filter/matched_elements_filter_test.cpp index 519961dedb6..8ac37ae76eb 100644 --- a/searchsummary/src/tests/docsummary/matched_elements_filter/matched_elements_filter_test.cpp +++ b/searchsummary/src/tests/docsummary/matched_elements_filter/matched_elements_filter_test.cpp @@ -218,7 +218,7 @@ private: Slime slime; SlimeInserter inserter(slime); - writer->insertField(doc_id, doc.get(), &state, ResType::RES_JSONSTRING, inserter); + writer->insertField(doc_id, doc.get(), state, inserter); return slime; } diff --git a/searchsummary/src/tests/docsummary/positionsdfw_test.cpp b/searchsummary/src/tests/docsummary/positionsdfw_test.cpp index f2e949cbddf..f23bd2f0437 100644 --- a/searchsummary/src/tests/docsummary/positionsdfw_test.cpp +++ b/searchsummary/src/tests/docsummary/positionsdfw_test.cpp @@ -1,17 +1,16 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -// Unit tests for positionsdfw. +#include <vespa/juniper/rpinterface.h> #include <vespa/searchlib/attribute/extendableattributes.h> #include <vespa/searchlib/attribute/iattributemanager.h> #include <vespa/searchlib/common/matching_elements.h> #include <vespa/searchsummary/docsummary/docsum_field_writer.h> -#include <vespa/searchsummary/docsummary/positionsdfw.h> -#include <vespa/searchsummary/docsummary/idocsumenvironment.h> #include <vespa/searchsummary/docsummary/docsumstate.h> +#include <vespa/searchsummary/docsummary/idocsumenvironment.h> +#include <vespa/searchsummary/docsummary/positionsdfw.h> #include <vespa/searchsummary/test/slime_value.h> -#include <vespa/vespalib/testkit/testapp.h> #include <vespa/vespalib/data/slime/slime.h> -#include <vespa/juniper/rpinterface.h> +#include <vespa/vespalib/gtest/gtest.h> #include <vespa/log/log.h> LOG_SETUP("positionsdfw_test"); @@ -29,33 +28,6 @@ namespace search::docsummary { namespace { -class Test : public vespalib::TestApp { - void requireThat2DPositionFieldIsWritten(); - -public: - int Main() override; -}; - -int -Test::Main() -{ - TEST_INIT("positionsdfw_test"); - - TEST_DO(requireThat2DPositionFieldIsWritten()); - - TEST_DONE(); -} - -struct MyEnvironment : IDocsumEnvironment { - IAttributeManager *attribute_man; - - MyEnvironment() : attribute_man(0) {} - - const IAttributeManager *getAttributeManager() const override { return attribute_man; } - string lookupIndex(const string &s) const override { return s; } - const juniper::Juniper *getJuniper() const override { return nullptr; } -}; - class MyAttributeContext : public IAttributeContext { const IAttributeVector &_attr; public: @@ -132,20 +104,22 @@ void checkWritePositionField(AttrType &attr, MyAttributeManager attribute_man(attr); PositionsDFW::UP writer = PositionsDFW::create(attr.getName().c_str(), &attribute_man, false); ASSERT_TRUE(writer.get()); - ResType res_type = RES_JSONSTRING; MyGetDocsumsStateCallback callback; GetDocsumsState state(callback); state._attributes.push_back(&attr); vespalib::Slime target; vespalib::slime::SlimeInserter inserter(target); - writer->insertField(doc_id, &state, res_type, inserter); + writer->insertField(doc_id, state, inserter); test::SlimeValue expected(expect_json); - EXPECT_EQUAL(expected.slime, target); + EXPECT_EQ(expected.slime, target); } -void Test::requireThat2DPositionFieldIsWritten() { +} // namespace + +TEST(PositionsDFWTest, require_that_2D_position_field_is_written) +{ SingleInt64ExtAttribute attr("foo"); checkWritePositionField(attr, 0x3e, "{x:6,y:7,latlong:'N0.000007;E0.000006'}"); checkWritePositionField(attr, 007, "{x:-1,y:-1,latlong:'S0.000001;W0.000001'}"); @@ -154,7 +128,6 @@ void Test::requireThat2DPositionFieldIsWritten() { checkWritePositionField(attr, 42, "null"); } -} // namespace } -TEST_APPHOOK(search::docsummary::Test); +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchsummary/src/tests/docsummary/slime_filler/slime_filler_test.cpp b/searchsummary/src/tests/docsummary/slime_filler/slime_filler_test.cpp index cf4006c5e67..505386f5b91 100644 --- a/searchsummary/src/tests/docsummary/slime_filler/slime_filler_test.cpp +++ b/searchsummary/src/tests/docsummary/slime_filler/slime_filler_test.cpp @@ -1,12 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include <vespa/document/annotation/annotation.h> -#include <vespa/document/annotation/span.h> -#include <vespa/document/annotation/spanlist.h> -#include <vespa/document/annotation/spantree.h> #include <vespa/document/base/documentid.h> #include <vespa/document/datatype/documenttype.h> -#include <vespa/document/datatype/urldatatype.h> #include <vespa/document/datatype/referencedatatype.h> #include <vespa/document/datatype/tensor_data_type.h> #include <vespa/document/fieldvalue/arrayfieldvalue.h> @@ -31,23 +26,18 @@ #include <vespa/eval/eval/tensor_spec.h> #include <vespa/eval/eval/value.h> #include <vespa/eval/eval/value_codec.h> -#include <vespa/juniper/juniper_separators.h> -#include <vespa/searchsummary/docsummary/docsum_field_writer.h> -#include <vespa/searchsummary/docsummary/i_docsum_field_writer_factory.h> -#include <vespa/searchsummary/docsummary/i_juniper_converter.h> +#include <vespa/searchsummary/docsummary/i_string_field_converter.h> #include <vespa/searchsummary/docsummary/linguisticsannotation.h> #include <vespa/searchsummary/docsummary/resultconfig.h> #include <vespa/searchsummary/docsummary/slime_filler.h> +#include <vespa/searchsummary/docsummary/slime_filler_filter.h> #include <vespa/vespalib/data/slime/binary_format.h> #include <vespa/vespalib/data/slime/json_format.h> #include <vespa/vespalib/data/slime/slime.h> #include <vespa/vespalib/data/simple_buffer.h> #include <vespa/vespalib/gtest/gtest.h> #include <vespa/vespalib/stllike/asciistream.h> -#include <vespa/vespalib/util/size_literals.h> -#include <vespa/config-summary.h> -using document::Annotation; using document::ArrayFieldValue; using document::BoolFieldValue; using document::ByteFieldValue; @@ -69,21 +59,16 @@ using document::RawFieldValue; using document::ReferenceDataType; using document::ReferenceFieldValue; using document::ShortFieldValue; -using document::Span; -using document::SpanList; -using document::SpanTree; using document::StringFieldValue; using document::StructDataType; using document::StructFieldValue; using document::TensorDataType; using document::TensorFieldValue; -using document::UrlDataType; using document::WeightedSetFieldValue; -using search::docsummary::IDocsumFieldWriterFactory; -using search::docsummary::IJuniperConverter; -using search::docsummary::DocsumFieldWriter; +using search::docsummary::IStringFieldConverter; using search::docsummary::ResultConfig; using search::docsummary::SlimeFiller; +using search::docsummary::SlimeFillerFilter; using search::linguistics::SPANTREE_NAME; using search::linguistics::TERM; using vespalib::SimpleBuffer; @@ -95,7 +80,6 @@ using vespalib::eval::ValueType; using vespalib::slime::Cursor; using vespalib::slime::JsonFormat; using vespalib::slime::SlimeInserter; -using vespa::config::search::SummaryConfigBuilder; namespace { @@ -114,15 +98,6 @@ slime_to_string(const Slime& slime) } vespalib::string -make_slime_string(vespalib::stringref value) -{ - Slime slime; - SlimeInserter inserter(slime); - inserter.insertString({value}); - return slime_to_string(slime); -} - -vespalib::string make_slime_data_string(vespalib::stringref data) { Slime slime; @@ -139,15 +114,6 @@ make_slime_tensor_string(const Value& value) return make_slime_data_string({s.peek(), s.size()}); } -class MockDocsumFieldWriterFactory : public IDocsumFieldWriterFactory -{ -public: - std::unique_ptr<DocsumFieldWriter> create_docsum_field_writer(const vespalib::string&, const vespalib::string&, const vespalib::string&, bool&) override { - return {}; - } - -}; - DocumenttypesConfig get_document_types_config() { @@ -172,20 +138,25 @@ get_document_types_config() .addField("d", nested_type_id) .addField("e", nested_type_id) .addField("f", nested_type_id)) + .addField("nested_array", Array(nested_type_id)) + .addField("nested_map", Map(DataType::T_STRING, nested_type_id)) .addField("ref", ref_type_id), Struct("indexingdocument.body")) .referenceType(ref_type_id, ref_target_doctype_id); return builder.config(); } -class MockJuniperConverter : public IJuniperConverter +class MockStringFieldConverter : public IStringFieldConverter { vespalib::string _result; public: - void insert_juniper_field(vespalib::stringref input, vespalib::slime::Inserter&) override { - _result = input; + MockStringFieldConverter() + : IStringFieldConverter(), + _result() + { } - void insert_juniper_field(const document::StringFieldValue& input, vespalib::slime::Inserter&) override { + ~MockStringFieldConverter() override = default; + void convert(const document::StringFieldValue& input, vespalib::slime::Inserter&) override { _result = input.getValueRef(); } const vespalib::string& get_result() const noexcept { return _result; } @@ -198,32 +169,26 @@ class SlimeFillerTest : public testing::Test protected: std::shared_ptr<const DocumentTypeRepo> _repo; const DocumentType* _document_type; - document::FixedTypeRepo _fixed_repo; SlimeFillerTest(); ~SlimeFillerTest() override; const DataType& get_data_type(const vespalib::string& name) const; const ReferenceDataType& get_as_ref_type(const vespalib::string& name) const; - void set_span_tree(StringFieldValue& value, std::unique_ptr<SpanTree> tree); - StringFieldValue make_annotated_string(); - StringFieldValue make_annotated_chinese_string(); - vespalib::string make_exp_il_annotated_string(); - vespalib::string make_exp_il_annotated_chinese_string(); ArrayFieldValue make_array(); WeightedSetFieldValue make_weighted_set(); MapFieldValue make_map(); - void expect_insert(const vespalib::string& exp, const FieldValue& fv, bool tokenize, const std::vector<uint32_t>* matching_elems); - void expect_insert(const vespalib::string& exp, const FieldValue& fv, bool tokenize); + StructFieldValue make_nested_value(int i); + void expect_insert(const vespalib::string& exp, const FieldValue& fv, const std::vector<uint32_t>* matching_elems); void expect_insert(const vespalib::string& exp, const FieldValue& fv); void expect_insert_filtered(const vespalib::string& exp, const FieldValue& fv, const std::vector<uint32_t>& matching_elems); - void expect_insert_callback(const vespalib::string& exp, const FieldValue& fv, bool tokenize); + void expect_insert(const vespalib::string& exp, const FieldValue& fv, SlimeFillerFilter& filter); + void expect_insert_callback(const vespalib::string& exp, const FieldValue& fv); }; SlimeFillerTest::SlimeFillerTest() : testing::Test(), _repo(std::make_unique<DocumentTypeRepo>(get_document_types_config())), - _document_type(_repo->getDocumentType("indexingdocument")), - _fixed_repo(*_repo, *_document_type) + _document_type(_repo->getDocumentType("indexingdocument")) { } @@ -242,64 +207,6 @@ SlimeFillerTest::get_as_ref_type(const vespalib::string& name) const { return dynamic_cast<const ReferenceDataType&>(get_data_type(name)); } -void -SlimeFillerTest::set_span_tree(StringFieldValue & value, std::unique_ptr<SpanTree> tree) -{ - StringFieldValue::SpanTrees trees; - trees.push_back(std::move(tree)); - value.setSpanTrees(trees, _fixed_repo); -} - -StringFieldValue -SlimeFillerTest::make_annotated_string() -{ - auto span_list_up = std::make_unique<SpanList>(); - auto span_list = span_list_up.get(); - auto tree = std::make_unique<SpanTree>(SPANTREE_NAME, std::move(span_list_up)); - tree->annotate(span_list->add(std::make_unique<Span>(0, 3)), *TERM); - tree->annotate(span_list->add(std::make_unique<Span>(4, 3)), - Annotation(*TERM, std::make_unique<StringFieldValue>("baz"))); - StringFieldValue value("foo bar"); - set_span_tree(value, std::move(tree)); - return value; -} - -StringFieldValue -SlimeFillerTest::make_annotated_chinese_string() -{ - auto span_list_up = std::make_unique<SpanList>(); - auto span_list = span_list_up.get(); - auto tree = std::make_unique<SpanTree>(SPANTREE_NAME, std::move(span_list_up)); - // These chinese characters each use 3 bytes in their UTF8 encoding. - tree->annotate(span_list->add(std::make_unique<Span>(0, 15)), *TERM); - tree->annotate(span_list->add(std::make_unique<Span>(15, 9)), *TERM); - StringFieldValue value("我就是那个大灰狼"); - set_span_tree(value, std::move(tree)); - return value; -} - -vespalib::string -SlimeFillerTest::make_exp_il_annotated_string() -{ - using namespace juniper::separators; - vespalib::asciistream exp; - exp << "foo" << unit_separator_string << - " " << unit_separator_string << interlinear_annotation_anchor_string << - "bar" << interlinear_annotation_separator_string << - "baz" << interlinear_annotation_terminator_string << unit_separator_string; - return exp.str(); -} - -vespalib::string -SlimeFillerTest::make_exp_il_annotated_chinese_string() -{ - using namespace juniper::separators; - vespalib::asciistream exp; - exp << "我就是那个" << unit_separator_string << - "大灰狼" << unit_separator_string; - return exp.str(); -} - ArrayFieldValue SlimeFillerTest::make_array() { @@ -330,15 +237,28 @@ SlimeFillerTest::make_map() return map; } +StructFieldValue +SlimeFillerTest::make_nested_value(int i) +{ + StructFieldValue nested(get_data_type("nested")); + StructFieldValue nested2(get_data_type("nested")); + nested.setValue("a", IntFieldValue(42 + 100 * i)); + nested.setValue("b", IntFieldValue(44 + 100 * i)); + nested.setValue("c", IntFieldValue(46 + 100 * i)); + nested2.setValue("a", IntFieldValue(62 + 100 * i)); + nested2.setValue("c", IntFieldValue(66 + 100 * i)); + nested.setValue("d", nested2); + nested.setValue("f", nested2); + return nested; +} + void -SlimeFillerTest::expect_insert(const vespalib::string& exp, const FieldValue& fv, bool tokenize, const std::vector<uint32_t>* matching_elems) +SlimeFillerTest::expect_insert(const vespalib::string& exp, const FieldValue& fv, const std::vector<uint32_t>* matching_elems) { Slime slime; SlimeInserter inserter(slime); - SlimeFiller filler(inserter, tokenize, matching_elems); + SlimeFiller filler(inserter, matching_elems); fv.accept(filler); - SimpleBuffer buf; - JsonFormat::encode(slime, buf, true); auto act = slime_to_string(slime); EXPECT_EQ(exp, act); } @@ -346,28 +266,33 @@ SlimeFillerTest::expect_insert(const vespalib::string& exp, const FieldValue& fv void SlimeFillerTest::expect_insert_filtered(const vespalib::string& exp, const FieldValue& fv, const std::vector<uint32_t>& matching_elems) { - expect_insert(exp, fv, false, &matching_elems); + expect_insert(exp, fv, &matching_elems); } void -SlimeFillerTest::expect_insert(const vespalib::string& exp, const FieldValue& fv, bool tokenize) +SlimeFillerTest::expect_insert(const vespalib::string& exp, const FieldValue& fv) { - expect_insert(exp, fv, tokenize, nullptr); + expect_insert(exp, fv, nullptr); } void -SlimeFillerTest::expect_insert(const vespalib::string& exp, const FieldValue& fv) +SlimeFillerTest::expect_insert(const vespalib::string& exp, const FieldValue& fv, SlimeFillerFilter& filter) { - expect_insert(exp, fv, false); + Slime slime; + SlimeInserter inserter(slime); + SlimeFiller filler(inserter, nullptr, &filter); + fv.accept(filler); + auto act = slime_to_string(slime); + EXPECT_EQ(exp, act); } void -SlimeFillerTest::expect_insert_callback(const vespalib::string& exp, const FieldValue& fv, bool tokenize) +SlimeFillerTest::expect_insert_callback(const vespalib::string& exp, const FieldValue& fv) { Slime slime; SlimeInserter inserter(slime); - MockJuniperConverter converter; - SlimeFiller filler(inserter, tokenize, &converter); + MockStringFieldConverter converter; + SlimeFiller filler(inserter, &converter, nullptr); fv.accept(filler); auto act_null = slime_to_string(slime); EXPECT_EQ("null", act_null); @@ -415,14 +340,8 @@ TEST_F(SlimeFillerTest, insert_string) expect_insert(R"("Foo Bar Baz")", StringFieldValue("Foo Bar Baz")); } { - SCOPED_TRACE("annotated string"); - auto exp = make_exp_il_annotated_string(); - expect_insert(make_slime_string(exp), make_annotated_string(), true); - } - { - SCOPED_TRACE("annotated chinese string"); - auto exp = make_exp_il_annotated_chinese_string(); - expect_insert(make_slime_string(exp), make_annotated_chinese_string(), true); + SCOPED_TRACE("empty string"); + expect_insert(R"("")", StringFieldValue()); } } @@ -569,43 +488,45 @@ TEST_F(SlimeFillerTest, insert_map_filtered) TEST_F(SlimeFillerTest, insert_struct) { - StructFieldValue nested(get_data_type("nested")); - StructFieldValue nested2(get_data_type("nested")); - nested.setValue("a", IntFieldValue(42)); - nested.setValue("b", IntFieldValue(44)); - nested.setValue("c", IntFieldValue(46)); - nested2.setValue("a", IntFieldValue(62)); - nested2.setValue("c", IntFieldValue(66)); - nested.setValue("d", nested2); - nested.setValue("f", nested2); - // Field order depends on assigned field ids, cf. document::Field::calculateIdV7() + auto nested = make_nested_value(0); + // Field order depends on assigned field ids, cf. document::Field::calculateIdV7(), and symbol insertion order in slime expect_insert(R"({"f":{"c":66,"a":62},"c":46,"a":42,"b":44,"d":{"c":66,"a":62}})", nested); + SlimeFillerFilter filter; + filter.add("a").add("c").add("f.a").add("d"); + expect_insert(R"({"f":{"a":62},"a":42,"c":46,"d":{"a":62,"c":66}})", nested, filter); } -TEST_F(SlimeFillerTest, insert_string_with_callback) +TEST_F(SlimeFillerTest, insert_struct_array) { - { - SCOPED_TRACE("plain string"); - using namespace juniper::separators; - vespalib::string exp("Foo Bar Baz"); - StringFieldValue plain_string("Foo Bar Baz"); - expect_insert_callback(exp + unit_separator_string, plain_string, true); - expect_insert_callback(exp, plain_string, false); - } - { - SCOPED_TRACE("annotated string"); - auto exp = make_exp_il_annotated_string(); - auto annotated_string = make_annotated_string(); - expect_insert_callback(exp, annotated_string, true); - expect_insert_callback("foo bar", annotated_string, false); + ArrayFieldValue array(get_data_type("Array<nested>")); + for (int i = 0; i < 3; ++i) { + array.add(make_nested_value(i)); } - { - SCOPED_TRACE("annotated chinese string"); - auto exp = make_exp_il_annotated_chinese_string(); - auto annotated_chinese_string = make_annotated_chinese_string(); - expect_insert_callback(exp, annotated_chinese_string, true); - expect_insert_callback(annotated_chinese_string.getValueRef(), annotated_chinese_string, false); + expect_insert(R"([{"f":{"c":66,"a":62},"c":46,"a":42,"b":44,"d":{"c":66,"a":62}},{"f":{"c":166,"a":162},"c":146,"a":142,"b":144,"d":{"c":166,"a":162}},{"f":{"c":266,"a":262},"c":246,"a":242,"b":244,"d":{"c":266,"a":262}}])", array); + SlimeFillerFilter filter; + filter.add("a").add("c").add("f.a").add("d"); + expect_insert(R"([{"f":{"a":62},"a":42,"c":46,"d":{"a":62,"c":66}},{"f":{"a":162},"a":142,"c":146,"d":{"a":162,"c":166}},{"f":{"a":262},"a":242,"c":246,"d":{"a":262,"c":266}}])", array, filter); +} + +TEST_F(SlimeFillerTest, insert_struct_map) +{ + MapFieldValue map(get_data_type("Map<String,nested>")); + for (int i = 0; i < 3; ++i) { + vespalib::asciistream key; + key << "key" << (i + 1); + map.put(StringFieldValue(key.str()), make_nested_value(i)); } + expect_insert(R"([{"key":"key1","value":{"f":{"c":66,"a":62},"c":46,"a":42,"b":44,"d":{"c":66,"a":62}}},{"key":"key2","value":{"f":{"c":166,"a":162},"c":146,"a":142,"b":144,"d":{"c":166,"a":162}}},{"key":"key3","value":{"f":{"c":266,"a":262},"c":246,"a":242,"b":244,"d":{"c":266,"a":262}}}])", map); + SlimeFillerFilter filter; + filter.add("value.a").add("value.c").add("value.f.a").add("value.d"); + expect_insert(R"([{"key":"key1","value":{"f":{"a":62},"a":42,"c":46,"d":{"a":62,"c":66}}},{"key":"key2","value":{"f":{"a":162},"a":142,"c":146,"d":{"a":162,"c":166}}},{"key":"key3","value":{"f":{"a":262},"a":242,"c":246,"d":{"a":262,"c":266}}}])", map, filter); +} + +TEST_F(SlimeFillerTest, insert_string_with_callback) +{ + vespalib::string exp("Foo Bar Baz"); + StringFieldValue plain_string(exp); + expect_insert_callback(exp, plain_string); } GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchsummary/src/tests/docsummary/slime_summary/CMakeLists.txt b/searchsummary/src/tests/docsummary/slime_summary/CMakeLists.txt index 344a33952d6..26456dae395 100644 --- a/searchsummary/src/tests/docsummary/slime_summary/CMakeLists.txt +++ b/searchsummary/src/tests/docsummary/slime_summary/CMakeLists.txt @@ -4,5 +4,6 @@ vespa_add_executable(searchsummary_slime_summary_test_app TEST slime_summary_test.cpp DEPENDS searchsummary + GTest::GTest ) vespa_add_test(NAME searchsummary_slime_summary_test_app COMMAND searchsummary_slime_summary_test_app) diff --git a/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp b/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp index cbde3d77b4a..fa53cf202ff 100644 --- a/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp +++ b/searchsummary/src/tests/docsummary/slime_summary/slime_summary_test.cpp @@ -1,8 +1,9 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + #include <vespa/document/base/documentid.h> #include <vespa/document/datatype/documenttype.h> -#include <vespa/document/fieldvalue/document.h> #include <vespa/document/fieldvalue/bytefieldvalue.h> +#include <vespa/document/fieldvalue/document.h> #include <vespa/document/fieldvalue/doublefieldvalue.h> #include <vespa/document/fieldvalue/floatfieldvalue.h> #include <vespa/document/fieldvalue/intfieldvalue.h> @@ -10,14 +11,14 @@ #include <vespa/document/fieldvalue/rawfieldvalue.h> #include <vespa/document/fieldvalue/shortfieldvalue.h> #include <vespa/document/fieldvalue/stringfieldvalue.h> -#include <vespa/vespalib/testkit/test_kit.h> #include <vespa/searchlib/common/matching_elements.h> -#include <vespa/searchsummary/docsummary/docsumwriter.h> +#include <vespa/searchsummary/docsummary/docsum_store_document.h> #include <vespa/searchsummary/docsummary/docsumstate.h> +#include <vespa/searchsummary/docsummary/docsumwriter.h> #include <vespa/searchsummary/docsummary/keywordextractor.h> -#include <vespa/searchsummary/docsummary/docsum_store_document.h> #include <vespa/vespalib/data/slime/slime.h> #include <vespa/vespalib/data/smart_buffer.h> +#include <vespa/vespalib/gtest/gtest.h> #include <vespa/vespalib/util/size_literals.h> using namespace vespalib::slime::convenience; @@ -42,27 +43,27 @@ using document::StructFieldValue; namespace { -struct DocsumFixture : IDocsumStore, GetDocsumsStateCallback { +struct SlimeSummaryTest : testing::Test, IDocsumStore, GetDocsumsStateCallback { std::unique_ptr<DynamicDocsumWriter> writer; StructDataType int_pair_type; DocumentType doc_type; GetDocsumsState state; bool fail_get_mapped_docsum; bool empty_get_mapped_docsum; - DocsumFixture(); - ~DocsumFixture() override; + SlimeSummaryTest(); + ~SlimeSummaryTest() override; void getDocsum(Slime &slime) { Slime slimeOut; SlimeInserter inserter(slimeOut); auto rci = writer->resolveClassInfo(state._args.getResultClassName()); - writer->insertDocsum(rci, 1u, &state, this, inserter); + writer->insertDocsum(rci, 1u, state, this, inserter); vespalib::SmartBuffer buf(4_Ki); BinaryFormat::encode(slimeOut, buf); - EXPECT_GREATER(BinaryFormat::decode(buf.obtain(), slime), 0u); + EXPECT_GT(BinaryFormat::decode(buf.obtain(), slime), 0u); } uint32_t getNumDocs() const override { return 2; } std::unique_ptr<const IDocsumStoreDocument> getMappedDocsum(uint32_t docid) override { - EXPECT_EQUAL(1u, docid); + EXPECT_EQ(1u, docid); if (fail_get_mapped_docsum) { return {}; } @@ -94,7 +95,7 @@ struct DocsumFixture : IDocsumStore, GetDocsumsStateCallback { }; -DocsumFixture::DocsumFixture() +SlimeSummaryTest::SlimeSummaryTest() : writer(), int_pair_type("int_pair"), doc_type("test"), @@ -132,49 +133,54 @@ DocsumFixture::DocsumFixture() doc_type.addField(Field("longdata_field", *DataType::RAW)); doc_type.addField(Field("int_pair_field", int_pair_type)); } -DocsumFixture::~DocsumFixture() = default; +SlimeSummaryTest::~SlimeSummaryTest() = default; } // namespace <unnamed> -TEST_FF("require that docsum can be written as slime", DocsumFixture(), Slime()) { - f1.getDocsum(f2); - EXPECT_EQUAL(f2.get()["int_field"].asLong(), 4u); - EXPECT_EQUAL(f2.get()["short_field"].asLong(), 2u); - EXPECT_EQUAL(f2.get()["byte_field"].asLong(), 1u); - EXPECT_EQUAL(f2.get()["float_field"].asDouble(), 4.5); - EXPECT_EQUAL(f2.get()["double_field"].asDouble(), 8.75); - EXPECT_EQUAL(f2.get()["int64_field"].asLong(), 8u); - EXPECT_EQUAL(f2.get()["string_field"].asString().make_string(), std::string("string")); - EXPECT_EQUAL(f2.get()["data_field"].asData().make_string(), std::string("data")); - EXPECT_EQUAL(f2.get()["longstring_field"].asString().make_string(), std::string("long_string")); - EXPECT_EQUAL(f2.get()["longdata_field"].asData().make_string(), std::string("long_data")); - EXPECT_EQUAL(f2.get()["int_pair_field"]["foo"].asLong(), 1u); - EXPECT_EQUAL(f2.get()["int_pair_field"]["bar"].asLong(), 2u); +TEST_F(SlimeSummaryTest, docsum_can_be_written_as_slime) +{ + Slime s; + getDocsum(s); + EXPECT_EQ(s.get()["int_field"].asLong(), 4u); + EXPECT_EQ(s.get()["short_field"].asLong(), 2u); + EXPECT_EQ(s.get()["byte_field"].asLong(), 1u); + EXPECT_EQ(s.get()["float_field"].asDouble(), 4.5); + EXPECT_EQ(s.get()["double_field"].asDouble(), 8.75); + EXPECT_EQ(s.get()["int64_field"].asLong(), 8u); + EXPECT_EQ(s.get()["string_field"].asString().make_string(), std::string("string")); + EXPECT_EQ(s.get()["data_field"].asData().make_string(), std::string("data")); + EXPECT_EQ(s.get()["longstring_field"].asString().make_string(), std::string("long_string")); + EXPECT_EQ(s.get()["longdata_field"].asData().make_string(), std::string("long_data")); + EXPECT_EQ(s.get()["int_pair_field"]["foo"].asLong(), 1u); + EXPECT_EQ(s.get()["int_pair_field"]["bar"].asLong(), 2u); } -TEST_FF("require that unknown summary class gives empty slime", DocsumFixture(), Slime()) +TEST_F(SlimeSummaryTest, unknown_summary_class_gives_empty_slime) { - f1.state._args.setResultClassName("unknown"); - f1.getDocsum(f2); - EXPECT_TRUE(f2.get().valid()); - EXPECT_EQUAL(vespalib::slime::NIX::ID, f2.get().type().getId()); + state._args.setResultClassName("unknown"); + Slime s; + getDocsum(s); + EXPECT_TRUE(s.get().valid()); + EXPECT_EQ(vespalib::slime::NIX::ID, s.get().type().getId()); } -TEST_FF("require that failure to retrieve docsum store document gives empty slime", DocsumFixture(), Slime()) +TEST_F(SlimeSummaryTest, failure_to_retrieve_docsum_store_document_gives_empty_slime) { - f1.fail_get_mapped_docsum = true; - f1.getDocsum(f2); - EXPECT_TRUE(f2.get().valid()); - EXPECT_EQUAL(vespalib::slime::NIX::ID, f2.get().type().getId()); + fail_get_mapped_docsum = true; + Slime s; + getDocsum(s); + EXPECT_TRUE(s.get().valid()); + EXPECT_EQ(vespalib::slime::NIX::ID, s.get().type().getId()); } -TEST_FF("require that empty docsum store document gives empty object", DocsumFixture(), Slime()) +TEST_F(SlimeSummaryTest, empty_docsum_store_document_gives_empty_object) { - f1.empty_get_mapped_docsum = true; - f1.getDocsum(f2); - EXPECT_TRUE(f2.get().valid()); - EXPECT_EQUAL(vespalib::slime::OBJECT::ID, f2.get().type().getId()); - EXPECT_EQUAL(0u, f2.get().fields()); + empty_get_mapped_docsum = true; + Slime s; + getDocsum(s); + EXPECT_TRUE(s.get().valid()); + EXPECT_EQ(vespalib::slime::OBJECT::ID, s.get().type().getId()); + EXPECT_EQ(0u, s.get().fields()); } -TEST_MAIN() { TEST_RUN_ALL(); } +GTEST_MAIN_RUN_ALL_TESTS() diff --git a/searchsummary/src/tests/docsummary/summary_field_converter/CMakeLists.txt b/searchsummary/src/tests/docsummary/summary_field_converter/CMakeLists.txt deleted file mode 100644 index cfda566ee6c..00000000000 --- a/searchsummary/src/tests/docsummary/summary_field_converter/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -vespa_add_executable(searchsummary_summary_field_converter_test_app - SOURCES - summary_field_converter_test.cpp - DEPENDS - searchsummary -) -vespa_add_test(NAME searchsummary_summary_field_converter_test_app COMMAND searchsummary_summary_field_converter_test_app) diff --git a/searchsummary/src/tests/docsummary/summary_field_converter/summary_field_converter_test.cpp b/searchsummary/src/tests/docsummary/summary_field_converter/summary_field_converter_test.cpp deleted file mode 100644 index 0eff397bc10..00000000000 --- a/searchsummary/src/tests/docsummary/summary_field_converter/summary_field_converter_test.cpp +++ /dev/null @@ -1,741 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -// Unit tests for summaryfieldconverter. - -#include <vespa/document/annotation/annotation.h> -#include <vespa/document/annotation/span.h> -#include <vespa/document/annotation/spanlist.h> -#include <vespa/document/annotation/spantree.h> -#include <vespa/document/base/documentid.h> -#include <vespa/document/base/exceptions.h> -#include <vespa/document/base/field.h> -#include <vespa/document/config/documenttypes_config_fwd.h> -#include <vespa/document/datatype/annotationtype.h> -#include <vespa/document/datatype/arraydatatype.h> -#include <vespa/document/datatype/datatype.h> -#include <vespa/document/datatype/documenttype.h> -#include <vespa/document/datatype/structdatatype.h> -#include <vespa/document/datatype/urldatatype.h> -#include <vespa/document/datatype/weightedsetdatatype.h> -#include <vespa/document/datatype/referencedatatype.h> -#include <vespa/document/datatype/tensor_data_type.h> -#include <vespa/document/fieldvalue/arrayfieldvalue.h> -#include <vespa/document/fieldvalue/bytefieldvalue.h> -#include <vespa/document/fieldvalue/document.h> -#include <vespa/document/fieldvalue/doublefieldvalue.h> -#include <vespa/document/fieldvalue/floatfieldvalue.h> -#include <vespa/document/fieldvalue/intfieldvalue.h> -#include <vespa/document/fieldvalue/longfieldvalue.h> -#include <vespa/document/fieldvalue/predicatefieldvalue.h> -#include <vespa/document/fieldvalue/rawfieldvalue.h> -#include <vespa/document/fieldvalue/shortfieldvalue.h> -#include <vespa/document/fieldvalue/stringfieldvalue.h> -#include <vespa/document/fieldvalue/structfieldvalue.h> -#include <vespa/document/fieldvalue/weightedsetfieldvalue.h> -#include <vespa/document/fieldvalue/tensorfieldvalue.h> -#include <vespa/document/fieldvalue/referencefieldvalue.h> -#include <vespa/document/predicate/predicate.h> -#include <vespa/document/repo/configbuilder.h> -#include <vespa/document/repo/fixedtyperepo.h> -#include <vespa/searchsummary/docsummary/summaryfieldconverter.h> -#include <vespa/searchsummary/docsummary/linguisticsannotation.h> -#include <vespa/searchsummary/docsummary/searchdatatype.h> -#include <vespa/searchcommon/common/schema.h> -#include <vespa/vespalib/geo/zcurve.h> -#include <vespa/vespalib/testkit/testapp.h> -#include <vespa/vespalib/data/slime/json_format.h> -#include <vespa/vespalib/data/slime/binary_format.h> -#include <vespa/searchlib/util/slime_output_raw_buf_adapter.h> -#include <vespa/eval/eval/simple_value.h> -#include <vespa/eval/eval/tensor_spec.h> -#include <vespa/eval/eval/value.h> -#include <vespa/eval/eval/test/value_compare.h> -#include <vespa/vespalib/data/slime/slime.h> - -using document::Annotation; -using document::AnnotationType; -using document::ArrayDataType; -using document::ArrayFieldValue; -using document::ByteFieldValue; -using document::DataType; -using document::Document; -using document::DocumentId; -using document::DocumentType; -using document::DocumentTypeRepo; -using document::DoubleFieldValue; -using document::FeatureSet; -using document::Field; -using document::FieldNotFoundException; -using document::FieldValue; -using document::FloatFieldValue; -using document::IntFieldValue; -using document::LongFieldValue; -using document::Predicate; -using document::PredicateFieldValue; -using document::RawFieldValue; -using document::ReferenceDataType; -using document::ReferenceFieldValue; -using document::ShortFieldValue; -using document::Span; -using document::SpanList; -using document::SpanTree; -using document::StringFieldValue; -using document::StructDataType; -using document::StructFieldValue; -using document::TensorDataType; -using document::TensorFieldValue; -using document::UrlDataType; -using document::WeightedSetDataType; -using document::WeightedSetFieldValue; -using search::index::Schema; -using search::linguistics::SPANTREE_NAME; -using search::linguistics::TERM; -using vespalib::Slime; -using vespalib::eval::SimpleValue; -using vespalib::eval::TensorSpec; -using vespalib::eval::Value; -using vespalib::eval::ValueType; -using vespalib::geo::ZCurve; -using vespalib::slime::Cursor; -using vespalib::string; - -using namespace search::docsummary; - -typedef SummaryFieldConverter SFC; - -namespace { - -struct FieldBlock { - vespalib::string input; - Slime slime; - search::RawBuf binary; - vespalib::string json; - - explicit FieldBlock(const vespalib::string &jsonInput); - ~FieldBlock(); -}; - -FieldBlock::FieldBlock(const vespalib::string &jsonInput) - : input(jsonInput), slime(), binary(1024), json() -{ - size_t used = vespalib::slime::JsonFormat::decode(jsonInput, slime); - EXPECT_TRUE(used > 0); - { - search::SlimeOutputRawBufAdapter adapter(binary); - vespalib::slime::JsonFormat::encode(slime, adapter, true); - json.assign(binary.GetDrainPos(), binary.GetUsedLen()); - binary.reset(); - } - search::SlimeOutputRawBufAdapter adapter(binary); - vespalib::slime::BinaryFormat::encode(slime, adapter); -} - -FieldBlock::~FieldBlock() = default; - -class Test : public vespalib::TestApp { - std::unique_ptr<Schema> _schema; - std::shared_ptr<const DocumentTypeRepo> _documentRepo; - const DocumentType *_documentType; - document::FixedTypeRepo _fixedRepo; - - void setUp(); - void tearDown(); - - const DataType &getDataType(const string &name) const; - const ReferenceDataType& getAsRefType(const string& name) const; - - template <typename T> - T getValueAs(const string &field_name, const Document &doc); - - template <typename T> - T - cvtValueAs(const FieldValue::UP &fv); - - template <typename T> - T - cvtAttributeAs(const FieldValue::UP &fv); - - template <typename T> - T - cvtSummaryAs(bool markup, const FieldValue::UP &fv); - - void checkString(const string &str, const FieldValue *value); - void checkStringForAllConversions(const string& expected, const FieldValue* fv); - void checkData(const search::RawBuf &data, const FieldValue *value); - void checkTensor(const Value::UP &tensor, const FieldValue *value); - template <unsigned int N> - void checkArray(const char *(&str)[N], const FieldValue *value); - void setSummaryField(const string &name); - void setAttributeField(const string &name); - - void requireThatSummaryIsAnUnmodifiedString(); - void requireThatAttributeIsAnUnmodifiedString(); - void requireThatArrayIsFlattenedInSummaryField(); - void requireThatWeightedSetIsFlattenedInSummaryField(); - void requireThatPositionsAreTransformedInSummary(); - void requireThatArrayIsPreservedInAttributeField(); - void requireThatPositionsAreTransformedInAttributeField(); - void requireThatPositionArrayIsTransformedInAttributeField(); - void requireThatPositionWeightedSetIsTransformedInAttributeField(); - void requireThatAttributeCanBePrimitiveTypes(); - void requireThatSummaryCanBePrimitiveTypes(); - void requireThatSummaryHandlesCjk(); - void requireThatSearchDataTypeUsesDefaultDataTypes(); - void requireThatLinguisticsAnnotationUsesDefaultDataTypes(); - void requireThatPredicateIsPrinted(); - void requireThatTensorIsNotConverted(); - void requireThatNonEmptyReferenceIsConvertedToStringWithId(); - void requireThatEmptyReferenceIsConvertedToEmptyString(); - void requireThatReferenceInCompositeTypeEmitsSlimeData(); - const DocumentType &getDocType() const { return *_documentType; } - Document makeDocument(); - StringFieldValue annotateTerm(const string &term); - StringFieldValue makeAnnotatedChineseString(); - StringFieldValue makeAnnotatedString(); - void setSpanTree(StringFieldValue & value, SpanTree::UP tree); -public: - Test(); - ~Test(); - int Main() override; -}; - -DocumenttypesConfig getDocumenttypesConfig() { - using namespace document::config_builder; - DocumenttypesConfigBuilderHelper builder; - const int ref_target_doctype_id = 1234; - const int ref_type_id = 5678; - builder.document(ref_target_doctype_id, "target_dummy_document", - Struct("target_dummy_document.header"), - Struct("target_dummy_document.body")); - builder.document(42, "indexingdocument", - Struct("indexingdocument.header") - .addField("empty", DataType::T_STRING) - .addField("string", DataType::T_STRING) - .addField("plain_string", DataType::T_STRING) - .addField("string_array", Array(DataType::T_STRING)) - .addField("string_wset", Wset(DataType::T_STRING)) - .addField("position1", DataType::T_INT) - .addField("position2", DataType::T_LONG) - .addField("position2_array", Array(DataType::T_LONG)) - .addField("position2_wset", Wset(DataType::T_LONG)) - .addField("uri", UrlDataType::getInstance().getId()) - .addField("uri_array", - Array(UrlDataType::getInstance().getId())) - .addField("int", DataType::T_INT) - .addField("long", DataType::T_LONG) - .addField("short", DataType::T_SHORT) - .addField("byte", DataType::T_BYTE) - .addField("double", DataType::T_DOUBLE) - .addField("float", DataType::T_FLOAT) - .addField("chinese", DataType::T_STRING) - .addField("predicate", DataType::T_PREDICATE) - .addTensorField("tensor", "tensor(x{},y{})") - .addField("ref", ref_type_id) - .addField("nested", Struct("indexingdocument.header.nested") - .addField("inner_ref", ref_type_id)), - Struct("indexingdocument.body")) - .referenceType(ref_type_id, ref_target_doctype_id); - return builder.config(); -} - -Test::Test() : - _documentRepo(std::make_unique<DocumentTypeRepo>(getDocumenttypesConfig())), - _documentType(_documentRepo->getDocumentType("indexingdocument")), - _fixedRepo(*_documentRepo, *_documentType) -{ - ASSERT_TRUE(_documentType); -} - -Test::~Test() {} - -#define TEST_CALL(func) \ - TEST_DO(setUp()); \ - TEST_DO(func); \ - TEST_DO(tearDown()) - -int -Test::Main() -{ - TEST_INIT("summaryfieldconverter_test"); - - TEST_CALL(requireThatSummaryIsAnUnmodifiedString()); - TEST_CALL(requireThatAttributeIsAnUnmodifiedString()); - TEST_CALL(requireThatArrayIsFlattenedInSummaryField()); - TEST_CALL(requireThatWeightedSetIsFlattenedInSummaryField()); - TEST_CALL(requireThatPositionsAreTransformedInSummary()); - TEST_CALL(requireThatArrayIsPreservedInAttributeField()); - TEST_CALL(requireThatPositionsAreTransformedInAttributeField()); - TEST_CALL(requireThatPositionArrayIsTransformedInAttributeField()); - TEST_CALL(requireThatPositionWeightedSetIsTransformedInAttributeField()); - TEST_CALL(requireThatAttributeCanBePrimitiveTypes()); - TEST_CALL(requireThatSummaryCanBePrimitiveTypes()); - TEST_CALL(requireThatSummaryHandlesCjk()); - TEST_CALL(requireThatSearchDataTypeUsesDefaultDataTypes()); - TEST_CALL(requireThatLinguisticsAnnotationUsesDefaultDataTypes()); - TEST_CALL(requireThatPredicateIsPrinted()); - TEST_CALL(requireThatTensorIsNotConverted()); - TEST_CALL(requireThatNonEmptyReferenceIsConvertedToStringWithId()); - TEST_CALL(requireThatEmptyReferenceIsConvertedToEmptyString()); - TEST_CALL(requireThatReferenceInCompositeTypeEmitsSlimeData()); - - TEST_DONE(); -} - -void Test::setUp() { - _schema = std::make_unique<Schema>(); -} - -void Test::tearDown() { -} - -const DataType &Test::getDataType(const string &name) const { - const DataType *type = _documentRepo->getDataType(*_documentType, name); - ASSERT_TRUE(type); - return *type; -} - -StringFieldValue Test::makeAnnotatedString() { - auto span_list_up = std::make_unique<SpanList>(); - auto span_list = span_list_up.get(); - auto tree = std::make_unique<SpanTree>(SPANTREE_NAME, std::move(span_list_up)); - // Annotations don't have to be added sequentially. - tree->annotate(span_list->add(std::make_unique<Span>(8, 3)), - Annotation(*TERM, std::make_unique<StringFieldValue>("Annotation"))); - tree->annotate(span_list->add(std::make_unique<Span>(0, 3)), *TERM); - tree->annotate(span_list->add(std::make_unique<Span>(4, 3)), *TERM); - tree->annotate(span_list->add(std::make_unique<Span>(4, 3)), - Annotation(*TERM, std::make_unique<StringFieldValue>("Multiple"))); - tree->annotate(span_list->add(std::make_unique<Span>(1, 2)), - Annotation(*TERM, std::make_unique<StringFieldValue>("Overlap"))); - StringFieldValue value("Foo Bar Baz"); - setSpanTree(value, std::move(tree)); - return value; -} - -StringFieldValue Test::annotateTerm(const string &term) { - auto tree = std::make_unique<SpanTree>(SPANTREE_NAME, std::make_unique<Span>(0, term.size())); - tree->annotate(tree->getRoot(), *TERM); - StringFieldValue value(term); - setSpanTree(value, std::move(tree)); - return value; -} - -void Test::setSpanTree(StringFieldValue & value, SpanTree::UP tree) { - StringFieldValue::SpanTrees trees; - trees.push_back(std::move(tree)); - value.setSpanTrees(trees, _fixedRepo); -} - -StringFieldValue Test::makeAnnotatedChineseString() { - auto span_list_up = std::make_unique<SpanList>(); - auto span_list = span_list_up.get(); - auto tree = std::make_unique<SpanTree>(SPANTREE_NAME, std::move(span_list_up)); - // These chinese characters each use 3 bytes in their UTF8 encoding. - tree->annotate(span_list->add(std::make_unique<Span>(0, 15)), *TERM); - tree->annotate(span_list->add(std::make_unique<Span>(15, 9)), *TERM); - StringFieldValue value("我就是那个大灰狼"); - setSpanTree(value, std::move(tree)); - return value; -} - -Document Test::makeDocument() { - Document doc(getDocType(), DocumentId("id:ns:indexingdocument::")); - doc.setRepo(*_documentRepo); - doc.setValue("string", makeAnnotatedString()); - - doc.setValue("plain_string", StringFieldValue("Plain")); - - ArrayFieldValue array(getDataType("Array<String>")); - array.add(annotateTerm("\"foO\"")); - array.add(annotateTerm("ba\\R")); - doc.setValue("string_array", array); - - WeightedSetFieldValue wset(getDataType("WeightedSet<String>")); - wset.add(annotateTerm("\"foo\""), 2); - wset.add(annotateTerm("ba\\r"), 4); - doc.setValue("string_wset", wset); - - doc.setValue("position1", IntFieldValue(5)); - - doc.setValue("position2", LongFieldValue(ZCurve::encode(4, 2))); - - StructFieldValue uri(getDataType("url")); - uri.setValue("all", annotateTerm("http://www.example.com:42/foobar?q#frag")); - uri.setValue("scheme", annotateTerm("http")); - uri.setValue("host", annotateTerm("www.example.com")); - uri.setValue("port", annotateTerm("42")); - uri.setValue("path", annotateTerm("foobar")); - uri.setValue("query", annotateTerm("q")); - uri.setValue("fragment", annotateTerm("frag")); - doc.setValue("uri", uri); - - ArrayFieldValue uri_array(getDataType("Array<url>")); - uri.setValue("all", annotateTerm("http://www.example.com:80/foobar?q#frag")); - uri.setValue("port", annotateTerm("80")); - uri_array.add(uri); - uri.setValue("all", annotateTerm("https://www.example.com:443/foo?q#frag")); - uri.setValue("scheme", annotateTerm("https")); - uri.setValue("path", annotateTerm("foo")); - uri.setValue("port", annotateTerm("443")); - uri_array.add(uri); - doc.setValue("uri_array", uri_array); - - ArrayFieldValue position2_array(getDataType("Array<Long>")); - position2_array.add(LongFieldValue(ZCurve::encode(4, 2))); - position2_array.add(LongFieldValue(ZCurve::encode(4, 4))); - doc.setValue("position2_array", position2_array); - - WeightedSetFieldValue position2_wset(getDataType("WeightedSet<Long>")); - position2_wset.add(LongFieldValue(ZCurve::encode(4, 2)), 4); - position2_wset.add(LongFieldValue(ZCurve::encode(4, 4)), 2); - doc.setValue("position2_wset", position2_wset); - - doc.setValue("int", IntFieldValue(42)); - doc.setValue("long", LongFieldValue(84)); - doc.setValue("short", ShortFieldValue(21)); - doc.setValue("byte", ByteFieldValue(11)); - doc.setValue("double", DoubleFieldValue(0.4)); - doc.setValue("float", FloatFieldValue(0.2f)); - - doc.setValue("chinese", makeAnnotatedChineseString()); - return doc; -} - -template <typename T> -T Test::getValueAs(const string &field_name, const Document &doc) { - FieldValue::UP fv(doc.getValue(field_name)); - const T *value = dynamic_cast<const T *>(fv.get()); - ASSERT_TRUE(value); - return *value; -} - -template <typename T> -T -Test::cvtValueAs(const FieldValue::UP &fv) -{ - ASSERT_TRUE(fv.get() != NULL); - const T *value = dynamic_cast<const T *>(fv.get()); - ASSERT_TRUE(value); - return *value; -} - -template <typename T> -T -Test::cvtAttributeAs(const FieldValue::UP &fv) -{ - ASSERT_TRUE(fv.get() != NULL); - return cvtValueAs<T>(fv); -} - -template <typename T> -T -Test::cvtSummaryAs(bool markup, const FieldValue::UP &fv) -{ - ASSERT_TRUE(fv.get() != NULL); - FieldValue::UP r = SFC::convertSummaryField(markup, *fv); - return cvtValueAs<T>(r); -} - -void Test::checkString(const string &str, const FieldValue *value) { - ASSERT_TRUE(value); - const StringFieldValue *s = dynamic_cast<const StringFieldValue *>(value); - ASSERT_TRUE(s); - // fprintf(stderr, ">>>%s<<< >>>%s<<<\n", str.c_str(), s->getValue().c_str()); - EXPECT_EQUAL(str, s->getValue()); -} - -void Test::checkData(const search::RawBuf &buf, const FieldValue *value) { - ASSERT_TRUE(value); - const RawFieldValue *s = dynamic_cast<const RawFieldValue *>(value); - ASSERT_TRUE(s); - auto got = s->getAsRaw(); - ASSERT_EQUAL(buf.GetUsedLen(), got.second); - EXPECT_TRUE(memcmp(buf.GetDrainPos(), got.first, got.second) == 0); -} - -void Test::checkTensor(const Value::UP &tensor, const FieldValue *value) { - ASSERT_TRUE(value); - const TensorFieldValue *s = dynamic_cast<const TensorFieldValue *>(value); - ASSERT_TRUE(s); - auto tvalue = s->getAsTensorPtr(); - EXPECT_EQUAL(tensor.get() != nullptr, tvalue != nullptr); - if (tensor) { - EXPECT_EQUAL(*tensor, *tvalue); - } -} - -template <unsigned int N> -void Test::checkArray(const char *(&str)[N], const FieldValue *value) { - ASSERT_TRUE(value); - const ArrayFieldValue *a = dynamic_cast<const ArrayFieldValue *>(value); - ASSERT_TRUE(a); - EXPECT_EQUAL(N, a->size()); - for (size_t i = 0; i < a->size() && i < N; ++i) { - checkString(str[i], &(*a)[i]); - } -} - -void Test::setSummaryField(const string &field) { - _schema->addSummaryField(Schema::Field(field, search::index::schema::DataType::STRING)); -} - -void Test::setAttributeField(const string &field) { - _schema->addAttributeField(Schema::Field(field, search::index::schema::DataType::STRING)); -} - -void Test::requireThatSummaryIsAnUnmodifiedString() { - setSummaryField("string"); - Document summary = makeDocument(); - checkString("Foo Bar Baz", SFC::convertSummaryField(false, - *summary.getValue("string")).get()); -} - -void Test::requireThatAttributeIsAnUnmodifiedString() { - setAttributeField("string"); - Document attribute = makeDocument(); - checkString("Foo Bar Baz", - attribute.getValue("string").get()); -} - -void Test::requireThatArrayIsFlattenedInSummaryField() { - setSummaryField("string_array"); - Document summary = makeDocument(); - FieldBlock expect("[\"\\\"foO\\\"\",\"ba\\\\R\"]"); - checkData(expect.binary, - SFC::convertSummaryField(false, - *summary.getValue("string_array")).get()); -} - -void Test::requireThatWeightedSetIsFlattenedInSummaryField() { - setSummaryField("string_wset"); - Document summary = makeDocument(); - FieldBlock expect("[{\"item\":\"\\\"foo\\\"\",\"weight\":2},{\"item\":\"ba\\\\r\",\"weight\":4}]"); - checkData(expect.binary, - SFC::convertSummaryField(false, - *summary.getValue("string_wset")).get()); -} - -void Test::requireThatPositionsAreTransformedInSummary() { - setSummaryField("position1"); - setSummaryField("position2"); - Document summary = makeDocument(); - FieldValue::UP fv = summary.getValue("position1"); - EXPECT_EQUAL(5, cvtSummaryAs<IntFieldValue>(false, fv).getValue()); - FieldValue::UP fv2 = summary.getValue("position2"); - EXPECT_EQUAL(24, cvtSummaryAs<LongFieldValue>(false, fv2).getValue()); -} - -void Test::requireThatArrayIsPreservedInAttributeField() { - setAttributeField("string_array"); - Document attribute = makeDocument(); - const char *array[] = { "\"foO\"", "ba\\R" }; - checkArray(array, - attribute.getValue("string_array").get()); -} - -void Test::requireThatPositionsAreTransformedInAttributeField() { - setAttributeField("position1"); - setAttributeField("position2"); - Document attr = makeDocument(); - FieldValue::UP fv = attr.getValue("position1"); - EXPECT_EQUAL(5, cvtAttributeAs<IntFieldValue>(fv).getValue()); - fv = attr.getValue("position2"); - EXPECT_EQUAL(24, cvtAttributeAs<LongFieldValue>(fv).getValue()); -} - -void Test::requireThatPositionArrayIsTransformedInAttributeField() { - setAttributeField("position2_array"); - Document attr = makeDocument(); - FieldValue::UP fv = attr.getValue("position2_array"); - ArrayFieldValue a = cvtAttributeAs<ArrayFieldValue>(fv); - EXPECT_EQUAL(2u, a.size()); - EXPECT_EQUAL(24, dynamic_cast<LongFieldValue &>(a[0]).getValue()); - EXPECT_EQUAL(48, dynamic_cast<LongFieldValue &>(a[1]).getValue()); -} - -void Test::requireThatPositionWeightedSetIsTransformedInAttributeField() { - setAttributeField("position2_wset"); - Document attr = makeDocument(); - FieldValue::UP fv = attr.getValue("position2_wset"); - WeightedSetFieldValue w = cvtAttributeAs<WeightedSetFieldValue>(fv); - EXPECT_EQUAL(2u, w.size()); - WeightedSetFieldValue::iterator it = w.begin(); - EXPECT_EQUAL(24, dynamic_cast<const LongFieldValue&>(*it->first).getValue()); - EXPECT_EQUAL(4, dynamic_cast<IntFieldValue &>(*it->second).getValue()); - ++it; - EXPECT_EQUAL(48, dynamic_cast<const LongFieldValue&>(*it->first).getValue()); - EXPECT_EQUAL(2, dynamic_cast<IntFieldValue &>(*it->second).getValue()); -} - -void Test::requireThatAttributeCanBePrimitiveTypes() { - setAttributeField("int"); - setAttributeField("long"); - setAttributeField("short"); - setAttributeField("byte"); - setAttributeField("double"); - setAttributeField("float"); - Document attribute = makeDocument(); - FieldValue::UP fv = attribute.getValue("int"); - EXPECT_EQUAL(42, cvtAttributeAs<IntFieldValue>(fv).getValue()); - fv = attribute.getValue("long"); - EXPECT_EQUAL(84, cvtAttributeAs<LongFieldValue>(fv).getValue()); - fv = attribute.getValue("short"); - EXPECT_EQUAL(21, cvtAttributeAs<ShortFieldValue>(fv).getValue()); - fv = attribute.getValue("byte"); - EXPECT_EQUAL(11, cvtAttributeAs<ByteFieldValue>(fv).getValue()); - fv = attribute.getValue("double"); - EXPECT_EQUAL(0.4, cvtAttributeAs<DoubleFieldValue>(fv).getValue()); - fv = attribute.getValue("float"); - EXPECT_EQUAL(0.2f, cvtAttributeAs<FloatFieldValue>(fv).getValue()); -} - -void Test::requireThatSummaryCanBePrimitiveTypes() { - setSummaryField("int"); - setSummaryField("long"); - setSummaryField("short"); - setSummaryField("byte"); - setSummaryField("double"); - setSummaryField("float"); - Document summary = makeDocument(); - FieldValue::UP fv = summary.getValue("int"); - EXPECT_EQUAL(42, cvtSummaryAs<IntFieldValue>(false, fv).getValue()); - fv = summary.getValue("long"); - EXPECT_EQUAL(84, cvtSummaryAs<LongFieldValue>(false, fv).getValue()); - fv = summary.getValue("short"); - EXPECT_EQUAL(21, cvtSummaryAs<ShortFieldValue>(false, fv).getValue()); - fv = summary.getValue("byte"); - EXPECT_EQUAL(11, cvtSummaryAs<ShortFieldValue>(false, fv).getValue()); - fv = summary.getValue("double"); - EXPECT_EQUAL(0.4, cvtSummaryAs<DoubleFieldValue>(false, fv).getValue()); - fv = summary.getValue("float"); - EXPECT_EQUAL(0.2f, cvtSummaryAs<FloatFieldValue>(false, fv).getValue()); -} - -void Test::requireThatSummaryHandlesCjk() { - Document summary = makeDocument(); - FieldValue::UP fv = summary.getValue("chinese"); - EXPECT_EQUAL("我就是那个\037大灰狼\037", - cvtSummaryAs<StringFieldValue>(true, fv).getValue()); -} - -void Test::requireThatSearchDataTypeUsesDefaultDataTypes() { - const StructDataType *uri = - dynamic_cast<const StructDataType *>(SearchDataType::URI); - ASSERT_TRUE(uri); - ASSERT_TRUE(uri->hasField("all")); - ASSERT_TRUE(uri->hasField("scheme")); - ASSERT_TRUE(uri->hasField("host")); - ASSERT_TRUE(uri->hasField("port")); - ASSERT_TRUE(uri->hasField("path")); - ASSERT_TRUE(uri->hasField("query")); - ASSERT_TRUE(uri->hasField("fragment")); - EXPECT_EQUAL(*DataType::STRING, uri->getField("all").getDataType()); - EXPECT_EQUAL(*DataType::STRING, uri->getField("scheme").getDataType()); - EXPECT_EQUAL(*DataType::STRING, uri->getField("host").getDataType()); - EXPECT_EQUAL(*DataType::STRING, uri->getField("port").getDataType()); - EXPECT_EQUAL(*DataType::STRING, uri->getField("path").getDataType()); - EXPECT_EQUAL(*DataType::STRING, uri->getField("query").getDataType()); - EXPECT_EQUAL(*DataType::STRING, uri->getField("fragment").getDataType()); -} - -void Test::requireThatLinguisticsAnnotationUsesDefaultDataTypes() { - EXPECT_EQUAL(*AnnotationType::TERM, *search::linguistics::TERM); - ASSERT_TRUE(AnnotationType::TERM->getDataType()); - ASSERT_TRUE(search::linguistics::TERM->getDataType()); - EXPECT_EQUAL(*AnnotationType::TERM->getDataType(), - *search::linguistics::TERM->getDataType()); -} - -void -Test::requireThatPredicateIsPrinted() -{ - auto input = std::make_unique<Slime>(); - Cursor &obj = input->setObject(); - obj.setLong(Predicate::NODE_TYPE, Predicate::TYPE_FEATURE_SET); - obj.setString(Predicate::KEY, "foo"); - Cursor &arr = obj.setArray(Predicate::SET); - arr.addString("bar"); - - Document doc(getDocType(), DocumentId("id:ns:indexingdocument::")); - doc.setRepo(*_documentRepo); - doc.setValue("predicate", PredicateFieldValue(std::move(input))); - - checkString("'foo' in ['bar']\n", - SFC::convertSummaryField(false, *doc.getValue("predicate")).get()); -} - -Value::UP make_tensor(const TensorSpec &spec) { - return SimpleValue::from_spec(spec); -} - -void -Test::requireThatTensorIsNotConverted() -{ - TensorDataType tensorDataType(ValueType::from_spec("tensor(x{},y{})")); - TensorFieldValue tensorFieldValue(tensorDataType); - tensorFieldValue = make_tensor(TensorSpec("tensor(x{},y{})") - .add({{"x", "4"}, {"y", "5"}}, 7)); - Document doc(getDocType(), DocumentId("id:ns:indexingdocument::")); - doc.setRepo(*_documentRepo); - doc.setValue("tensor", tensorFieldValue); - - TEST_CALL(checkTensor(make_tensor(TensorSpec("tensor(x{},y{})") - .add({{"x", "4"}, {"y", "5"}}, 7)), - SFC::convertSummaryField(false, - *doc.getValue("tensor")).get())); - doc.setValue("tensor", TensorFieldValue()); - - TEST_CALL(checkTensor(Value::UP(), - SFC::convertSummaryField(false, - *doc.getValue("tensor")).get())); -} - -void Test::checkStringForAllConversions(const string& expected, const FieldValue* fv) { - ASSERT_TRUE(fv != nullptr); - checkString(expected, SFC::convertSummaryField(false, *fv).get()); -} - -const ReferenceDataType& Test::getAsRefType(const string& name) const { - return dynamic_cast<const ReferenceDataType&>(getDataType(name)); -} - -void Test::requireThatNonEmptyReferenceIsConvertedToStringWithId() { - Document doc(getDocType(), DocumentId("id:ns:indexingdocument::")); - doc.setRepo(*_documentRepo); - doc.setValue("ref", ReferenceFieldValue( - getAsRefType("Reference<target_dummy_document>"), - DocumentId("id:ns:target_dummy_document::foo"))); - - checkStringForAllConversions("id:ns:target_dummy_document::foo", - doc.getValue("ref").get()); -} - -void Test::requireThatEmptyReferenceIsConvertedToEmptyString() { - Document doc(getDocType(), DocumentId("id:ns:indexingdocument::")); - doc.setRepo(*_documentRepo); - doc.setValue("ref", ReferenceFieldValue( - getAsRefType("Reference<target_dummy_document>"))); - - checkStringForAllConversions("", doc.getValue("ref").get()); - -} - -// Own test for this to ensure that SlimeFiller code path is executed, -// as this only triggers for composite field types. -void Test::requireThatReferenceInCompositeTypeEmitsSlimeData() { - Document doc(getDocType(), DocumentId("id:ns:indexingdocument::")); - doc.setRepo(*_documentRepo); - - StructFieldValue sfv(getDataType("indexingdocument.header.nested")); - sfv.setValue("inner_ref", ReferenceFieldValue( - getAsRefType("Reference<target_dummy_document>"), - DocumentId("id:ns:target_dummy_document::foo"))); - doc.setValue("nested", sfv); - - FieldBlock expect(R"({"inner_ref":"id:ns:target_dummy_document::foo"})"); - checkData(expect.binary, - SFC::convertSummaryField(false, *doc.getValue("nested")).get()); -} - -} // namespace - -TEST_APPHOOK(Test); diff --git a/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt b/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt index 6aba9614e73..37ee0697149 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt +++ b/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt @@ -9,6 +9,7 @@ vespa_add_library(searchsummary_docsummary OBJECT check_undefined_value_visitor.cpp copy_dfw.cpp docsum_field_writer.cpp + docsum_field_writer_commands.cpp docsum_field_writer_factory.cpp docsum_store_document.cpp docsumstate.cpp @@ -34,6 +35,7 @@ vespa_add_library(searchsummary_docsummary OBJECT searchdatatype.cpp simple_dfw.cpp slime_filler.cpp + slime_filler_filter.cpp struct_fields_resolver.cpp struct_map_attribute_combiner_dfw.cpp summaryfeaturesdfw.cpp diff --git a/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.cpp index 82f3d086b79..b36a2f8383e 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.cpp @@ -1,6 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "annotation_converter.h" +#include "i_juniper_converter.h" #include "linguisticsannotation.h" #include <vespa/document/annotation/alternatespanlist.h> #include <vespa/document/annotation/annotation.h> @@ -12,8 +13,8 @@ #include <vespa/vespalib/util/exceptions.h> #include <utility> -using document::Annotation; using document::AlternateSpanList; +using document::Annotation; using document::FieldValue; using document::SimpleSpanList; using document::Span; @@ -27,10 +28,10 @@ namespace search::docsummary { namespace { -vespalib::string -getSpanString(const vespalib::string &s, const Span &span) +vespalib::stringref +getSpanString(vespalib::stringref s, const Span &span) { - return vespalib::string(&s[span.from()], &s[span.from() + span.length()]); + return {s.data() + span.from(), static_cast<size_t>(span.length())}; } struct SpanFinder : SpanTreeVisitor { @@ -78,6 +79,16 @@ const StringFieldValue &ensureStringFieldValue(const FieldValue &value) { } +AnnotationConverter::AnnotationConverter(IJuniperConverter& juniper_converter) + : IStringFieldConverter(), + _juniper_converter(juniper_converter), + _text(), + _out() +{ +} + +AnnotationConverter::~AnnotationConverter() = default; + template <typename ForwardIt> void AnnotationConverter::handleAnnotations(const document::Span& span, ForwardIt it, ForwardIt last) { @@ -85,28 +96,28 @@ AnnotationConverter::handleAnnotations(const document::Span& span, ForwardIt it, if (annCnt > 1 || (annCnt == 1 && it->second)) { annotateSpans(span, it, last); } else { - out << getSpanString(text, span) << juniper::separators::unit_separator_string; + _out << getSpanString(_text, span) << juniper::separators::unit_separator_string; } } template <typename ForwardIt> void AnnotationConverter::annotateSpans(const document::Span& span, ForwardIt it, ForwardIt last) { - out << juniper::separators::interlinear_annotation_anchor_string // ANCHOR - << (getSpanString(text, span)) - << juniper::separators::interlinear_annotation_separator_string; // SEPARATOR + _out << juniper::separators::interlinear_annotation_anchor_string // ANCHOR + << (getSpanString(_text, span)) + << juniper::separators::interlinear_annotation_separator_string; // SEPARATOR while (it != last) { if (it->second) { - out << ensureStringFieldValue(*it->second).getValue(); + _out << ensureStringFieldValue(*it->second).getValue(); } else { - out << getSpanString(text, span); + _out << getSpanString(_text, span); } if (++it != last) { - out << " "; + _out << " "; } } - out << juniper::separators::interlinear_annotation_terminator_string // TERMINATOR - << juniper::separators::unit_separator_string; + _out << juniper::separators::interlinear_annotation_terminator_string // TERMINATOR + << juniper::separators::unit_separator_string; } void @@ -114,11 +125,11 @@ AnnotationConverter::handleIndexingTerms(const StringFieldValue& value) { StringFieldValue::SpanTrees trees = value.getSpanTrees(); const SpanTree *tree = StringFieldValue::findTree(trees, linguistics::SPANTREE_NAME); - typedef std::pair<Span, const FieldValue *> SpanTerm; - typedef std::vector<SpanTerm> SpanTermVector; + using SpanTerm = std::pair<Span, const FieldValue *>; + using SpanTermVector = std::vector<SpanTerm>; if (!tree) { // Treat a string without annotations as a single span. - SpanTerm str(Span(0, text.size()), + SpanTerm str(Span(0, _text.size()), static_cast<const FieldValue*>(nullptr)); handleAnnotations(str.first, &str, &str + 1); return; @@ -126,7 +137,7 @@ AnnotationConverter::handleIndexingTerms(const StringFieldValue& value) SpanTermVector terms; for (const Annotation& annotation : *tree) { // For now, skip any composite spans. - const Span *span = dynamic_cast<const Span*>(annotation.getSpanNode()); + const auto *span = dynamic_cast<const Span*>(annotation.getSpanNode()); if ((span != nullptr) && annotation.valid() && (annotation.getType() == *linguistics::TERM)) { terms.push_back(std::make_pair(getSpan(*span), @@ -148,11 +159,20 @@ AnnotationConverter::handleIndexingTerms(const StringFieldValue& value) handleAnnotations(it_begin->first, it_begin, it); endPos = it_begin->first.from() + it_begin->first.length(); } - int32_t wantEndPos = text.size(); + int32_t wantEndPos = _text.size(); if (endPos < wantEndPos) { Span tmpSpan(endPos, wantEndPos - endPos); handleAnnotations(tmpSpan, ite, ite); } } +void +AnnotationConverter::convert(const StringFieldValue &input, vespalib::slime::Inserter& inserter) +{ + _out.clear(); + _text = input.getValueRef(); + handleIndexingTerms(input); + _juniper_converter.convert(_out.str(), inserter); +} + } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.h b/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.h index 37e3c18606e..59b03c64540 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.h @@ -2,34 +2,37 @@ #pragma once -#include <vespa/vespalib/stllike/string.h> +#include "i_string_field_converter.h" +#include <vespa/vespalib/stllike/asciistream.h> -namespace document -{ -class Span; -class StringFieldValue; -} +namespace document { class Span; } namespace vespalib { class asciistream; } namespace search::docsummary { +class IJuniperConverter; + /* * Class converting a string field value with annotations into a string - * with interlinear annotations used by juniper. + * with interlinear annotations used by juniper before passing it to + * the juniper converter. */ -struct AnnotationConverter { - const vespalib::string text; - vespalib::asciistream& out; +class AnnotationConverter : public IStringFieldConverter +{ + IJuniperConverter& _juniper_converter; + vespalib::stringref _text; + vespalib::asciistream _out; template <typename ForwardIt> void handleAnnotations(const document::Span& span, ForwardIt it, ForwardIt last); template <typename ForwardIt> void annotateSpans(const document::Span& span, ForwardIt it, ForwardIt last); -public: - AnnotationConverter(const vespalib::string& s, vespalib::asciistream& stream) - : text(s), out(stream) {} void handleIndexingTerms(const document::StringFieldValue& value); +public: + AnnotationConverter(IJuniperConverter& juniper_converter); + ~AnnotationConverter() override; + void convert(const document::StringFieldValue &input, vespalib::slime::Inserter& inserter) override; }; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/attribute_combiner_dfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/attribute_combiner_dfw.cpp index 6e00511398c..889169f8888 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/attribute_combiner_dfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/attribute_combiner_dfw.cpp @@ -41,7 +41,7 @@ AttributeCombinerDFW::create(const vespalib::string &fieldName, IAttributeContex { StructFieldsResolver structFields(fieldName, attrCtx, true); if (structFields.has_error()) { - return std::unique_ptr<DocsumFieldWriter>(); + return {}; } else if (structFields.is_map_of_struct()) { return std::make_unique<StructMapAttributeCombinerDFW>(fieldName, structFields, filter_elements, std::move(matching_elems_fields)); } @@ -49,15 +49,15 @@ AttributeCombinerDFW::create(const vespalib::string &fieldName, IAttributeContex } void -AttributeCombinerDFW::insertField(uint32_t docid, GetDocsumsState *state, ResType, vespalib::slime::Inserter &target) const +AttributeCombinerDFW::insertField(uint32_t docid, GetDocsumsState& state, vespalib::slime::Inserter &target) const { - auto& fieldWriterState = state->_fieldWriterStates[_stateIndex]; + auto& fieldWriterState = state._fieldWriterStates[_stateIndex]; if (!fieldWriterState) { const MatchingElements *matching_elements = nullptr; if (_filter_elements) { - matching_elements = &state->get_matching_elements(*_matching_elems_fields); + matching_elements = &state.get_matching_elements(*_matching_elems_fields); } - fieldWriterState = allocFieldWriterState(*state->_attrCtx, state->get_stash(), matching_elements); + fieldWriterState = allocFieldWriterState(*state._attrCtx, state.get_stash(), matching_elements); } fieldWriterState->insertField(docid, target); } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/attribute_combiner_dfw.h b/searchsummary/src/vespa/searchsummary/docsummary/attribute_combiner_dfw.h index 33b61718392..0e1163df5e2 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/attribute_combiner_dfw.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/attribute_combiner_dfw.h @@ -39,7 +39,7 @@ public: bool setFieldWriterStateIndex(uint32_t fieldWriterStateIndex) override; static std::unique_ptr<DocsumFieldWriter> create(const vespalib::string &fieldName, search::attribute::IAttributeContext &attrCtx, bool filter_elements, std::shared_ptr<MatchingElementsFields> matching_elems_fields); - void insertField(uint32_t docid, GetDocsumsState *state, ResType type, vespalib::slime::Inserter &target) const override; + void insertField(uint32_t docid, GetDocsumsState& state, vespalib::slime::Inserter &target) const override; }; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp index ce08da7f7f1..74d67aabe88 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp @@ -25,12 +25,12 @@ using search::attribute::IAttributeContext; using search::attribute::IAttributeVector; using search::attribute::IMultiValueAttribute; using search::attribute::IMultiValueReadView; +using vespalib::Issue; using vespalib::Memory; +using vespalib::eval::Value; using vespalib::slime::Cursor; using vespalib::slime::Inserter; using vespalib::slime::Symbol; -using vespalib::eval::Value; -using vespalib::Issue; namespace search::docsummary { @@ -53,16 +53,16 @@ public: explicit SingleAttrDFW(const vespalib::string & attrName) : AttrDFW(attrName) { } - void insertField(uint32_t docid, GetDocsumsState *state, ResType, Inserter &target) const override; - bool isDefaultValue(uint32_t docid, const GetDocsumsState * state) const override { - return get_attribute(*state).isUndefined(docid); + void insertField(uint32_t docid, GetDocsumsState& state, Inserter &target) const override; + bool isDefaultValue(uint32_t docid, const GetDocsumsState& state) const override { + return get_attribute(state).isUndefined(docid); } }; void -SingleAttrDFW::insertField(uint32_t docid, GetDocsumsState * state, ResType, Inserter &target) const +SingleAttrDFW::insertField(uint32_t docid, GetDocsumsState& state, Inserter &target) const { - const auto& v = get_attribute(*state); + const auto& v = get_attribute(state); switch (v.getBasicType()) { case BasicType::Type::UINT2: case BasicType::Type::UINT4: @@ -253,7 +253,7 @@ public: } } bool setFieldWriterStateIndex(uint32_t fieldWriterStateIndex) override; - void insertField(uint32_t docid, GetDocsumsState* state, ResType, Inserter& target) const override; + void insertField(uint32_t docid, GetDocsumsState& state, Inserter& target) const override; }; bool @@ -301,16 +301,16 @@ make_field_writer_state(const vespalib::string& field_name, const IAttributeVect } void -MultiAttrDFW::insertField(uint32_t docid, GetDocsumsState *state, ResType, vespalib::slime::Inserter &target) const +MultiAttrDFW::insertField(uint32_t docid, GetDocsumsState& state, vespalib::slime::Inserter &target) const { - auto& field_writer_state = state->_fieldWriterStates[_state_index]; + auto& field_writer_state = state._fieldWriterStates[_state_index]; if (!field_writer_state) { const MatchingElements *matching_elements = nullptr; if (_filter_elements) { - matching_elements = &state->get_matching_elements(*_matching_elems_fields); + matching_elements = &state.get_matching_elements(*_matching_elems_fields); } - const auto& attr = get_attribute(*state); - field_writer_state = make_field_writer_state(getAttributeName(), attr, state->get_stash(), matching_elements); + const auto& attr = get_attribute(state); + field_writer_state = make_field_writer_state(getAttributeName(), attr, state.get_stash(), matching_elements); } field_writer_state->insertField(docid, target); } @@ -347,7 +347,7 @@ AttributeDFWFactory::create(const IAttributeManager& attr_mgr, const auto* attr = ctx->getAttribute(attr_name); if (attr == nullptr) { Issue::report("No valid attribute vector found: '%s'", attr_name.c_str()); - return std::unique_ptr<DocsumFieldWriter>(); + return {}; } if (attr->hasMultiValue()) { return create_multi_writer(*attr, filter_elements, std::move(matching_elems_fields)); diff --git a/searchsummary/src/vespa/searchsummary/docsummary/copy_dfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/copy_dfw.cpp index 2dc04c03845..94e5420881b 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/copy_dfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/copy_dfw.cpp @@ -17,7 +17,7 @@ CopyDFW::CopyDFW(const vespalib::string& inputField) CopyDFW::~CopyDFW() = default; void -CopyDFW::insertField(uint32_t, const IDocsumStoreDocument* doc, GetDocsumsState *, ResType, +CopyDFW::insertField(uint32_t, const IDocsumStoreDocument* doc, GetDocsumsState&, vespalib::slime::Inserter &target) const { if (doc != nullptr) { diff --git a/searchsummary/src/vespa/searchsummary/docsummary/copy_dfw.h b/searchsummary/src/vespa/searchsummary/docsummary/copy_dfw.h index 76c10f47bf1..175bc5b3246 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/copy_dfw.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/copy_dfw.h @@ -21,8 +21,7 @@ public: ~CopyDFW() override; bool IsGenerated() const override { return false; } - void insertField(uint32_t docid, const IDocsumStoreDocument* doc, GetDocsumsState *state, ResType type, - vespalib::slime::Inserter &target) const override; + void insertField(uint32_t docid, const IDocsumStoreDocument* doc, GetDocsumsState& state, vespalib::slime::Inserter &target) const override; }; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer.cpp index c698f0603c6..452ca98ea0b 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer.cpp @@ -13,7 +13,7 @@ DocsumFieldWriter::getAttributeName() const } bool -DocsumFieldWriter::isDefaultValue(uint32_t, const GetDocsumsState*) const +DocsumFieldWriter::isDefaultValue(uint32_t, const GetDocsumsState&) const { return false; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer.h b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer.h index a1af91a2b3f..77dc5d5d2d6 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer.h @@ -2,7 +2,6 @@ #pragma once -#include "res_type_utils.h" #include <vespa/vespalib/stllike/string.h> namespace vespalib::slime { struct Inserter; } @@ -24,9 +23,9 @@ public: } virtual ~DocsumFieldWriter() = default; virtual bool IsGenerated() const = 0; - virtual void insertField(uint32_t docid, const IDocsumStoreDocument* doc, GetDocsumsState *state, ResType type, vespalib::slime::Inserter &target) const = 0; + virtual void insertField(uint32_t docid, const IDocsumStoreDocument* doc, GetDocsumsState& state, vespalib::slime::Inserter &target) const = 0; virtual const vespalib::string & getAttributeName() const; - virtual bool isDefaultValue(uint32_t docid, const GetDocsumsState * state) const; + virtual bool isDefaultValue(uint32_t docid, const GetDocsumsState& state) const; void setIndex(size_t v) { _index = v; } size_t getIndex() const { return _index; } virtual bool setFieldWriterStateIndex(uint32_t fieldWriterStateIndex); diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_commands.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_commands.cpp new file mode 100644 index 00000000000..b04963a5907 --- /dev/null +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_commands.cpp @@ -0,0 +1,22 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "docsum_field_writer_commands.h" + +namespace search::docsummary::command { + +const vespalib::string abs_distance("absdist"); +const vespalib::string attribute("attribute"); +const vespalib::string attribute_combiner("attributecombiner"); +const vespalib::string copy("copy"); +const vespalib::string documentid("documentid"); +const vespalib::string dynamic_teaser("dynamicteaser"); +const vespalib::string empty("empty"); +const vespalib::string geo_position("geopos"); +const vespalib::string matched_attribute_elements_filter("matchedattributeelementsfilter"); +const vespalib::string matched_elements_filter("matchedelementsfilter"); +const vespalib::string positions("positions"); +const vespalib::string rank_features("rankfeatures"); +const vespalib::string summary_features("summaryfeatures"); + +} + diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_commands.h b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_commands.h new file mode 100644 index 00000000000..8ca508a6b60 --- /dev/null +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_commands.h @@ -0,0 +1,27 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/stllike/string.h> + +namespace search::docsummary::command { + +/** + * This contains all commands that map to specific docsum field writer(s) when setting up a summary result class. + */ + +extern const vespalib::string abs_distance; +extern const vespalib::string attribute; +extern const vespalib::string attribute_combiner; +extern const vespalib::string copy; +extern const vespalib::string documentid; +extern const vespalib::string dynamic_teaser; +extern const vespalib::string empty; +extern const vespalib::string geo_position; +extern const vespalib::string matched_attribute_elements_filter; +extern const vespalib::string matched_elements_filter; +extern const vespalib::string positions; +extern const vespalib::string rank_features; +extern const vespalib::string summary_features; + +} diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_factory.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_factory.cpp index b3fa6c68b87..dc215d9c2ba 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_factory.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_factory.cpp @@ -1,8 +1,9 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -#include "docsum_field_writer_factory.h" #include "attribute_combiner_dfw.h" #include "copy_dfw.h" +#include "docsum_field_writer_commands.h" +#include "docsum_field_writer_factory.h" #include "document_id_dfw.h" #include "empty_dfw.h" #include "geoposdfw.h" @@ -34,65 +35,83 @@ DocsumFieldWriterFactory::has_attribute_manager() const noexcept return getEnvironment().getAttributeManager() != nullptr; } +namespace { + +void +throw_if_nullptr(const std::unique_ptr<DocsumFieldWriter>& writer, + const vespalib::string& command) +{ + if (writer.get() == nullptr) { + throw IllegalArgumentException("Failed to create docsum field writer for command '" + command + "'."); + } +} + +void +throw_missing_source(const vespalib::string& command) +{ + throw IllegalArgumentException("Missing source for command '" + command + "'."); +} + +} + std::unique_ptr<DocsumFieldWriter> -DocsumFieldWriterFactory::create_docsum_field_writer(const vespalib::string& fieldName, const vespalib::string& overrideName, const vespalib::string& argument, bool& rc) +DocsumFieldWriterFactory::create_docsum_field_writer(const vespalib::string& field_name, + const vespalib::string& command, + const vespalib::string& source) { - rc = false; std::unique_ptr<DocsumFieldWriter> fieldWriter; - if (overrideName == "dynamicteaser") { - if ( ! argument.empty() ) { + if (command == command::dynamic_teaser) { + if ( ! source.empty() ) { auto fw = std::make_unique<DynamicTeaserDFW>(getEnvironment().getJuniper()); auto fw_ptr = fw.get(); fieldWriter = std::move(fw); - rc = fw_ptr->Init(fieldName.c_str(), argument); + if (!fw_ptr->Init(field_name.c_str(), source)) { + throw IllegalArgumentException("Failed to initialize DynamicTeaserDFW."); + } } else { - throw IllegalArgumentException("Missing argument"); + throw_missing_source(command); } - } else if (overrideName == "summaryfeatures") { + } else if (command == command::summary_features) { fieldWriter = std::make_unique<SummaryFeaturesDFW>(); - rc = true; - } else if (overrideName == "rankfeatures") { + } else if (command == command::rank_features) { fieldWriter = std::make_unique<RankFeaturesDFW>(); - rc = true; - } else if (overrideName == "empty") { + } else if (command == command::empty) { fieldWriter = std::make_unique<EmptyDFW>(); - rc = true; - } else if (overrideName == "copy") { - if ( ! argument.empty() ) { - fieldWriter = std::make_unique<CopyDFW>(argument); - rc = true; + } else if (command == command::copy) { + if ( ! source.empty() ) { + fieldWriter = std::make_unique<CopyDFW>(source); } else { - throw IllegalArgumentException("Missing argument"); + throw_missing_source(command); } - } else if (overrideName == "absdist") { + } else if (command == command::abs_distance) { if (has_attribute_manager()) { - fieldWriter = AbsDistanceDFW::create(argument.c_str(), getEnvironment().getAttributeManager()); - rc = static_cast<bool>(fieldWriter); + fieldWriter = AbsDistanceDFW::create(source.c_str(), getEnvironment().getAttributeManager()); + throw_if_nullptr(fieldWriter, command); } - } else if (overrideName == "positions") { + } else if (command == command::positions) { if (has_attribute_manager()) { - fieldWriter = PositionsDFW::create(argument.c_str(), getEnvironment().getAttributeManager(), _use_v8_geo_positions); - rc = static_cast<bool>(fieldWriter); + fieldWriter = PositionsDFW::create(source.c_str(), getEnvironment().getAttributeManager(), _use_v8_geo_positions); + throw_if_nullptr(fieldWriter, command); } - } else if (overrideName == "geopos") { + } else if (command == command::geo_position) { if (has_attribute_manager()) { - fieldWriter = GeoPositionDFW::create(argument.c_str(), getEnvironment().getAttributeManager(), _use_v8_geo_positions); - rc = static_cast<bool>(fieldWriter); + fieldWriter = GeoPositionDFW::create(source.c_str(), getEnvironment().getAttributeManager(), _use_v8_geo_positions); + throw_if_nullptr(fieldWriter, command); } - } else if (overrideName == "attribute") { + } else if (command == command::attribute) { if (has_attribute_manager()) { - fieldWriter = AttributeDFWFactory::create(*getEnvironment().getAttributeManager(), argument); - rc = true; // Allow missing attribute vector + fieldWriter = AttributeDFWFactory::create(*getEnvironment().getAttributeManager(), source); + // Missing attribute vector is allowed, so throw_if_nullptr() is NOT used. } - } else if (overrideName == "attributecombiner") { + } else if (command == command::attribute_combiner) { if (has_attribute_manager()) { auto attr_ctx = getEnvironment().getAttributeManager()->createContext(); - const vespalib::string& source_field = argument.empty() ? fieldName : argument; + const vespalib::string& source_field = source.empty() ? field_name : source; fieldWriter = AttributeCombinerDFW::create(source_field, *attr_ctx, false, std::shared_ptr<MatchingElementsFields>()); - rc = static_cast<bool>(fieldWriter); + throw_if_nullptr(fieldWriter, command); } - } else if (overrideName == "matchedattributeelementsfilter") { - const vespalib::string& source_field = argument.empty() ? fieldName : argument; + } else if (command == command::matched_attribute_elements_filter) { + const vespalib::string& source_field = source.empty() ? field_name : source; if (has_attribute_manager()) { auto attr_ctx = getEnvironment().getAttributeManager()->createContext(); if (attr_ctx->getAttribute(source_field) != nullptr) { @@ -100,20 +119,19 @@ DocsumFieldWriterFactory::create_docsum_field_writer(const vespalib::string& fie } else { fieldWriter = AttributeCombinerDFW::create(source_field, *attr_ctx, true, _matching_elems_fields); } - rc = static_cast<bool>(fieldWriter); + throw_if_nullptr(fieldWriter, command); } - } else if (overrideName == "matchedelementsfilter") { - const vespalib::string& source_field = argument.empty() ? fieldName : argument; + } else if (command == command::matched_elements_filter) { + const vespalib::string& source_field = source.empty() ? field_name : source; if (has_attribute_manager()) { auto attr_ctx = getEnvironment().getAttributeManager()->createContext(); fieldWriter = MatchedElementsFilterDFW::create(source_field,*attr_ctx, _matching_elems_fields); - rc = static_cast<bool>(fieldWriter); + throw_if_nullptr(fieldWriter, command); } - } else if (overrideName == "documentid") { + } else if (command == command::documentid) { fieldWriter = std::make_unique<DocumentIdDFW>(); - rc = true; } else { - throw IllegalArgumentException("unknown override operation '" + overrideName + "' for field '" + fieldName + "'."); + throw IllegalArgumentException("Unknown command '" + command + "'."); } return fieldWriter; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_factory.h b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_factory.h index bab7153009d..e341f49c25b 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_factory.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_factory.h @@ -24,7 +24,9 @@ protected: public: DocsumFieldWriterFactory(bool use_v8_geo_positions, const IDocsumEnvironment& env); ~DocsumFieldWriterFactory() override; - std::unique_ptr<DocsumFieldWriter> create_docsum_field_writer(const vespalib::string& fieldName, const vespalib::string& overrideName, const vespalib::string& argument, bool& rc) override; + std::unique_ptr<DocsumFieldWriter> create_docsum_field_writer(const vespalib::string& field_name, + const vespalib::string& command, + const vespalib::string& source) override; }; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsum_store_document.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsum_store_document.cpp index dca6e6f8bd3..35db818ac58 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsum_store_document.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsum_store_document.cpp @@ -1,7 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "docsum_store_document.h" -#include "check_undefined_value_visitor.h" +#include "annotation_converter.h" #include "summaryfieldconverter.h" #include <vespa/document/base/exceptions.h> #include <vespa/document/datatype/datatype.h> @@ -50,7 +50,8 @@ DocsumStoreDocument::insert_juniper_field(const vespalib::string& field_name, ve { auto field_value = get_field_value(field_name); if (field_value) { - SummaryFieldConverter::insert_juniper_field(*field_value, inserter, true, converter); + AnnotationConverter stacked_converter(converter); + SummaryFieldConverter::insert_juniper_field(*field_value, inserter, stacked_converter); } } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumstore.h b/searchsummary/src/vespa/searchsummary/docsummary/docsumstore.h index b112b7ab0bf..7f3a88b05eb 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsumstore.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumstore.h @@ -15,10 +15,7 @@ class IDocsumStoreDocument; class IDocsumStore { public: - /** - * Convenience typedef. - */ - typedef std::unique_ptr<IDocsumStore> UP; + using UP = std::unique_ptr<IDocsumStore>; /** * Destructor. No cleanup needed for base class. diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp index 422400dc2ef..b4b663718bd 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.cpp @@ -14,8 +14,8 @@ LOG_SETUP(".searchlib.docsummary.docsumwriter"); using vespalib::Issue; -using vespalib::slime::ObjectInserter; using vespalib::Memory; +using vespalib::slime::ObjectInserter; namespace search::docsummary { @@ -48,7 +48,7 @@ DynamicDocsumWriter::resolveOutputClass(vespalib::stringref summaryClass) const } void -DynamicDocsumWriter::insertDocsum(const ResolveClassInfo & rci, uint32_t docid, GetDocsumsState *state, +DynamicDocsumWriter::insertDocsum(const ResolveClassInfo & rci, uint32_t docid, GetDocsumsState& state, IDocsumStore *docinfos, Inserter& topInserter) { if (rci.outputClass == nullptr) { @@ -61,10 +61,10 @@ DynamicDocsumWriter::insertDocsum(const ResolveClassInfo & rci, uint32_t docid, for (uint32_t i = 0; i < rci.outputClass->GetNumEntries(); ++i) { const ResConfigEntry *resCfg = rci.outputClass->GetEntry(i); const DocsumFieldWriter *writer = resCfg->_docsum_field_writer.get(); - if (state->_args.needField(resCfg->_name) && ! writer->isDefaultValue(docid, state)) { + if (state._args.needField(resCfg->_name) && ! writer->isDefaultValue(docid, state)) { const Memory field_name(resCfg->_name.data(), resCfg->_name.size()); ObjectInserter inserter(docsum, field_name); - writer->insertField(docid, nullptr, state, resCfg->_type, inserter); + writer->insertField(docid, nullptr, state, inserter); } } } else { @@ -77,13 +77,15 @@ DynamicDocsumWriter::insertDocsum(const ResolveClassInfo & rci, uint32_t docid, vespalib::slime::Cursor & docsum = topInserter.insertObject(); for (uint32_t i = 0; i < rci.outputClass->GetNumEntries(); ++i) { const ResConfigEntry *outCfg = rci.outputClass->GetEntry(i); - if ( ! state->_args.needField(outCfg->_name)) continue; + if (!state._args.needField(outCfg->_name)) { + continue; + } const DocsumFieldWriter *writer = outCfg->_docsum_field_writer.get(); const Memory field_name(outCfg->_name.data(), outCfg->_name.size()); ObjectInserter inserter(docsum, field_name); if (writer != nullptr) { if (! writer->isDefaultValue(docid, state)) { - writer->insertField(docid, doc.get(), state, outCfg->_type, inserter); + writer->insertField(docid, doc.get(), state, inserter); } } else { if (doc) { diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.h b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.h index c0579638593..52ddeebb0d6 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsumwriter.h @@ -37,7 +37,7 @@ public: virtual ~IDocsumWriter() = default; virtual void InitState(const search::IAttributeManager & attrMan, GetDocsumsState& state, const ResolveClassInfo& rci) = 0; - virtual void insertDocsum(const ResolveClassInfo & rci, uint32_t docid, GetDocsumsState *state, + virtual void insertDocsum(const ResolveClassInfo & rci, uint32_t docid, GetDocsumsState& state, IDocsumStore *docinfos, Inserter & target) = 0; virtual ResolveClassInfo resolveClassInfo(vespalib::stringref outputClassName) const = 0; }; @@ -61,7 +61,7 @@ public: const ResultConfig *GetResultConfig() { return _resultConfig.get(); } void InitState(const search::IAttributeManager & attrMan, GetDocsumsState& state, const ResolveClassInfo& rci) override; - void insertDocsum(const ResolveClassInfo & outputClassInfo, uint32_t docid, GetDocsumsState *state, + void insertDocsum(const ResolveClassInfo & outputClassInfo, uint32_t docid, GetDocsumsState& state, IDocsumStore *docinfos, Inserter & inserter) override; ResolveClassInfo resolveClassInfo(vespalib::stringref outputClassName) const override; diff --git a/searchsummary/src/vespa/searchsummary/docsummary/document_id_dfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/document_id_dfw.cpp index 3d28dbdc6fb..911cd1acc0d 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/document_id_dfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/document_id_dfw.cpp @@ -10,7 +10,7 @@ DocumentIdDFW::DocumentIdDFW() = default; DocumentIdDFW::~DocumentIdDFW() = default; void -DocumentIdDFW::insertField(uint32_t, const IDocsumStoreDocument* doc, GetDocsumsState *, ResType, +DocumentIdDFW::insertField(uint32_t, const IDocsumStoreDocument* doc, GetDocsumsState&, vespalib::slime::Inserter &target) const { if (doc != nullptr) { diff --git a/searchsummary/src/vespa/searchsummary/docsummary/document_id_dfw.h b/searchsummary/src/vespa/searchsummary/docsummary/document_id_dfw.h index 27766a86e83..b6a89ff7828 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/document_id_dfw.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/document_id_dfw.h @@ -16,7 +16,7 @@ public: DocumentIdDFW(); ~DocumentIdDFW() override; bool IsGenerated() const override { return false; } - void insertField(uint32_t docid, const IDocsumStoreDocument* doc, GetDocsumsState *state, ResType type, vespalib::slime::Inserter &target) const override; + void insertField(uint32_t docid, const IDocsumStoreDocument* doc, GetDocsumsState& state, vespalib::slime::Inserter &target) const override; }; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp index fcdb81defbf..0ea5bc9a604 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp @@ -114,8 +114,7 @@ class JuniperConverter : public IJuniperConverter public: JuniperConverter(const DynamicTeaserDFW& writer, uint32_t doc_id, GetDocsumsState& state); ~JuniperConverter() override; - void insert_juniper_field(vespalib::stringref input, vespalib::slime::Inserter& inserter) override; - void insert_juniper_field(const document::StringFieldValue &input, vespalib::slime::Inserter& inserter) override; + void convert(vespalib::stringref input, vespalib::slime::Inserter& inserter) override; }; JuniperConverter::JuniperConverter(const DynamicTeaserDFW& writer, uint32_t doc_id, GetDocsumsState& state) @@ -129,25 +128,19 @@ JuniperConverter::JuniperConverter(const DynamicTeaserDFW& writer, uint32_t doc_ JuniperConverter::~JuniperConverter() = default; void -JuniperConverter::insert_juniper_field(vespalib::stringref input, vespalib::slime::Inserter& inserter) +JuniperConverter::convert(vespalib::stringref input, vespalib::slime::Inserter& inserter) { _writer.insert_juniper_field(_doc_id, input, _state, inserter); } -void -JuniperConverter::insert_juniper_field(const document::StringFieldValue& input, vespalib::slime::Inserter& inserter) -{ - _writer.insert_juniper_field(_doc_id, input.getValueRef(), _state, inserter); -} - } void -DynamicTeaserDFW::insertField(uint32_t docid, const IDocsumStoreDocument* doc, GetDocsumsState *state, ResType, +DynamicTeaserDFW::insertField(uint32_t docid, const IDocsumStoreDocument* doc, GetDocsumsState& state, vespalib::slime::Inserter &target) const { if (doc != nullptr) { - JuniperConverter converter(*this, docid, *state); + JuniperConverter converter(*this, docid, state); doc->insert_juniper_field(_input_field_name, target, converter); } } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/empty_dfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/empty_dfw.cpp index 37d2785ffa7..d7d59c06791 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/empty_dfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/empty_dfw.cpp @@ -9,7 +9,7 @@ EmptyDFW::EmptyDFW() = default; EmptyDFW::~EmptyDFW() = default; void -EmptyDFW::insertField(uint32_t, GetDocsumsState *, ResType, vespalib::slime::Inserter &target) const +EmptyDFW::insertField(uint32_t, GetDocsumsState&, vespalib::slime::Inserter &target) const { // insert explicitly-empty field? // target.insertNix(); diff --git a/searchsummary/src/vespa/searchsummary/docsummary/empty_dfw.h b/searchsummary/src/vespa/searchsummary/docsummary/empty_dfw.h index 9a250450b1f..3e05e029722 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/empty_dfw.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/empty_dfw.h @@ -16,7 +16,7 @@ public: ~EmptyDFW() override; bool IsGenerated() const override { return true; } - void insertField(uint32_t docid, GetDocsumsState *state, ResType type, vespalib::slime::Inserter &target) const override; + void insertField(uint32_t docid, GetDocsumsState& state, vespalib::slime::Inserter &target) const override; }; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/geoposdfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/geoposdfw.cpp index 474f329799b..6d668561651 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/geoposdfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/geoposdfw.cpp @@ -55,14 +55,14 @@ void fmtZcurve(int64_t zval, vespalib::slime::Inserter &target, bool useV8geoPos } void -GeoPositionDFW::insertField(uint32_t docid, GetDocsumsState * dsState, ResType, vespalib::slime::Inserter &target) const +GeoPositionDFW::insertField(uint32_t docid, GetDocsumsState& dsState, vespalib::slime::Inserter &target) const { using vespalib::slime::Cursor; using vespalib::slime::ObjectSymbolInserter; using vespalib::slime::Symbol; using vespalib::slime::ArrayInserter; - const auto& attribute = get_attribute(*dsState); + const auto& attribute = get_attribute(dsState); if (attribute.hasMultiValue()) { uint32_t entries = attribute.getValueCount(docid); if (entries == 0 && _useV8geoPositions) return; @@ -104,21 +104,20 @@ GeoPositionDFW::create(const char *attribute_name, const IAttributeManager *attribute_manager, bool useV8geoPositions) { - GeoPositionDFW::UP ret; if (attribute_manager != nullptr) { if (!attribute_name) { LOG(warning, "create: missing attribute name '%p'", attribute_name); - return ret; + return {}; } IAttributeContext::UP context = attribute_manager->createContext(); if (!context.get()) { LOG(warning, "create: could not create context from attribute manager"); - return ret; + return {}; } const IAttributeVector *attribute = context->getAttribute(attribute_name); if (!attribute) { Issue::report("GeoPositionDFW::create: could not get attribute '%s' from context", attribute_name); - return ret; + return {}; } } return std::make_unique<GeoPositionDFW>(attribute_name, useV8geoPositions); diff --git a/searchsummary/src/vespa/searchsummary/docsummary/geoposdfw.h b/searchsummary/src/vespa/searchsummary/docsummary/geoposdfw.h index 1bc8b523160..6e470d479ff 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/geoposdfw.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/geoposdfw.h @@ -14,9 +14,9 @@ class GeoPositionDFW : public AttrDFW private: bool _useV8geoPositions; public: - typedef std::unique_ptr<GeoPositionDFW> UP; + using UP = std::unique_ptr<GeoPositionDFW>; GeoPositionDFW(const vespalib::string & attrName, bool useV8geoPositions); - void insertField(uint32_t docid, GetDocsumsState *state, ResType type, vespalib::slime::Inserter &target) const override; + void insertField(uint32_t docid, GetDocsumsState& state, vespalib::slime::Inserter &target) const override; static UP create(const char *attribute_name, const IAttributeManager *attribute_manager, bool useV8geoPositions); }; diff --git a/searchsummary/src/vespa/searchsummary/docsummary/i_docsum_field_writer_factory.h b/searchsummary/src/vespa/searchsummary/docsummary/i_docsum_field_writer_factory.h index 927fef26d1a..6a5cd691857 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/i_docsum_field_writer_factory.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/i_docsum_field_writer_factory.h @@ -16,7 +16,12 @@ class IDocsumFieldWriterFactory { public: virtual ~IDocsumFieldWriterFactory() = default; - virtual std::unique_ptr<DocsumFieldWriter> create_docsum_field_writer(const vespalib::string& fieldName, const vespalib::string& overrideName, const vespalib::string& argument, bool& rc) = 0; + /** + * Implementations can throw vespalib::IllegalArgumentException if setup of field writer fails. + */ + virtual std::unique_ptr<DocsumFieldWriter> create_docsum_field_writer(const vespalib::string& field_name, + const vespalib::string& command, + const vespalib::string& source) = 0; }; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/i_juniper_converter.h b/searchsummary/src/vespa/searchsummary/docsummary/i_juniper_converter.h index 00751082567..a52002d37f5 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/i_juniper_converter.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/i_juniper_converter.h @@ -12,17 +12,12 @@ namespace search::docsummary { /** * Interface class for inserting a dynamic string based on an * annotated full string and query context. - * - * For streaming search we use the same interface in an adapter that - * calls a snippet modifier (vsm::SnippetModifier) to add the annotation - * needed by juniper. */ class IJuniperConverter { public: virtual ~IJuniperConverter() = default; - virtual void insert_juniper_field(vespalib::stringref input, vespalib::slime::Inserter& inserter) = 0; - virtual void insert_juniper_field(const document::StringFieldValue &input, vespalib::slime::Inserter& inserter) = 0; + virtual void convert(vespalib::stringref input, vespalib::slime::Inserter& inserter) = 0; }; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/i_string_field_converter.h b/searchsummary/src/vespa/searchsummary/docsummary/i_string_field_converter.h new file mode 100644 index 00000000000..0e80fc28ded --- /dev/null +++ b/searchsummary/src/vespa/searchsummary/docsummary/i_string_field_converter.h @@ -0,0 +1,22 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/stllike/string.h> + +namespace document { class StringFieldValue; } +namespace vespalib::slime { struct Inserter; } + +namespace search::docsummary { + +/** + * Interface class for inserting a dynamic string. + */ +class IStringFieldConverter +{ +public: + virtual ~IStringFieldConverter() = default; + virtual void convert(const document::StringFieldValue &input, vespalib::slime::Inserter& inserter) = 0; +}; + +} diff --git a/searchsummary/src/vespa/searchsummary/docsummary/juniper_query_adapter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/juniper_query_adapter.cpp index 814fe0aafe4..a13f65db5ce 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/juniper_query_adapter.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/juniper_query_adapter.cpp @@ -27,8 +27,9 @@ JuniperQueryAdapter::SkipItem(search::SimpleQueryStackDumpIterator *iterator) co uint32_t skipCount = iterator->getArity(); while (skipCount > 0) { - if (!iterator->next()) + if (!iterator->next()) { return false; // stack too small + } skipCount = skipCount - 1 + iterator->getArity(); } return true; diff --git a/searchsummary/src/vespa/searchsummary/docsummary/juniperdfw.h b/searchsummary/src/vespa/searchsummary/docsummary/juniperdfw.h index 7dcf3d16e26..24c99873f58 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/juniperdfw.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/juniperdfw.h @@ -48,8 +48,8 @@ class DynamicTeaserDFW : public JuniperTeaserDFW public: explicit DynamicTeaserDFW(const juniper::Juniper * juniper) : JuniperTeaserDFW(juniper) { } - void insertField(uint32_t docid, const IDocsumStoreDocument* doc, GetDocsumsState *state, - ResType type, vespalib::slime::Inserter &target) const override; + void insertField(uint32_t docid, const IDocsumStoreDocument* doc, GetDocsumsState& state, + vespalib::slime::Inserter &target) const override; void insert_juniper_field(uint32_t docid, vespalib::stringref input, GetDocsumsState& state, vespalib::slime::Inserter& inserter) const; }; diff --git a/searchsummary/src/vespa/searchsummary/docsummary/keywordextractor.cpp b/searchsummary/src/vespa/searchsummary/docsummary/keywordextractor.cpp index 0256965e7f4..e8ff3068a4c 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/keywordextractor.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/keywordextractor.cpp @@ -100,8 +100,9 @@ KeywordExtractor::GetLegalIndexSpec() } for (const auto & index : _legalIndexes) { - if (!spec.empty()) + if (!spec.empty()) { spec.append(';'); + } spec.append(index); } return spec; diff --git a/searchsummary/src/vespa/searchsummary/docsummary/keywordextractor.h b/searchsummary/src/vespa/searchsummary/docsummary/keywordextractor.h index 5f87de762f9..9d46f0c8d89 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/keywordextractor.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/keywordextractor.h @@ -23,7 +23,7 @@ public: }; private: - typedef vespalib::hash_set<vespalib::string> Set; + using Set = vespalib::hash_set<vespalib::string>; const IDocsumEnvironment *_env; std::vector<IndexPrefix> _legalPrefixes; Set _legalIndexes; diff --git a/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.cpp index fe06212bcd2..1a029cfd16f 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.cpp @@ -54,7 +54,7 @@ MatchedElementsFilterDFW::create(const std::string& input_field_name, { StructFieldsResolver resolver(input_field_name, attr_ctx, false); if (resolver.has_error()) { - return std::unique_ptr<DocsumFieldWriter>(); + return {}; } resolver.apply_to(*matching_elems_fields); return std::make_unique<MatchedElementsFilterDFW>(input_field_name, std::move(matching_elems_fields)); @@ -63,12 +63,12 @@ MatchedElementsFilterDFW::create(const std::string& input_field_name, MatchedElementsFilterDFW::~MatchedElementsFilterDFW() = default; void -MatchedElementsFilterDFW::insertField(uint32_t docid, const IDocsumStoreDocument* doc, GetDocsumsState *state, - ResType, vespalib::slime::Inserter& target) const +MatchedElementsFilterDFW::insertField(uint32_t docid, const IDocsumStoreDocument* doc, GetDocsumsState& state, + vespalib::slime::Inserter& target) const { auto field_value = doc->get_field_value(_input_field_name); if (field_value) { - SummaryFieldConverter::insert_summary_field_with_filter(*field_value, target, get_matching_elements(docid, *state)); + SummaryFieldConverter::insert_summary_field_with_filter(*field_value, target, get_matching_elements(docid, state)); } } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.h b/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.h index 18d608440d3..7dafdbc9e6b 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/matched_elements_filter_dfw.h @@ -34,8 +34,8 @@ public: std::shared_ptr<MatchingElementsFields> matching_elems_fields); ~MatchedElementsFilterDFW() override; bool IsGenerated() const override { return false; } - void insertField(uint32_t docid, const IDocsumStoreDocument* doc, GetDocsumsState *state, - ResType, vespalib::slime::Inserter& target) const override; + void insertField(uint32_t docid, const IDocsumStoreDocument* doc, GetDocsumsState& state, + vespalib::slime::Inserter& target) const override; }; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/positionsdfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/positionsdfw.cpp index 2dd19d8d9ea..5aba321b540 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/positionsdfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/positionsdfw.cpp @@ -26,24 +26,24 @@ double to_degrees(int32_t microDegrees) { } +using search::attribute::BasicType; using search::attribute::IAttributeContext; using search::attribute::IAttributeVector; -using search::attribute::BasicType; using search::attribute::IntegerContent; -using search::common::Location; using search::common::GeoGcd; +using search::common::Location; LocationAttrDFW::AllLocations -LocationAttrDFW::getAllLocations(GetDocsumsState *state) const +LocationAttrDFW::getAllLocations(GetDocsumsState& state) const { AllLocations retval; - if (! state->_args.locations_possible()) { + if (! state._args.locations_possible()) { return retval; } - if (state->_parsedLocations.empty()) { - state->parse_locations(); + if (state._parsedLocations.empty()) { + state.parse_locations(); } - for (const auto & loc : state->_parsedLocations) { + for (const auto & loc : state._parsedLocations) { if (loc.location.valid()) { LOG(debug, "found location(field %s) for DFW(field %s)\n", loc.field_name.c_str(), getAttributeName().c_str()); @@ -56,7 +56,7 @@ LocationAttrDFW::getAllLocations(GetDocsumsState *state) const } if (retval.empty()) { // avoid doing things twice - state->_args.locations_possible(false); + state._args.locations_possible(false); } return retval; } @@ -69,13 +69,13 @@ AbsDistanceDFW::AbsDistanceDFW(const vespalib::string & attrName) { } uint64_t -AbsDistanceDFW::findMinDistance(uint32_t docid, GetDocsumsState *state, +AbsDistanceDFW::findMinDistance(uint32_t docid, GetDocsumsState& state, const std::vector<const GeoLoc *> &locations) const { // ensure result fits in Java "int" uint64_t absdist = std::numeric_limits<int32_t>::max(); uint64_t sqdist = absdist*absdist; - const auto& attribute = get_attribute(*state); + const auto& attribute = get_attribute(state); for (auto location : locations) { int32_t docx = 0; int32_t docy = 0; @@ -95,7 +95,7 @@ AbsDistanceDFW::findMinDistance(uint32_t docid, GetDocsumsState *state, } void -AbsDistanceDFW::insertField(uint32_t docid, GetDocsumsState *state, ResType, vespalib::slime::Inserter &target) const +AbsDistanceDFW::insertField(uint32_t docid, GetDocsumsState& state, vespalib::slime::Inserter &target) const { const auto & all_locations = getAllLocations(state); if (all_locations.empty()) { @@ -220,33 +220,32 @@ void insertV8FromAttr(const attribute::IAttributeVector &attribute, uint32_t doc } // namespace void -PositionsDFW::insertField(uint32_t docid, GetDocsumsState * dsState, ResType, vespalib::slime::Inserter &target) const +PositionsDFW::insertField(uint32_t docid, GetDocsumsState& dsState, vespalib::slime::Inserter &target) const { if (_useV8geoPositions) { - insertV8FromAttr(get_attribute(*dsState), docid, target); + insertV8FromAttr(get_attribute(dsState), docid, target); } else { - insertFromAttr(get_attribute(*dsState), docid, target); + insertFromAttr(get_attribute(dsState), docid, target); } } //-------------------------------------------------------------------------- PositionsDFW::UP PositionsDFW::create(const char *attribute_name, const IAttributeManager *attribute_manager, bool useV8geoPositions) { - PositionsDFW::UP ret; if (attribute_manager != nullptr) { if (!attribute_name) { LOG(debug, "createPositionsDFW: missing attribute name '%p'", attribute_name); - return ret; + return {}; } IAttributeContext::UP context = attribute_manager->createContext(); if (!context.get()) { LOG(debug, "createPositionsDFW: could not create context from attribute manager"); - return ret; + return {}; } const IAttributeVector *attribute = context->getAttribute(attribute_name); if (!attribute) { LOG(debug, "createPositionsDFW: could not get attribute '%s' from context", attribute_name); - return ret; + return {}; } } return std::make_unique<PositionsDFW>(attribute_name, useV8geoPositions); @@ -254,21 +253,20 @@ PositionsDFW::UP PositionsDFW::create(const char *attribute_name, const IAttribu std::unique_ptr<DocsumFieldWriter> AbsDistanceDFW::create(const char *attribute_name, const IAttributeManager *attribute_manager) { - std::unique_ptr<DocsumFieldWriter> ret; if (attribute_manager != nullptr) { if (!attribute_name) { LOG(debug, "createAbsDistanceDFW: missing attribute name '%p'", attribute_name); - return ret; + return {}; } IAttributeContext::UP context = attribute_manager->createContext(); if (!context.get()) { LOG(debug, "createAbsDistanceDFW: could not create context from attribute manager"); - return ret; + return {}; } const IAttributeVector *attribute = context->getAttribute(attribute_name); if (!attribute) { LOG(debug, "createAbsDistanceDFW: could not get attribute '%s' from context", attribute_name); - return ret; + return {}; } } return std::make_unique<AbsDistanceDFW>(attribute_name); diff --git a/searchsummary/src/vespa/searchsummary/docsummary/positionsdfw.h b/searchsummary/src/vespa/searchsummary/docsummary/positionsdfw.h index 67fe0bba5fe..5ac5f0fe051 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/positionsdfw.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/positionsdfw.h @@ -31,20 +31,20 @@ public: return matching.empty() ? other : matching; } }; - AllLocations getAllLocations(GetDocsumsState *state) const; + AllLocations getAllLocations(GetDocsumsState& state) const; }; class AbsDistanceDFW : public LocationAttrDFW { private: - uint64_t findMinDistance(uint32_t docid, GetDocsumsState *state, + uint64_t findMinDistance(uint32_t docid, GetDocsumsState& state, const std::vector<const GeoLoc *> &locations) const; public: explicit AbsDistanceDFW(const vespalib::string & attrName); bool IsGenerated() const override { return true; } - void insertField(uint32_t docid, GetDocsumsState *state, - ResType, vespalib::slime::Inserter &target) const override; + void insertField(uint32_t docid, GetDocsumsState& state, + vespalib::slime::Inserter &target) const override; static std::unique_ptr<DocsumFieldWriter> create(const char *attribute_name, const IAttributeManager *index_man); @@ -57,10 +57,10 @@ class PositionsDFW : public AttrDFW private: bool _useV8geoPositions; public: - typedef std::unique_ptr<PositionsDFW> UP; + using UP = std::unique_ptr<PositionsDFW>; PositionsDFW(const vespalib::string & attrName, bool useV8geoPositions); bool IsGenerated() const override { return true; } - void insertField(uint32_t docid, GetDocsumsState *state, ResType, vespalib::slime::Inserter &target) const override; + void insertField(uint32_t docid, GetDocsumsState& state, vespalib::slime::Inserter &target) const override; static UP create(const char *attribute_name, const IAttributeManager *index_man, bool useV8geoPositions); }; diff --git a/searchsummary/src/vespa/searchsummary/docsummary/rankfeaturesdfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/rankfeaturesdfw.cpp index b7b10d9c1ea..08fba307e8f 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/rankfeaturesdfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/rankfeaturesdfw.cpp @@ -12,17 +12,17 @@ RankFeaturesDFW::RankFeaturesDFW() = default; RankFeaturesDFW::~RankFeaturesDFW() = default; void -RankFeaturesDFW::insertField(uint32_t docid, GetDocsumsState *state, - ResType, vespalib::slime::Inserter &target) const +RankFeaturesDFW::insertField(uint32_t docid, GetDocsumsState& state, + vespalib::slime::Inserter &target) const { - if ( !state->_rankFeatures ) { - state->_callback.FillRankFeatures(*state); - if (state->_rankFeatures.get() == nullptr) { // still no rank features to write + if ( !state._rankFeatures ) { + state._callback.FillRankFeatures(state); + if (state._rankFeatures.get() == nullptr) { // still no rank features to write return; } } - const FeatureSet::StringVector & names = state->_rankFeatures->getNames(); - const FeatureSet::Value * values = state->_rankFeatures->getFeaturesByDocId(docid); + const FeatureSet::StringVector & names = state._rankFeatures->getNames(); + const FeatureSet::Value * values = state._rankFeatures->getFeaturesByDocId(docid); if (values == nullptr) { return; } vespalib::slime::Cursor& obj = target.insertObject(); diff --git a/searchsummary/src/vespa/searchsummary/docsummary/rankfeaturesdfw.h b/searchsummary/src/vespa/searchsummary/docsummary/rankfeaturesdfw.h index 7302d162b65..dbd5f3ce0b6 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/rankfeaturesdfw.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/rankfeaturesdfw.h @@ -14,7 +14,7 @@ public: RankFeaturesDFW & operator=(const RankFeaturesDFW &) = delete; ~RankFeaturesDFW() override; bool IsGenerated() const override { return true; } - void insertField(uint32_t docid, GetDocsumsState *state, ResType type, vespalib::slime::Inserter &target) const override; + void insertField(uint32_t docid, GetDocsumsState& state, vespalib::slime::Inserter &target) const override; }; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp index 781cd62a818..d19a111080f 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/resultclass.cpp @@ -23,15 +23,16 @@ ResultClass::~ResultClass() = default; int ResultClass::GetIndexFromName(const char* name) const { - NameIdMap::const_iterator found(_nameMap.find(name)); + auto found = _nameMap.find(name); return (found != _nameMap.end()) ? found->second : -1; } bool ResultClass::AddConfigEntry(const char *name, ResType type, std::unique_ptr<DocsumFieldWriter> docsum_field_writer) { - if (_nameMap.find(name) != _nameMap.end()) + if (_nameMap.find(name) != _nameMap.end()) { return false; + } _nameMap[name] = _entries.size(); ResConfigEntry e; diff --git a/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.cpp b/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.cpp index 77714ddd98f..4f5b5db841c 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/resultconfig.cpp @@ -4,8 +4,9 @@ #include "docsum_field_writer.h" #include "docsum_field_writer_factory.h" #include "resultclass.h" -#include <vespa/vespalib/stllike/hash_map.hpp> #include <vespa/config-summary.h> +#include <vespa/vespalib/stllike/hash_map.hpp> +#include <vespa/vespalib/util/exceptions.h> #include <atomic> #include <vespa/log/log.h> @@ -70,14 +71,14 @@ ResultConfig::set_default_result_class_id(uint32_t id) const ResultClass* ResultConfig::LookupResultClass(uint32_t id) const { - IdMap::const_iterator it(_classLookup.find(id)); + auto it = _classLookup.find(id); return (it != _classLookup.end()) ? it->second.get() : nullptr; } uint32_t ResultConfig::LookupResultClassId(const vespalib::string &name) const { - NameMap::const_iterator found(_nameLookup.find(name)); + auto found = _nameLookup.find(name); return (found != _nameLookup.end()) ? found->second : ((name.empty() || (name == "default")) ? _defaultSummaryId : NoClassID()); } @@ -126,16 +127,20 @@ ResultConfig::ReadConfig(const SummaryConfig &cfg, const char *configId, IDocsum for (unsigned int j = 0; rc && (j < cfg_class.fields.size()); j++) { const char *fieldtype = cfg_class.fields[j].type.c_str(); const char *fieldname = cfg_class.fields[j].name.c_str(); - vespalib::string override_name = cfg_class.fields[j].command; + vespalib::string command = cfg_class.fields[j].command; vespalib::string source_name = cfg_class.fields[j].source; auto res_type = ResTypeUtils::get_res_type(fieldtype); LOG(debug, "Reconfiguring class '%s' field '%s' of type '%s'", cfg_class.name.c_str(), fieldname, fieldtype); if (res_type != RES_BAD) { std::unique_ptr<DocsumFieldWriter> docsum_field_writer; - if (!override_name.empty()) { - docsum_field_writer = docsum_field_writer_factory.create_docsum_field_writer(fieldname, override_name, source_name, rc); - if (!rc) { - LOG(error, "%s override operation failed during initialization", override_name.c_str()); + if (!command.empty()) { + try { + docsum_field_writer = docsum_field_writer_factory.create_docsum_field_writer(fieldname, + command, + source_name); + } catch (const vespalib::IllegalArgumentException& ex) { + LOG(error, "Exception during setup of summary result class '%s': field='%s', command='%s', source='%s': %s", + cfg_class.name.c_str(), fieldname, command.c_str(), source_name.c_str(), ex.getMessage().c_str()); break; } } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/simple_dfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/simple_dfw.cpp index 01e306161e7..b2a05d98f5b 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/simple_dfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/simple_dfw.cpp @@ -5,9 +5,9 @@ namespace search::docsummary { void -SimpleDFW::insertField(uint32_t docid, const IDocsumStoreDocument *, GetDocsumsState *state, ResType type, vespalib::slime::Inserter &target) const +SimpleDFW::insertField(uint32_t docid, const IDocsumStoreDocument *, GetDocsumsState& state, vespalib::slime::Inserter &target) const { - insertField(docid, state, type, target); + insertField(docid, state, target); } } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/simple_dfw.h b/searchsummary/src/vespa/searchsummary/docsummary/simple_dfw.h index 52a45754c1f..4c7a4be517e 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/simple_dfw.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/simple_dfw.h @@ -13,8 +13,8 @@ namespace search::docsummary { class SimpleDFW : public DocsumFieldWriter { public: - virtual void insertField(uint32_t docid, GetDocsumsState *state, ResType type, vespalib::slime::Inserter &target) const = 0; - void insertField(uint32_t docid, const IDocsumStoreDocument*, GetDocsumsState *state, ResType type, vespalib::slime::Inserter &target) const override; + virtual void insertField(uint32_t docid, GetDocsumsState& state, vespalib::slime::Inserter &target) const = 0; + void insertField(uint32_t docid, const IDocsumStoreDocument*, GetDocsumsState& state, vespalib::slime::Inserter &target) const override; }; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.cpp b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.cpp index b3d3fde7150..94774c1bee4 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.cpp @@ -1,10 +1,11 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "slime_filler.h" -#include "annotation_converter.h" #include "i_juniper_converter.h" +#include "i_string_field_converter.h" #include "resultconfig.h" #include "searchdatatype.h" +#include "slime_filler_filter.h" #include <vespa/document/datatype/positiondatatype.h> #include <vespa/document/fieldvalue/arrayfieldvalue.h> #include <vespa/document/fieldvalue/boolfieldvalue.h> @@ -65,51 +66,53 @@ private: Cursor& _array; Symbol _key_sym; Symbol _val_sym; - bool _tokenize; + std::optional<const SlimeFillerFilter*> _filter; public: - MapFieldValueInserter(Inserter& parent_inserter, bool tokenize) + MapFieldValueInserter(Inserter& parent_inserter, std::optional<const SlimeFillerFilter*> filter) : _array(parent_inserter.insertArray()), _key_sym(_array.resolve("key")), _val_sym(_array.resolve("value")), - _tokenize(tokenize) + _filter(std::move(filter)) { } void insert_entry(const FieldValue& key, const FieldValue& value) { Cursor& c = _array.addObject(); ObjectSymbolInserter ki(c, _key_sym); - ObjectSymbolInserter vi(c, _val_sym); - SlimeFiller key_conv(ki, _tokenize); - SlimeFiller val_conv(vi, _tokenize); + SlimeFiller key_conv(ki); key.accept(key_conv); - value.accept(val_conv); + if (_filter.has_value()) { + ObjectSymbolInserter vi(c, _val_sym); + SlimeFiller val_conv(vi, nullptr, _filter.value()); + value.accept(val_conv); + } } }; } -SlimeFiller::SlimeFiller(Inserter& inserter, bool tokenize) +SlimeFiller::SlimeFiller(Inserter& inserter) : _inserter(inserter), - _tokenize(tokenize), _matching_elems(nullptr), - _juniper_converter(nullptr) + _string_converter(nullptr), + _filter(nullptr) { } -SlimeFiller::SlimeFiller(Inserter& inserter, bool tokenize, const std::vector<uint32_t>* matching_elems) +SlimeFiller::SlimeFiller(Inserter& inserter, const std::vector<uint32_t>* matching_elems) : _inserter(inserter), - _tokenize(tokenize), _matching_elems(matching_elems), - _juniper_converter(nullptr) + _string_converter(nullptr), + _filter(nullptr) { } -SlimeFiller::SlimeFiller(Inserter& inserter, bool tokenize, IJuniperConverter* juniper_converter) +SlimeFiller::SlimeFiller(Inserter& inserter, IStringFieldConverter* string_converter, const SlimeFillerFilter* filter) : _inserter(inserter), - _tokenize(tokenize), _matching_elems(nullptr), - _juniper_converter(juniper_converter) + _string_converter(string_converter), + _filter(filter) { } @@ -141,7 +144,7 @@ SlimeFiller::visit(const MapFieldValue& v) if (empty_or_empty_after_filtering(v)) { return; } - MapFieldValueInserter map_inserter(_inserter, _tokenize); + MapFieldValueInserter map_inserter(_inserter, SlimeFillerFilter::get_filter(_filter, "value")); if (filter_matching_elements()) { assert(v.has_no_erased_keys()); for (uint32_t id_to_keep : (*_matching_elems)) { @@ -163,7 +166,7 @@ SlimeFiller::visit(const ArrayFieldValue& value) } Cursor& a = _inserter.insertArray(); ArrayInserter ai(a); - SlimeFiller conv(ai, _tokenize, _juniper_converter); + SlimeFiller conv(ai, _string_converter, _filter); if (filter_matching_elements()) { for (uint32_t id_to_keep : (*_matching_elems)) { value[id_to_keep].accept(conv); @@ -178,21 +181,10 @@ SlimeFiller::visit(const ArrayFieldValue& value) void SlimeFiller::visit(const StringFieldValue& value) { - if (_tokenize) { - asciistream tmp; - AnnotationConverter converter(value.getValue(), tmp); - converter.handleIndexingTerms(value); - if (_juniper_converter != nullptr) { - _juniper_converter->insert_juniper_field(tmp.str(), _inserter); - } else { - _inserter.insertString(Memory(tmp.str())); - } + if (_string_converter != nullptr) { + _string_converter->convert(value, _inserter); } else { - if (_juniper_converter != nullptr) { - _juniper_converter->insert_juniper_field(value, _inserter); - } else { - _inserter.insertString(Memory(value.getValueRef())); - } + _inserter.insertString(Memory(value.getValueRef())); } } @@ -282,11 +274,15 @@ SlimeFiller::visit(const StructFieldValue& value) } Cursor& c = _inserter.insertObject(); for (StructFieldValue::const_iterator itr = value.begin(); itr != value.end(); ++itr) { - Memory keymem(itr.field().getName()); - ObjectInserter vi(c, keymem); - SlimeFiller conv(vi, _tokenize); - FieldValue::UP nextValue(value.getValue(itr.field())); - (*nextValue).accept(conv); + auto& name = itr.field().getName(); + auto sub_filter = SlimeFillerFilter::get_filter(_filter, name); + if (sub_filter.has_value()) { + Memory keymem(name); + ObjectInserter vi(c, keymem); + SlimeFiller conv(vi, nullptr, sub_filter.value()); + FieldValue::UP nextValue(value.getValue(itr.field())); + (*nextValue).accept(conv); + } } } @@ -318,7 +314,7 @@ SlimeFiller::visit(const WeightedSetFieldValue& value) } Cursor& o = a.addObject(); ObjectSymbolInserter ki(o, isym); - SlimeFiller conv(ki, _tokenize); + SlimeFiller conv(ki); entry.first->accept(conv); int weight = static_cast<const IntFieldValue&>(*entry.second).getValue(); o.setLong(wsym, weight); diff --git a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.h b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.h index ebade8aa711..a81a20814c4 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.h @@ -10,7 +10,8 @@ namespace vespalib::slime { struct Inserter; } namespace search::docsummary { -class IJuniperConverter; +class IStringFieldConverter; +class SlimeFillerFilter; /* * Class inserting a field value into a slime object. @@ -18,9 +19,9 @@ class IJuniperConverter; class SlimeFiller : public document::ConstFieldValueVisitor { vespalib::slime::Inserter& _inserter; - bool _tokenize; const std::vector<uint32_t>* _matching_elems; - IJuniperConverter* _juniper_converter; + IStringFieldConverter* _string_converter; + const SlimeFillerFilter* _filter; bool filter_matching_elements() const { return _matching_elems != nullptr; @@ -50,9 +51,9 @@ class SlimeFiller : public document::ConstFieldValueVisitor { void visit(const document::TensorFieldValue& value) override; void visit(const document::ReferenceFieldValue& value) override; public: - SlimeFiller(vespalib::slime::Inserter& inserter, bool tokenize); - SlimeFiller(vespalib::slime::Inserter& inserter, bool tokenize, const std::vector<uint32_t>* matching_elems); - SlimeFiller(vespalib::slime::Inserter& inserter, bool tokenize, IJuniperConverter* juniper_converter); + SlimeFiller(vespalib::slime::Inserter& inserter); + SlimeFiller(vespalib::slime::Inserter& inserter, const std::vector<uint32_t>* matching_elems); + SlimeFiller(vespalib::slime::Inserter& inserter, IStringFieldConverter* string_converter, const SlimeFillerFilter* filter); ~SlimeFiller() override; }; diff --git a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler_filter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler_filter.cpp new file mode 100644 index 00000000000..db28a1ae5cf --- /dev/null +++ b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler_filter.cpp @@ -0,0 +1,67 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "slime_filler_filter.h" +#include <vespa/vespalib/stllike/hash_map.hpp> +#include <cassert> + +namespace search::docsummary { + +SlimeFillerFilter::SlimeFillerFilter() + : _filter() +{ +} + +SlimeFillerFilter::~SlimeFillerFilter() = default; + +std::optional<const SlimeFillerFilter*> +SlimeFillerFilter::get_filter(vespalib::stringref field_name) const +{ + auto itr = _filter.find(field_name); + if (itr == _filter.end()) { + return std::nullopt; + } + return itr->second.get(); +} + +std::optional<const SlimeFillerFilter*> +SlimeFillerFilter::get_filter(const SlimeFillerFilter* filter, vespalib::stringref field_name) +{ + return (filter != nullptr) ? filter->get_filter(field_name) : nullptr; +} + +bool +SlimeFillerFilter::empty() const { return _filter.empty(); } + +SlimeFillerFilter& +SlimeFillerFilter::add(vespalib::stringref field_path) +{ + vespalib::stringref field_name; + vespalib::stringref remaining_path; + auto dot_pos = field_path.find('.'); + if (dot_pos != vespalib::string::npos) { + field_name = field_path.substr(0, dot_pos); + remaining_path = field_path.substr(dot_pos + 1); + } else { + field_name = field_path; + } + auto itr = _filter.find(field_name); + if (itr != _filter.end()) { + if (itr->second) { + if (remaining_path.empty()) { + itr->second.reset(); + } else { + itr->second->add(remaining_path); + } + } + } else { + auto insres = _filter.insert(std::make_pair(field_name, std::unique_ptr<SlimeFillerFilter>())); + assert(insres.second); + if (!remaining_path.empty()) { + insres.first->second = std::make_unique<SlimeFillerFilter>(); + insres.first->second->add(remaining_path); + } + } + return *this; +} + +} diff --git a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler_filter.h b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler_filter.h new file mode 100644 index 00000000000..ba7ba6fe159 --- /dev/null +++ b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler_filter.h @@ -0,0 +1,29 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/stllike/string.h> +#include <vespa/vespalib/stllike/hash_map.h> +#include <optional> + +namespace search::docsummary { + +/* + * Class filtering which fields to render in a struct field. + */ +class SlimeFillerFilter { + vespalib::hash_map<vespalib::string, std::unique_ptr<SlimeFillerFilter>> _filter; + std::optional<const SlimeFillerFilter*> get_filter(vespalib::stringref field_name) const; +public: + SlimeFillerFilter(); + ~SlimeFillerFilter(); + /* + * If field is blocked by the filter then the return value is not set, + * otherwise it is set to the filter for the next level. + */ + static std::optional<const SlimeFillerFilter*> get_filter(const SlimeFillerFilter* filter, vespalib::stringref field_name); + bool empty() const; + SlimeFillerFilter& add(vespalib::stringref field_path); +}; + +} diff --git a/searchsummary/src/vespa/searchsummary/docsummary/summaryfeaturesdfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/summaryfeaturesdfw.cpp index 76bae0cee97..13a3a345bf6 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/summaryfeaturesdfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/summaryfeaturesdfw.cpp @@ -18,19 +18,19 @@ SummaryFeaturesDFW::~SummaryFeaturesDFW() = default; static vespalib::Memory _M_cached("vespa.summaryFeatures.cached"); void -SummaryFeaturesDFW::insertField(uint32_t docid, GetDocsumsState *state, ResType, vespalib::slime::Inserter &target) const +SummaryFeaturesDFW::insertField(uint32_t docid, GetDocsumsState& state, vespalib::slime::Inserter &target) const { - if (state->_omit_summary_features) { + if (state._omit_summary_features) { return; } - if ( ! state->_summaryFeatures) { - state->_callback.FillSummaryFeatures(*state); - if ( !state->_summaryFeatures) { // still no summary features to write + if ( ! state._summaryFeatures) { + state._callback.FillSummaryFeatures(state); + if ( !state._summaryFeatures) { // still no summary features to write return; } } - const FeatureSet::StringVector &names = state->_summaryFeatures->getNames(); - const FeatureSet::Value *values = state->_summaryFeatures->getFeaturesByDocId(docid); + const FeatureSet::StringVector &names = state._summaryFeatures->getNames(); + const FeatureSet::Value *values = state._summaryFeatures->getFeaturesByDocId(docid); if (values == nullptr) { return; } vespalib::slime::Cursor& obj = target.insertObject(); @@ -42,7 +42,7 @@ SummaryFeaturesDFW::insertField(uint32_t docid, GetDocsumsState *state, ResType, obj.setDouble(name, values[i].as_double()); } } - if (state->_summaryFeaturesCached) { + if (state._summaryFeaturesCached) { obj.setDouble(_M_cached, 1.0); } else { obj.setDouble(_M_cached, 0.0); diff --git a/searchsummary/src/vespa/searchsummary/docsummary/summaryfeaturesdfw.h b/searchsummary/src/vespa/searchsummary/docsummary/summaryfeaturesdfw.h index ec14dc45055..661d23c2d64 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/summaryfeaturesdfw.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/summaryfeaturesdfw.h @@ -14,8 +14,8 @@ public: SummaryFeaturesDFW & operator=(const SummaryFeaturesDFW &) = delete; ~SummaryFeaturesDFW() override; bool IsGenerated() const override { return true; } - void insertField(uint32_t docid, GetDocsumsState *state, - ResType type, vespalib::slime::Inserter &target) const override; + void insertField(uint32_t docid, GetDocsumsState& state, + vespalib::slime::Inserter &target) const override; }; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp index 1a21c1d3eab..dd5a59e46af 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp @@ -1,214 +1,21 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "summaryfieldconverter.h" -#include "annotation_converter.h" #include "check_undefined_value_visitor.h" -#include "searchdatatype.h" #include "slime_filler.h" -#include <vespa/document/fieldvalue/arrayfieldvalue.h> -#include <vespa/document/fieldvalue/boolfieldvalue.h> -#include <vespa/document/fieldvalue/bytefieldvalue.h> -#include <vespa/document/fieldvalue/document.h> -#include <vespa/document/fieldvalue/doublefieldvalue.h> -#include <vespa/document/fieldvalue/floatfieldvalue.h> -#include <vespa/document/fieldvalue/intfieldvalue.h> -#include <vespa/document/fieldvalue/longfieldvalue.h> -#include <vespa/document/fieldvalue/predicatefieldvalue.h> -#include <vespa/document/fieldvalue/rawfieldvalue.h> -#include <vespa/document/fieldvalue/shortfieldvalue.h> -#include <vespa/document/fieldvalue/stringfieldvalue.h> -#include <vespa/document/fieldvalue/weightedsetfieldvalue.h> -#include <vespa/document/fieldvalue/annotationreferencefieldvalue.h> -#include <vespa/document/fieldvalue/tensorfieldvalue.h> -#include <vespa/document/fieldvalue/referencefieldvalue.h> -#include <vespa/vespalib/stllike/asciistream.h> -#include <vespa/vespalib/util/size_literals.h> -#include <vespa/vespalib/data/slime/slime.h> -#include <vespa/vespalib/data/smart_buffer.h> +#include <vespa/document/fieldvalue/fieldvalue.h> -using document::AnnotationReferenceFieldValue; -using document::ArrayFieldValue; -using document::BoolFieldValue; -using document::ByteFieldValue; -using document::Document; -using document::DoubleFieldValue; using document::FieldValue; -using document::ConstFieldValueVisitor; -using document::FloatFieldValue; -using document::IntFieldValue; -using document::LongFieldValue; -using document::MapFieldValue; -using document::PredicateFieldValue; -using document::RawFieldValue; -using document::ShortFieldValue; -using document::StringFieldValue; -using document::StructFieldValue; -using document::WeightedSetFieldValue; -using document::TensorFieldValue; -using document::ReferenceFieldValue; namespace search::docsummary { -namespace { - -struct FieldValueConverter { - virtual FieldValue::UP convert(const FieldValue &input) = 0; - virtual ~FieldValueConverter() = default; -}; - - -class SummaryFieldValueConverter : protected ConstFieldValueVisitor -{ - vespalib::asciistream _str; - bool _tokenize; - FieldValue::UP _field_value; - FieldValueConverter &_structuredFieldConverter; - - template <typename T> - void visitPrimitive(const T &t) { - _field_value.reset(t.clone()); - } - void visit(const IntFieldValue &value) override { visitPrimitive(value); } - void visit(const LongFieldValue &value) override { visitPrimitive(value); } - void visit(const ShortFieldValue &value) override { visitPrimitive(value); } - void visit(const BoolFieldValue &value) override { visitPrimitive(value); } - void visit(const ByteFieldValue &value) override { - int8_t signedValue = value.getAsByte(); - _field_value = std::make_unique<ShortFieldValue>(signedValue); - } - void visit(const DoubleFieldValue &value) override { visitPrimitive(value); } - void visit(const FloatFieldValue &value) override { visitPrimitive(value); } - - void visit(const StringFieldValue &value) override { - if (_tokenize) { - AnnotationConverter converter(value.getValue(), _str); - converter.handleIndexingTerms(value); - } else { - _str << value.getValue(); - } - } - - void visit(const AnnotationReferenceFieldValue & v ) override { - _field_value = _structuredFieldConverter.convert(v); - } - void visit(const Document & v) override { - _field_value = _structuredFieldConverter.convert(v); - } - - void visit(const PredicateFieldValue &value) override { - _str << value.toString(); - } - - void visit(const RawFieldValue &value) override { - visitPrimitive(value); - } - - void visit(const ArrayFieldValue &value) override { - if (value.size() > 0) { - _field_value = _structuredFieldConverter.convert(value); - } // else: implicit empty string - } - - void visit(const MapFieldValue & value) override { - if (value.size() > 0) { - _field_value = _structuredFieldConverter.convert(value); - } // else: implicit empty string - } - - void visit(const StructFieldValue &value) override { - if (*value.getDataType() == *SearchDataType::URI) { - FieldValue::UP uriAllValue = value.getValue("all"); - if (uriAllValue && uriAllValue->isA(FieldValue::Type::STRING)) { - uriAllValue->accept(*this); - return; - } - } - _field_value = _structuredFieldConverter.convert(value); - } - - void visit(const WeightedSetFieldValue &value) override { - if (value.size() > 0) { - _field_value = _structuredFieldConverter.convert(value); - } // else: implicit empty string - } - - void visit(const TensorFieldValue &value) override { - visitPrimitive(value); - } - - void visit(const ReferenceFieldValue& value) override { - if (value.hasValidDocumentId()) { - _str << value.getDocumentId().toString(); - } // else: implicit empty string - } - -public: - SummaryFieldValueConverter(bool tokenize, FieldValueConverter &subConverter); - ~SummaryFieldValueConverter() override; - - FieldValue::UP convert(const FieldValue &input) { - input.accept(*this); - if (_field_value.get()) { - return std::move(_field_value); - } - return StringFieldValue::make(_str.str()); - } -}; - -SummaryFieldValueConverter::SummaryFieldValueConverter(bool tokenize, FieldValueConverter &subConverter) - : _str(), _tokenize(tokenize), - _structuredFieldConverter(subConverter) -{} -SummaryFieldValueConverter::~SummaryFieldValueConverter() = default; - -using namespace vespalib::slime::convenience; - -class SlimeConverter : public FieldValueConverter { -private: - bool _tokenize; - const std::vector<uint32_t>* _matching_elems; - -public: - explicit SlimeConverter(bool tokenize) - : _tokenize(tokenize), - _matching_elems() - {} - - SlimeConverter(bool tokenize, const std::vector<uint32_t>& matching_elems) - : _tokenize(tokenize), - _matching_elems(&matching_elems) - {} - - FieldValue::UP convert(const FieldValue &input) override { - vespalib::Slime slime; - SlimeInserter inserter(slime); - SlimeFiller visitor(inserter, _tokenize, _matching_elems); - input.accept(visitor); - vespalib::SmartBuffer buffer(4_Ki); - vespalib::slime::BinaryFormat::encode(slime, buffer); - vespalib::Memory mem = buffer.obtain(); - return std::make_unique<RawFieldValue>(mem.data, mem.size); - } -}; - - -} // namespace - -FieldValue::UP -SummaryFieldConverter::convertSummaryField(bool markup, - const FieldValue &value) -{ - SlimeConverter subConv(markup); - return SummaryFieldValueConverter(markup, subConv).convert(value); -} - void SummaryFieldConverter::insert_summary_field(const FieldValue& value, vespalib::slime::Inserter& inserter) { CheckUndefinedValueVisitor check_undefined; value.accept(check_undefined); if (!check_undefined.is_undefined()) { - SlimeFiller visitor(inserter, false); + SlimeFiller visitor(inserter); value.accept(visitor); } } @@ -219,18 +26,18 @@ SummaryFieldConverter::insert_summary_field_with_filter(const FieldValue& value, CheckUndefinedValueVisitor check_undefined; value.accept(check_undefined); if (!check_undefined.is_undefined()) { - SlimeFiller visitor(inserter, false, &matching_elems); + SlimeFiller visitor(inserter, &matching_elems); value.accept(visitor); } } void -SummaryFieldConverter::insert_juniper_field(const document::FieldValue& value, vespalib::slime::Inserter& inserter, bool tokenize, IJuniperConverter& converter) +SummaryFieldConverter::insert_juniper_field(const document::FieldValue& value, vespalib::slime::Inserter& inserter, IStringFieldConverter& converter) { CheckUndefinedValueVisitor check_undefined; value.accept(check_undefined); if (!check_undefined.is_undefined()) { - SlimeFiller visitor(inserter, tokenize, &converter); + SlimeFiller visitor(inserter, &converter, nullptr); value.accept(visitor); } } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h index 924ec6f402e..ce3bf80b365 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h @@ -2,13 +2,16 @@ #pragma once -#include <vespa/document/fieldvalue/fieldvalue.h> +#include <cstdint> +#include <vector> + +namespace document { class FieldValue; } namespace vespalib::slime { struct Inserter; } namespace search::docsummary { -class IJuniperConverter; +class IStringFieldConverter; /** * This class converts a summary field for docsum fetching. @@ -16,18 +19,12 @@ class IJuniperConverter; class SummaryFieldConverter { public: - static document::FieldValue::UP convertSummaryField(bool markup, const document::FieldValue &value); - - static document::FieldValue::UP convert_field_with_filter(bool markup, - const document::FieldValue& value, - const std::vector<uint32_t>& matching_elems); - static void insert_summary_field(const document::FieldValue& value, vespalib::slime::Inserter& inserter); /** * Insert the given field value, but only the elements that are contained in the matching_elems vector. */ static void insert_summary_field_with_filter(const document::FieldValue& value, vespalib::slime::Inserter& inserter, const std::vector<uint32_t>& matching_elems); - static void insert_juniper_field(const document::FieldValue& value, vespalib::slime::Inserter& inserter, bool tokenize, IJuniperConverter& converter); + static void insert_juniper_field(const document::FieldValue& value, vespalib::slime::Inserter& inserter, IStringFieldConverter& converter); }; } |