summaryrefslogtreecommitdiffstats
path: root/searchsummary
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@yahooinc.com>2022-09-16 16:07:08 +0200
committerGitHub <noreply@github.com>2022-09-16 16:07:08 +0200
commit658dcaff63003a3d9797c3e78a51a5058206fe9f (patch)
tree2dcf9c0fbc9cba644b91430a9395fd2e5bef21b9 /searchsummary
parent0bf612ad7f44520fb19e27f55f31419cbf102851 (diff)
parent1e34475bbf067d11a618f8e1f74a70613a06fa58 (diff)
Merge pull request #24098 from vespa-engine/toregge/add-annotation-converter-unit-test
Add annotation converter unit test.
Diffstat (limited to 'searchsummary')
-rw-r--r--searchsummary/CMakeLists.txt1
-rw-r--r--searchsummary/src/tests/docsummary/annotation_converter/CMakeLists.txt9
-rw-r--r--searchsummary/src/tests/docsummary/annotation_converter/annotation_converter_test.cpp176
-rw-r--r--searchsummary/src/tests/docsummary/slime_filler/slime_filler_test.cpp176
4 files changed, 203 insertions, 159 deletions
diff --git a/searchsummary/CMakeLists.txt b/searchsummary/CMakeLists.txt
index 6e970bdfc00..a5dc62da5c0 100644
--- a/searchsummary/CMakeLists.txt
+++ b/searchsummary/CMakeLists.txt
@@ -16,6 +16,7 @@ vespa_define_module(
TESTS
src/tests/docsummary
+ src/tests/docsummary/annotation_converter
src/tests/docsummary/attribute_combiner
src/tests/docsummary/attributedfw
src/tests/docsummary/document_id_dfw
diff --git a/searchsummary/src/tests/docsummary/annotation_converter/CMakeLists.txt b/searchsummary/src/tests/docsummary/annotation_converter/CMakeLists.txt
new file mode 100644
index 00000000000..22e0d3e6477
--- /dev/null
+++ b/searchsummary/src/tests/docsummary/annotation_converter/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchsummary_annotation_converter_test_app TEST
+ SOURCES
+ annotation_converter_test.cpp
+ DEPENDS
+ searchsummary
+ GTest::GTest
+)
+vespa_add_test(NAME searchsummary_annotation_converter_test_app COMMAND searchsummary_annotation_converter_test_app)
diff --git a/searchsummary/src/tests/docsummary/annotation_converter/annotation_converter_test.cpp b/searchsummary/src/tests/docsummary/annotation_converter/annotation_converter_test.cpp
new file mode 100644
index 00000000000..753ae8d9044
--- /dev/null
+++ b/searchsummary/src/tests/docsummary/annotation_converter/annotation_converter_test.cpp
@@ -0,0 +1,176 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/document/annotation/annotation.h>
+#include <vespa/document/annotation/span.h>
+#include <vespa/document/annotation/spanlist.h>
+#include <vespa/document/annotation/spantree.h>
+#include <vespa/document/fieldvalue/stringfieldvalue.h>
+#include <vespa/document/repo/configbuilder.h>
+#include <vespa/document/repo/fixedtyperepo.h>
+#include <vespa/juniper/juniper_separators.h>
+#include <vespa/searchsummary/docsummary/annotation_converter.h>
+#include <vespa/searchsummary/docsummary/i_juniper_converter.h>
+#include <vespa/searchsummary/docsummary/linguisticsannotation.h>
+#include <vespa/vespalib/data/slime/slime.h>
+#include <vespa/vespalib/gtest/gtest.h>
+#include <vespa/vespalib/stllike/asciistream.h>
+
+using document::Annotation;
+using document::DocumentType;
+using document::DocumentTypeRepo;
+using document::Span;
+using document::SpanList;
+using document::SpanTree;
+using document::StringFieldValue;
+using search::docsummary::AnnotationConverter;
+using search::docsummary::IJuniperConverter;
+using search::linguistics::SPANTREE_NAME;
+using search::linguistics::TERM;
+using vespalib::Slime;
+using vespalib::slime::SlimeInserter;
+
+namespace {
+
+DocumenttypesConfig
+get_document_types_config()
+{
+ using namespace document::config_builder;
+ DocumenttypesConfigBuilderHelper builder;
+ builder.document(42, "indexingdocument",
+ Struct("indexingdocument.header"),
+ Struct("indexingdocument.body"));
+ return builder.config();
+}
+
+class MockJuniperConverter : public IJuniperConverter
+{
+ vespalib::string _result;
+public:
+ void convert(vespalib::stringref input, vespalib::slime::Inserter&) override {
+ _result = input;
+ }
+ const vespalib::string& get_result() const noexcept { return _result; }
+};
+
+}
+
+class AnnotationConverterTest : public testing::Test
+{
+protected:
+ std::shared_ptr<const DocumentTypeRepo> _repo;
+ const DocumentType* _document_type;
+ document::FixedTypeRepo _fixed_repo;
+
+ AnnotationConverterTest();
+ ~AnnotationConverterTest() override;
+ void set_span_tree(StringFieldValue& value, std::unique_ptr<SpanTree> tree);
+ StringFieldValue make_annotated_string();
+ StringFieldValue make_annotated_chinese_string();
+ vespalib::string make_exp_il_annotated_string();
+ vespalib::string make_exp_il_annotated_chinese_string();
+ void expect_annotated(const vespalib::string& exp, const StringFieldValue& fv);
+};
+
+AnnotationConverterTest::AnnotationConverterTest()
+ : testing::Test(),
+ _repo(std::make_unique<DocumentTypeRepo>(get_document_types_config())),
+ _document_type(_repo->getDocumentType("indexingdocument")),
+ _fixed_repo(*_repo, *_document_type)
+{
+}
+
+AnnotationConverterTest::~AnnotationConverterTest() = default;
+
+void
+AnnotationConverterTest::set_span_tree(StringFieldValue & value, std::unique_ptr<SpanTree> tree)
+{
+ StringFieldValue::SpanTrees trees;
+ trees.push_back(std::move(tree));
+ value.setSpanTrees(trees, _fixed_repo);
+}
+
+StringFieldValue
+AnnotationConverterTest::make_annotated_string()
+{
+ auto span_list_up = std::make_unique<SpanList>();
+ auto span_list = span_list_up.get();
+ auto tree = std::make_unique<SpanTree>(SPANTREE_NAME, std::move(span_list_up));
+ tree->annotate(span_list->add(std::make_unique<Span>(0, 3)), *TERM);
+ tree->annotate(span_list->add(std::make_unique<Span>(4, 3)),
+ Annotation(*TERM, std::make_unique<StringFieldValue>("baz")));
+ StringFieldValue value("foo bar");
+ set_span_tree(value, std::move(tree));
+ return value;
+}
+
+StringFieldValue
+AnnotationConverterTest::make_annotated_chinese_string()
+{
+ auto span_list_up = std::make_unique<SpanList>();
+ auto span_list = span_list_up.get();
+ auto tree = std::make_unique<SpanTree>(SPANTREE_NAME, std::move(span_list_up));
+ // These chinese characters each use 3 bytes in their UTF8 encoding.
+ tree->annotate(span_list->add(std::make_unique<Span>(0, 15)), *TERM);
+ tree->annotate(span_list->add(std::make_unique<Span>(15, 9)), *TERM);
+ StringFieldValue value("我就是那个大灰狼");
+ set_span_tree(value, std::move(tree));
+ return value;
+}
+
+vespalib::string
+AnnotationConverterTest::make_exp_il_annotated_string()
+{
+ using namespace juniper::separators;
+ vespalib::asciistream exp;
+ exp << "foo" << unit_separator_string <<
+ " " << unit_separator_string << interlinear_annotation_anchor_string <<
+ "bar" << interlinear_annotation_separator_string <<
+ "baz" << interlinear_annotation_terminator_string << unit_separator_string;
+ return exp.str();
+}
+
+vespalib::string
+AnnotationConverterTest::make_exp_il_annotated_chinese_string()
+{
+ using namespace juniper::separators;
+ vespalib::asciistream exp;
+ exp << "我就是那个" << unit_separator_string <<
+ "大灰狼" << unit_separator_string;
+ return exp.str();
+}
+
+void
+AnnotationConverterTest::expect_annotated(const vespalib::string& exp, const StringFieldValue& fv)
+{
+ MockJuniperConverter juniper_converter;
+ AnnotationConverter annotation_converter(juniper_converter);
+ Slime slime;
+ SlimeInserter inserter(slime);
+ annotation_converter.convert(fv, inserter);
+ EXPECT_EQ(exp, juniper_converter.get_result());
+}
+
+
+TEST_F(AnnotationConverterTest, convert_plain_string)
+{
+ using namespace juniper::separators;
+ vespalib::string exp("Foo Bar Baz");
+ StringFieldValue plain_string("Foo Bar Baz");
+ expect_annotated(exp + unit_separator_string, plain_string);
+}
+
+TEST_F(AnnotationConverterTest, convert_annotated_string)
+{
+ auto exp = make_exp_il_annotated_string();
+ auto annotated_string = make_annotated_string();
+ expect_annotated(exp, annotated_string);
+}
+
+TEST_F(AnnotationConverterTest, convert_annotated_chinese_string)
+{
+ auto exp = make_exp_il_annotated_chinese_string();
+ auto annotated_chinese_string = make_annotated_chinese_string();
+ expect_annotated(exp, annotated_chinese_string);
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchsummary/src/tests/docsummary/slime_filler/slime_filler_test.cpp b/searchsummary/src/tests/docsummary/slime_filler/slime_filler_test.cpp
index 6f3faeb69d5..505386f5b91 100644
--- a/searchsummary/src/tests/docsummary/slime_filler/slime_filler_test.cpp
+++ b/searchsummary/src/tests/docsummary/slime_filler/slime_filler_test.cpp
@@ -1,12 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-#include <vespa/document/annotation/annotation.h>
-#include <vespa/document/annotation/span.h>
-#include <vespa/document/annotation/spanlist.h>
-#include <vespa/document/annotation/spantree.h>
#include <vespa/document/base/documentid.h>
#include <vespa/document/datatype/documenttype.h>
-#include <vespa/document/datatype/urldatatype.h>
#include <vespa/document/datatype/referencedatatype.h>
#include <vespa/document/datatype/tensor_data_type.h>
#include <vespa/document/fieldvalue/arrayfieldvalue.h>
@@ -31,11 +26,6 @@
#include <vespa/eval/eval/tensor_spec.h>
#include <vespa/eval/eval/value.h>
#include <vespa/eval/eval/value_codec.h>
-#include <vespa/juniper/juniper_separators.h>
-#include <vespa/searchsummary/docsummary/annotation_converter.h>
-#include <vespa/searchsummary/docsummary/docsum_field_writer.h>
-#include <vespa/searchsummary/docsummary/i_docsum_field_writer_factory.h>
-#include <vespa/searchsummary/docsummary/i_juniper_converter.h>
#include <vespa/searchsummary/docsummary/i_string_field_converter.h>
#include <vespa/searchsummary/docsummary/linguisticsannotation.h>
#include <vespa/searchsummary/docsummary/resultconfig.h>
@@ -47,10 +37,7 @@
#include <vespa/vespalib/data/simple_buffer.h>
#include <vespa/vespalib/gtest/gtest.h>
#include <vespa/vespalib/stllike/asciistream.h>
-#include <vespa/vespalib/util/size_literals.h>
-#include <vespa/config-summary.h>
-using document::Annotation;
using document::ArrayFieldValue;
using document::BoolFieldValue;
using document::ByteFieldValue;
@@ -72,21 +59,13 @@ using document::RawFieldValue;
using document::ReferenceDataType;
using document::ReferenceFieldValue;
using document::ShortFieldValue;
-using document::Span;
-using document::SpanList;
-using document::SpanTree;
using document::StringFieldValue;
using document::StructDataType;
using document::StructFieldValue;
using document::TensorDataType;
using document::TensorFieldValue;
-using document::UrlDataType;
using document::WeightedSetFieldValue;
-using search::docsummary::AnnotationConverter;
-using search::docsummary::IDocsumFieldWriterFactory;
-using search::docsummary::IJuniperConverter;
using search::docsummary::IStringFieldConverter;
-using search::docsummary::DocsumFieldWriter;
using search::docsummary::ResultConfig;
using search::docsummary::SlimeFiller;
using search::docsummary::SlimeFillerFilter;
@@ -101,7 +80,6 @@ using vespalib::eval::ValueType;
using vespalib::slime::Cursor;
using vespalib::slime::JsonFormat;
using vespalib::slime::SlimeInserter;
-using vespa::config::search::SummaryConfigBuilder;
namespace {
@@ -120,15 +98,6 @@ slime_to_string(const Slime& slime)
}
vespalib::string
-make_slime_string(vespalib::stringref value)
-{
- Slime slime;
- SlimeInserter inserter(slime);
- inserter.insertString({value});
- return slime_to_string(slime);
-}
-
-vespalib::string
make_slime_data_string(vespalib::stringref data)
{
Slime slime;
@@ -145,15 +114,6 @@ make_slime_tensor_string(const Value& value)
return make_slime_data_string({s.peek(), s.size()});
}
-class MockDocsumFieldWriterFactory : public IDocsumFieldWriterFactory
-{
-public:
- std::unique_ptr<DocsumFieldWriter> create_docsum_field_writer(const vespalib::string&, const vespalib::string&, const vespalib::string&, bool&) override {
- return {};
- }
-
-};
-
DocumenttypesConfig
get_document_types_config()
{
@@ -186,29 +146,20 @@ get_document_types_config()
return builder.config();
}
-class MockJuniperConverter : public IJuniperConverter
+class MockStringFieldConverter : public IStringFieldConverter
{
vespalib::string _result;
public:
- void convert(vespalib::stringref input, vespalib::slime::Inserter&) override {
- _result = input;
- }
- const vespalib::string& get_result() const noexcept { return _result; }
-};
-
-class PassThroughStringFieldConverter : public IStringFieldConverter
-{
- IJuniperConverter& _juniper_converter;
-public:
- PassThroughStringFieldConverter(IJuniperConverter& juniper_converter)
+ MockStringFieldConverter()
: IStringFieldConverter(),
- _juniper_converter(juniper_converter)
+ _result()
{
}
- ~PassThroughStringFieldConverter() override = default;
- void convert(const document::StringFieldValue& input, vespalib::slime::Inserter& inserter) override {
- _juniper_converter.convert(input.getValueRef(), inserter);
+ ~MockStringFieldConverter() override = default;
+ void convert(const document::StringFieldValue& input, vespalib::slime::Inserter&) override {
+ _result = input.getValueRef();
}
+ const vespalib::string& get_result() const noexcept { return _result; }
};
}
@@ -218,17 +169,11 @@ class SlimeFillerTest : public testing::Test
protected:
std::shared_ptr<const DocumentTypeRepo> _repo;
const DocumentType* _document_type;
- document::FixedTypeRepo _fixed_repo;
SlimeFillerTest();
~SlimeFillerTest() override;
const DataType& get_data_type(const vespalib::string& name) const;
const ReferenceDataType& get_as_ref_type(const vespalib::string& name) const;
- void set_span_tree(StringFieldValue& value, std::unique_ptr<SpanTree> tree);
- StringFieldValue make_annotated_string();
- StringFieldValue make_annotated_chinese_string();
- vespalib::string make_exp_il_annotated_string();
- vespalib::string make_exp_il_annotated_chinese_string();
ArrayFieldValue make_array();
WeightedSetFieldValue make_weighted_set();
MapFieldValue make_map();
@@ -237,14 +182,13 @@ protected:
void expect_insert(const vespalib::string& exp, const FieldValue& fv);
void expect_insert_filtered(const vespalib::string& exp, const FieldValue& fv, const std::vector<uint32_t>& matching_elems);
void expect_insert(const vespalib::string& exp, const FieldValue& fv, SlimeFillerFilter& filter);
- void expect_insert_callback(const vespalib::string& exp, const FieldValue& fv, bool tokenize);
+ void expect_insert_callback(const vespalib::string& exp, const FieldValue& fv);
};
SlimeFillerTest::SlimeFillerTest()
: testing::Test(),
_repo(std::make_unique<DocumentTypeRepo>(get_document_types_config())),
- _document_type(_repo->getDocumentType("indexingdocument")),
- _fixed_repo(*_repo, *_document_type)
+ _document_type(_repo->getDocumentType("indexingdocument"))
{
}
@@ -263,64 +207,6 @@ SlimeFillerTest::get_as_ref_type(const vespalib::string& name) const {
return dynamic_cast<const ReferenceDataType&>(get_data_type(name));
}
-void
-SlimeFillerTest::set_span_tree(StringFieldValue & value, std::unique_ptr<SpanTree> tree)
-{
- StringFieldValue::SpanTrees trees;
- trees.push_back(std::move(tree));
- value.setSpanTrees(trees, _fixed_repo);
-}
-
-StringFieldValue
-SlimeFillerTest::make_annotated_string()
-{
- auto span_list_up = std::make_unique<SpanList>();
- auto span_list = span_list_up.get();
- auto tree = std::make_unique<SpanTree>(SPANTREE_NAME, std::move(span_list_up));
- tree->annotate(span_list->add(std::make_unique<Span>(0, 3)), *TERM);
- tree->annotate(span_list->add(std::make_unique<Span>(4, 3)),
- Annotation(*TERM, std::make_unique<StringFieldValue>("baz")));
- StringFieldValue value("foo bar");
- set_span_tree(value, std::move(tree));
- return value;
-}
-
-StringFieldValue
-SlimeFillerTest::make_annotated_chinese_string()
-{
- auto span_list_up = std::make_unique<SpanList>();
- auto span_list = span_list_up.get();
- auto tree = std::make_unique<SpanTree>(SPANTREE_NAME, std::move(span_list_up));
- // These chinese characters each use 3 bytes in their UTF8 encoding.
- tree->annotate(span_list->add(std::make_unique<Span>(0, 15)), *TERM);
- tree->annotate(span_list->add(std::make_unique<Span>(15, 9)), *TERM);
- StringFieldValue value("我就是那个大灰狼");
- set_span_tree(value, std::move(tree));
- return value;
-}
-
-vespalib::string
-SlimeFillerTest::make_exp_il_annotated_string()
-{
- using namespace juniper::separators;
- vespalib::asciistream exp;
- exp << "foo" << unit_separator_string <<
- " " << unit_separator_string << interlinear_annotation_anchor_string <<
- "bar" << interlinear_annotation_separator_string <<
- "baz" << interlinear_annotation_terminator_string << unit_separator_string;
- return exp.str();
-}
-
-vespalib::string
-SlimeFillerTest::make_exp_il_annotated_chinese_string()
-{
- using namespace juniper::separators;
- vespalib::asciistream exp;
- exp << "我就是那个" << unit_separator_string <<
- "大灰狼" << unit_separator_string;
- return exp.str();
-}
-
ArrayFieldValue
SlimeFillerTest::make_array()
{
@@ -401,14 +287,12 @@ SlimeFillerTest::expect_insert(const vespalib::string& exp, const FieldValue& fv
}
void
-SlimeFillerTest::expect_insert_callback(const vespalib::string& exp, const FieldValue& fv, bool tokenize)
+SlimeFillerTest::expect_insert_callback(const vespalib::string& exp, const FieldValue& fv)
{
Slime slime;
SlimeInserter inserter(slime);
- MockJuniperConverter converter;
- AnnotationConverter annotation_converter(converter);
- PassThroughStringFieldConverter passthrough_converter(converter);
- SlimeFiller filler(inserter, tokenize ? (IStringFieldConverter*) &annotation_converter : (IStringFieldConverter*) &passthrough_converter, nullptr);
+ MockStringFieldConverter converter;
+ SlimeFiller filler(inserter, &converter, nullptr);
fv.accept(filler);
auto act_null = slime_to_string(slime);
EXPECT_EQ("null", act_null);
@@ -456,15 +340,8 @@ TEST_F(SlimeFillerTest, insert_string)
expect_insert(R"("Foo Bar Baz")", StringFieldValue("Foo Bar Baz"));
}
{
- SCOPED_TRACE("annotated string");
- auto exp = make_exp_il_annotated_string();
- expect_insert(R"("foo bar")", make_annotated_string());
- }
- {
- SCOPED_TRACE("annotated chinese string");
- auto annotated_chinese_string = make_annotated_chinese_string();
- auto exp = annotated_chinese_string.getValue();
- expect_insert(make_slime_string(exp), annotated_chinese_string);
+ SCOPED_TRACE("empty string");
+ expect_insert(R"("")", StringFieldValue());
}
}
@@ -647,28 +524,9 @@ TEST_F(SlimeFillerTest, insert_struct_map)
TEST_F(SlimeFillerTest, insert_string_with_callback)
{
- {
- SCOPED_TRACE("plain string");
- using namespace juniper::separators;
- vespalib::string exp("Foo Bar Baz");
- StringFieldValue plain_string("Foo Bar Baz");
- expect_insert_callback(exp + unit_separator_string, plain_string, true);
- expect_insert_callback(exp, plain_string, false);
- }
- {
- SCOPED_TRACE("annotated string");
- auto exp = make_exp_il_annotated_string();
- auto annotated_string = make_annotated_string();
- expect_insert_callback(exp, annotated_string, true);
- expect_insert_callback("foo bar", annotated_string, false);
- }
- {
- SCOPED_TRACE("annotated chinese string");
- auto exp = make_exp_il_annotated_chinese_string();
- auto annotated_chinese_string = make_annotated_chinese_string();
- expect_insert_callback(exp, annotated_chinese_string, true);
- expect_insert_callback(annotated_chinese_string.getValueRef(), annotated_chinese_string, false);
- }
+ vespalib::string exp("Foo Bar Baz");
+ StringFieldValue plain_string(exp);
+ expect_insert_callback(exp, plain_string);
}
GTEST_MAIN_RUN_ALL_TESTS()