From df058bea840b257ed181054b813315fd1aa31052 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Thu, 15 Sep 2022 20:14:20 +0200 Subject: Remove tokenize support in slime filler. --- .../docsummary/slime_filler/slime_filler_test.cpp | 29 ++++++------ .../docsummary/annotation_converter.cpp | 54 ++++++++++++++++------ .../docsummary/annotation_converter.h | 28 +++++------ .../docsummary/docsum_store_document.cpp | 5 +- .../searchsummary/docsummary/slime_filler.cpp | 44 ++++++------------ .../vespa/searchsummary/docsummary/slime_filler.h | 7 ++- .../docsummary/summaryfieldconverter.cpp | 8 ++-- .../docsummary/summaryfieldconverter.h | 2 +- 8 files changed, 93 insertions(+), 84 deletions(-) (limited to 'searchsummary') diff --git a/searchsummary/src/tests/docsummary/slime_filler/slime_filler_test.cpp b/searchsummary/src/tests/docsummary/slime_filler/slime_filler_test.cpp index cf4006c5e67..777e42198f7 100644 --- a/searchsummary/src/tests/docsummary/slime_filler/slime_filler_test.cpp +++ b/searchsummary/src/tests/docsummary/slime_filler/slime_filler_test.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -79,6 +80,7 @@ using document::TensorDataType; using document::TensorFieldValue; using document::UrlDataType; using document::WeightedSetFieldValue; +using search::docsummary::AnnotationConverter; using search::docsummary::IDocsumFieldWriterFactory; using search::docsummary::IJuniperConverter; using search::docsummary::DocsumFieldWriter; @@ -212,8 +214,7 @@ protected: ArrayFieldValue make_array(); WeightedSetFieldValue make_weighted_set(); MapFieldValue make_map(); - void expect_insert(const vespalib::string& exp, const FieldValue& fv, bool tokenize, const std::vector* matching_elems); - void expect_insert(const vespalib::string& exp, const FieldValue& fv, bool tokenize); + void expect_insert(const vespalib::string& exp, const FieldValue& fv, const std::vector* matching_elems); void expect_insert(const vespalib::string& exp, const FieldValue& fv); void expect_insert_filtered(const vespalib::string& exp, const FieldValue& fv, const std::vector& matching_elems); void expect_insert_callback(const vespalib::string& exp, const FieldValue& fv, bool tokenize); @@ -331,11 +332,11 @@ SlimeFillerTest::make_map() } void -SlimeFillerTest::expect_insert(const vespalib::string& exp, const FieldValue& fv, bool tokenize, const std::vector* matching_elems) +SlimeFillerTest::expect_insert(const vespalib::string& exp, const FieldValue& fv, const std::vector* matching_elems) { Slime slime; SlimeInserter inserter(slime); - SlimeFiller filler(inserter, tokenize, matching_elems); + SlimeFiller filler(inserter, matching_elems); fv.accept(filler); SimpleBuffer buf; JsonFormat::encode(slime, buf, true); @@ -346,19 +347,13 @@ SlimeFillerTest::expect_insert(const vespalib::string& exp, const FieldValue& fv void SlimeFillerTest::expect_insert_filtered(const vespalib::string& exp, const FieldValue& fv, const std::vector& matching_elems) { - expect_insert(exp, fv, false, &matching_elems); -} - -void -SlimeFillerTest::expect_insert(const vespalib::string& exp, const FieldValue& fv, bool tokenize) -{ - expect_insert(exp, fv, tokenize, nullptr); + expect_insert(exp, fv, &matching_elems); } void SlimeFillerTest::expect_insert(const vespalib::string& exp, const FieldValue& fv) { - expect_insert(exp, fv, false); + expect_insert(exp, fv, nullptr); } void @@ -367,7 +362,8 @@ SlimeFillerTest::expect_insert_callback(const vespalib::string& exp, const Field Slime slime; SlimeInserter inserter(slime); MockJuniperConverter converter; - SlimeFiller filler(inserter, tokenize, &converter); + AnnotationConverter stacked_converter(converter); + SlimeFiller filler(inserter, tokenize ? (IJuniperConverter*)&stacked_converter : (IJuniperConverter *) &converter); fv.accept(filler); auto act_null = slime_to_string(slime); EXPECT_EQ("null", act_null); @@ -417,12 +413,13 @@ TEST_F(SlimeFillerTest, insert_string) { SCOPED_TRACE("annotated string"); auto exp = make_exp_il_annotated_string(); - expect_insert(make_slime_string(exp), make_annotated_string(), true); + expect_insert(R"("foo bar")", make_annotated_string()); } { SCOPED_TRACE("annotated chinese string"); - auto exp = make_exp_il_annotated_chinese_string(); - expect_insert(make_slime_string(exp), make_annotated_chinese_string(), true); + auto annotated_chinese_string = make_annotated_chinese_string(); + auto exp = annotated_chinese_string.getValue(); + expect_insert(make_slime_string(exp), annotated_chinese_string); } } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.cpp index 82f3d086b79..0682f39c602 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.cpp @@ -27,10 +27,10 @@ namespace search::docsummary { namespace { -vespalib::string -getSpanString(const vespalib::string &s, const Span &span) +vespalib::stringref +getSpanString(vespalib::stringref s, const Span &span) { - return vespalib::string(&s[span.from()], &s[span.from() + span.length()]); + return {s.data() + span.from(), static_cast(span.length())}; } struct SpanFinder : SpanTreeVisitor { @@ -78,6 +78,16 @@ const StringFieldValue &ensureStringFieldValue(const FieldValue &value) { } +AnnotationConverter::AnnotationConverter(IJuniperConverter& orig_converter) + : IJuniperConverter(), + _orig_converter(orig_converter), + _text(), + _out() +{ +} + +AnnotationConverter::~AnnotationConverter() = default; + template void AnnotationConverter::handleAnnotations(const document::Span& span, ForwardIt it, ForwardIt last) { @@ -85,28 +95,28 @@ AnnotationConverter::handleAnnotations(const document::Span& span, ForwardIt it, if (annCnt > 1 || (annCnt == 1 && it->second)) { annotateSpans(span, it, last); } else { - out << getSpanString(text, span) << juniper::separators::unit_separator_string; + _out << getSpanString(_text, span) << juniper::separators::unit_separator_string; } } template void AnnotationConverter::annotateSpans(const document::Span& span, ForwardIt it, ForwardIt last) { - out << juniper::separators::interlinear_annotation_anchor_string // ANCHOR - << (getSpanString(text, span)) - << juniper::separators::interlinear_annotation_separator_string; // SEPARATOR + _out << juniper::separators::interlinear_annotation_anchor_string // ANCHOR + << (getSpanString(_text, span)) + << juniper::separators::interlinear_annotation_separator_string; // SEPARATOR while (it != last) { if (it->second) { - out << ensureStringFieldValue(*it->second).getValue(); + _out << ensureStringFieldValue(*it->second).getValue(); } else { - out << getSpanString(text, span); + _out << getSpanString(_text, span); } if (++it != last) { - out << " "; + _out << " "; } } - out << juniper::separators::interlinear_annotation_terminator_string // TERMINATOR - << juniper::separators::unit_separator_string; + _out << juniper::separators::interlinear_annotation_terminator_string // TERMINATOR + << juniper::separators::unit_separator_string; } void @@ -118,7 +128,7 @@ AnnotationConverter::handleIndexingTerms(const StringFieldValue& value) typedef std::vector SpanTermVector; if (!tree) { // Treat a string without annotations as a single span. - SpanTerm str(Span(0, text.size()), + SpanTerm str(Span(0, _text.size()), static_cast(nullptr)); handleAnnotations(str.first, &str, &str + 1); return; @@ -148,11 +158,27 @@ AnnotationConverter::handleIndexingTerms(const StringFieldValue& value) handleAnnotations(it_begin->first, it_begin, it); endPos = it_begin->first.from() + it_begin->first.length(); } - int32_t wantEndPos = text.size(); + int32_t wantEndPos = _text.size(); if (endPos < wantEndPos) { Span tmpSpan(endPos, wantEndPos - endPos); handleAnnotations(tmpSpan, ite, ite); } } +void +AnnotationConverter::insert_juniper_field(vespalib::stringref input, vespalib::slime::Inserter& inserter) +{ + StringFieldValue value(input); + insert_juniper_field(value, inserter); +} + +void +AnnotationConverter::insert_juniper_field(const StringFieldValue &input, vespalib::slime::Inserter& inserter) +{ + _out.clear(); + _text = input.getValueRef(); + handleIndexingTerms(input); + _orig_converter.insert_juniper_field(_out.str(), inserter); +} + } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.h b/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.h index 37e3c18606e..67c5a17aeaf 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.h @@ -2,13 +2,10 @@ #pragma once -#include +#include "i_juniper_converter.h" +#include -namespace document -{ -class Span; -class StringFieldValue; -} +namespace document { class Span; } namespace vespalib { class asciistream; } @@ -16,20 +13,25 @@ namespace search::docsummary { /* * Class converting a string field value with annotations into a string - * with interlinear annotations used by juniper. + * with interlinear annotations used by juniper before passing it to + * the original juniper converter. */ -struct AnnotationConverter { - const vespalib::string text; - vespalib::asciistream& out; +class AnnotationConverter : public IJuniperConverter +{ + IJuniperConverter& _orig_converter; + vespalib::stringref _text; + vespalib::asciistream _out; template void handleAnnotations(const document::Span& span, ForwardIt it, ForwardIt last); template void annotateSpans(const document::Span& span, ForwardIt it, ForwardIt last); -public: - AnnotationConverter(const vespalib::string& s, vespalib::asciistream& stream) - : text(s), out(stream) {} void handleIndexingTerms(const document::StringFieldValue& value); +public: + AnnotationConverter(IJuniperConverter& orig_converter); + ~AnnotationConverter() override; + void insert_juniper_field(vespalib::stringref input, vespalib::slime::Inserter& inserter) override; + void insert_juniper_field(const document::StringFieldValue &input, vespalib::slime::Inserter& inserter) override; }; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsum_store_document.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsum_store_document.cpp index dca6e6f8bd3..35db818ac58 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsum_store_document.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsum_store_document.cpp @@ -1,7 +1,7 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "docsum_store_document.h" -#include "check_undefined_value_visitor.h" +#include "annotation_converter.h" #include "summaryfieldconverter.h" #include #include @@ -50,7 +50,8 @@ DocsumStoreDocument::insert_juniper_field(const vespalib::string& field_name, ve { auto field_value = get_field_value(field_name); if (field_value) { - SummaryFieldConverter::insert_juniper_field(*field_value, inserter, true, converter); + AnnotationConverter stacked_converter(converter); + SummaryFieldConverter::insert_juniper_field(*field_value, inserter, stacked_converter); } } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.cpp b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.cpp index b3d3fde7150..f78a2c12691 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.cpp @@ -65,22 +65,20 @@ private: Cursor& _array; Symbol _key_sym; Symbol _val_sym; - bool _tokenize; public: - MapFieldValueInserter(Inserter& parent_inserter, bool tokenize) + MapFieldValueInserter(Inserter& parent_inserter) : _array(parent_inserter.insertArray()), _key_sym(_array.resolve("key")), - _val_sym(_array.resolve("value")), - _tokenize(tokenize) + _val_sym(_array.resolve("value")) { } void insert_entry(const FieldValue& key, const FieldValue& value) { Cursor& c = _array.addObject(); ObjectSymbolInserter ki(c, _key_sym); ObjectSymbolInserter vi(c, _val_sym); - SlimeFiller key_conv(ki, _tokenize); - SlimeFiller val_conv(vi, _tokenize); + SlimeFiller key_conv(ki); + SlimeFiller val_conv(vi); key.accept(key_conv); value.accept(val_conv); @@ -89,25 +87,22 @@ public: } -SlimeFiller::SlimeFiller(Inserter& inserter, bool tokenize) +SlimeFiller::SlimeFiller(Inserter& inserter) : _inserter(inserter), - _tokenize(tokenize), _matching_elems(nullptr), _juniper_converter(nullptr) { } -SlimeFiller::SlimeFiller(Inserter& inserter, bool tokenize, const std::vector* matching_elems) +SlimeFiller::SlimeFiller(Inserter& inserter, const std::vector* matching_elems) : _inserter(inserter), - _tokenize(tokenize), _matching_elems(matching_elems), _juniper_converter(nullptr) { } -SlimeFiller::SlimeFiller(Inserter& inserter, bool tokenize, IJuniperConverter* juniper_converter) +SlimeFiller::SlimeFiller(Inserter& inserter, IJuniperConverter* juniper_converter) : _inserter(inserter), - _tokenize(tokenize), _matching_elems(nullptr), _juniper_converter(juniper_converter) { @@ -141,7 +136,7 @@ SlimeFiller::visit(const MapFieldValue& v) if (empty_or_empty_after_filtering(v)) { return; } - MapFieldValueInserter map_inserter(_inserter, _tokenize); + MapFieldValueInserter map_inserter(_inserter); if (filter_matching_elements()) { assert(v.has_no_erased_keys()); for (uint32_t id_to_keep : (*_matching_elems)) { @@ -163,7 +158,7 @@ SlimeFiller::visit(const ArrayFieldValue& value) } Cursor& a = _inserter.insertArray(); ArrayInserter ai(a); - SlimeFiller conv(ai, _tokenize, _juniper_converter); + SlimeFiller conv(ai, _juniper_converter); if (filter_matching_elements()) { for (uint32_t id_to_keep : (*_matching_elems)) { value[id_to_keep].accept(conv); @@ -178,21 +173,10 @@ SlimeFiller::visit(const ArrayFieldValue& value) void SlimeFiller::visit(const StringFieldValue& value) { - if (_tokenize) { - asciistream tmp; - AnnotationConverter converter(value.getValue(), tmp); - converter.handleIndexingTerms(value); - if (_juniper_converter != nullptr) { - _juniper_converter->insert_juniper_field(tmp.str(), _inserter); - } else { - _inserter.insertString(Memory(tmp.str())); - } + if (_juniper_converter != nullptr) { + _juniper_converter->insert_juniper_field(value, _inserter); } else { - if (_juniper_converter != nullptr) { - _juniper_converter->insert_juniper_field(value, _inserter); - } else { - _inserter.insertString(Memory(value.getValueRef())); - } + _inserter.insertString(Memory(value.getValueRef())); } } @@ -284,7 +268,7 @@ SlimeFiller::visit(const StructFieldValue& value) for (StructFieldValue::const_iterator itr = value.begin(); itr != value.end(); ++itr) { Memory keymem(itr.field().getName()); ObjectInserter vi(c, keymem); - SlimeFiller conv(vi, _tokenize); + SlimeFiller conv(vi); FieldValue::UP nextValue(value.getValue(itr.field())); (*nextValue).accept(conv); } @@ -318,7 +302,7 @@ SlimeFiller::visit(const WeightedSetFieldValue& value) } Cursor& o = a.addObject(); ObjectSymbolInserter ki(o, isym); - SlimeFiller conv(ki, _tokenize); + SlimeFiller conv(ki); entry.first->accept(conv); int weight = static_cast(*entry.second).getValue(); o.setLong(wsym, weight); diff --git a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.h b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.h index ebade8aa711..11b62c95a4d 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.h @@ -18,7 +18,6 @@ class IJuniperConverter; class SlimeFiller : public document::ConstFieldValueVisitor { vespalib::slime::Inserter& _inserter; - bool _tokenize; const std::vector* _matching_elems; IJuniperConverter* _juniper_converter; @@ -50,9 +49,9 @@ class SlimeFiller : public document::ConstFieldValueVisitor { void visit(const document::TensorFieldValue& value) override; void visit(const document::ReferenceFieldValue& value) override; public: - SlimeFiller(vespalib::slime::Inserter& inserter, bool tokenize); - SlimeFiller(vespalib::slime::Inserter& inserter, bool tokenize, const std::vector* matching_elems); - SlimeFiller(vespalib::slime::Inserter& inserter, bool tokenize, IJuniperConverter* juniper_converter); + SlimeFiller(vespalib::slime::Inserter& inserter); + SlimeFiller(vespalib::slime::Inserter& inserter, const std::vector* matching_elems); + SlimeFiller(vespalib::slime::Inserter& inserter, IJuniperConverter* juniper_converter); ~SlimeFiller() override; }; diff --git a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp index d55e00a1905..9031a0299aa 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp @@ -15,7 +15,7 @@ SummaryFieldConverter::insert_summary_field(const FieldValue& value, vespalib::s CheckUndefinedValueVisitor check_undefined; value.accept(check_undefined); if (!check_undefined.is_undefined()) { - SlimeFiller visitor(inserter, false); + SlimeFiller visitor(inserter); value.accept(visitor); } } @@ -26,18 +26,18 @@ SummaryFieldConverter::insert_summary_field_with_filter(const FieldValue& value, CheckUndefinedValueVisitor check_undefined; value.accept(check_undefined); if (!check_undefined.is_undefined()) { - SlimeFiller visitor(inserter, false, &matching_elems); + SlimeFiller visitor(inserter, &matching_elems); value.accept(visitor); } } void -SummaryFieldConverter::insert_juniper_field(const document::FieldValue& value, vespalib::slime::Inserter& inserter, bool tokenize, IJuniperConverter& converter) +SummaryFieldConverter::insert_juniper_field(const document::FieldValue& value, vespalib::slime::Inserter& inserter, IJuniperConverter& converter) { CheckUndefinedValueVisitor check_undefined; value.accept(check_undefined); if (!check_undefined.is_undefined()) { - SlimeFiller visitor(inserter, tokenize, &converter); + SlimeFiller visitor(inserter, &converter); value.accept(visitor); } } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h index 425b357d355..3524a1c0e02 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h @@ -24,7 +24,7 @@ public: * Insert the given field value, but only the elements that are contained in the matching_elems vector. */ static void insert_summary_field_with_filter(const document::FieldValue& value, vespalib::slime::Inserter& inserter, const std::vector& matching_elems); - static void insert_juniper_field(const document::FieldValue& value, vespalib::slime::Inserter& inserter, bool tokenize, IJuniperConverter& converter); + static void insert_juniper_field(const document::FieldValue& value, vespalib::slime::Inserter& inserter, IJuniperConverter& converter); }; } -- cgit v1.2.3