summaryrefslogtreecommitdiffstats
path: root/searchsummary
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2022-09-15 20:14:20 +0200
committerTor Egge <Tor.Egge@online.no>2022-09-15 20:14:20 +0200
commitdf058bea840b257ed181054b813315fd1aa31052 (patch)
treeb23377b23a228ab38687132793350d4998c81c65 /searchsummary
parentd7c4706c5ab23025d6fb33a0a8f2193943ae4be9 (diff)
Remove tokenize support in slime filler.
Diffstat (limited to 'searchsummary')
-rw-r--r--searchsummary/src/tests/docsummary/slime_filler/slime_filler_test.cpp29
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.cpp54
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.h28
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/docsum_store_document.cpp5
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/slime_filler.cpp44
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/slime_filler.h7
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp8
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h2
8 files changed, 93 insertions, 84 deletions
diff --git a/searchsummary/src/tests/docsummary/slime_filler/slime_filler_test.cpp b/searchsummary/src/tests/docsummary/slime_filler/slime_filler_test.cpp
index cf4006c5e67..777e42198f7 100644
--- a/searchsummary/src/tests/docsummary/slime_filler/slime_filler_test.cpp
+++ b/searchsummary/src/tests/docsummary/slime_filler/slime_filler_test.cpp
@@ -32,6 +32,7 @@
#include <vespa/eval/eval/value.h>
#include <vespa/eval/eval/value_codec.h>
#include <vespa/juniper/juniper_separators.h>
+#include <vespa/searchsummary/docsummary/annotation_converter.h>
#include <vespa/searchsummary/docsummary/docsum_field_writer.h>
#include <vespa/searchsummary/docsummary/i_docsum_field_writer_factory.h>
#include <vespa/searchsummary/docsummary/i_juniper_converter.h>
@@ -79,6 +80,7 @@ using document::TensorDataType;
using document::TensorFieldValue;
using document::UrlDataType;
using document::WeightedSetFieldValue;
+using search::docsummary::AnnotationConverter;
using search::docsummary::IDocsumFieldWriterFactory;
using search::docsummary::IJuniperConverter;
using search::docsummary::DocsumFieldWriter;
@@ -212,8 +214,7 @@ protected:
ArrayFieldValue make_array();
WeightedSetFieldValue make_weighted_set();
MapFieldValue make_map();
- void expect_insert(const vespalib::string& exp, const FieldValue& fv, bool tokenize, const std::vector<uint32_t>* matching_elems);
- void expect_insert(const vespalib::string& exp, const FieldValue& fv, bool tokenize);
+ void expect_insert(const vespalib::string& exp, const FieldValue& fv, const std::vector<uint32_t>* matching_elems);
void expect_insert(const vespalib::string& exp, const FieldValue& fv);
void expect_insert_filtered(const vespalib::string& exp, const FieldValue& fv, const std::vector<uint32_t>& matching_elems);
void expect_insert_callback(const vespalib::string& exp, const FieldValue& fv, bool tokenize);
@@ -331,11 +332,11 @@ SlimeFillerTest::make_map()
}
void
-SlimeFillerTest::expect_insert(const vespalib::string& exp, const FieldValue& fv, bool tokenize, const std::vector<uint32_t>* matching_elems)
+SlimeFillerTest::expect_insert(const vespalib::string& exp, const FieldValue& fv, const std::vector<uint32_t>* matching_elems)
{
Slime slime;
SlimeInserter inserter(slime);
- SlimeFiller filler(inserter, tokenize, matching_elems);
+ SlimeFiller filler(inserter, matching_elems);
fv.accept(filler);
SimpleBuffer buf;
JsonFormat::encode(slime, buf, true);
@@ -346,19 +347,13 @@ SlimeFillerTest::expect_insert(const vespalib::string& exp, const FieldValue& fv
void
SlimeFillerTest::expect_insert_filtered(const vespalib::string& exp, const FieldValue& fv, const std::vector<uint32_t>& matching_elems)
{
- expect_insert(exp, fv, false, &matching_elems);
-}
-
-void
-SlimeFillerTest::expect_insert(const vespalib::string& exp, const FieldValue& fv, bool tokenize)
-{
- expect_insert(exp, fv, tokenize, nullptr);
+ expect_insert(exp, fv, &matching_elems);
}
void
SlimeFillerTest::expect_insert(const vespalib::string& exp, const FieldValue& fv)
{
- expect_insert(exp, fv, false);
+ expect_insert(exp, fv, nullptr);
}
void
@@ -367,7 +362,8 @@ SlimeFillerTest::expect_insert_callback(const vespalib::string& exp, const Field
Slime slime;
SlimeInserter inserter(slime);
MockJuniperConverter converter;
- SlimeFiller filler(inserter, tokenize, &converter);
+ AnnotationConverter stacked_converter(converter);
+ SlimeFiller filler(inserter, tokenize ? (IJuniperConverter*)&stacked_converter : (IJuniperConverter *) &converter);
fv.accept(filler);
auto act_null = slime_to_string(slime);
EXPECT_EQ("null", act_null);
@@ -417,12 +413,13 @@ TEST_F(SlimeFillerTest, insert_string)
{
SCOPED_TRACE("annotated string");
auto exp = make_exp_il_annotated_string();
- expect_insert(make_slime_string(exp), make_annotated_string(), true);
+ expect_insert(R"("foo bar")", make_annotated_string());
}
{
SCOPED_TRACE("annotated chinese string");
- auto exp = make_exp_il_annotated_chinese_string();
- expect_insert(make_slime_string(exp), make_annotated_chinese_string(), true);
+ auto annotated_chinese_string = make_annotated_chinese_string();
+ auto exp = annotated_chinese_string.getValue();
+ expect_insert(make_slime_string(exp), annotated_chinese_string);
}
}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.cpp
index 82f3d086b79..0682f39c602 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.cpp
+++ b/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.cpp
@@ -27,10 +27,10 @@ namespace search::docsummary {
namespace {
-vespalib::string
-getSpanString(const vespalib::string &s, const Span &span)
+vespalib::stringref
+getSpanString(vespalib::stringref s, const Span &span)
{
- return vespalib::string(&s[span.from()], &s[span.from() + span.length()]);
+ return {s.data() + span.from(), static_cast<size_t>(span.length())};
}
struct SpanFinder : SpanTreeVisitor {
@@ -78,6 +78,16 @@ const StringFieldValue &ensureStringFieldValue(const FieldValue &value) {
}
+AnnotationConverter::AnnotationConverter(IJuniperConverter& orig_converter)
+ : IJuniperConverter(),
+ _orig_converter(orig_converter),
+ _text(),
+ _out()
+{
+}
+
+AnnotationConverter::~AnnotationConverter() = default;
+
template <typename ForwardIt>
void
AnnotationConverter::handleAnnotations(const document::Span& span, ForwardIt it, ForwardIt last) {
@@ -85,28 +95,28 @@ AnnotationConverter::handleAnnotations(const document::Span& span, ForwardIt it,
if (annCnt > 1 || (annCnt == 1 && it->second)) {
annotateSpans(span, it, last);
} else {
- out << getSpanString(text, span) << juniper::separators::unit_separator_string;
+ _out << getSpanString(_text, span) << juniper::separators::unit_separator_string;
}
}
template <typename ForwardIt>
void
AnnotationConverter::annotateSpans(const document::Span& span, ForwardIt it, ForwardIt last) {
- out << juniper::separators::interlinear_annotation_anchor_string // ANCHOR
- << (getSpanString(text, span))
- << juniper::separators::interlinear_annotation_separator_string; // SEPARATOR
+ _out << juniper::separators::interlinear_annotation_anchor_string // ANCHOR
+ << (getSpanString(_text, span))
+ << juniper::separators::interlinear_annotation_separator_string; // SEPARATOR
while (it != last) {
if (it->second) {
- out << ensureStringFieldValue(*it->second).getValue();
+ _out << ensureStringFieldValue(*it->second).getValue();
} else {
- out << getSpanString(text, span);
+ _out << getSpanString(_text, span);
}
if (++it != last) {
- out << " ";
+ _out << " ";
}
}
- out << juniper::separators::interlinear_annotation_terminator_string // TERMINATOR
- << juniper::separators::unit_separator_string;
+ _out << juniper::separators::interlinear_annotation_terminator_string // TERMINATOR
+ << juniper::separators::unit_separator_string;
}
void
@@ -118,7 +128,7 @@ AnnotationConverter::handleIndexingTerms(const StringFieldValue& value)
typedef std::vector<SpanTerm> SpanTermVector;
if (!tree) {
// Treat a string without annotations as a single span.
- SpanTerm str(Span(0, text.size()),
+ SpanTerm str(Span(0, _text.size()),
static_cast<const FieldValue*>(nullptr));
handleAnnotations(str.first, &str, &str + 1);
return;
@@ -148,11 +158,27 @@ AnnotationConverter::handleIndexingTerms(const StringFieldValue& value)
handleAnnotations(it_begin->first, it_begin, it);
endPos = it_begin->first.from() + it_begin->first.length();
}
- int32_t wantEndPos = text.size();
+ int32_t wantEndPos = _text.size();
if (endPos < wantEndPos) {
Span tmpSpan(endPos, wantEndPos - endPos);
handleAnnotations(tmpSpan, ite, ite);
}
}
+void
+AnnotationConverter::insert_juniper_field(vespalib::stringref input, vespalib::slime::Inserter& inserter)
+{
+ StringFieldValue value(input);
+ insert_juniper_field(value, inserter);
+}
+
+void
+AnnotationConverter::insert_juniper_field(const StringFieldValue &input, vespalib::slime::Inserter& inserter)
+{
+ _out.clear();
+ _text = input.getValueRef();
+ handleIndexingTerms(input);
+ _orig_converter.insert_juniper_field(_out.str(), inserter);
+}
+
}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.h b/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.h
index 37e3c18606e..67c5a17aeaf 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.h
+++ b/searchsummary/src/vespa/searchsummary/docsummary/annotation_converter.h
@@ -2,13 +2,10 @@
#pragma once
-#include <vespa/vespalib/stllike/string.h>
+#include "i_juniper_converter.h"
+#include <vespa/vespalib/stllike/asciistream.h>
-namespace document
-{
-class Span;
-class StringFieldValue;
-}
+namespace document { class Span; }
namespace vespalib { class asciistream; }
@@ -16,20 +13,25 @@ namespace search::docsummary {
/*
* Class converting a string field value with annotations into a string
- * with interlinear annotations used by juniper.
+ * with interlinear annotations used by juniper before passing it to
+ * the original juniper converter.
*/
-struct AnnotationConverter {
- const vespalib::string text;
- vespalib::asciistream& out;
+class AnnotationConverter : public IJuniperConverter
+{
+ IJuniperConverter& _orig_converter;
+ vespalib::stringref _text;
+ vespalib::asciistream _out;
template <typename ForwardIt>
void handleAnnotations(const document::Span& span, ForwardIt it, ForwardIt last);
template <typename ForwardIt>
void annotateSpans(const document::Span& span, ForwardIt it, ForwardIt last);
-public:
- AnnotationConverter(const vespalib::string& s, vespalib::asciistream& stream)
- : text(s), out(stream) {}
void handleIndexingTerms(const document::StringFieldValue& value);
+public:
+ AnnotationConverter(IJuniperConverter& orig_converter);
+ ~AnnotationConverter() override;
+ void insert_juniper_field(vespalib::stringref input, vespalib::slime::Inserter& inserter) override;
+ void insert_juniper_field(const document::StringFieldValue &input, vespalib::slime::Inserter& inserter) override;
};
}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsum_store_document.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsum_store_document.cpp
index dca6e6f8bd3..35db818ac58 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/docsum_store_document.cpp
+++ b/searchsummary/src/vespa/searchsummary/docsummary/docsum_store_document.cpp
@@ -1,7 +1,7 @@
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "docsum_store_document.h"
-#include "check_undefined_value_visitor.h"
+#include "annotation_converter.h"
#include "summaryfieldconverter.h"
#include <vespa/document/base/exceptions.h>
#include <vespa/document/datatype/datatype.h>
@@ -50,7 +50,8 @@ DocsumStoreDocument::insert_juniper_field(const vespalib::string& field_name, ve
{
auto field_value = get_field_value(field_name);
if (field_value) {
- SummaryFieldConverter::insert_juniper_field(*field_value, inserter, true, converter);
+ AnnotationConverter stacked_converter(converter);
+ SummaryFieldConverter::insert_juniper_field(*field_value, inserter, stacked_converter);
}
}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.cpp b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.cpp
index b3d3fde7150..f78a2c12691 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.cpp
+++ b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.cpp
@@ -65,22 +65,20 @@ private:
Cursor& _array;
Symbol _key_sym;
Symbol _val_sym;
- bool _tokenize;
public:
- MapFieldValueInserter(Inserter& parent_inserter, bool tokenize)
+ MapFieldValueInserter(Inserter& parent_inserter)
: _array(parent_inserter.insertArray()),
_key_sym(_array.resolve("key")),
- _val_sym(_array.resolve("value")),
- _tokenize(tokenize)
+ _val_sym(_array.resolve("value"))
{
}
void insert_entry(const FieldValue& key, const FieldValue& value) {
Cursor& c = _array.addObject();
ObjectSymbolInserter ki(c, _key_sym);
ObjectSymbolInserter vi(c, _val_sym);
- SlimeFiller key_conv(ki, _tokenize);
- SlimeFiller val_conv(vi, _tokenize);
+ SlimeFiller key_conv(ki);
+ SlimeFiller val_conv(vi);
key.accept(key_conv);
value.accept(val_conv);
@@ -89,25 +87,22 @@ public:
}
-SlimeFiller::SlimeFiller(Inserter& inserter, bool tokenize)
+SlimeFiller::SlimeFiller(Inserter& inserter)
: _inserter(inserter),
- _tokenize(tokenize),
_matching_elems(nullptr),
_juniper_converter(nullptr)
{
}
-SlimeFiller::SlimeFiller(Inserter& inserter, bool tokenize, const std::vector<uint32_t>* matching_elems)
+SlimeFiller::SlimeFiller(Inserter& inserter, const std::vector<uint32_t>* matching_elems)
: _inserter(inserter),
- _tokenize(tokenize),
_matching_elems(matching_elems),
_juniper_converter(nullptr)
{
}
-SlimeFiller::SlimeFiller(Inserter& inserter, bool tokenize, IJuniperConverter* juniper_converter)
+SlimeFiller::SlimeFiller(Inserter& inserter, IJuniperConverter* juniper_converter)
: _inserter(inserter),
- _tokenize(tokenize),
_matching_elems(nullptr),
_juniper_converter(juniper_converter)
{
@@ -141,7 +136,7 @@ SlimeFiller::visit(const MapFieldValue& v)
if (empty_or_empty_after_filtering(v)) {
return;
}
- MapFieldValueInserter map_inserter(_inserter, _tokenize);
+ MapFieldValueInserter map_inserter(_inserter);
if (filter_matching_elements()) {
assert(v.has_no_erased_keys());
for (uint32_t id_to_keep : (*_matching_elems)) {
@@ -163,7 +158,7 @@ SlimeFiller::visit(const ArrayFieldValue& value)
}
Cursor& a = _inserter.insertArray();
ArrayInserter ai(a);
- SlimeFiller conv(ai, _tokenize, _juniper_converter);
+ SlimeFiller conv(ai, _juniper_converter);
if (filter_matching_elements()) {
for (uint32_t id_to_keep : (*_matching_elems)) {
value[id_to_keep].accept(conv);
@@ -178,21 +173,10 @@ SlimeFiller::visit(const ArrayFieldValue& value)
void
SlimeFiller::visit(const StringFieldValue& value)
{
- if (_tokenize) {
- asciistream tmp;
- AnnotationConverter converter(value.getValue(), tmp);
- converter.handleIndexingTerms(value);
- if (_juniper_converter != nullptr) {
- _juniper_converter->insert_juniper_field(tmp.str(), _inserter);
- } else {
- _inserter.insertString(Memory(tmp.str()));
- }
+ if (_juniper_converter != nullptr) {
+ _juniper_converter->insert_juniper_field(value, _inserter);
} else {
- if (_juniper_converter != nullptr) {
- _juniper_converter->insert_juniper_field(value, _inserter);
- } else {
- _inserter.insertString(Memory(value.getValueRef()));
- }
+ _inserter.insertString(Memory(value.getValueRef()));
}
}
@@ -284,7 +268,7 @@ SlimeFiller::visit(const StructFieldValue& value)
for (StructFieldValue::const_iterator itr = value.begin(); itr != value.end(); ++itr) {
Memory keymem(itr.field().getName());
ObjectInserter vi(c, keymem);
- SlimeFiller conv(vi, _tokenize);
+ SlimeFiller conv(vi);
FieldValue::UP nextValue(value.getValue(itr.field()));
(*nextValue).accept(conv);
}
@@ -318,7 +302,7 @@ SlimeFiller::visit(const WeightedSetFieldValue& value)
}
Cursor& o = a.addObject();
ObjectSymbolInserter ki(o, isym);
- SlimeFiller conv(ki, _tokenize);
+ SlimeFiller conv(ki);
entry.first->accept(conv);
int weight = static_cast<const IntFieldValue&>(*entry.second).getValue();
o.setLong(wsym, weight);
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.h b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.h
index ebade8aa711..11b62c95a4d 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.h
+++ b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.h
@@ -18,7 +18,6 @@ class IJuniperConverter;
class SlimeFiller : public document::ConstFieldValueVisitor {
vespalib::slime::Inserter& _inserter;
- bool _tokenize;
const std::vector<uint32_t>* _matching_elems;
IJuniperConverter* _juniper_converter;
@@ -50,9 +49,9 @@ class SlimeFiller : public document::ConstFieldValueVisitor {
void visit(const document::TensorFieldValue& value) override;
void visit(const document::ReferenceFieldValue& value) override;
public:
- SlimeFiller(vespalib::slime::Inserter& inserter, bool tokenize);
- SlimeFiller(vespalib::slime::Inserter& inserter, bool tokenize, const std::vector<uint32_t>* matching_elems);
- SlimeFiller(vespalib::slime::Inserter& inserter, bool tokenize, IJuniperConverter* juniper_converter);
+ SlimeFiller(vespalib::slime::Inserter& inserter);
+ SlimeFiller(vespalib::slime::Inserter& inserter, const std::vector<uint32_t>* matching_elems);
+ SlimeFiller(vespalib::slime::Inserter& inserter, IJuniperConverter* juniper_converter);
~SlimeFiller() override;
};
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp
index d55e00a1905..9031a0299aa 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp
+++ b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp
@@ -15,7 +15,7 @@ SummaryFieldConverter::insert_summary_field(const FieldValue& value, vespalib::s
CheckUndefinedValueVisitor check_undefined;
value.accept(check_undefined);
if (!check_undefined.is_undefined()) {
- SlimeFiller visitor(inserter, false);
+ SlimeFiller visitor(inserter);
value.accept(visitor);
}
}
@@ -26,18 +26,18 @@ SummaryFieldConverter::insert_summary_field_with_filter(const FieldValue& value,
CheckUndefinedValueVisitor check_undefined;
value.accept(check_undefined);
if (!check_undefined.is_undefined()) {
- SlimeFiller visitor(inserter, false, &matching_elems);
+ SlimeFiller visitor(inserter, &matching_elems);
value.accept(visitor);
}
}
void
-SummaryFieldConverter::insert_juniper_field(const document::FieldValue& value, vespalib::slime::Inserter& inserter, bool tokenize, IJuniperConverter& converter)
+SummaryFieldConverter::insert_juniper_field(const document::FieldValue& value, vespalib::slime::Inserter& inserter, IJuniperConverter& converter)
{
CheckUndefinedValueVisitor check_undefined;
value.accept(check_undefined);
if (!check_undefined.is_undefined()) {
- SlimeFiller visitor(inserter, tokenize, &converter);
+ SlimeFiller visitor(inserter, &converter);
value.accept(visitor);
}
}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h
index 425b357d355..3524a1c0e02 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h
+++ b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h
@@ -24,7 +24,7 @@ public:
* Insert the given field value, but only the elements that are contained in the matching_elems vector.
*/
static void insert_summary_field_with_filter(const document::FieldValue& value, vespalib::slime::Inserter& inserter, const std::vector<uint32_t>& matching_elems);
- static void insert_juniper_field(const document::FieldValue& value, vespalib::slime::Inserter& inserter, bool tokenize, IJuniperConverter& converter);
+ static void insert_juniper_field(const document::FieldValue& value, vespalib::slime::Inserter& inserter, IJuniperConverter& converter);
};
}