diff options
author | Tor Egge <Tor.Egge@online.no> | 2022-09-12 14:20:07 +0200 |
---|---|---|
committer | Tor Egge <Tor.Egge@online.no> | 2022-09-12 14:20:07 +0200 |
commit | 3a2b527c645d3cb858bbe5f7f9b812e947e7cdce (patch) | |
tree | c50d6b7f8c7e5998e147a032d2ddac66c883a759 /searchsummary | |
parent | 5dc2018a6c910e94b44e415fadebda3f95267130 (diff) |
Add callback interface for rendering string field value via juniper.
Diffstat (limited to 'searchsummary')
13 files changed, 126 insertions, 101 deletions
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt b/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt index ad65532b02f..6aba9614e73 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt +++ b/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt @@ -20,7 +20,6 @@ vespa_add_library(searchsummary_docsummary OBJECT getdocsumargs.cpp juniper_dfw_query_item.cpp juniper_dfw_term_visitor.cpp - juniper_input.cpp juniper_query_adapter.cpp juniperproperties.cpp keywordextractor.cpp diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsum_store_document.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsum_store_document.cpp index 85ee1aa302d..dca6e6f8bd3 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsum_store_document.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsum_store_document.cpp @@ -21,42 +21,36 @@ DocsumStoreFieldValue DocsumStoreDocument::get_field_value(const vespalib::string& field_name) const { if (_document) { - const document::Field& field = _document->getField(field_name); - auto value(field.getDataType().createFieldValue()); - if (value) { - try { + try { + const document::Field& field = _document->getField(field_name); + auto value(field.getDataType().createFieldValue()); + if (value) { if (_document->getValue(field, *value)) { return DocsumStoreFieldValue(std::move(value)); } - } catch (document::FieldNotFoundException&) { - // Field was not found in document type. Return empty value. } + } catch (document::FieldNotFoundException&) { + // Field was not found in document type. Return empty value. } } return DocsumStoreFieldValue(); } -JuniperInput -DocsumStoreDocument::get_juniper_input(const vespalib::string& field_name) const +void +DocsumStoreDocument::insert_summary_field(const vespalib::string& field_name, vespalib::slime::Inserter& inserter) const { auto field_value = get_field_value(field_name); if (field_value) { - auto field_value_with_markup = SummaryFieldConverter::convertSummaryField(true, *field_value); - return JuniperInput(DocsumStoreFieldValue(std::move(field_value_with_markup))); + SummaryFieldConverter::insert_summary_field(*field_value, inserter); } - return {}; } void -DocsumStoreDocument::insert_summary_field(const vespalib::string& field_name, vespalib::slime::Inserter& inserter) const +DocsumStoreDocument::insert_juniper_field(const vespalib::string& field_name, vespalib::slime::Inserter& inserter, IJuniperConverter& converter) const { - try { - auto field_value = get_field_value(field_name); - if (field_value) { - SummaryFieldConverter::insert_summary_field(*field_value, inserter); - } - } catch (document::FieldNotFoundException&) { - // Field was not found in document type. Don't insert anything. + auto field_value = get_field_value(field_name); + if (field_value) { + SummaryFieldConverter::insert_juniper_field(*field_value, inserter, true, converter); } } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsum_store_document.h b/searchsummary/src/vespa/searchsummary/docsummary/docsum_store_document.h index 76d152e1548..825b5fae81c 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/docsum_store_document.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/docsum_store_document.h @@ -18,8 +18,8 @@ public: explicit DocsumStoreDocument(std::unique_ptr<document::Document> document); ~DocsumStoreDocument() override; DocsumStoreFieldValue get_field_value(const vespalib::string& field_name) const override; - JuniperInput get_juniper_input(const vespalib::string& field_name) const override; void insert_summary_field(const vespalib::string& field_name, vespalib::slime::Inserter& inserter) const override; + void insert_juniper_field(const vespalib::string& field_name, vespalib::slime::Inserter& inserter, IJuniperConverter& converter) const override; void insert_document_id(vespalib::slime::Inserter& inserter) const override; }; diff --git a/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp index 591f053d1cb..456af5a5dd5 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp @@ -3,6 +3,7 @@ #include "juniperdfw.h" #include "docsumstate.h" #include "i_docsum_store_document.h" +#include "i_juniper_converter.h" #include "juniper_query_adapter.h" #include <vespa/vespalib/objects/hexdump.h> #include <vespa/juniper/config.h> @@ -50,14 +51,14 @@ JuniperTeaserDFW::Init( return JuniperDFW::Init(fieldName, inputField); } -vespalib::string -DynamicTeaserDFW::makeDynamicTeaser(uint32_t docid, vespalib::stringref input, GetDocsumsState *state) const +void +DynamicTeaserDFW::insert_juniper_field(uint32_t docid, vespalib::stringref input, GetDocsumsState& state, vespalib::slime::Inserter& inserter) const { - if (!state->_dynteaser._query) { - JuniperQueryAdapter iq(state->_kwExtractor, - state->_args.getStackDump(), - &state->_args.highlightTerms()); - state->_dynteaser._query = _juniper->CreateQueryHandle(iq, nullptr); + if (!state._dynteaser._query) { + JuniperQueryAdapter iq(state._kwExtractor, + state._args.getStackDump(), + &state._args.highlightTerms()); + state._dynteaser._query = _juniper->CreateQueryHandle(iq, nullptr); } LOG(debug, "makeDynamicTeaser: docid (%d)", @@ -65,7 +66,7 @@ DynamicTeaserDFW::makeDynamicTeaser(uint32_t docid, vespalib::stringref input, G std::unique_ptr<juniper::Result> result; - if (state->_dynteaser._query != nullptr) { + if (state._dynteaser._query != nullptr) { if (LOG_WOULD_LOG(spam)) { std::ostringstream hexDump; @@ -76,7 +77,7 @@ DynamicTeaserDFW::makeDynamicTeaser(uint32_t docid, vespalib::stringref input, G auto langid = static_cast<uint32_t>(-1); - result = juniper::Analyse(*_juniperConfig, *state->_dynteaser._query, + result = juniper::Analyse(*_juniperConfig, *state._dynteaser._query, input.data(), input.length(), docid, langid); } @@ -95,23 +96,49 @@ DynamicTeaserDFW::makeDynamicTeaser(uint32_t docid, vespalib::stringref input, G } if (teaser != nullptr) { - return {teaser->Text(), teaser->Length()}; - } else { - return {}; + inserter.insertString({teaser->Text(), teaser->Length()}); } } +namespace { + +class JuniperConverter : public IJuniperConverter +{ + const DynamicTeaserDFW& _writer; + uint32_t _doc_id; + GetDocsumsState& _state; + +public: + JuniperConverter(const DynamicTeaserDFW& writer, uint32_t doc_id, GetDocsumsState& state); + ~JuniperConverter() override; + void insert_juniper_field(vespalib::stringref input, vespalib::slime::Inserter& inserter) override; +}; + +JuniperConverter::JuniperConverter(const DynamicTeaserDFW& writer, uint32_t doc_id, GetDocsumsState& state) + : IJuniperConverter(), + _writer(writer), + _doc_id(doc_id), + _state(state) +{ +} + +JuniperConverter::~JuniperConverter() = default; + +void +JuniperConverter::insert_juniper_field(vespalib::stringref input, vespalib::slime::Inserter& inserter) +{ + _writer.insert_juniper_field(_doc_id, input, _state, inserter); +} + +} + void DynamicTeaserDFW::insertField(uint32_t docid, const IDocsumStoreDocument* doc, GetDocsumsState *state, ResType, vespalib::slime::Inserter &target) const { if (doc != nullptr) { - auto input = doc->get_juniper_input(_input_field_name); - if (!input.empty()) { - vespalib::string teaser = makeDynamicTeaser(docid, input.get_value(), state); - vespalib::Memory value(teaser.c_str(), teaser.size()); - target.insertString(value); - } + JuniperConverter converter(*this, docid, *state); + doc->insert_juniper_field(_input_field_name, target, converter); } } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/i_docsum_store_document.h b/searchsummary/src/vespa/searchsummary/docsummary/i_docsum_store_document.h index a4d1d7a0097..7d38ae18f37 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/i_docsum_store_document.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/i_docsum_store_document.h @@ -3,13 +3,14 @@ #pragma once #include "docsum_store_field_value.h" -#include "juniper_input.h" #include <vespa/vespalib/stllike/string.h> namespace vespalib::slime { struct Inserter; } namespace search::docsummary { +class IJuniperConverter; + /** * Interface class providing access to a document retrieved from an * IDocsumStore. Some implementations (e.g. DocsumStoreVsmDocument) might @@ -20,8 +21,8 @@ class IDocsumStoreDocument public: virtual ~IDocsumStoreDocument() = default; virtual DocsumStoreFieldValue get_field_value(const vespalib::string& field_name) const = 0; - virtual JuniperInput get_juniper_input(const vespalib::string& field_name) const = 0; virtual void insert_summary_field(const vespalib::string& field_name, vespalib::slime::Inserter& inserter) const = 0; + virtual void insert_juniper_field(const vespalib::string& field_name, vespalib::slime::Inserter& inserter, IJuniperConverter& converter) const = 0; virtual void insert_document_id(vespalib::slime::Inserter& inserter) const = 0; }; diff --git a/searchsummary/src/vespa/searchsummary/docsummary/i_juniper_converter.h b/searchsummary/src/vespa/searchsummary/docsummary/i_juniper_converter.h new file mode 100644 index 00000000000..f0b8ec4309f --- /dev/null +++ b/searchsummary/src/vespa/searchsummary/docsummary/i_juniper_converter.h @@ -0,0 +1,22 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include <vespa/vespalib/stllike/string.h> + +namespace vespalib::slime { struct Inserter; } + +namespace search::docsummary { + +/** + * Interface class for inserting a dynamic string based on an + * annotated full string and query context. + */ +class IJuniperConverter +{ +public: + virtual ~IJuniperConverter() = default; + virtual void insert_juniper_field(vespalib::stringref input, vespalib::slime::Inserter& inserter) = 0; +}; + +} diff --git a/searchsummary/src/vespa/searchsummary/docsummary/juniper_input.cpp b/searchsummary/src/vespa/searchsummary/docsummary/juniper_input.cpp deleted file mode 100644 index 3dfd5a7116a..00000000000 --- a/searchsummary/src/vespa/searchsummary/docsummary/juniper_input.cpp +++ /dev/null @@ -1,27 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#include "juniper_input.h" -#include "summaryfieldconverter.h" -#include <vespa/document/fieldvalue/stringfieldvalue.h> - -namespace search::docsummary { - -JuniperInput::JuniperInput() - : _field_value_with_markup(), - _value() -{ -} - -JuniperInput::JuniperInput(DocsumStoreFieldValue value) - : _field_value_with_markup(std::move(value)), - _value() -{ - if (_field_value_with_markup && _field_value_with_markup->isA(document::FieldValue::Type::STRING)) { - const auto& string_field_value_with_markup = static_cast<const document::StringFieldValue&>(*_field_value_with_markup); - _value = string_field_value_with_markup.getValueRef(); - } -} - -JuniperInput::~JuniperInput() = default; - -} diff --git a/searchsummary/src/vespa/searchsummary/docsummary/juniper_input.h b/searchsummary/src/vespa/searchsummary/docsummary/juniper_input.h deleted file mode 100644 index be7da50f99a..00000000000 --- a/searchsummary/src/vespa/searchsummary/docsummary/juniper_input.h +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -#pragma once - -#include "docsum_store_field_value.h" -#include <vespa/vespalib/stllike/string.h> - -namespace document { class FieldValue; } - -namespace search::docsummary { - -/* - * Class containing input for juniper processing. - */ -class JuniperInput { - DocsumStoreFieldValue _field_value_with_markup; - vespalib::stringref _value; -public: - JuniperInput(); - explicit JuniperInput(DocsumStoreFieldValue value); - ~JuniperInput(); - bool empty() const noexcept { return _value.empty(); } - vespalib::stringref get_value() const noexcept { return _value; }; -}; - -} diff --git a/searchsummary/src/vespa/searchsummary/docsummary/juniperdfw.h b/searchsummary/src/vespa/searchsummary/docsummary/juniperdfw.h index 63642ed7543..7dcf3d16e26 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/juniperdfw.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/juniperdfw.h @@ -45,14 +45,12 @@ protected: class DynamicTeaserDFW : public JuniperTeaserDFW { - vespalib::string makeDynamicTeaser(uint32_t docid, - vespalib::stringref input, - GetDocsumsState *state) const; public: explicit DynamicTeaserDFW(const juniper::Juniper * juniper) : JuniperTeaserDFW(juniper) { } void insertField(uint32_t docid, const IDocsumStoreDocument* doc, GetDocsumsState *state, ResType type, vespalib::slime::Inserter &target) const override; + void insert_juniper_field(uint32_t docid, vespalib::stringref input, GetDocsumsState& state, vespalib::slime::Inserter& inserter) const; }; } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.cpp b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.cpp index 7d31af552b1..5e3bc22aa69 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.cpp @@ -2,6 +2,7 @@ #include "slime_filler.h" #include "annotation_converter.h" +#include "i_juniper_converter.h" #include "resultconfig.h" #include "searchdatatype.h" #include <vespa/document/datatype/positiondatatype.h> @@ -91,14 +92,24 @@ public: SlimeFiller::SlimeFiller(Inserter& inserter, bool tokenize) : _inserter(inserter), _tokenize(tokenize), - _matching_elems(nullptr) + _matching_elems(nullptr), + _juniper_converter(nullptr) { } SlimeFiller::SlimeFiller(Inserter& inserter, bool tokenize, const std::vector<uint32_t>* matching_elems) : _inserter(inserter), _tokenize(tokenize), - _matching_elems(matching_elems) + _matching_elems(matching_elems), + _juniper_converter(nullptr) +{ +} + +SlimeFiller::SlimeFiller(Inserter& inserter, bool tokenize, IJuniperConverter& juniper_converter) + : _inserter(inserter), + _tokenize(tokenize), + _matching_elems(nullptr), + _juniper_converter(&juniper_converter) { } @@ -171,9 +182,17 @@ SlimeFiller::visit(const StringFieldValue& value) asciistream tmp; AnnotationConverter converter(value.getValue(), tmp); converter.handleIndexingTerms(value); - _inserter.insertString(Memory(tmp.str())); + if (_juniper_converter != nullptr) { + _juniper_converter->insert_juniper_field(tmp.str(), _inserter); + } else { + _inserter.insertString(Memory(tmp.str())); + } } else { - _inserter.insertString(Memory(value.getValue())); + if (_juniper_converter != nullptr) { + _juniper_converter->insert_juniper_field(value.getValue(), _inserter); + } else { + _inserter.insertString(Memory(value.getValue())); + } } } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.h b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.h index e7d05ced3cf..4627c1be3c7 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/slime_filler.h @@ -10,6 +10,8 @@ namespace vespalib::slime { struct Inserter; } namespace search::docsummary { +class IJuniperConverter; + /* * Class inserting a field value into a slime object. */ @@ -18,6 +20,7 @@ class SlimeFiller : public document::ConstFieldValueVisitor { vespalib::slime::Inserter& _inserter; bool _tokenize; const std::vector<uint32_t>* _matching_elems; + IJuniperConverter* _juniper_converter; bool filter_matching_elements() const { return _matching_elems != nullptr; @@ -49,6 +52,7 @@ class SlimeFiller : public document::ConstFieldValueVisitor { public: SlimeFiller(vespalib::slime::Inserter& inserter, bool tokenize); SlimeFiller(vespalib::slime::Inserter& inserter, bool tokenize, const std::vector<uint32_t>* matching_elems); + SlimeFiller(vespalib::slime::Inserter& inserter, bool tokenize, IJuniperConverter& juniper_converter); ~SlimeFiller() override; }; diff --git a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp index 78d9f7d10a1..4679c8d096c 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.cpp @@ -224,4 +224,15 @@ SummaryFieldConverter::insert_summary_field_with_filter(const FieldValue& value, } } +void +SummaryFieldConverter::insert_juniper_field(const document::FieldValue& value, vespalib::slime::Inserter& inserter, bool tokenize, IJuniperConverter& converter) +{ + CheckUndefinedValueVisitor check_undefined; + value.accept(check_undefined); + if (!check_undefined.is_undefined()) { + SlimeFiller visitor(inserter, tokenize, converter); + value.accept(visitor); + } +} + } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h index 2408e3c5b68..924ec6f402e 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/summaryfieldconverter.h @@ -8,6 +8,8 @@ namespace vespalib::slime { struct Inserter; } namespace search::docsummary { +class IJuniperConverter; + /** * This class converts a summary field for docsum fetching. */ @@ -25,6 +27,7 @@ public: * Insert the given field value, but only the elements that are contained in the matching_elems vector. */ static void insert_summary_field_with_filter(const document::FieldValue& value, vespalib::slime::Inserter& inserter, const std::vector<uint32_t>& matching_elems); + static void insert_juniper_field(const document::FieldValue& value, vespalib::slime::Inserter& inserter, bool tokenize, IJuniperConverter& converter); }; } |