From 1483005e6f4b93b4126fda31f5e21006ca68b923 Mon Sep 17 00:00:00 2001 From: Tor Egge Date: Thu, 14 Jul 2022 13:59:09 +0200 Subject: Handle juniper input not being stored in docsum blob. --- .../vespa/searchsummary/docsummary/CMakeLists.txt | 1 + .../searchsummary/docsummary/dynamicteaserdfw.cpp | 28 ++++++++++++-------- .../searchsummary/docsummary/juniper_input.cpp | 30 ++++++++++++++++++++++ .../vespa/searchsummary/docsummary/juniper_input.h | 26 +++++++++++++++++++ .../vespa/searchsummary/docsummary/juniperdfw.h | 9 ++++--- 5 files changed, 80 insertions(+), 14 deletions(-) create mode 100644 searchsummary/src/vespa/searchsummary/docsummary/juniper_input.cpp create mode 100644 searchsummary/src/vespa/searchsummary/docsummary/juniper_input.h (limited to 'searchsummary') diff --git a/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt b/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt index 6f89e95c641..f27bac80d5b 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt +++ b/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt @@ -19,6 +19,7 @@ vespa_add_library(searchsummary_docsummary OBJECT general_result.cpp geoposdfw.cpp getdocsumargs.cpp + juniper_input.cpp juniperproperties.cpp keywordextractor.cpp linguisticsannotation.cpp diff --git a/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp index 57adbcc8163..024046b679b 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/dynamicteaserdfw.cpp @@ -3,6 +3,8 @@ #include "juniperdfw.h" #include "docsumwriter.h" #include "docsumstate.h" +#include "i_docsum_store_document.h" +#include #include #include #include @@ -282,10 +284,11 @@ JuniperQueryAdapter::Traverse(juniper::IQueryVisitor *v) const } JuniperDFW::JuniperDFW(juniper::Juniper * juniper) - : _inputFieldEnumValue(static_cast(-1)) - , _juniperConfig() - , _langFieldEnumValue(static_cast(-1)) - , _juniper(juniper) + : _inputFieldEnumValue(static_cast(-1)), + _input_field_name(), + _juniperConfig(), + _langFieldEnumValue(static_cast(-1)), + _juniper(juniper) { } @@ -310,6 +313,7 @@ JuniperDFW::Init( } _inputFieldEnumValue = enums.Lookup(inputField); + _input_field_name = inputField; if (_inputFieldEnumValue >= enums.GetNumEntries()) { LOG(warning, "no docsum format contains field '%s'; dynamic teasers will be empty", @@ -344,7 +348,7 @@ JuniperTeaserDFW::Init( return rc; } -vespalib::stringref +JuniperInput DynamicTeaserDFW::getJuniperInput(GeneralResult *gres) { int idx = gres->GetClass()->GetIndexFromEnumValue(_inputFieldEnumValue); ResEntry *entry = gres->GetPresentEntry(idx); @@ -352,9 +356,13 @@ DynamicTeaserDFW::getJuniperInput(GeneralResult *gres) { const char *buf; uint32_t buflen; entry->_resolve_field(&buf, &buflen); - return vespalib::stringref(buf, buflen); + return JuniperInput(vespalib::stringref(buf, buflen)); } - return vespalib::stringref(); + const auto* document = gres->get_document(); + if (document != nullptr) { + return JuniperInput(document->get_field_value(_input_field_name).get()); + } + return JuniperInput(vespalib::stringref()); } vespalib::string @@ -428,9 +436,9 @@ void DynamicTeaserDFW::insertField(uint32_t docid, GeneralResult *gres, GetDocsumsState *state, ResType, vespalib::slime::Inserter &target) { - vespalib::stringref input = getJuniperInput(gres); - if (input.length() > 0) { - vespalib::string teaser = makeDynamicTeaser(docid, input, state); + auto input = getJuniperInput(gres); + if (!input.empty()) { + vespalib::string teaser = makeDynamicTeaser(docid, input.get_value(), state); vespalib::Memory value(teaser.c_str(), teaser.size()); target.insertString(value); } diff --git a/searchsummary/src/vespa/searchsummary/docsummary/juniper_input.cpp b/searchsummary/src/vespa/searchsummary/docsummary/juniper_input.cpp new file mode 100644 index 00000000000..a9f265b0842 --- /dev/null +++ b/searchsummary/src/vespa/searchsummary/docsummary/juniper_input.cpp @@ -0,0 +1,30 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "juniper_input.h" +#include "summaryfieldconverter.h" +#include + +namespace search::docsummary { + +JuniperInput::JuniperInput(vespalib::stringref value) + : _field_value_with_markup(), + _value(value) +{ +} + +JuniperInput::JuniperInput(const document::FieldValue* value) + : _field_value_with_markup(), + _value() +{ + if (value != nullptr) { + _field_value_with_markup = SummaryFieldConverter::convertSummaryField(true, *value); + } + if (_field_value_with_markup && _field_value_with_markup->isA(document::FieldValue::Type::STRING)) { + const auto& string_field_value_with_markup = static_cast(*_field_value_with_markup); + _value = string_field_value_with_markup.getValueRef(); + } +} + +JuniperInput::~JuniperInput() = default; + +} diff --git a/searchsummary/src/vespa/searchsummary/docsummary/juniper_input.h b/searchsummary/src/vespa/searchsummary/docsummary/juniper_input.h new file mode 100644 index 00000000000..b9c3ee169e6 --- /dev/null +++ b/searchsummary/src/vespa/searchsummary/docsummary/juniper_input.h @@ -0,0 +1,26 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include +#include + +namespace document { class FieldValue; } + +namespace search::docsummary { + +/* + * Class containing input for juniper processing. + */ +class JuniperInput { + std::unique_ptr _field_value_with_markup; + vespalib::stringref _value; +public: + JuniperInput(vespalib::stringref value); + JuniperInput(const document::FieldValue* value); + ~JuniperInput(); + bool empty() const noexcept { return _value.empty(); } + vespalib::stringref get_value() const noexcept { return _value; }; +}; + +} diff --git a/searchsummary/src/vespa/searchsummary/docsummary/juniperdfw.h b/searchsummary/src/vespa/searchsummary/docsummary/juniperdfw.h index 5e2ec517a47..790f07dd237 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/juniperdfw.h +++ b/searchsummary/src/vespa/searchsummary/docsummary/juniperdfw.h @@ -5,6 +5,7 @@ #include "general_result.h" #include "resultconfig.h" #include "docsum_field_writer.h" +#include "juniper_input.h" #include #include #include @@ -24,6 +25,7 @@ protected: ~JuniperDFW() override; uint32_t _inputFieldEnumValue; + vespalib::string _input_field_name; std::unique_ptr _juniperConfig; uint32_t _langFieldEnumValue; juniper::Juniper *_juniper; @@ -46,13 +48,12 @@ protected: class DynamicTeaserDFW : public JuniperTeaserDFW { -public: - DynamicTeaserDFW(juniper::Juniper * juniper) : JuniperTeaserDFW(juniper) { } - - vespalib::stringref getJuniperInput(GeneralResult *gres); + JuniperInput getJuniperInput(GeneralResult *gres); vespalib::string makeDynamicTeaser(uint32_t docid, vespalib::stringref input, GetDocsumsState *state); +public: + DynamicTeaserDFW(juniper::Juniper * juniper) : JuniperTeaserDFW(juniper) { } void insertField(uint32_t docid, GeneralResult *gres, GetDocsumsState *state, ResType type, vespalib::slime::Inserter &target) override; -- cgit v1.2.3