summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeir Storli <geirst@yahooinc.com>2023-10-30 20:16:56 +0100
committerGitHub <noreply@github.com>2023-10-30 20:16:56 +0100
commit31ebd909589a19235d912feff443e46ac9a068f1 (patch)
tree2bb1587ea797477b5453266037c90123ca2a6579
parent4bd554338d34e4c4884b7314ab7b06f5f061aa46 (diff)
parentec13a0f724937be9df01896f3b7ef52071fb2a4c (diff)
Merge pull request #29157 from vespa-engine/toregge/add-attribute-tokens-dfw
Add attribute tokens dfw.
-rw-r--r--searchlib/src/vespa/searchcommon/attribute/iattributevector.h7
-rw-r--r--searchlib/src/vespa/searchlib/attribute/stringbase.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/attribute/stringbase.h1
-rw-r--r--searchsummary/CMakeLists.txt1
-rw-r--r--searchsummary/src/tests/docsummary/attribute_tokens_dfw/CMakeLists.txt10
-rw-r--r--searchsummary/src/tests/docsummary/attribute_tokens_dfw/attribute_tokens_dfw_test.cpp97
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt2
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/attribute_tokens_dfw.cpp179
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/attribute_tokens_dfw.h30
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp11
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_commands.cpp1
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_commands.h1
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_factory.cpp18
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_state.h2
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/empty_docsum_field_writer_state.cpp18
-rw-r--r--searchsummary/src/vespa/searchsummary/docsummary/empty_docsum_field_writer_state.h21
-rw-r--r--searchsummary/src/vespa/searchsummary/test/mock_attribute_manager.cpp17
-rw-r--r--searchsummary/src/vespa/searchsummary/test/mock_attribute_manager.h7
18 files changed, 412 insertions, 17 deletions
diff --git a/searchlib/src/vespa/searchcommon/attribute/iattributevector.h b/searchlib/src/vespa/searchcommon/attribute/iattributevector.h
index d613bf61a16..b44b6a4baf2 100644
--- a/searchlib/src/vespa/searchcommon/attribute/iattributevector.h
+++ b/searchlib/src/vespa/searchcommon/attribute/iattributevector.h
@@ -419,6 +419,13 @@ public:
*/
virtual bool isImported() const = 0;
+ /*
+ * Returns whether the match setting for the attribute is uncased.
+ * This is only relevant for string attributes (i.e. when isStringType()
+ * returns true). The default for string attributes is uncased matching.
+ */
+ virtual bool has_uncased_matching() const noexcept { return true; }
+
/**
* Will serialize the values for the documentid in ascending order. The serialized form can be used by memcmp and
* sortorder will be preserved.
diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp
index ac8178f8afa..0e261059777 100644
--- a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp
@@ -283,6 +283,12 @@ StringAttribute::get_match_is_cased() const noexcept {
return getConfig().get_match() == attribute::Config::Match::CASED;
}
+bool
+StringAttribute::has_uncased_matching() const noexcept
+{
+ return !get_match_is_cased();
+}
+
template bool AttributeVector::clearDoc(StringAttribute::ChangeVector& changes, DocId doc);
template bool AttributeVector::update(StringAttribute::ChangeVector& changes, DocId doc, const StringChangeData& v);
template bool AttributeVector::append(StringAttribute::ChangeVector& changes, DocId doc, const StringChangeData& v, int32_t w, bool doCount);
diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.h b/searchlib/src/vespa/searchlib/attribute/stringbase.h
index 1440b945428..e65b181f37d 100644
--- a/searchlib/src/vespa/searchlib/attribute/stringbase.h
+++ b/searchlib/src/vespa/searchlib/attribute/stringbase.h
@@ -72,6 +72,7 @@ protected:
vespalib::MemoryUsage getChangeVectorMemoryUsage() const override;
bool get_match_is_cased() const noexcept;
+ bool has_uncased_matching() const noexcept override;
long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override;
long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override;
private:
diff --git a/searchsummary/CMakeLists.txt b/searchsummary/CMakeLists.txt
index f771a8e4494..a5d65721aeb 100644
--- a/searchsummary/CMakeLists.txt
+++ b/searchsummary/CMakeLists.txt
@@ -19,6 +19,7 @@ vespa_define_module(
src/tests/docsummary/annotation_converter
src/tests/docsummary/attribute_combiner
src/tests/docsummary/attributedfw
+ src/tests/docsummary/attribute_tokens_dfw
src/tests/docsummary/document_id_dfw
src/tests/docsummary/tokens_converter
src/tests/docsummary/matched_elements_filter
diff --git a/searchsummary/src/tests/docsummary/attribute_tokens_dfw/CMakeLists.txt b/searchsummary/src/tests/docsummary/attribute_tokens_dfw/CMakeLists.txt
new file mode 100644
index 00000000000..adcb18585d0
--- /dev/null
+++ b/searchsummary/src/tests/docsummary/attribute_tokens_dfw/CMakeLists.txt
@@ -0,0 +1,10 @@
+# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+vespa_add_executable(searchsummary_attribute_tokens_dfw_test_app TEST
+ SOURCES
+ attribute_tokens_dfw_test.cpp
+ DEPENDS
+ searchsummary
+ searchsummary_test
+ GTest::GTest
+)
+vespa_add_test(NAME searchsummary_attribute_tokens_dfw_test_app COMMAND searchsummary_attribute_tokens_dfw_test_app)
diff --git a/searchsummary/src/tests/docsummary/attribute_tokens_dfw/attribute_tokens_dfw_test.cpp b/searchsummary/src/tests/docsummary/attribute_tokens_dfw/attribute_tokens_dfw_test.cpp
new file mode 100644
index 00000000000..bac817077c4
--- /dev/null
+++ b/searchsummary/src/tests/docsummary/attribute_tokens_dfw/attribute_tokens_dfw_test.cpp
@@ -0,0 +1,97 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include <vespa/searchsummary/docsummary/attribute_tokens_dfw.h>
+#include <vespa/searchsummary/test/mock_attribute_manager.h>
+#include <vespa/searchsummary/test/mock_state_callback.h>
+#include <vespa/searchsummary/test/slime_value.h>
+#include <vespa/vespalib/gtest/gtest.h>
+
+#include <vespa/log/log.h>
+LOG_SETUP("attribute_tokens_dfw_test");
+
+using search::attribute::CollectionType;
+using search::docsummary::AttributeTokensDFW;
+using search::docsummary::GetDocsumsState;
+using search::docsummary::DocsumFieldWriter;
+using search::docsummary::test::MockAttributeManager;
+using search::docsummary::test::MockStateCallback;
+using search::docsummary::test::SlimeValue;
+
+class AttributeTokensDFWTest : public ::testing::Test {
+protected:
+ MockAttributeManager _attrs;
+ std::unique_ptr<DocsumFieldWriter> _writer;
+ MockStateCallback _callback;
+ GetDocsumsState _state;
+ std::shared_ptr<search::MatchingElementsFields> _matching_elems_fields;
+ vespalib::string _field_name;
+
+public:
+ AttributeTokensDFWTest()
+ : _attrs(),
+ _writer(),
+ _callback(),
+ _state(_callback),
+ _matching_elems_fields(),
+ _field_name()
+ {
+ _attrs.build_string_attribute("array_str", { {"This", "is", "A TEST"}, {} });
+ _attrs.build_string_attribute("cased_array_str", { {"CASING", "Matters here" }, {} }, CollectionType::ARRAY, false);
+ _attrs.build_string_attribute("wset_str", { {"This is", "b", "C"}, {} }, CollectionType::WSET);
+ _attrs.build_string_attribute("single_str", { {"Hello World"}, {} }, CollectionType::SINGLE);
+ _state._attrCtx = _attrs.mgr().createContext();
+ }
+ ~AttributeTokensDFWTest() {}
+
+ void setup(const vespalib::string& field_name) {
+ _writer = std::make_unique<AttributeTokensDFW>(field_name);
+ _writer->setIndex(0);
+ auto attr = _state._attrCtx->getAttribute(field_name);
+ EXPECT_TRUE(_writer->setFieldWriterStateIndex(0));
+ _state._fieldWriterStates.resize(1);
+ _field_name = field_name;
+ _state._attributes.resize(1);
+ _state._attributes[0] = attr;
+ }
+
+ void expect_field(const vespalib::string& exp_slime_as_json, uint32_t docid) {
+ vespalib::Slime act;
+ vespalib::slime::SlimeInserter inserter(act);
+ if (!_writer->isDefaultValue(docid, _state)) {
+ _writer->insertField(docid, nullptr, _state, inserter);
+ }
+
+ SlimeValue exp(exp_slime_as_json);
+ EXPECT_EQ(exp.slime, act);
+ }
+};
+
+TEST_F(AttributeTokensDFWTest, outputs_slime_for_array_of_string)
+{
+ setup("array_str");
+ expect_field("[ ['this' ], [ 'is' ], [ 'a test' ] ]", 1);
+ expect_field("null", 2);
+}
+
+TEST_F(AttributeTokensDFWTest, outputs_slime_for_cased_array_of_string)
+{
+ setup("cased_array_str");
+ expect_field("[ ['CASING' ], [ 'Matters here' ] ]", 1);
+ expect_field("null", 2);
+}
+
+TEST_F(AttributeTokensDFWTest, outputs_slime_for_wset_of_string)
+{
+ setup("wset_str");
+ expect_field("[ ['this is'], [ 'b' ], [ 'c' ] ]", 1);
+ expect_field("null", 2);
+}
+
+TEST_F(AttributeTokensDFWTest, single_string)
+{
+ setup("single_str");
+ expect_field("[ 'hello world' ]", 1);
+ expect_field("[ '' ]", 2);
+}
+
+GTEST_MAIN_RUN_ALL_TESTS()
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt b/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt
index 0287517f830..84ceb18ef6f 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt
+++ b/searchsummary/src/vespa/searchsummary/docsummary/CMakeLists.txt
@@ -5,6 +5,7 @@ vespa_add_library(searchsummary_docsummary OBJECT
array_attribute_combiner_dfw.cpp
attribute_combiner_dfw.cpp
attribute_field_writer.cpp
+ attribute_tokens_dfw.cpp
attributedfw.cpp
check_undefined_value_visitor.cpp
copy_dfw.cpp
@@ -17,6 +18,7 @@ vespa_add_library(searchsummary_docsummary OBJECT
document_id_dfw.cpp
dynamicteaserdfw.cpp
empty_dfw.cpp
+ empty_docsum_field_writer_state.cpp
geoposdfw.cpp
getdocsumargs.cpp
juniper_dfw_query_item.cpp
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/attribute_tokens_dfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/attribute_tokens_dfw.cpp
new file mode 100644
index 00000000000..9e0dafc5e91
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/attribute_tokens_dfw.cpp
@@ -0,0 +1,179 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "attribute_tokens_dfw.h"
+#include "docsumstate.h"
+#include "empty_docsum_field_writer_state.h"
+#include <vespa/searchcommon/attribute/iattributevector.h>
+#include <vespa/searchcommon/attribute/i_multi_value_attribute.h>
+#include <vespa/vespalib/data/slime/slime.h>
+#include <vespa/vespalib/text/lowercase.h>
+#include <vespa/vespalib/text/utf8.h>
+#include <vespa/vespalib/util/stash.h>
+
+using search::attribute::IAttributeVector;
+using search::attribute::BasicType;
+using search::attribute::IMultiValueAttribute;
+using search::attribute::IMultiValueReadView;
+using vespalib::LowerCase;
+using vespalib::Utf8Reader;
+using vespalib::Utf8Writer;
+using vespalib::slime::ArrayInserter;
+using vespalib::slime::Cursor;
+using vespalib::slime::Inserter;
+
+namespace search::docsummary {
+
+namespace {
+
+const IMultiValueReadView<const char*>*
+make_read_view(const IAttributeVector& attribute, vespalib::Stash& stash)
+{
+ auto multi_value_attribute = attribute.as_multi_value_attribute();
+ if (multi_value_attribute != nullptr) {
+ return multi_value_attribute->make_read_view(IMultiValueAttribute::MultiValueTag<const char*>(), stash);
+ }
+ return nullptr;
+}
+
+void
+insert_value(vespalib::stringref value, Inserter& inserter, vespalib::string& scratch, bool lowercase)
+{
+ Cursor& arr = inserter.insertArray(1);
+ ArrayInserter ai(arr);
+ if (lowercase) {
+ scratch.clear();
+ Utf8Reader r(value);
+ Utf8Writer w(scratch);
+ while (r.hasMore()) {
+ w.putChar(LowerCase::convert(r.getChar()));
+ }
+ ai.insertString(scratch);
+ } else {
+ ai.insertString(value);
+ }
+}
+
+}
+
+class MultiAttributeTokensDFWState : public DocsumFieldWriterState
+{
+ const IMultiValueReadView<const char*>* _read_view;
+ vespalib::string _lowercase_scratch;
+ bool _lowercase;
+public:
+ MultiAttributeTokensDFWState(const IAttributeVector& attr, vespalib::Stash& stash);
+ ~MultiAttributeTokensDFWState() override;
+ void insertField(uint32_t docid, Inserter& target) override;
+};
+
+MultiAttributeTokensDFWState::MultiAttributeTokensDFWState(const IAttributeVector& attr, vespalib::Stash& stash)
+ : DocsumFieldWriterState(),
+ _read_view(make_read_view(attr, stash)),
+ _lowercase_scratch(),
+ _lowercase(attr.has_uncased_matching())
+{
+}
+
+MultiAttributeTokensDFWState::~MultiAttributeTokensDFWState() = default;
+
+void
+MultiAttributeTokensDFWState::insertField(uint32_t docid, Inserter& target)
+{
+ if (!_read_view) {
+ return;
+ }
+ auto elements = _read_view->get_values(docid);
+ if (elements.empty()) {
+ return;
+ }
+ Cursor &arr = target.insertArray(elements.size());
+ ArrayInserter ai(arr);
+ for (const auto & element : elements) {
+ insert_value(element, ai, _lowercase_scratch, _lowercase);
+ }
+}
+
+class SingleAttributeTokensDFWState : public DocsumFieldWriterState
+{
+ const IAttributeVector& _attr;
+ vespalib::string _lowercase_scratch;
+ bool _lowercase;
+public:
+ SingleAttributeTokensDFWState(const IAttributeVector& attr);
+ ~SingleAttributeTokensDFWState() override;
+ void insertField(uint32_t docid, Inserter& target) override;
+};
+
+SingleAttributeTokensDFWState::SingleAttributeTokensDFWState(const IAttributeVector& attr)
+ : DocsumFieldWriterState(),
+ _attr(attr),
+ _lowercase_scratch(),
+ _lowercase(attr.has_uncased_matching())
+{
+}
+
+SingleAttributeTokensDFWState::~SingleAttributeTokensDFWState() = default;
+
+void
+SingleAttributeTokensDFWState::insertField(uint32_t docid, Inserter& target)
+{
+ auto s = _attr.get_raw(docid);
+ insert_value(vespalib::stringref(s.data(), s.size()), target, _lowercase_scratch, _lowercase);
+}
+
+DocsumFieldWriterState*
+make_field_writer_state(const IAttributeVector& attr, vespalib::Stash& stash)
+{
+ auto type = attr.getBasicType();
+ switch (type) {
+ case BasicType::Type::STRING:
+ if (attr.hasMultiValue()) {
+ return &stash.create<MultiAttributeTokensDFWState>(attr, stash);
+ } else {
+ return &stash.create<SingleAttributeTokensDFWState>(attr);
+ }
+ default:
+ ;
+ }
+ return &stash.create<EmptyDocsumFieldWriterState>();
+}
+
+AttributeTokensDFW::AttributeTokensDFW(const vespalib::string& input_field_name)
+ : DocsumFieldWriter(),
+ _input_field_name(input_field_name)
+{
+}
+
+AttributeTokensDFW::~AttributeTokensDFW() = default;
+
+const vespalib::string&
+AttributeTokensDFW::getAttributeName() const
+{
+ return _input_field_name;
+}
+
+bool
+AttributeTokensDFW::isGenerated() const
+{
+ return true;
+}
+
+bool
+AttributeTokensDFW::setFieldWriterStateIndex(uint32_t fieldWriterStateIndex)
+{
+ _state_index = fieldWriterStateIndex;
+ return true;
+}
+
+void
+AttributeTokensDFW::insertField(uint32_t docid, const IDocsumStoreDocument*, GetDocsumsState& state, vespalib::slime::Inserter& target) const
+{
+ auto& field_writer_state = state._fieldWriterStates[_state_index];
+ if (!field_writer_state) {
+ const auto& attr = *state.getAttribute(getIndex());
+ field_writer_state = make_field_writer_state(attr, state.get_stash());
+ }
+ field_writer_state->insertField(docid, target);
+}
+
+}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/attribute_tokens_dfw.h b/searchsummary/src/vespa/searchsummary/docsummary/attribute_tokens_dfw.h
new file mode 100644
index 00000000000..53bb3d73290
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/attribute_tokens_dfw.h
@@ -0,0 +1,30 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "docsum_field_writer.h"
+
+namespace search::docsummary {
+
+/*
+ * Class for writing values from a string attribute vector as arrays
+ * containing the tokens. The string values are not split but they are
+ * lowercased if the string attribute vector uses uncased matching.
+ */
+class AttributeTokensDFW : public DocsumFieldWriter
+{
+private:
+ vespalib::string _input_field_name;
+ uint32_t _state_index; // index into _fieldWriterStates in GetDocsumsState
+
+protected:
+ const vespalib::string & getAttributeName() const override;
+public:
+ AttributeTokensDFW(const vespalib::string& input_field_name);
+ ~AttributeTokensDFW() override;
+ bool isGenerated() const override;
+ bool setFieldWriterStateIndex(uint32_t fieldWriterStateIndex) override;
+ void insertField(uint32_t docid, const IDocsumStoreDocument* doc, GetDocsumsState& state, vespalib::slime::Inserter& target) const override;
+};
+
+}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp
index 4ec406b7cf0..0fc26387c00 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp
+++ b/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp
@@ -4,6 +4,7 @@
#include "docsumwriter.h"
#include "docsumstate.h"
#include "docsum_field_writer_state.h"
+#include "empty_docsum_field_writer_state.h"
#include <vespa/eval/eval/value.h>
#include <vespa/eval/eval/value_codec.h>
#include <vespa/searchcommon/attribute/i_multi_value_attribute.h>
@@ -129,14 +130,6 @@ make_read_view(const IAttributeVector& attribute, vespalib::Stash& stash)
return nullptr;
}
-class EmptyWriterState : public DocsumFieldWriterState
-{
-public:
- EmptyWriterState() = default;
- ~EmptyWriterState() = default;
- void insertField(uint32_t, Inserter&) override { }
-};
-
template <typename MultiValueType>
class MultiAttrDFWState : public DocsumFieldWriterState
{
@@ -302,7 +295,7 @@ make_field_writer_state(const vespalib::string& field_name, const IAttributeVect
default:
;
}
- return &stash.create<EmptyWriterState>();
+ return &stash.create<EmptyDocsumFieldWriterState>();
}
void
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_commands.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_commands.cpp
index 2ac5d1babbf..d3fc71b3173 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_commands.cpp
+++ b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_commands.cpp
@@ -7,6 +7,7 @@ namespace search::docsummary::command {
const vespalib::string abs_distance("absdist");
const vespalib::string attribute("attribute");
const vespalib::string attribute_combiner("attributecombiner");
+const vespalib::string attribute_tokens("attribute-tokens");
const vespalib::string copy("copy");
const vespalib::string documentid("documentid");
const vespalib::string dynamic_teaser("dynamicteaser");
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_commands.h b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_commands.h
index d53351d8b04..d77416f2df5 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_commands.h
+++ b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_commands.h
@@ -13,6 +13,7 @@ namespace search::docsummary::command {
extern const vespalib::string abs_distance;
extern const vespalib::string attribute;
extern const vespalib::string attribute_combiner;
+extern const vespalib::string attribute_tokens;
extern const vespalib::string copy;
extern const vespalib::string documentid;
extern const vespalib::string dynamic_teaser;
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_factory.cpp b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_factory.cpp
index 2f7d9acdb65..28a2c34ca87 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_factory.cpp
+++ b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_factory.cpp
@@ -1,6 +1,7 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "attribute_combiner_dfw.h"
+#include "attribute_tokens_dfw.h"
#include "copy_dfw.h"
#include "docsum_field_writer_commands.h"
#include "docsum_field_writer_factory.h"
@@ -16,8 +17,10 @@
#include "tokens_dfw.h"
#include <vespa/searchlib/common/matching_elements_fields.h>
#include <vespa/vespalib/util/exceptions.h>
+#include <vespa/vespalib/util/issue.h>
using vespalib::IllegalArgumentException;
+using vespalib::Issue;
namespace search::docsummary {
@@ -91,6 +94,21 @@ DocsumFieldWriterFactory::create_docsum_field_writer(const vespalib::string& fie
} else {
throw_missing_source(command);
}
+ } else if (command == command::attribute_tokens) {
+ if (!source.empty()) {
+ if (has_attribute_manager()) {
+ auto ctx = getEnvironment().getAttributeManager()->createContext();
+ const auto* attr = ctx->getAttribute(source);
+ if (attr == nullptr) {
+ Issue::report("No valid attribute vector found: field='%s', command='%s', source='%s'",
+ field_name.c_str(), command.c_str(), source.c_str());
+ } else {
+ fieldWriter = std::make_unique<AttributeTokensDFW>(source);
+ }
+ }
+ } else {
+ throw_missing_source(command);
+ }
} else if (command == command::abs_distance) {
if (has_attribute_manager()) {
fieldWriter = AbsDistanceDFW::create(source.c_str(), getEnvironment().getAttributeManager());
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_state.h b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_state.h
index efacd0b1a49..ce151d82fc0 100644
--- a/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_state.h
+++ b/searchsummary/src/vespa/searchsummary/docsummary/docsum_field_writer_state.h
@@ -2,6 +2,8 @@
#pragma once
+#include <cstdint>
+
namespace vespalib::slime { struct Inserter; }
namespace search::docsummary {
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/empty_docsum_field_writer_state.cpp b/searchsummary/src/vespa/searchsummary/docsummary/empty_docsum_field_writer_state.cpp
new file mode 100644
index 00000000000..b279430a367
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/empty_docsum_field_writer_state.cpp
@@ -0,0 +1,18 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "empty_docsum_field_writer_state.h"
+
+using vespalib::slime::Inserter;
+
+namespace search::docsummary {
+
+EmptyDocsumFieldWriterState::EmptyDocsumFieldWriterState() = default;
+
+EmptyDocsumFieldWriterState::~EmptyDocsumFieldWriterState() = default;
+
+void
+EmptyDocsumFieldWriterState::insertField(uint32_t, Inserter&)
+{
+}
+
+}
diff --git a/searchsummary/src/vespa/searchsummary/docsummary/empty_docsum_field_writer_state.h b/searchsummary/src/vespa/searchsummary/docsummary/empty_docsum_field_writer_state.h
new file mode 100644
index 00000000000..ff8319b52d1
--- /dev/null
+++ b/searchsummary/src/vespa/searchsummary/docsummary/empty_docsum_field_writer_state.h
@@ -0,0 +1,21 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include "docsum_field_writer_state.h"
+
+namespace search::docsummary {
+
+/*
+ * Class used as fallback when no suitable field writer state could be
+ * instantiated. insertField() is a noop.
+ */
+class EmptyDocsumFieldWriterState : public DocsumFieldWriterState
+{
+public:
+ EmptyDocsumFieldWriterState();
+ ~EmptyDocsumFieldWriterState() override;
+ void insertField(uint32_t, vespalib::slime::Inserter&) override;
+};
+
+}
diff --git a/searchsummary/src/vespa/searchsummary/test/mock_attribute_manager.cpp b/searchsummary/src/vespa/searchsummary/test/mock_attribute_manager.cpp
index 557ba8f4cde..4d73420b523 100644
--- a/searchsummary/src/vespa/searchsummary/test/mock_attribute_manager.cpp
+++ b/searchsummary/src/vespa/searchsummary/test/mock_attribute_manager.cpp
@@ -20,9 +20,13 @@ template <typename AttributeType, typename ValueType>
void
MockAttributeManager::build_attribute(const vespalib::string& name, BasicType type,
CollectionType col_type,
- const std::vector<std::vector<ValueType>>& values)
+ const std::vector<std::vector<ValueType>>& values,
+ std::optional<bool> uncased)
{
Config cfg(type, col_type);
+ if (uncased.has_value()) {
+ cfg.set_match(uncased.value() ? Config::Match::UNCASED : Config::Match::CASED);
+ }
auto attr_base = AttributeFactory::createAttribute(name, cfg);
assert(attr_base);
auto attr = std::dynamic_pointer_cast<AttributeType>(attr_base);
@@ -55,9 +59,10 @@ MockAttributeManager::~MockAttributeManager() = default;
void
MockAttributeManager::build_string_attribute(const vespalib::string& name,
const std::vector<std::vector<vespalib::string>>& values,
- CollectionType col_type)
+ CollectionType col_type,
+ std::optional<bool> uncased)
{
- build_attribute<StringAttribute, vespalib::string>(name, BasicType::Type::STRING, col_type, values);
+ build_attribute<StringAttribute, vespalib::string>(name, BasicType::Type::STRING, col_type, values, uncased);
}
void
@@ -65,7 +70,7 @@ MockAttributeManager::build_float_attribute(const vespalib::string& name,
const std::vector<std::vector<double>>& values,
CollectionType col_type)
{
- build_attribute<FloatingPointAttribute, double>(name, BasicType::Type::DOUBLE, col_type, values);
+ build_attribute<FloatingPointAttribute, double>(name, BasicType::Type::DOUBLE, col_type, values, std::nullopt);
}
void
@@ -73,14 +78,14 @@ MockAttributeManager::build_int_attribute(const vespalib::string& name, BasicTyp
const std::vector<std::vector<int64_t>>& values,
CollectionType col_type)
{
- build_attribute<IntegerAttribute, int64_t>(name, type, col_type, values);
+ build_attribute<IntegerAttribute, int64_t>(name, type, col_type, values, std::nullopt);
}
void
MockAttributeManager::build_raw_attribute(const vespalib::string& name,
const std::vector<std::vector<std::vector<char>>>& values)
{
- build_attribute<SingleRawAttribute, std::vector<char>>(name, BasicType::Type::RAW, CollectionType::SINGLE, values);
+ build_attribute<SingleRawAttribute, std::vector<char>>(name, BasicType::Type::RAW, CollectionType::SINGLE, values, std::nullopt);
}
}
diff --git a/searchsummary/src/vespa/searchsummary/test/mock_attribute_manager.h b/searchsummary/src/vespa/searchsummary/test/mock_attribute_manager.h
index 69762a37f95..ae105025826 100644
--- a/searchsummary/src/vespa/searchsummary/test/mock_attribute_manager.h
+++ b/searchsummary/src/vespa/searchsummary/test/mock_attribute_manager.h
@@ -2,6 +2,7 @@
#include <vespa/searchcommon/attribute/basictype.h>
#include <vespa/searchlib/attribute/attributemanager.h>
+#include <optional>
namespace search::docsummary::test {
@@ -15,7 +16,8 @@ private:
template <typename AttributeType, typename ValueType>
void build_attribute(const vespalib::string& name, search::attribute::BasicType type,
search::attribute::CollectionType col_type,
- const std::vector<std::vector<ValueType>>& values);
+ const std::vector<std::vector<ValueType>>& values,
+ std::optional<bool> uncased);
public:
MockAttributeManager();
@@ -24,7 +26,8 @@ public:
void build_string_attribute(const vespalib::string& name,
const std::vector<std::vector<vespalib::string>>& values,
- search::attribute::CollectionType col_type = search::attribute::CollectionType::ARRAY);
+ search::attribute::CollectionType col_type = search::attribute::CollectionType::ARRAY,
+ std::optional<bool> uncased = std::nullopt);
void build_float_attribute(const vespalib::string& name,
const std::vector<std::vector<double>>& values,
search::attribute::CollectionType col_type = search::attribute::CollectionType::ARRAY);