diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2023-03-07 13:01:06 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-03-07 13:01:06 +0100 |
commit | c1285a18fc5629e4ded7d189401cb0fea5a646b7 (patch) | |
tree | c275bbf60c88d0204acf5a1263a4439313ed87a8 | |
parent | bdd1269927b38f8f72d9fa767110c5ed3eb35407 (diff) | |
parent | e2c778f1f53b8f60e6bd0b5260ce672e8bd1a062 (diff) |
Merge pull request #26325 from vespa-engine/balder/handle-raw-attributes-in-grouping
- Handle single value raw attributes in grouping.
19 files changed, 71 insertions, 51 deletions
diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp index 20373fbb3a9..e217e8c8533 100644 --- a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp +++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp @@ -339,6 +339,9 @@ testSingleValue(Attribute & svsa, Config &cfg) for (uint32_t j = i - 9; j <= i; ++j) { snprintf(tmp, sizeof(tmp), "enum%u", j % 10); EXPECT_TRUE( strcmp(t = v.get(j), tmp) == 0 ); + auto raw = v.get_raw(j); + EXPECT_EQUAL(strlen(tmp), raw.size()); + EXPECT_EQUAL(0, memcmp(raw.data(), tmp, raw.size())); e1 = v.getEnum(j); EXPECT_TRUE( v.findEnum(t, e2) ); EXPECT_TRUE( e1 == e2 ); diff --git a/searchlib/src/vespa/searchcommon/attribute/iattributevector.h b/searchlib/src/vespa/searchcommon/attribute/iattributevector.h index 34bf6f49cba..884d34c78c6 100644 --- a/searchlib/src/vespa/searchcommon/attribute/iattributevector.h +++ b/searchlib/src/vespa/searchcommon/attribute/iattributevector.h @@ -377,7 +377,11 @@ public: virtual bool isPredicateType() const { return getBasicType() == BasicType::PREDICATE; } virtual bool isTensorType() const { return getBasicType() == BasicType::TENSOR; } virtual bool isReferenceType() const { return getBasicType() == BasicType::REFERENCE; } - virtual bool is_raw_type() const noexcept { return getBasicType() == BasicType::RAW; } + virtual bool is_raw_type() const noexcept { + BasicType::Type t = getBasicType(); + return t == BasicType::RAW || + t == BasicType::STRING; + } /** * Returns whether this is a multi value attribute. diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp index 55c01964502..2800d7c3f6d 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp @@ -86,9 +86,10 @@ StringAttribute::getFloat(DocId doc) const { } vespalib::ConstArrayRef<char> -StringAttribute::get_raw(DocId) const +StringAttribute::get_raw(DocId doc) const { - return {}; + const char * s = get(doc); + return {s, s ? ::strlen(s) : 0u}; } uint32_t diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.h b/searchlib/src/vespa/searchlib/attribute/stringbase.h index f40a89f76b4..3de7df5aa28 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.h +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.h @@ -51,6 +51,10 @@ public: static void generateOffsets(const char * bt, size_t sz, OffsetVector & offsets); virtual const char * getFromEnum(EnumHandle e) const = 0; virtual const char *get(DocId doc) const = 0; + largeint_t getInt(DocId doc) const override { return strtoll(get(doc), nullptr, 0); } + double getFloat(DocId doc) const override; + vespalib::ConstArrayRef<char> get_raw(DocId) const override; + const char * getString(DocId doc, char * v, size_t sz) const override { (void) v; (void) sz; return get(doc); } protected: StringAttribute(const vespalib::string & name); StringAttribute(const vespalib::string & name, const Config & c); @@ -79,11 +83,6 @@ private: virtual void load_enumerated_data(ReaderBase &attrReader, enumstore::EnumeratedLoader& loader); virtual void load_posting_lists_and_update_enum_store(enumstore::EnumeratedPostingsLoader& loader); - largeint_t getInt(DocId doc) const override { return strtoll(get(doc), nullptr, 0); } - double getFloat(DocId doc) const override; - vespalib::ConstArrayRef<char> get_raw(DocId) const override; - const char * getString(DocId doc, char * v, size_t sz) const override { (void) v; (void) sz; return get(doc); } - long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override; long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override; }; diff --git a/searchlib/src/vespa/searchlib/common/identifiable.h b/searchlib/src/vespa/searchlib/common/identifiable.h index 1f5aff6d2d0..4b401633a2d 100644 --- a/searchlib/src/vespa/searchlib/common/identifiable.h +++ b/searchlib/src/vespa/searchlib/common/identifiable.h @@ -151,6 +151,7 @@ #define CID_search_expression_AttributeMapLookupNode SEARCHLIB_CID(145) #define CID_search_expression_BoolResultNode SEARCHLIB_CID(146) #define CID_search_expression_BoolResultNodeVector SEARCHLIB_CID(147) +#define CID_search_expression_RawAttributeResult SEARCHLIB_CID(148) #define CID_search_QueryNode SEARCHLIB_CID(150) diff --git a/searchlib/src/vespa/searchlib/expression/attributenode.cpp b/searchlib/src/vespa/searchlib/expression/attributenode.cpp index f8ae4bd698d..73f306fc708 100644 --- a/searchlib/src/vespa/searchlib/expression/attributenode.cpp +++ b/searchlib/src/vespa/searchlib/expression/attributenode.cpp @@ -77,7 +77,9 @@ createResult(const IAttributeVector * attribute) { IAttributeVector::EnumRefs enumRefs = attribute->make_enum_read_view(); return (enumRefs.empty()) - ? std::make_unique<AttributeResult>(attribute, 0) + ? attribute->is_raw_type() + ? std::make_unique<RawAttributeResult>(attribute, 0) + : std::make_unique<AttributeResult>(attribute, 0) : std::make_unique<EnumAttributeResult>(enumRefs, attribute, 0); } @@ -221,6 +223,13 @@ AttributeNode::onPrepare(bool preserveAccurateTypes) setResultType(std::make_unique<StringResultNode>()); } } + } else if (attribute->is_raw_type()) { + if (_hasMultiValue) { + throw std::runtime_error(make_string("Does not support multivalue raw attribute vector '%s'", + attribute->getName().c_str())); + } else { + setResultType(std::make_unique<RawResultNode>()); + } } else { throw std::runtime_error(make_string("Can not deduce correct resultclass for attribute vector '%s'", attribute->getName().c_str())); diff --git a/searchlib/src/vespa/searchlib/expression/attributenode.h b/searchlib/src/vespa/searchlib/expression/attributenode.h index 03b7909e581..d668bd3f662 100644 --- a/searchlib/src/vespa/searchlib/expression/attributenode.h +++ b/searchlib/src/vespa/searchlib/expression/attributenode.h @@ -46,7 +46,7 @@ public: AttributeNode(const search::attribute::IAttributeVector & attribute); AttributeNode(const AttributeNode & attribute); AttributeNode & operator = (const AttributeNode & attribute); - ~AttributeNode(); + ~AttributeNode() override; void setDocId(DocId docId) const { _scratchResult->setDocId(docId); } const search::attribute::IAttributeVector *getAttribute() const { return _scratchResult ? _scratchResult->getAttribute() : nullptr; @@ -59,7 +59,7 @@ public: class Handler { public: - virtual ~Handler() { } + virtual ~Handler() = default; virtual void handle(const AttributeResult & r) = 0; }; private: diff --git a/searchlib/src/vespa/searchlib/expression/attributeresult.cpp b/searchlib/src/vespa/searchlib/expression/attributeresult.cpp index 9eb8b35d83c..8a4574265c4 100644 --- a/searchlib/src/vespa/searchlib/expression/attributeresult.cpp +++ b/searchlib/src/vespa/searchlib/expression/attributeresult.cpp @@ -5,5 +5,6 @@ namespace search::expression { IMPLEMENT_RESULTNODE(AttributeResult, ResultNode); +IMPLEMENT_RESULTNODE(RawAttributeResult, ResultNode); } diff --git a/searchlib/src/vespa/searchlib/expression/attributeresult.h b/searchlib/src/vespa/searchlib/expression/attributeresult.h index 0501b6477cf..5fd271b6ae0 100644 --- a/searchlib/src/vespa/searchlib/expression/attributeresult.h +++ b/searchlib/src/vespa/searchlib/expression/attributeresult.h @@ -13,13 +13,18 @@ public: using UP = std::unique_ptr<AttributeResult>; DECLARE_RESULTNODE(AttributeResult); AttributeResult() : _attribute(nullptr), _docId(0) { } - AttributeResult(const attribute::IAttributeVector * attribute, DocId docId) : - _attribute(attribute), - _docId(docId) + AttributeResult(const attribute::IAttributeVector * attribute, DocId docId) + : _attribute(attribute), + _docId(docId) { } void setDocId(DocId docId) { _docId = docId; } const search::attribute::IAttributeVector *getAttribute() const { return _attribute; } DocId getDocId() const { return _docId; } +protected: + ConstBufferRef get_raw() const { + auto raw = getAttribute()->get_raw(_docId); + return {raw.data(), raw.size()}; + } private: int64_t onGetInteger(size_t index) const override { (void) index; return _attribute->getInt(_docId); } double onGetFloat(size_t index) const override { (void) index; return _attribute->getFloat(_docId); } @@ -36,4 +41,17 @@ private: DocId _docId; }; +class RawAttributeResult : public AttributeResult { +public: + DECLARE_RESULTNODE(RawAttributeResult); + RawAttributeResult() : AttributeResult() {} + RawAttributeResult(const attribute::IAttributeVector * attribute, DocId docId) + : AttributeResult(attribute, docId) + { } + ConstBufferRef onGetString(size_t index, BufferRef buf) const override { + (void) index; (void) buf; + return get_raw(); + } +}; + } diff --git a/searchlib/src/vespa/searchlib/expression/catfunctionnode.h b/searchlib/src/vespa/searchlib/expression/catfunctionnode.h index 0667b408500..33df55c891a 100644 --- a/searchlib/src/vespa/searchlib/expression/catfunctionnode.h +++ b/searchlib/src/vespa/searchlib/expression/catfunctionnode.h @@ -3,8 +3,7 @@ #include "multiargfunctionnode.h" -namespace search { -namespace expression { +namespace search::expression { class CatFunctionNode : public MultiArgFunctionNode { @@ -19,5 +18,3 @@ private: }; } -} - diff --git a/searchlib/src/vespa/searchlib/expression/documentfieldnode.cpp b/searchlib/src/vespa/searchlib/expression/documentfieldnode.cpp index f48be061d15..bd13c032a03 100644 --- a/searchlib/src/vespa/searchlib/expression/documentfieldnode.cpp +++ b/searchlib/src/vespa/searchlib/expression/documentfieldnode.cpp @@ -140,7 +140,8 @@ DocumentFieldNode::onPrepare(bool preserveAccurateTypes) } } -void DocumentFieldNode::onDocType(const DocumentType & docType) +void +DocumentFieldNode::onDocType(const DocumentType & docType) { LOG(debug, "DocumentFieldNode::onDocType(this=%p)", this); _fieldPath.clear(); @@ -173,12 +174,14 @@ private: char DefaultValue::null = 0; -void DefaultValue::set(const ResultNode&) +void +DefaultValue::set(const ResultNode&) { throw std::runtime_error("DefaultValue::set(const ResultNode&) is not possible."); } -void FieldValue2ResultNode::set(const ResultNode&) +void +FieldValue2ResultNode::set(const ResultNode&) { throw std::runtime_error("FieldValue2ResultNode::set(const ResultNode&) is not possible."); } @@ -192,7 +195,8 @@ void DocumentFieldNode::onDoc(const Document & doc) _handler->reset(); } -bool DocumentFieldNode::onExecute() const +bool +DocumentFieldNode::onExecute() const { _doc->iterateNested(_fieldPath.getFullRange(), *_handler); return true; @@ -237,12 +241,14 @@ DocumentFieldNode::Handler::onStructStart(const Content & c) } -Serializer & DocumentFieldNode::onSerialize(Serializer & os) const +Serializer & +DocumentFieldNode::onSerialize(Serializer & os) const { return os << _fieldName << _value; } -Deserializer & DocumentFieldNode::onDeserialize(Deserializer & is) +Deserializer & +DocumentFieldNode::onDeserialize(Deserializer & is) { return is >> _fieldName >> _value; } diff --git a/searchlib/src/vespa/searchlib/expression/enumresultnode.h b/searchlib/src/vespa/searchlib/expression/enumresultnode.h index 14dacd75651..6d201cb2b5d 100644 --- a/searchlib/src/vespa/searchlib/expression/enumresultnode.h +++ b/searchlib/src/vespa/searchlib/expression/enumresultnode.h @@ -3,8 +3,7 @@ #include "integerresultnode.h" -namespace search { -namespace expression { +namespace search::expression { class EnumResultNode : public IntegerResultNodeT<int64_t> { @@ -20,5 +19,3 @@ private: }; } -} - diff --git a/searchlib/src/vespa/searchlib/expression/floatresultnode.h b/searchlib/src/vespa/searchlib/expression/floatresultnode.h index c31f9a2de40..e79911fe985 100644 --- a/searchlib/src/vespa/searchlib/expression/floatresultnode.h +++ b/searchlib/src/vespa/searchlib/expression/floatresultnode.h @@ -4,8 +4,7 @@ #include "numericresultnode.h" #include <vespa/vespalib/util/sort.h> -namespace search { -namespace expression { +namespace search ::expression { class FloatResultNode final : public NumericResultNode { @@ -54,5 +53,3 @@ private: }; } -} - diff --git a/searchlib/src/vespa/searchlib/expression/integerbucketresultnode.h b/searchlib/src/vespa/searchlib/expression/integerbucketresultnode.h index 95a4555e6e4..ffd0fb11701 100644 --- a/searchlib/src/vespa/searchlib/expression/integerbucketresultnode.h +++ b/searchlib/src/vespa/searchlib/expression/integerbucketresultnode.h @@ -3,8 +3,7 @@ #include "bucketresultnode.h" -namespace search { -namespace expression { +namespace search::expression { class IntegerBucketResultNode : public BucketResultNode { @@ -48,5 +47,3 @@ public: }; } -} - diff --git a/searchlib/src/vespa/searchlib/expression/nullresultnode.h b/searchlib/src/vespa/searchlib/expression/nullresultnode.h index e873d85d0f1..b16fa2245de 100644 --- a/searchlib/src/vespa/searchlib/expression/nullresultnode.h +++ b/searchlib/src/vespa/searchlib/expression/nullresultnode.h @@ -3,8 +3,7 @@ #include "singleresultnode.h" -namespace search { -namespace expression { +namespace search::expression { class NullResultNode : public SingleResultNode { @@ -32,5 +31,3 @@ private: }; } -} - diff --git a/searchlib/src/vespa/searchlib/expression/positiveinfinityresultnode.h b/searchlib/src/vespa/searchlib/expression/positiveinfinityresultnode.h index 261b60b3613..a12bcaa0a32 100644 --- a/searchlib/src/vespa/searchlib/expression/positiveinfinityresultnode.h +++ b/searchlib/src/vespa/searchlib/expression/positiveinfinityresultnode.h @@ -3,8 +3,7 @@ #include "singleresultnode.h" -namespace search { -namespace expression { +namespace search::expression { class PositiveInfinityResultNode : public SingleResultNode { @@ -26,5 +25,3 @@ private: }; } -} - diff --git a/searchlib/src/vespa/searchlib/expression/resultnode.h b/searchlib/src/vespa/searchlib/expression/resultnode.h index 4c81259325b..6a62600b993 100644 --- a/searchlib/src/vespa/searchlib/expression/resultnode.h +++ b/searchlib/src/vespa/searchlib/expression/resultnode.h @@ -53,7 +53,6 @@ private: public: DECLARE_ABSTRACT_RESULTNODE(ResultNode); - ~ResultNode() { } using UP = std::unique_ptr<ResultNode>; using CP = vespalib::IdentifiablePtr<ResultNode>; virtual void set(const ResultNode & rhs) = 0; diff --git a/searchlib/src/vespa/searchlib/expression/stringresultnode.h b/searchlib/src/vespa/searchlib/expression/stringresultnode.h index 79d849bdd15..303d8778e99 100644 --- a/searchlib/src/vespa/searchlib/expression/stringresultnode.h +++ b/searchlib/src/vespa/searchlib/expression/stringresultnode.h @@ -3,8 +3,7 @@ #include "singleresultnode.h" -namespace search { -namespace expression { +namespace search::expression { class StringResultNode : public SingleResultNode { @@ -60,5 +59,3 @@ private: }; } -} - diff --git a/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp b/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp index 74d67aabe88..e606c6f08bb 100644 --- a/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp +++ b/searchsummary/src/vespa/searchsummary/docsummary/attributedfw.cpp @@ -97,8 +97,8 @@ SingleAttrDFW::insertField(uint32_t docid, GetDocsumsState& state, Inserter &tar break; } case BasicType::STRING: { - const char *s = v.getString(docid, nullptr, 0); // no need to pass in a buffer, this attribute has a string storage. - target.insertString(vespalib::Memory(s)); + auto s = v.get_raw(docid); + target.insertString(vespalib::Memory(s.data(), s.size())); break; } case BasicType::REFERENCE: |