diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-03-04 11:07:41 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2021-03-04 11:07:53 +0000 |
commit | 9a9ec529aad8c0075bb32f814c856584e9099245 (patch) | |
tree | 40439d0be2dcbd78713ef5e02abe74ef782f17a7 /searchlib | |
parent | bb78e8e3ce543267c4af42e44df3aa56996e9471 (diff) |
Make the StringSearchContext smaller.
- Rearrange members.
- Skip std::optional on Regex.
Diffstat (limited to 'searchlib')
4 files changed, 58 insertions, 45 deletions
diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp index 464a5f79937..a3a43e09ae5 100644 --- a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp +++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp @@ -386,7 +386,7 @@ testSingleValue(Attribute & svsa, Config &cfg) TEST("testSingleValue") { EXPECT_EQUAL(24u, sizeof(AttributeVector::SearchContext)); - EXPECT_EQUAL(96u, sizeof(SingleValueStringAttribute::StringSingleImplSearchContext)); + EXPECT_EQUAL(80u, sizeof(SingleValueStringAttribute::StringSingleImplSearchContext)); { Config cfg(BasicType::STRING, CollectionType::SINGLE); SingleValueStringAttribute svsa("svsa", cfg); diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h index b8edce00ba7..d8fcfaff958 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h @@ -186,7 +186,7 @@ private: using Parent::isRegex; using Parent::getRegex; bool useThis(const PostingListSearchContext::DictionaryConstIterator & it) const override { - return isRegex() ? (getRegex() ? getRegex()->partial_match(_enumStore.get_value(it.getKey())) : false ) : true; + return isRegex() ? (getRegex().valid() ? getRegex().partial_match(_enumStore.get_value(it.getKey())) : false ) : true; } public: StringPostingSearchContext(QueryTermSimpleUP qTerm, bool useBitVector, const AttrT &toBeSearched); diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp index f113b99357f..78225a59b66 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp @@ -21,10 +21,9 @@ using attribute::LoadedEnumAttribute; using attribute::LoadedEnumAttributeVector; AttributeVector::SearchContext::UP -StringAttribute::getSearch(QueryTermSimple::UP term, const attribute::SearchContextParams & params) const +StringAttribute::getSearch(QueryTermSimple::UP term, const attribute::SearchContextParams &) const { - (void) params; - return SearchContext::UP(new StringSearchContext(std::move(term), *this)); + return std::make_unique<StringSearchContext>(std::move(term), *this); } class SortDataChar { @@ -94,7 +93,8 @@ public: } }; -size_t StringAttribute::countZero(const char * bt, size_t sz) +size_t +StringAttribute::countZero(const char * bt, size_t sz) { size_t size(0); for(size_t i(0); i < sz; i++) { @@ -105,7 +105,8 @@ size_t StringAttribute::countZero(const char * bt, size_t sz) return size; } -void StringAttribute::generateOffsets(const char * bt, size_t sz, OffsetVector & offsets) +void +StringAttribute::generateOffsets(const char * bt, size_t sz, OffsetVector & offsets) { offsets.clear(); uint32_t start(0); @@ -131,25 +132,27 @@ StringAttribute::StringAttribute(const vespalib::string & name, const Config & c { } -StringAttribute::~StringAttribute() {} +StringAttribute::~StringAttribute() = default; -uint32_t StringAttribute::get(DocId doc, WeightedInt * v, uint32_t sz) const +uint32_t +StringAttribute::get(DocId doc, WeightedInt * v, uint32_t sz) const { WeightedConstChar * s = new WeightedConstChar[sz]; uint32_t n = static_cast<const AttributeVector *>(this)->get(doc, s, sz); for(uint32_t i(0),m(std::min(n,sz)); i<m; i++) { - v[i] = WeightedInt(strtoll(s[i].getValue(), NULL, 0), s[i].getWeight()); + v[i] = WeightedInt(strtoll(s[i].getValue(), nullptr, 0), s[i].getWeight()); } delete [] s; return n; } -uint32_t StringAttribute::get(DocId doc, WeightedFloat * v, uint32_t sz) const +uint32_t +StringAttribute::get(DocId doc, WeightedFloat * v, uint32_t sz) const { WeightedConstChar * s = new WeightedConstChar[sz]; uint32_t n = static_cast<const AttributeVector *>(this)->get(doc, s, sz); for(uint32_t i(0),m(std::min(n,sz)); i<m; i++) { - v[i] = WeightedFloat(vespalib::locale::c::strtod(s[i].getValue(), NULL), s[i].getWeight()); + v[i] = WeightedFloat(vespalib::locale::c::strtod(s[i].getValue(), nullptr), s[i].getWeight()); } delete [] s; return n; @@ -157,32 +160,35 @@ uint32_t StringAttribute::get(DocId doc, WeightedFloat * v, uint32_t sz) const double StringAttribute::getFloat(DocId doc) const { - return vespalib::locale::c::strtod(get(doc), NULL); + return vespalib::locale::c::strtod(get(doc), nullptr); } -uint32_t StringAttribute::get(DocId doc, double * v, uint32_t sz) const +uint32_t +StringAttribute::get(DocId doc, double * v, uint32_t sz) const { const char ** s = new const char *[sz]; uint32_t n = static_cast<const AttributeVector *>(this)->get(doc, s, sz); for(uint32_t i(0),m(std::min(n,sz)); i<m; i++) { - v[i] = vespalib::locale::c::strtod(s[i], NULL); + v[i] = vespalib::locale::c::strtod(s[i], nullptr); } delete [] s; return n; } -uint32_t StringAttribute::get(DocId doc, largeint_t * v, uint32_t sz) const +uint32_t +StringAttribute::get(DocId doc, largeint_t * v, uint32_t sz) const { const char ** s = new const char *[sz]; uint32_t n = static_cast<const AttributeVector *>(this)->get(doc, s, sz); for(uint32_t i(0),m(std::min(n,sz)); i<m; i++) { - v[i] = strtoll(s[i], NULL, 0); + v[i] = strtoll(s[i], nullptr, 0); } delete [] s; return n; } -long StringAttribute::onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const +long +StringAttribute::onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const { unsigned char *dst = static_cast<unsigned char *>(serTo); const char *value(get(doc)); @@ -199,7 +205,8 @@ long StringAttribute::onSerializeForAscendingSort(DocId doc, void * serTo, long return buf.size(); } -long StringAttribute::onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const +long +StringAttribute::onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const { (void) bc; unsigned char *dst = static_cast<unsigned char *>(serTo); @@ -223,13 +230,13 @@ long StringAttribute::onSerializeForDescendingSort(DocId doc, void * serTo, long StringAttribute::StringSearchContext::StringSearchContext(QueryTermSimple::UP qTerm, const StringAttribute & toBeSearched) : SearchContext(toBeSearched), - _isPrefix(qTerm->isPrefix()), - _isRegex(qTerm->isRegex()), - _queryTerm(std::move(qTerm)), + _queryTerm(static_cast<QueryTermUCS4 *>(qTerm.release())), _termUCS4(queryTerm()->getUCS4Term()), - _bufferLen(toBeSearched.getMaxValueCount()), _buffer(nullptr), - _regex() + _regex(), + _bufferLen(toBeSearched.getMaxValueCount()), + _isPrefix(_queryTerm->isPrefix()), + _isRegex(_queryTerm->isRegex()) { if (isRegex()) { _regex = vespalib::Regex::from_pattern(_queryTerm->getTerm(), vespalib::Regex::Options::IgnoreCase); @@ -246,16 +253,17 @@ StringAttribute::StringSearchContext::~StringSearchContext() bool StringAttribute::StringSearchContext::valid() const { - return (_queryTerm.get() && (!_queryTerm->empty())); + return (_queryTerm && (!_queryTerm->empty())); } const QueryTermUCS4 * StringAttribute::StringSearchContext::queryTerm() const { - return static_cast<const QueryTermUCS4 *>(_queryTerm.get()); + return _queryTerm.get(); } -uint32_t StringAttribute::clearDoc(DocId doc) +uint32_t +StringAttribute::clearDoc(DocId doc) { uint32_t removed(0); if (hasMultiValue() && (doc < getNumDocs())) { @@ -303,19 +311,22 @@ StringAttribute::StringSearchContext::onFind(DocId docId, int32_t elemId) const return -1; } -bool StringAttribute::applyWeight(DocId doc, const FieldValue & fv, const ArithmeticValueUpdate & wAdjust) +bool +StringAttribute::applyWeight(DocId doc, const FieldValue & fv, const ArithmeticValueUpdate & wAdjust) { vespalib::string v = fv.getAsString(); return AttributeVector::adjustWeight(_changes, doc, StringChangeData(v), wAdjust); } -bool StringAttribute::applyWeight(DocId doc, const FieldValue& fv, const document::AssignValueUpdate& wAdjust) +bool +StringAttribute::applyWeight(DocId doc, const FieldValue& fv, const document::AssignValueUpdate& wAdjust) { vespalib::string v = fv.getAsString(); return AttributeVector::adjustWeight(_changes, doc, StringChangeData(v), wAdjust); } -bool StringAttribute::apply(DocId, const ArithmeticValueUpdate & ) +bool +StringAttribute::apply(DocId, const ArithmeticValueUpdate & ) { return false; } @@ -358,7 +369,8 @@ StringAttribute::onLoadEnumerated(ReaderBase &attrReader) return true; } -bool StringAttribute::onLoad() +bool +StringAttribute::onLoad() { ReaderBase attrReader(*this); bool ok(attrReader.getHasLoadData()); @@ -379,15 +391,18 @@ StringAttribute::onAddDoc(DocId ) return false; } -void StringAttribute::load_posting_lists(LoadedVector&) +void +StringAttribute::load_posting_lists(LoadedVector&) { } -void StringAttribute::load_enum_store(LoadedVector&) +void +StringAttribute::load_enum_store(LoadedVector&) { } -void StringAttribute::fillValues(LoadedVector & ) +void +StringAttribute::fillValues(LoadedVector & ) { } diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.h b/searchlib/src/vespa/searchlib/attribute/stringbase.h index 5d21f48fc74..36c3d113b03 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.h +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.h @@ -83,7 +83,7 @@ private: virtual void load_enumerated_data(ReaderBase &attrReader, enumstore::EnumeratedLoader& loader); virtual void load_posting_lists_and_update_enum_store(enumstore::EnumeratedPostingsLoader& loader); - largeint_t getInt(DocId doc) const override { return strtoll(get(doc), NULL, 0); } + largeint_t getInt(DocId doc) const override { return strtoll(get(doc), nullptr, 0); } double getFloat(DocId doc) const override; const char * getString(DocId doc, char * v, size_t sz) const override { (void) v; (void) sz; return get(doc); } @@ -95,16 +95,13 @@ protected: public: StringSearchContext(QueryTermSimpleUP qTerm, const StringAttribute & toBeSearched); ~StringSearchContext() override; - private: - bool _isPrefix; - bool _isRegex; protected: bool valid() const override; const QueryTermUCS4 * queryTerm() const override; bool isMatch(const char *src) const { if (__builtin_expect(isRegex(), false)) { - return _regex ? _regex->partial_match(std::string_view(src)) : false; + return _regex.valid() ? _regex.partial_match(std::string_view(src)) : false; } vespalib::Utf8ReaderForZTS u8reader(src); uint32_t j = 0; @@ -161,9 +158,7 @@ protected: bool isPrefix() const { return _isPrefix; } bool isRegex() const { return _isRegex; } - QueryTermSimpleUP _queryTerm; - std::vector<ucs4_t> _termUCS4; - const std::optional<vespalib::Regex>& getRegex() const { return _regex; } + const vespalib::Regex & getRegex() const { return _regex; } private: WeightedConstChar * getBuffer() const { if (_buffer == nullptr) { @@ -171,11 +166,14 @@ protected: } return _buffer; } - unsigned _bufferLen; + std::unique_ptr<QueryTermUCS4> _queryTerm; + std::vector<ucs4_t> _termUCS4; mutable WeightedConstChar * _buffer; - std::optional<vespalib::Regex> _regex; + vespalib::Regex _regex; + unsigned _bufferLen; + bool _isPrefix; + bool _isRegex; }; -private: SearchContext::UP getSearch(QueryTermSimpleUP term, const attribute::SearchContextParams & params) const override; }; |