diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-03-04 16:15:53 +0000 |
---|---|---|
committer | Henning Baldersheim <balder@yahoo-inc.com> | 2021-03-04 18:07:04 +0000 |
commit | 4d4b1024cbaa9ad537927d66309db8aa9f628c37 (patch) | |
tree | 834db36e1f9c1fa8804713ae049deb3d86c6264b /searchlib | |
parent | 311e77aad06f187c70864a80a0703082f72bb3d8 (diff) |
Keep only one ucs4 buffer, and create the buffer lazy.
Diffstat (limited to 'searchlib')
9 files changed, 53 insertions, 51 deletions
diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp index 87d9f081ffc..aaae2772687 100644 --- a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp +++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp @@ -386,7 +386,7 @@ testSingleValue(Attribute & svsa, Config &cfg) TEST("testSingleValue") { EXPECT_EQUAL(24u, sizeof(AttributeVector::SearchContext)); - EXPECT_EQUAL(72u, sizeof(SingleValueStringAttribute::StringSingleImplSearchContext)); + EXPECT_EQUAL(56u, sizeof(SingleValueStringAttribute::StringSingleImplSearchContext)); { Config cfg(BasicType::STRING, CollectionType::SINGLE); SingleValueStringAttribute svsa("svsa", cfg); diff --git a/searchlib/src/tests/query/streaming_query_test.cpp b/searchlib/src/tests/query/streaming_query_test.cpp index 9aa07570c0d..2db25da03f7 100644 --- a/searchlib/src/tests/query/streaming_query_test.cpp +++ b/searchlib/src/tests/query/streaming_query_test.cpp @@ -734,8 +734,8 @@ TEST("testSameElementEvaluate") { TEST("Control the size of query terms") { EXPECT_EQUAL(104u, sizeof(QueryTermSimple)); - EXPECT_EQUAL(136u, sizeof(QueryTermUCS4)); - EXPECT_EQUAL(280u, sizeof(QueryTerm)); + EXPECT_EQUAL(120u, sizeof(QueryTermUCS4)); + EXPECT_EQUAL(264u, sizeof(QueryTerm)); } TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp index d64e03c67a4..56a644a68b1 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp @@ -225,13 +225,15 @@ StringAttribute::StringSearchContext::StringSearchContext(QueryTermSimple::UP qT const StringAttribute & toBeSearched) : SearchContext(toBeSearched), _queryTerm(static_cast<QueryTermUCS4 *>(qTerm.release())), - _termUCS4(queryTerm()->getUCS4Term()), + _termUCS4(nullptr), _regex(), _isPrefix(_queryTerm->isPrefix()), _isRegex(_queryTerm->isRegex()) { if (isRegex()) { _regex = vespalib::Regex::from_pattern(_queryTerm->getTerm(), vespalib::Regex::Options::IgnoreCase); + } else { + _queryTerm->term(_termUCS4); } } @@ -261,16 +263,6 @@ StringAttribute::clearDoc(DocId doc) return removed; } -namespace { - -class DirectAccessor { -public: - DirectAccessor() { } - const char * get(const char * v) const { return v; } -}; - -} - bool StringAttribute::applyWeight(DocId doc, const FieldValue & fv, const ArithmeticValueUpdate & wAdjust) { diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.h b/searchlib/src/vespa/searchlib/attribute/stringbase.h index d72f7002086..b8fef783d58 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.h +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.h @@ -157,7 +157,7 @@ protected: const vespalib::Regex & getRegex() const { return _regex; } private: std::unique_ptr<QueryTermUCS4> _queryTerm; - std::vector<ucs4_t> _termUCS4; + const ucs4_t *_termUCS4; vespalib::Regex _regex; bool _isPrefix; bool _isRegex; diff --git a/searchlib/src/vespa/searchlib/query/query_term_ucs4.cpp b/searchlib/src/vespa/searchlib/query/query_term_ucs4.cpp index b267ad9253e..7729c7ede36 100644 --- a/searchlib/src/vespa/searchlib/query/query_term_ucs4.cpp +++ b/searchlib/src/vespa/searchlib/query/query_term_ucs4.cpp @@ -3,37 +3,30 @@ #include "query_term_ucs4.h" #include <vespa/vespalib/objects/visit.h> #include <vespa/vespalib/text/utf8.h> +#include <mutex> namespace search { -QueryTermUCS4::UCS4StringT -QueryTermUCS4::getUCS4Term() const { - UCS4StringT ucs4; - const string & term = getTermString(); - ucs4.reserve(term.size() + 1); - vespalib::Utf8Reader r(term); - while (r.hasMore()) { - ucs4_t u = r.getChar(); - ucs4.push_back(u); - } - ucs4.push_back(0); - return ucs4; +namespace { + std::mutex _globalMutex; } +ucs4_t QueryTermUCS4::ZERO_TERM(0); + QueryTermUCS4::QueryTermUCS4() : QueryTermSimple(), + _termUCS4(), _cachedTermLen(0), - _termUCS4() -{ - _termUCS4.push_back(0); -} + _filled(true) +{ } QueryTermUCS4::~QueryTermUCS4() = default; QueryTermUCS4::QueryTermUCS4(const string & termS, Type type) : QueryTermSimple(termS, type), + _termUCS4(), _cachedTermLen(0), - _termUCS4() + _filled(false) { vespalib::Utf8Reader r(termS); while (r.hasMore()) { @@ -44,6 +37,25 @@ QueryTermUCS4::QueryTermUCS4(const string & termS, Type type) : } void +QueryTermUCS4::fillUCS4() { + /* + * Double checked locking...... + * This is a 'dirty' optimisation, but this is done to avoid writing a lot of data and blow the cpu caches with something + * you do not really need most of the time. That matters when qps is very high and query is wide, and hits are few. + */ + std::lock_guard guard(_globalMutex); + if (_filled) return; + _termUCS4.reset(new ucs4_t[_cachedTermLen + 1]); + vespalib::Utf8Reader r(getTermString()); + uint32_t i(0); + while (r.hasMore()) { + _termUCS4.get()[i++] = r.getChar(); + } + _termUCS4.get()[_cachedTermLen] = 0; + _filled = true; +} + +void QueryTermUCS4::visitMembers(vespalib::ObjectVisitor & visitor) const { QueryTermSimple::visitMembers(visitor); diff --git a/searchlib/src/vespa/searchlib/query/query_term_ucs4.h b/searchlib/src/vespa/searchlib/query/query_term_ucs4.h index 90f5c07b7ca..d5e92acb378 100644 --- a/searchlib/src/vespa/searchlib/query/query_term_ucs4.h +++ b/searchlib/src/vespa/searchlib/query/query_term_ucs4.h @@ -14,29 +14,30 @@ namespace search { */ class QueryTermUCS4 : public QueryTermSimple { public: - typedef std::vector<ucs4_t> UCS4StringT; typedef std::unique_ptr<QueryTermUCS4> UP; - QueryTermUCS4(const QueryTermUCS4 &) = default; - QueryTermUCS4 & operator = (const QueryTermUCS4 &) = default; + QueryTermUCS4(const QueryTermUCS4 &) = delete; + QueryTermUCS4 & operator = (const QueryTermUCS4 &) = delete; QueryTermUCS4(QueryTermUCS4 &&) = default; QueryTermUCS4 & operator = (QueryTermUCS4 &&) = default; QueryTermUCS4(); QueryTermUCS4(const string & term_, Type type); ~QueryTermUCS4(); - size_t getTermLen() const { return _cachedTermLen; } - size_t term(const char * & t) const { t = getTerm(); return _cachedTermLen; } - UCS4StringT getUCS4Term() const; + uint32_t getTermLen() const { return _cachedTermLen; } + uint32_t term(const char * & t) const { t = getTerm(); return _cachedTermLen; } void visitMembers(vespalib::ObjectVisitor &visitor) const override; - size_t term(const ucs4_t * & t) { - if (_termUCS4.empty()) { - _termUCS4 = getUCS4Term(); + uint32_t term(const ucs4_t * & t) { + if (!_filled) { + fillUCS4(); } - t = &_termUCS4[0]; + t = (_termUCS4) ? _termUCS4.get() : &ZERO_TERM; return _cachedTermLen; } private: - size_t _cachedTermLen; - UCS4StringT _termUCS4; + void fillUCS4(); + static ucs4_t ZERO_TERM; + std::unique_ptr<ucs4_t[]> _termUCS4; + uint32_t _cachedTermLen; + bool _filled; }; } diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp index 3caa47bf55d..a50c5a8bf8b 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp @@ -51,8 +51,6 @@ QueryTerm::QueryTerm() : _fieldInfo() { } -QueryTerm::QueryTerm(const QueryTerm &) = default; -QueryTerm & QueryTerm::operator = (const QueryTerm &) = default; QueryTerm::QueryTerm(QueryTerm &&) noexcept = default; QueryTerm & QueryTerm::operator = (QueryTerm &&) noexcept = default; diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h index 6daa60a317a..4f323b7f9f1 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h +++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h @@ -26,7 +26,7 @@ public: class EncodingBitMap { public: - EncodingBitMap(unsigned bm=0) : _enc(bm) { } + EncodingBitMap(uint8_t bm=0) : _enc(bm) { } bool isFloat() const { return _enc & Float; } bool isBase10Integer() const { return _enc & Base10Integer; } bool isAscii7Bit() const { return _enc & Ascii7Bit; } @@ -35,7 +35,7 @@ public: void setFloat(bool v) { if (v) _enc |= Float; else _enc &= ~Float; } private: enum { Ascii7Bit=0x01, Base10Integer=0x02, Float=0x04 }; - unsigned _enc; + uint8_t _enc; }; class FieldInfo { public: @@ -55,8 +55,6 @@ public: }; QueryTerm(); QueryTerm(std::unique_ptr<QueryNodeResultBase> resultBase, const string & term, const string & index, Type type); - QueryTerm(const QueryTerm &); - QueryTerm & operator = (const QueryTerm &); QueryTerm(QueryTerm &&) noexcept; QueryTerm & operator = (QueryTerm &&) noexcept; ~QueryTerm(); diff --git a/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.cpp b/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.cpp index 02b1063cc37..61eaae40e90 100644 --- a/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.cpp +++ b/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.cpp @@ -2,6 +2,7 @@ #include "imported_attribute_fixture.h" #include "mock_gid_to_lid_mapping.h" +#include <vespa/searchlib/query/query_term_ucs4.h> #include <vespa/vespalib/util/stringfmt.h> #include <future> @@ -55,7 +56,7 @@ GlobalId dummy_gid(uint32_t doc_index) { } std::unique_ptr<QueryTermSimple> word_term(vespalib::stringref term) { - return std::make_unique<QueryTermSimple>(term, QueryTermSimple::Type::WORD); + return std::make_unique<QueryTermUCS4>(term, QueryTermSimple::Type::WORD); } |