From 9a916767e8e2b1083b519ac1214852ed724bce89 Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Tue, 23 May 2023 10:17:42 +0000 Subject: Extract ucs4 and take ownership to avoid the dirty code using a mutext protected lazy construction. --- .../stringattribute/stringattribute_test.cpp | 4 ++-- .../searchlib/attribute/string_search_helper.cpp | 10 +++++----- .../vespa/searchlib/attribute/string_search_helper.h | 11 +++++------ .../src/vespa/searchlib/query/query_term_ucs4.cpp | 19 ++++++++++++++----- searchlib/src/vespa/searchlib/query/query_term_ucs4.h | 1 + 5 files changed, 27 insertions(+), 18 deletions(-) (limited to 'searchlib') diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp index 5ba90d2b077..6e5971ea81d 100644 --- a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp +++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp @@ -389,8 +389,8 @@ testSingleValue(Attribute & svsa, Config &cfg) TEST("testSingleValue") { EXPECT_EQUAL(24u, sizeof(SearchContext)); - EXPECT_EQUAL(32u, sizeof(StringSearchHelper)); - EXPECT_EQUAL(88u, sizeof(attribute::SingleStringEnumSearchContext)); + EXPECT_EQUAL(40u, sizeof(StringSearchHelper)); + EXPECT_EQUAL(96u, sizeof(attribute::SingleStringEnumSearchContext)); { Config cfg(BasicType::STRING, CollectionType::SINGLE); SingleValueStringAttribute svsa("svsa", cfg); diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp index 206c2bcbd69..17a0e6256d4 100644 --- a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp +++ b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp @@ -29,10 +29,10 @@ StringSearchHelper::StringSearchHelper(QueryTermUCS4 & term, bool cased) term.getFuzzyPrefixLength(), isCased()); } else if (isCased()) { - _term._char = term.getTerm(); + _term = term.getTerm(); _termLen = term.getTermLen(); } else { - term.term(_term._ucs4); + _ucs4 = term.asUcs4(); } } @@ -49,7 +49,7 @@ StringSearchHelper::isMatch(const char *src) const { return getFuzzyMatcher().isMatch(src); } if (__builtin_expect(isCased(), false)) { - int res = strncmp(_term._char, src, _termLen); + int res = strncmp(_term, src, _termLen); return (res == 0) && (src[_termLen] == 0 || isPrefix()); } vespalib::Utf8ReaderForZTS u8reader(src); @@ -58,11 +58,11 @@ StringSearchHelper::isMatch(const char *src) const { for (;; ++j) { val = u8reader.getChar(); val = vespalib::LowerCase::convert(val); - if (_term._ucs4[j] == 0 || _term._ucs4[j] != val) { + if (_ucs4[j] == 0 || _ucs4[j] != val) { break; } } - return (_term._ucs4[j] == 0 && (val == 0 || isPrefix())); + return (_ucs4[j] == 0 && (val == 0 || isPrefix())); } } diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_helper.h b/searchlib/src/vespa/searchlib/attribute/string_search_helper.h index 4d69b61449e..7bfcf0e4292 100644 --- a/searchlib/src/vespa/searchlib/attribute/string_search_helper.h +++ b/searchlib/src/vespa/searchlib/attribute/string_search_helper.h @@ -16,6 +16,7 @@ namespace search::attribute { */ class StringSearchHelper { public: + using FuzzyMatcher = vespalib::FuzzyMatcher; StringSearchHelper(QueryTermUCS4 & qTerm, bool cased); StringSearchHelper(StringSearchHelper&&) noexcept; StringSearchHelper(const StringSearchHelper &) = delete; @@ -27,14 +28,12 @@ public: bool isCased() const noexcept { return _isCased; } bool isFuzzy() const noexcept { return _isFuzzy; } const vespalib::Regex & getRegex() const noexcept { return _regex; } - const vespalib::FuzzyMatcher & getFuzzyMatcher() const noexcept { return *_fuzzyMatcher; } + const FuzzyMatcher & getFuzzyMatcher() const noexcept { return *_fuzzyMatcher; } private: vespalib::Regex _regex; - std::unique_ptr _fuzzyMatcher; - union { - const ucs4_t *_ucs4; - const char *_char; - } _term; + std::unique_ptr _fuzzyMatcher; + std::unique_ptr _ucs4; + const char * _term; uint32_t _termLen; bool _isPrefix; bool _isRegex; diff --git a/searchlib/src/vespa/searchlib/query/query_term_ucs4.cpp b/searchlib/src/vespa/searchlib/query/query_term_ucs4.cpp index e68685bd78c..8c3c2514877 100644 --- a/searchlib/src/vespa/searchlib/query/query_term_ucs4.cpp +++ b/searchlib/src/vespa/searchlib/query/query_term_ucs4.cpp @@ -38,17 +38,26 @@ QueryTermUCS4::fillUCS4() { * This is a 'dirty' optimisation, but this is done to avoid writing a lot of data and blow the cpu caches with something * you do not really need most of the time. That matters when qps is very high and query is wide, and hits are few. */ - std::lock_guard guard(_globalMutex); - ucs4_t * ucs4 = _termUCS4.load(std::memory_order_relaxed); - if (ucs4 != nullptr) return ucs4; - ucs4 = new ucs4_t[_cachedTermLen + 1]; + std::unique_ptr ucs4 = asUcs4(); + ucs4_t * next = ucs4.get(); + { + std::lock_guard guard(_globalMutex); + ucs4_t *prev = _termUCS4.load(std::memory_order_relaxed); + if (prev != nullptr) return prev; + _termUCS4.store(ucs4.release(), std::memory_order_relaxed); + } + return next; +} + +std::unique_ptr +QueryTermUCS4::asUcs4() const { + auto ucs4 = std::make_unique(_cachedTermLen + 1); vespalib::Utf8Reader r(getTermString()); uint32_t i(0); while (r.hasMore()) { ucs4[i++] = r.getChar(); } ucs4[_cachedTermLen] = 0; - _termUCS4.store(ucs4); return ucs4; } diff --git a/searchlib/src/vespa/searchlib/query/query_term_ucs4.h b/searchlib/src/vespa/searchlib/query/query_term_ucs4.h index 0639ce8a74c..673927cf685 100644 --- a/searchlib/src/vespa/searchlib/query/query_term_ucs4.h +++ b/searchlib/src/vespa/searchlib/query/query_term_ucs4.h @@ -21,6 +21,7 @@ public: uint32_t getTermLen() const { return _cachedTermLen; } uint32_t term(const char * & t) const { t = getTerm(); return _cachedTermLen; } void visitMembers(vespalib::ObjectVisitor &visitor) const override; + std::unique_ptr asUcs4() const; uint32_t term(const ucs4_t * & t) { t = _termUCS4.load(std::memory_order_relaxed); if (t == nullptr) { -- cgit v1.2.3