summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2023-05-23 12:51:48 +0200
committerGitHub <noreply@github.com>2023-05-23 12:51:48 +0200
commitbe124652c6f20677ecf07aad887d0cf02944c83e (patch)
tree4418127a0166f799eeba3bdddedb9d93e88b6f2f
parent70170e8444a718cd2b81ae1e6f7dc77ad858c122 (diff)
parent9a916767e8e2b1083b519ac1214852ed724bce89 (diff)
Merge pull request #27183 from vespa-engine/balder/extract-ucs4-and-take-ownership
Extract ucs4 and take ownership to avoid the dirty code using a mutex…
-rw-r--r--searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp10
-rw-r--r--searchlib/src/vespa/searchlib/attribute/string_search_helper.h11
-rw-r--r--searchlib/src/vespa/searchlib/query/query_term_ucs4.cpp19
-rw-r--r--searchlib/src/vespa/searchlib/query/query_term_ucs4.h1
5 files changed, 27 insertions, 18 deletions
diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
index 5ba90d2b077..6e5971ea81d 100644
--- a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
+++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
@@ -389,8 +389,8 @@ testSingleValue(Attribute & svsa, Config &cfg)
TEST("testSingleValue")
{
EXPECT_EQUAL(24u, sizeof(SearchContext));
- EXPECT_EQUAL(32u, sizeof(StringSearchHelper));
- EXPECT_EQUAL(88u, sizeof(attribute::SingleStringEnumSearchContext));
+ EXPECT_EQUAL(40u, sizeof(StringSearchHelper));
+ EXPECT_EQUAL(96u, sizeof(attribute::SingleStringEnumSearchContext));
{
Config cfg(BasicType::STRING, CollectionType::SINGLE);
SingleValueStringAttribute svsa("svsa", cfg);
diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp
index 206c2bcbd69..17a0e6256d4 100644
--- a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp
@@ -29,10 +29,10 @@ StringSearchHelper::StringSearchHelper(QueryTermUCS4 & term, bool cased)
term.getFuzzyPrefixLength(),
isCased());
} else if (isCased()) {
- _term._char = term.getTerm();
+ _term = term.getTerm();
_termLen = term.getTermLen();
} else {
- term.term(_term._ucs4);
+ _ucs4 = term.asUcs4();
}
}
@@ -49,7 +49,7 @@ StringSearchHelper::isMatch(const char *src) const {
return getFuzzyMatcher().isMatch(src);
}
if (__builtin_expect(isCased(), false)) {
- int res = strncmp(_term._char, src, _termLen);
+ int res = strncmp(_term, src, _termLen);
return (res == 0) && (src[_termLen] == 0 || isPrefix());
}
vespalib::Utf8ReaderForZTS u8reader(src);
@@ -58,11 +58,11 @@ StringSearchHelper::isMatch(const char *src) const {
for (;; ++j) {
val = u8reader.getChar();
val = vespalib::LowerCase::convert(val);
- if (_term._ucs4[j] == 0 || _term._ucs4[j] != val) {
+ if (_ucs4[j] == 0 || _ucs4[j] != val) {
break;
}
}
- return (_term._ucs4[j] == 0 && (val == 0 || isPrefix()));
+ return (_ucs4[j] == 0 && (val == 0 || isPrefix()));
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_helper.h b/searchlib/src/vespa/searchlib/attribute/string_search_helper.h
index 4d69b61449e..7bfcf0e4292 100644
--- a/searchlib/src/vespa/searchlib/attribute/string_search_helper.h
+++ b/searchlib/src/vespa/searchlib/attribute/string_search_helper.h
@@ -16,6 +16,7 @@ namespace search::attribute {
*/
class StringSearchHelper {
public:
+ using FuzzyMatcher = vespalib::FuzzyMatcher;
StringSearchHelper(QueryTermUCS4 & qTerm, bool cased);
StringSearchHelper(StringSearchHelper&&) noexcept;
StringSearchHelper(const StringSearchHelper &) = delete;
@@ -27,14 +28,12 @@ public:
bool isCased() const noexcept { return _isCased; }
bool isFuzzy() const noexcept { return _isFuzzy; }
const vespalib::Regex & getRegex() const noexcept { return _regex; }
- const vespalib::FuzzyMatcher & getFuzzyMatcher() const noexcept { return *_fuzzyMatcher; }
+ const FuzzyMatcher & getFuzzyMatcher() const noexcept { return *_fuzzyMatcher; }
private:
vespalib::Regex _regex;
- std::unique_ptr<vespalib::FuzzyMatcher> _fuzzyMatcher;
- union {
- const ucs4_t *_ucs4;
- const char *_char;
- } _term;
+ std::unique_ptr<FuzzyMatcher> _fuzzyMatcher;
+ std::unique_ptr<ucs4_t[]> _ucs4;
+ const char * _term;
uint32_t _termLen;
bool _isPrefix;
bool _isRegex;
diff --git a/searchlib/src/vespa/searchlib/query/query_term_ucs4.cpp b/searchlib/src/vespa/searchlib/query/query_term_ucs4.cpp
index e68685bd78c..8c3c2514877 100644
--- a/searchlib/src/vespa/searchlib/query/query_term_ucs4.cpp
+++ b/searchlib/src/vespa/searchlib/query/query_term_ucs4.cpp
@@ -38,17 +38,26 @@ QueryTermUCS4::fillUCS4() {
* This is a 'dirty' optimisation, but this is done to avoid writing a lot of data and blow the cpu caches with something
* you do not really need most of the time. That matters when qps is very high and query is wide, and hits are few.
*/
- std::lock_guard guard(_globalMutex);
- ucs4_t * ucs4 = _termUCS4.load(std::memory_order_relaxed);
- if (ucs4 != nullptr) return ucs4;
- ucs4 = new ucs4_t[_cachedTermLen + 1];
+ std::unique_ptr<ucs4_t[]> ucs4 = asUcs4();
+ ucs4_t * next = ucs4.get();
+ {
+ std::lock_guard guard(_globalMutex);
+ ucs4_t *prev = _termUCS4.load(std::memory_order_relaxed);
+ if (prev != nullptr) return prev;
+ _termUCS4.store(ucs4.release(), std::memory_order_relaxed);
+ }
+ return next;
+}
+
+std::unique_ptr<ucs4_t[]>
+QueryTermUCS4::asUcs4() const {
+ auto ucs4 = std::make_unique<ucs4_t[]>(_cachedTermLen + 1);
vespalib::Utf8Reader r(getTermString());
uint32_t i(0);
while (r.hasMore()) {
ucs4[i++] = r.getChar();
}
ucs4[_cachedTermLen] = 0;
- _termUCS4.store(ucs4);
return ucs4;
}
diff --git a/searchlib/src/vespa/searchlib/query/query_term_ucs4.h b/searchlib/src/vespa/searchlib/query/query_term_ucs4.h
index 0639ce8a74c..673927cf685 100644
--- a/searchlib/src/vespa/searchlib/query/query_term_ucs4.h
+++ b/searchlib/src/vespa/searchlib/query/query_term_ucs4.h
@@ -21,6 +21,7 @@ public:
uint32_t getTermLen() const { return _cachedTermLen; }
uint32_t term(const char * & t) const { t = getTerm(); return _cachedTermLen; }
void visitMembers(vespalib::ObjectVisitor &visitor) const override;
+ std::unique_ptr<ucs4_t[]> asUcs4() const;
uint32_t term(const ucs4_t * & t) {
t = _termUCS4.load(std::memory_order_relaxed);
if (t == nullptr) {