summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorTor Egge <Tor.Egge@online.no>2022-03-31 20:50:45 +0200
committerTor Egge <Tor.Egge@online.no>2022-03-31 20:50:45 +0200
commit7e13424bc67011269899bcdb5dd4307974c69957 (patch)
tree9dd948082ccc75ff9fbbbc44456a067c9b83df36 /searchlib
parenta7e3bc6fd0c771cfb1643d4b04874ecd6f289cc3 (diff)
Move StringSearchHelper out from stringbase.{h,cpp}.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp65
-rw-r--r--searchlib/src/vespa/searchlib/attribute/string_search_helper.h43
-rw-r--r--searchlib/src/vespa/searchlib/attribute/stringbase.cpp60
-rw-r--r--searchlib/src/vespa/searchlib/attribute/stringbase.h38
6 files changed, 112 insertions, 96 deletions
diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
index 576a2ddb467..fd10bfcf47c 100644
--- a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
+++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
@@ -18,6 +18,7 @@ LOG_SETUP("stringattribute_test");
using search::attribute::CollectionType;
using search::attribute::IAttributeVector;
using search::attribute::SearchContext;
+using search::attribute::StringSearchHelper;
using vespalib::datastore::EntryRef;
using namespace search;
diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
index e86f04db56d..fde1e4cdc4f 100644
--- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
@@ -109,5 +109,6 @@ vespa_add_library(searchlib_attribute OBJECT
sourceselector.cpp
stringattribute.cpp
stringbase.cpp
+ string_search_helper.cpp
DEPENDS
)
diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp
new file mode 100644
index 00000000000..d876d80e73f
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp
@@ -0,0 +1,65 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#include "string_search_helper.h"
+#include <vespa/searchlib/query/query_term_ucs4.h>
+#include <vespa/vespalib/text/lowercase.h>
+#include <vespa/vespalib/text/utf8.h>
+
+namespace search::attribute {
+
+StringSearchHelper::StringSearchHelper(QueryTermUCS4 & term, bool cased)
+ : _regex(),
+ _fuzzyMatcher(),
+ _term(),
+ _termLen(),
+ _isPrefix(term.isPrefix()),
+ _isRegex(term.isRegex()),
+ _isCased(cased),
+ _isFuzzy(term.isFuzzy())
+{
+ if (isRegex()) {
+ if (isCased()) {
+ _regex = vespalib::Regex::from_pattern(term.getTerm(), vespalib::Regex::Options::None);
+ } else {
+ _regex = vespalib::Regex::from_pattern(term.getTerm(), vespalib::Regex::Options::IgnoreCase);
+ }
+ } else if (isCased()) {
+ _term._char = term.getTerm();
+ _termLen = term.getTermLen();
+ } else if (isFuzzy()) {
+ _fuzzyMatcher = vespalib::FuzzyMatcher::from_term(term.getTerm());
+ } else {
+ term.term(_term._ucs4);
+ }
+}
+
+StringSearchHelper::StringSearchHelper(StringSearchHelper&&) noexcept = default;
+
+StringSearchHelper::~StringSearchHelper() = default;
+
+bool
+StringSearchHelper::isMatch(const char *src) const {
+ if (__builtin_expect(isRegex(), false)) {
+ return getRegex().valid() ? getRegex().partial_match(std::string_view(src)) : false;
+ }
+ if (__builtin_expect(isCased(), false)) {
+ int res = strncmp(_term._char, src, _termLen);
+ return (res == 0) && (src[_termLen] == 0 || isPrefix());
+ }
+ if (__builtin_expect(isFuzzy(), false)) {
+ return getFuzzyMatcher().isMatch(src);
+ }
+ vespalib::Utf8ReaderForZTS u8reader(src);
+ uint32_t j = 0;
+ uint32_t val;
+ for (;; ++j) {
+ val = u8reader.getChar();
+ val = vespalib::LowerCase::convert(val);
+ if (_term._ucs4[j] == 0 || _term._ucs4[j] != val) {
+ break;
+ }
+ }
+ return (_term._ucs4[j] == 0 && (val == 0 || isPrefix()));
+}
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_helper.h b/searchlib/src/vespa/searchlib/attribute/string_search_helper.h
new file mode 100644
index 00000000000..5947f397359
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/attribute/string_search_helper.h
@@ -0,0 +1,43 @@
+// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+
+#pragma once
+
+#include <vespa/fastlib/text/unicodeutil.h>
+#include <vespa/vespalib/regex/regex.h>
+#include <vespa/vespalib/fuzzy/fuzzy_matcher.h>
+
+namespace search { class QueryTermUCS4; }
+
+namespace search::attribute {
+
+/**
+ * Helper class for search context when scanning string fields
+ * It handles different search settings like prefix, regex and cased/uncased.
+ */
+class StringSearchHelper {
+public:
+ StringSearchHelper(QueryTermUCS4 & qTerm, bool cased);
+ StringSearchHelper(StringSearchHelper&&) noexcept;
+ ~StringSearchHelper();
+ bool isMatch(const char *src) const;
+ bool isPrefix() const { return _isPrefix; }
+ bool isRegex() const { return _isRegex; }
+ bool isCased() const { return _isCased; }
+ bool isFuzzy() const { return _isFuzzy; }
+ const vespalib::Regex & getRegex() const { return _regex; }
+ const vespalib::FuzzyMatcher & getFuzzyMatcher() const { return _fuzzyMatcher; }
+private:
+ vespalib::Regex _regex;
+ vespalib::FuzzyMatcher _fuzzyMatcher;
+ union {
+ const ucs4_t *_ucs4;
+ const char *_char;
+ } _term;
+ uint32_t _termLen;
+ bool _isPrefix;
+ bool _isRegex;
+ bool _isCased;
+ bool _isFuzzy;
+};
+
+}
diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp
index 52daea9593e..d8ab577c6d3 100644
--- a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp
@@ -16,66 +16,6 @@ LOG_SETUP(".searchlib.attribute.stringbase");
namespace search {
-StringSearchHelper::StringSearchHelper(QueryTermUCS4 & term, bool cased)
- : _regex(),
- _fuzzyMatcher(),
- _term(),
- _termLen(),
- _isPrefix(term.isPrefix()),
- _isRegex(term.isRegex()),
- _isCased(cased),
- _isFuzzy(term.isFuzzy())
-{
- if (isRegex()) {
- if (isCased()) {
- _regex = vespalib::Regex::from_pattern(term.getTerm(), vespalib::Regex::Options::None);
- } else {
- _regex = vespalib::Regex::from_pattern(term.getTerm(), vespalib::Regex::Options::IgnoreCase);
- }
- } else if (isCased()) {
- _term._char = term.getTerm();
- _termLen = term.getTermLen();
- } else if (isFuzzy()) {
- _fuzzyMatcher = vespalib::FuzzyMatcher::from_term(term.getTerm());
- } else {
- term.term(_term._ucs4);
- }
-}
-
-StringSearchHelper::StringSearchHelper(StringSearchHelper&&) noexcept = default;
-
-StringSearchHelper::~StringSearchHelper()
-{
- if (isRegex()) {
-
- }
-}
-
-bool
-StringSearchHelper::isMatch(const char *src) const {
- if (__builtin_expect(isRegex(), false)) {
- return getRegex().valid() ? getRegex().partial_match(std::string_view(src)) : false;
- }
- if (__builtin_expect(isCased(), false)) {
- int res = strncmp(_term._char, src, _termLen);
- return (res == 0) && (src[_termLen] == 0 || isPrefix());
- }
- if (__builtin_expect(isFuzzy(), false)) {
- return getFuzzyMatcher().isMatch(src);
- }
- vespalib::Utf8ReaderForZTS u8reader(src);
- uint32_t j = 0;
- uint32_t val;
- for (;; ++j) {
- val = u8reader.getChar();
- val = vespalib::LowerCase::convert(val);
- if (_term._ucs4[j] == 0 || _term._ucs4[j] != val) {
- break;
- }
- }
- return (_term._ucs4[j] == 0 && (val == 0 || isPrefix()));
-}
-
IMPLEMENT_IDENTIFIABLE_ABSTRACT(StringAttribute, AttributeVector);
class SortDataChar {
diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.h b/searchlib/src/vespa/searchlib/attribute/stringbase.h
index a3ae160dee3..ffd285a9a73 100644
--- a/searchlib/src/vespa/searchlib/attribute/stringbase.h
+++ b/searchlib/src/vespa/searchlib/attribute/stringbase.h
@@ -8,44 +8,10 @@
#include "i_enum_store.h"
#include "loadedenumvalue.h"
#include "search_context.h"
-#include <vespa/vespalib/regex/regex.h>
-#include <vespa/vespalib/text/lowercase.h>
-#include <vespa/vespalib/text/utf8.h>
-#include <vespa/vespalib/fuzzy/fuzzy_matcher.h>
-#include <optional>
+#include "string_search_helper.h"
namespace search {
-/**
- * Helper class for search context when scanning string fields
- * It handles different search settings like prefix, regex and cased/uncased.
- */
-class StringSearchHelper {
-public:
- StringSearchHelper(QueryTermUCS4 & qTerm, bool cased);
- StringSearchHelper(StringSearchHelper&&) noexcept;
- ~StringSearchHelper();
- bool isMatch(const char *src) const;
- bool isPrefix() const { return _isPrefix; }
- bool isRegex() const { return _isRegex; }
- bool isCased() const { return _isCased; }
- bool isFuzzy() const { return _isFuzzy; }
- const vespalib::Regex & getRegex() const { return _regex; }
- const vespalib::FuzzyMatcher & getFuzzyMatcher() const { return _fuzzyMatcher; }
-private:
- vespalib::Regex _regex;
- vespalib::FuzzyMatcher _fuzzyMatcher;
- union {
- const ucs4_t *_ucs4;
- const char *_char;
- } _term;
- uint32_t _termLen;
- bool _isPrefix;
- bool _isRegex;
- bool _isCased;
- bool _isFuzzy;
-};
-
class ReaderBase;
/**
@@ -176,7 +142,7 @@ protected:
}
private:
std::unique_ptr<QueryTermUCS4> _queryTerm;
- StringSearchHelper _helper;
+ attribute::StringSearchHelper _helper;
};
};