summaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/attribute/string_search_helper.h
blob: 0e7a116a874d5e98d095b743cc5e52b1318e0d96 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#pragma once

#include <vespa/vespalib/fuzzy/fuzzy_matching_algorithm.h>
#include <vespa/vespalib/regex/regex.h>

namespace vespalib { class FuzzyMatcher; }
namespace search { class QueryTermUCS4; }

namespace search::attribute {

/**
 * Helper class for search context when scanning string fields
 * It handles different search settings like prefix, regex and cased/uncased.
 */
class StringSearchHelper {
public:
    using FuzzyMatcher = vespalib::FuzzyMatcher;
    StringSearchHelper(QueryTermUCS4 & qTerm, bool cased,
                       vespalib::FuzzyMatchingAlgorithm fuzzy_matching_algorithm = vespalib::FuzzyMatchingAlgorithm::BruteForce);
    StringSearchHelper(StringSearchHelper&&) noexcept;
    StringSearchHelper(const StringSearchHelper &) = delete;
    StringSearchHelper & operator =(const StringSearchHelper &) = delete;
    ~StringSearchHelper();
    bool isMatch(const char *src) const noexcept;
    bool isPrefix() const noexcept { return _isPrefix; }
    bool isRegex() const noexcept { return _isRegex; }
    bool isCased() const noexcept { return _isCased; }
    bool isFuzzy() const noexcept { return _isFuzzy; }
    const vespalib::Regex & getRegex() const noexcept { return _regex; }
    const FuzzyMatcher & getFuzzyMatcher() const noexcept { return *_fuzzyMatcher; }
private:
    using ucs4_t = uint32_t;
    vespalib::Regex                _regex;
    std::unique_ptr<FuzzyMatcher>  _fuzzyMatcher;
    std::unique_ptr<ucs4_t[]>      _ucs4;
    const char *                   _term;
    uint32_t                       _termLen; // measured in bytes
    bool                           _isPrefix;
    bool                           _isRegex;
    bool                           _isCased;
    bool                           _isFuzzy;
};

}