diff options
author | Harald Musum <musum@verizonmedia.com> | 2023-03-20 15:17:55 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-03-20 15:17:55 +0100 |
commit | 30dcb2dfffe023b818633a4c38ee5f6f73d889b9 (patch) | |
tree | b1f256dc8d00a28136e5ab13741e0f6958cbbf5e | |
parent | 7604c8c3974329802228bd41487102e4e53409d0 (diff) | |
parent | c95c354230bcff2880d9d22ae2a9041bb068e255 (diff) |
Merge pull request #26503 from vespa-engine/revert-26494-vekterli/add-possible-match-prefix-regex-range-util
Revert "Add utility wrapper around RE2 possible regex prefix match range"
-rw-r--r-- | vespalib/src/tests/regex/regex.cpp | 31 | ||||
-rw-r--r-- | vespalib/src/vespa/vespalib/regex/regex.cpp | 14 | ||||
-rw-r--r-- | vespalib/src/vespa/vespalib/regex/regex.h | 15 |
3 files changed, 3 insertions, 57 deletions
diff --git a/vespalib/src/tests/regex/regex.cpp b/vespalib/src/tests/regex/regex.cpp index 1ab4a24a1b3..471ba84a68f 100644 --- a/vespalib/src/tests/regex/regex.cpp +++ b/vespalib/src/tests/regex/regex.cpp @@ -150,35 +150,4 @@ TEST("Test that default constructed regex is invalid.") { ASSERT_FALSE(dummy.valid()); } -TEST("Can extract min/max prefix range from anchored regex") { - auto min_max = Regex::from_pattern("^.*").possible_anchored_match_prefix_range(); - EXPECT_EQUAL(min_max.first, ""); - EXPECT_EQUAL(min_max.second, "\xf4\x8f\xbf\xc0"); // Highest possible Unicode char (U+10FFFF) as UTF-8, plus 1 - - min_max = Regex::from_pattern("^hello").possible_anchored_match_prefix_range(); - EXPECT_EQUAL(min_max.first, "hello"); - EXPECT_EQUAL(min_max.second, "hello"); - - min_max = Regex::from_pattern("^hello|^world").possible_anchored_match_prefix_range(); - EXPECT_EQUAL(min_max.first, "hello"); - EXPECT_EQUAL(min_max.second, "world"); - - min_max = Regex::from_pattern("(^hello|^world|^zoidberg)").possible_anchored_match_prefix_range(); - EXPECT_EQUAL(min_max.first, "hello"); - EXPECT_EQUAL(min_max.second, "zoidberg"); - - min_max = Regex::from_pattern("^hello (foo|bar|zoo)").possible_anchored_match_prefix_range(); - EXPECT_EQUAL(min_max.first, "hello bar"); - EXPECT_EQUAL(min_max.second, "hello zoo"); - - min_max = Regex::from_pattern("^(hello|world)+").possible_anchored_match_prefix_range(); - EXPECT_EQUAL(min_max.first, "hello"); - EXPECT_EQUAL(min_max.second, "worldwp"); - - // Bad regex; no range - min_max = Regex::from_pattern("*hello").possible_anchored_match_prefix_range(); - EXPECT_EQUAL(min_max.first, ""); - EXPECT_EQUAL(min_max.second, ""); -} - TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/vespalib/src/vespa/vespalib/regex/regex.cpp b/vespalib/src/vespa/vespalib/regex/regex.cpp index c73df1182d6..a904ed4d4ee 100644 --- a/vespalib/src/vespa/vespalib/regex/regex.cpp +++ b/vespalib/src/vespa/vespalib/regex/regex.cpp @@ -49,16 +49,6 @@ public: } return RE2::FullMatch(StringPiece(input.data(), input.size()), _regex); } - - std::pair<std::string, std::string> possible_anchored_match_prefix_range() const { - constexpr int max_len = 128; // TODO determine a "reasonable" value. RE2 docs are not clear on this. - std::string min_prefix, max_prefix; - - if (!_regex.PossibleMatchRange(&min_prefix, &max_prefix, max_len)) { - return {}; - } - return {std::move(min_prefix), std::move(max_prefix)}; - } }; Regex Regex::from_pattern(std::string_view pattern, uint32_t opt_mask) { @@ -86,10 +76,6 @@ bool Regex::full_match(std::string_view input) const noexcept { return _impl->full_match(input); } -std::pair<std::string, std::string> Regex::possible_anchored_match_prefix_range() const { - return _impl->possible_anchored_match_prefix_range(); -} - bool Regex::partial_match(std::string_view input, std::string_view pattern) noexcept { assert(pattern.size() <= INT32_MAX); Impl impl(pattern, RE2::Quiet); diff --git a/vespalib/src/vespa/vespalib/regex/regex.h b/vespalib/src/vespa/vespalib/regex/regex.h index 50eb76b14d4..6a4d6bc47fc 100644 --- a/vespalib/src/vespa/vespalib/regex/regex.h +++ b/vespalib/src/vespa/vespalib/regex/regex.h @@ -4,7 +4,6 @@ #include <memory> #include <string> #include <string_view> -#include <utility> namespace vespalib { @@ -38,7 +37,7 @@ namespace vespalib { */ class Regex { class Impl; - std::unique_ptr<const Impl> _impl; + std::unique_ptr<const Impl> _impl; // shared_ptr to allow for cheap copying. explicit Regex(std::unique_ptr<const Impl> impl); public: @@ -56,22 +55,14 @@ public: Regex(Regex&&) noexcept; Regex& operator=(Regex&&) noexcept; - [[nodiscard]] bool valid() const noexcept { return bool(_impl); } + bool valid() const { return bool(_impl); } [[nodiscard]] bool parsed_ok() const noexcept; [[nodiscard]] bool partial_match(std::string_view input) const noexcept; [[nodiscard]] bool full_match(std::string_view input) const noexcept; - // Returns a pair of <lower bound, upper bound> prefix strings that constrain the possible - // match-able range of inputs for this regex. If there is no shared prefix, or if extracting - // the range fails, the strings will be empty. - // Important: this is _only_ semantically valid if the regex is strictly start-anchored, i.e. - // all possible matching paths start with '^'. - // This method does _not_ validate that the regex is strictly start-anchored. - [[nodiscard]] std::pair<std::string, std::string> possible_anchored_match_prefix_range() const; - - [[nodiscard]] static Regex from_pattern(std::string_view pattern, uint32_t opt_flags = Options::None); + static Regex from_pattern(std::string_view pattern, uint32_t opt_flags = Options::None); // Utility matchers for non-precompiled expressions. [[nodiscard]] static bool partial_match(std::string_view input, std::string_view pattern) noexcept; |