summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHarald Musum <musum@verizonmedia.com>2023-03-20 15:17:55 +0100
committerGitHub <noreply@github.com>2023-03-20 15:17:55 +0100
commit30dcb2dfffe023b818633a4c38ee5f6f73d889b9 (patch)
treeb1f256dc8d00a28136e5ab13741e0f6958cbbf5e
parent7604c8c3974329802228bd41487102e4e53409d0 (diff)
parentc95c354230bcff2880d9d22ae2a9041bb068e255 (diff)
Merge pull request #26503 from vespa-engine/revert-26494-vekterli/add-possible-match-prefix-regex-range-util
Revert "Add utility wrapper around RE2 possible regex prefix match range"
-rw-r--r--vespalib/src/tests/regex/regex.cpp31
-rw-r--r--vespalib/src/vespa/vespalib/regex/regex.cpp14
-rw-r--r--vespalib/src/vespa/vespalib/regex/regex.h15
3 files changed, 3 insertions, 57 deletions
diff --git a/vespalib/src/tests/regex/regex.cpp b/vespalib/src/tests/regex/regex.cpp
index 1ab4a24a1b3..471ba84a68f 100644
--- a/vespalib/src/tests/regex/regex.cpp
+++ b/vespalib/src/tests/regex/regex.cpp
@@ -150,35 +150,4 @@ TEST("Test that default constructed regex is invalid.") {
ASSERT_FALSE(dummy.valid());
}
-TEST("Can extract min/max prefix range from anchored regex") {
- auto min_max = Regex::from_pattern("^.*").possible_anchored_match_prefix_range();
- EXPECT_EQUAL(min_max.first, "");
- EXPECT_EQUAL(min_max.second, "\xf4\x8f\xbf\xc0"); // Highest possible Unicode char (U+10FFFF) as UTF-8, plus 1
-
- min_max = Regex::from_pattern("^hello").possible_anchored_match_prefix_range();
- EXPECT_EQUAL(min_max.first, "hello");
- EXPECT_EQUAL(min_max.second, "hello");
-
- min_max = Regex::from_pattern("^hello|^world").possible_anchored_match_prefix_range();
- EXPECT_EQUAL(min_max.first, "hello");
- EXPECT_EQUAL(min_max.second, "world");
-
- min_max = Regex::from_pattern("(^hello|^world|^zoidberg)").possible_anchored_match_prefix_range();
- EXPECT_EQUAL(min_max.first, "hello");
- EXPECT_EQUAL(min_max.second, "zoidberg");
-
- min_max = Regex::from_pattern("^hello (foo|bar|zoo)").possible_anchored_match_prefix_range();
- EXPECT_EQUAL(min_max.first, "hello bar");
- EXPECT_EQUAL(min_max.second, "hello zoo");
-
- min_max = Regex::from_pattern("^(hello|world)+").possible_anchored_match_prefix_range();
- EXPECT_EQUAL(min_max.first, "hello");
- EXPECT_EQUAL(min_max.second, "worldwp");
-
- // Bad regex; no range
- min_max = Regex::from_pattern("*hello").possible_anchored_match_prefix_range();
- EXPECT_EQUAL(min_max.first, "");
- EXPECT_EQUAL(min_max.second, "");
-}
-
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/vespalib/src/vespa/vespalib/regex/regex.cpp b/vespalib/src/vespa/vespalib/regex/regex.cpp
index c73df1182d6..a904ed4d4ee 100644
--- a/vespalib/src/vespa/vespalib/regex/regex.cpp
+++ b/vespalib/src/vespa/vespalib/regex/regex.cpp
@@ -49,16 +49,6 @@ public:
}
return RE2::FullMatch(StringPiece(input.data(), input.size()), _regex);
}
-
- std::pair<std::string, std::string> possible_anchored_match_prefix_range() const {
- constexpr int max_len = 128; // TODO determine a "reasonable" value. RE2 docs are not clear on this.
- std::string min_prefix, max_prefix;
-
- if (!_regex.PossibleMatchRange(&min_prefix, &max_prefix, max_len)) {
- return {};
- }
- return {std::move(min_prefix), std::move(max_prefix)};
- }
};
Regex Regex::from_pattern(std::string_view pattern, uint32_t opt_mask) {
@@ -86,10 +76,6 @@ bool Regex::full_match(std::string_view input) const noexcept {
return _impl->full_match(input);
}
-std::pair<std::string, std::string> Regex::possible_anchored_match_prefix_range() const {
- return _impl->possible_anchored_match_prefix_range();
-}
-
bool Regex::partial_match(std::string_view input, std::string_view pattern) noexcept {
assert(pattern.size() <= INT32_MAX);
Impl impl(pattern, RE2::Quiet);
diff --git a/vespalib/src/vespa/vespalib/regex/regex.h b/vespalib/src/vespa/vespalib/regex/regex.h
index 50eb76b14d4..6a4d6bc47fc 100644
--- a/vespalib/src/vespa/vespalib/regex/regex.h
+++ b/vespalib/src/vespa/vespalib/regex/regex.h
@@ -4,7 +4,6 @@
#include <memory>
#include <string>
#include <string_view>
-#include <utility>
namespace vespalib {
@@ -38,7 +37,7 @@ namespace vespalib {
*/
class Regex {
class Impl;
- std::unique_ptr<const Impl> _impl;
+ std::unique_ptr<const Impl> _impl; // shared_ptr to allow for cheap copying.
explicit Regex(std::unique_ptr<const Impl> impl);
public:
@@ -56,22 +55,14 @@ public:
Regex(Regex&&) noexcept;
Regex& operator=(Regex&&) noexcept;
- [[nodiscard]] bool valid() const noexcept { return bool(_impl); }
+ bool valid() const { return bool(_impl); }
[[nodiscard]] bool parsed_ok() const noexcept;
[[nodiscard]] bool partial_match(std::string_view input) const noexcept;
[[nodiscard]] bool full_match(std::string_view input) const noexcept;
- // Returns a pair of <lower bound, upper bound> prefix strings that constrain the possible
- // match-able range of inputs for this regex. If there is no shared prefix, or if extracting
- // the range fails, the strings will be empty.
- // Important: this is _only_ semantically valid if the regex is strictly start-anchored, i.e.
- // all possible matching paths start with '^'.
- // This method does _not_ validate that the regex is strictly start-anchored.
- [[nodiscard]] std::pair<std::string, std::string> possible_anchored_match_prefix_range() const;
-
- [[nodiscard]] static Regex from_pattern(std::string_view pattern, uint32_t opt_flags = Options::None);
+ static Regex from_pattern(std::string_view pattern, uint32_t opt_flags = Options::None);
// Utility matchers for non-precompiled expressions.
[[nodiscard]] static bool partial_match(std::string_view input, std::string_view pattern) noexcept;