summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2024-01-16 12:18:58 +0100
committerGitHub <noreply@github.com>2024-01-16 12:18:58 +0100
commit324d3edb7de008da284af9b6e664298538dae0f4 (patch)
treed259ba4ae76722f6e64e2e92bca6dd882c86387f /searchlib
parent6cd973bc62c4550ef3f1d4abe789a22edda17eaf (diff)
parentae88431f3770388afd22c6856b2ad17c994783ee (diff)
Merge pull request #29913 from vespa-engine/vekterli/streaming-search-regex-support
Add regular expression support to streaming search
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/querynode.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp6
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/queryterm.h2
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/regexp_term.cpp27
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/regexp_term.h25
6 files changed, 68 insertions, 1 deletions
diff --git a/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt b/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt
index 6b9be2e3269..05a75f4662e 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/query/streaming/CMakeLists.txt
@@ -11,5 +11,6 @@ vespa_add_library(searchlib_query_streaming OBJECT
queryterm.cpp
wand_term.cpp
weighted_set_term.cpp
+ regexp_term.cpp
DEPENDS
)
diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp
index 1ce80660d46..2ee515f062a 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp
@@ -2,6 +2,7 @@
#include "query.h"
#include "nearest_neighbor_query_node.h"
+#include "regexp_term.h"
#include <vespa/searchlib/parsequery/stackdumpiterator.h>
#include <vespa/searchlib/query/streaming/dot_product_term.h>
#include <vespa/searchlib/query/streaming/in_term.h>
@@ -145,7 +146,12 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor
qn = std::make_unique<TrueNode>();
} else {
Normalizing normalize_mode = factory.normalizing_mode(ssIndex);
- auto qt = std::make_unique<QueryTerm>(factory.create(), ssTerm, ssIndex, sTerm, normalize_mode);
+ std::unique_ptr<QueryTerm> qt;
+ if (sTerm != TermType::REGEXP) {
+ qt = std::make_unique<QueryTerm>(factory.create(), ssTerm, ssIndex, sTerm, normalize_mode);
+ } else {
+ qt = std::make_unique<RegexpTerm>(factory.create(), ssTerm, ssIndex, TermType::REGEXP, normalize_mode);
+ }
qt->setWeight(queryRep.GetWeight());
qt->setUniqueId(queryRep.getUniqueId());
if (qt->isFuzzy()) {
diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp
index 3950a179d67..3e05d381ee2 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp
@@ -179,4 +179,10 @@ QueryTerm::as_multi_term() noexcept
return nullptr;
}
+RegexpTerm*
+QueryTerm::as_regexp_term() noexcept
+{
+ return nullptr;
+}
+
}
diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h
index 743998a630e..cd2bdd7eaec 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h
+++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h
@@ -13,6 +13,7 @@ namespace search::streaming {
class NearestNeighborQueryNode;
class MultiTerm;
+class RegexpTerm;
/**
This is a leaf in the Query tree. All terms are leafs.
@@ -93,6 +94,7 @@ public:
void setFuzzyPrefixLength(uint32_t fuzzyPrefixLength) { _fuzzyPrefixLength = fuzzyPrefixLength; }
virtual NearestNeighborQueryNode* as_nearest_neighbor_query_node() noexcept;
virtual MultiTerm* as_multi_term() noexcept;
+ virtual RegexpTerm* as_regexp_term() noexcept;
protected:
using QueryNodeResultBaseContainer = std::unique_ptr<QueryNodeResultBase>;
string _index;
diff --git a/searchlib/src/vespa/searchlib/query/streaming/regexp_term.cpp b/searchlib/src/vespa/searchlib/query/streaming/regexp_term.cpp
new file mode 100644
index 00000000000..4508caa7072
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/streaming/regexp_term.cpp
@@ -0,0 +1,27 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#include "regexp_term.h"
+
+namespace search::streaming {
+
+using vespalib::Regex;
+
+namespace {
+
+constexpr Regex::Options normalize_mode_to_regex_opts(Normalizing norm) noexcept {
+ return ((norm == Normalizing::NONE)
+ ? Regex::Options::None
+ : Regex::Options::IgnoreCase);
+}
+
+}
+
+RegexpTerm::RegexpTerm(std::unique_ptr<QueryNodeResultBase> result_base, stringref term,
+ const string& index, Type type, Normalizing normalizing)
+ : QueryTerm(std::move(result_base), term, index, type, normalizing),
+ _regexp(Regex::from_pattern({term.data(), term.size()}, normalize_mode_to_regex_opts(normalizing)))
+{
+}
+
+RegexpTerm::~RegexpTerm() = default;
+
+}
diff --git a/searchlib/src/vespa/searchlib/query/streaming/regexp_term.h b/searchlib/src/vespa/searchlib/query/streaming/regexp_term.h
new file mode 100644
index 00000000000..96d14eeb0bd
--- /dev/null
+++ b/searchlib/src/vespa/searchlib/query/streaming/regexp_term.h
@@ -0,0 +1,25 @@
+// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
+#pragma once
+
+#include "queryterm.h"
+#include <vespa/vespalib/regex/regex.h>
+
+namespace search::streaming {
+
+/**
+ * Query term that matches fields using a regular expression, with case sensitivity
+ * controlled by the provided Normalizing mode.
+ */
+class RegexpTerm : public QueryTerm {
+ vespalib::Regex _regexp;
+public:
+ RegexpTerm(std::unique_ptr<QueryNodeResultBase> result_base, stringref term,
+ const string& index, Type type, Normalizing normalizing);
+ ~RegexpTerm() override;
+
+ RegexpTerm* as_regexp_term() noexcept override { return this; }
+
+ [[nodiscard]] const vespalib::Regex& regexp() const noexcept { return _regexp; }
+};
+
+}