summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/attribute/dfa_fuzzy_matcher/dfa_fuzzy_matcher_test.cpp54
-rw-r--r--searchlib/src/tests/query/customtypevisitor_test.cpp2
-rw-r--r--searchlib/src/tests/query/query_visitor_test.cpp2
-rw-r--r--searchlib/src/tests/query/querybuilder_test.cpp47
-rw-r--r--searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp12
-rw-r--r--searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h6
-rw-r--r--searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp10
-rw-r--r--searchlib/src/vespa/searchlib/parsequery/parse.h2
-rw-r--r--searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h21
-rw-r--r--searchlib/src/vespa/searchlib/query/query_term_simple.cpp5
-rw-r--r--searchlib/src/vespa/searchlib/query/query_term_simple.h12
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/fuzzy_term.cpp11
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/fuzzy_term.h2
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/querynode.cpp3
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/queryterm.h5
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/querybuilder.h9
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/queryreplicator.h3
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/simplequery.h6
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp7
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h10
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/term.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/term.h8
-rw-r--r--searchlib/src/vespa/searchlib/query/tree/termnodes.h23
23 files changed, 171 insertions, 93 deletions
diff --git a/searchlib/src/tests/attribute/dfa_fuzzy_matcher/dfa_fuzzy_matcher_test.cpp b/searchlib/src/tests/attribute/dfa_fuzzy_matcher/dfa_fuzzy_matcher_test.cpp
index 8ba8c62c5ff..55810c02550 100644
--- a/searchlib/src/tests/attribute/dfa_fuzzy_matcher/dfa_fuzzy_matcher_test.cpp
+++ b/searchlib/src/tests/attribute/dfa_fuzzy_matcher/dfa_fuzzy_matcher_test.cpp
@@ -120,11 +120,12 @@ struct MatchStats {
template <bool collect_matches>
void
-brute_force_fuzzy_match_in_dictionary(std::string_view target, const StringEnumStore& store, uint32_t prefix_size, bool cased, MatchStats& stats, StringVector& matched_words)
+brute_force_fuzzy_match_in_dictionary(std::string_view target, const StringEnumStore& store, uint32_t prefix_size,
+ bool cased, bool prefix_match, MatchStats& stats, StringVector& matched_words)
{
auto view = store.get_dictionary().get_posting_dictionary().getFrozenView();
vespalib::Timer timer;
- FuzzyMatcher matcher(target, 2, prefix_size, cased);
+ FuzzyMatcher matcher(target, 2, prefix_size, cased, prefix_match);
auto itr = view.begin();
size_t matches = 0;
size_t seeks = 0;
@@ -144,11 +145,12 @@ brute_force_fuzzy_match_in_dictionary(std::string_view target, const StringEnumS
template <bool collect_matches>
void
-dfa_fuzzy_match_in_dictionary(std::string_view target, const StringEnumStore& store, uint32_t prefix_size, bool cased, MatchStats& stats, StringVector& matched_words)
+dfa_fuzzy_match_in_dictionary(std::string_view target, const StringEnumStore& store, uint32_t prefix_size,
+ bool cased, bool prefix_match, MatchStats& stats, StringVector& matched_words)
{
auto view = store.get_dictionary().get_posting_dictionary().getFrozenView();
vespalib::Timer timer;
- DfaFuzzyMatcher matcher(target, 2, prefix_size, cased, LevenshteinDfa::DfaType::Explicit);
+ DfaFuzzyMatcher matcher(target, 2, prefix_size, cased, prefix_match, LevenshteinDfa::DfaType::Explicit);
Utf8Reader reader(vespalib::stringref(target.data(), target.size()));
std::string target_copy;
Utf8Writer<std::string> writer(target_copy);
@@ -187,11 +189,12 @@ dfa_fuzzy_match_in_dictionary(std::string_view target, const StringEnumStore& st
template <bool collect_matches>
void
-dfa_fuzzy_match_in_dictionary_no_skip(std::string_view target, const StringEnumStore& store, uint32_t prefix_size, bool cased, MatchStats& stats, StringVector& matched_words)
+dfa_fuzzy_match_in_dictionary_no_skip(std::string_view target, const StringEnumStore& store, uint32_t prefix_size,
+ bool cased, bool prefix_match, MatchStats& stats, StringVector& matched_words)
{
auto view = store.get_dictionary().get_posting_dictionary().getFrozenView();
vespalib::Timer timer;
- DfaFuzzyMatcher matcher(target, 2, prefix_size, cased, LevenshteinDfa::DfaType::Explicit);
+ DfaFuzzyMatcher matcher(target, 2, prefix_size, cased, prefix_match, LevenshteinDfa::DfaType::Explicit);
auto itr = view.begin();
size_t matches = 0;
size_t seeks = 0;
@@ -247,20 +250,23 @@ struct DfaFuzzyMatcherTest : public ::testing::TestWithParam<TestParam> {
updater.commit();
store.freeze_dictionary();
}
- void expect_prefix_matches(std::string_view target, uint32_t prefix_size, const StringVector& exp_matches) {
+ void expect_prefix_matches(std::string_view target, uint32_t prefix_size, bool prefix_match, const StringVector& exp_matches) {
MatchStats stats;
StringVector brute_force_matches;
StringVector dfa_matches;
StringVector dfa_no_skip_matches;
bool cased = GetParam()._cased;
SCOPED_TRACE(target);
- brute_force_fuzzy_match_in_dictionary<true>(target, store, prefix_size, cased, stats, brute_force_matches);
- dfa_fuzzy_match_in_dictionary<true>(target, store, prefix_size, cased, stats, dfa_matches);
- dfa_fuzzy_match_in_dictionary_no_skip<true>(target, store, prefix_size, cased, stats, dfa_no_skip_matches);
+ brute_force_fuzzy_match_in_dictionary<true>(target, store, prefix_size, cased, prefix_match, stats, brute_force_matches);
+ dfa_fuzzy_match_in_dictionary<true>(target, store, prefix_size, cased, prefix_match, stats, dfa_matches);
+ dfa_fuzzy_match_in_dictionary_no_skip<true>(target, store, prefix_size, cased, prefix_match, stats, dfa_no_skip_matches);
EXPECT_EQ(exp_matches, brute_force_matches);
EXPECT_EQ(exp_matches, dfa_matches);
EXPECT_EQ(exp_matches, dfa_no_skip_matches);
}
+ void expect_prefix_matches(std::string_view target, uint32_t prefix_size, const StringVector& exp_matches) {
+ expect_prefix_matches(target, prefix_size, false, exp_matches);
+ }
void expect_matches(std::string_view target, const StringVector& exp_matches) {
expect_prefix_matches(target, 0, exp_matches);
}
@@ -284,11 +290,12 @@ TEST_P(DfaFuzzyMatcherTest, fuzzy_match_in_dictionary)
expect_matches("forcecast", {"forecast"});
}
-TEST_P(DfaFuzzyMatcherTest, fuzzy_match_in_dictionary_with_prefix_size)
+TEST_P(DfaFuzzyMatcherTest, fuzzy_match_in_dictionary_with_prefix_lock_length)
{
bool cased = GetParam()._cased;
StringVector words = { "board", "boat", "bob", "door", "food", "foot", "football", "foothill",
- "for", "forbid", "force", "ford", "forearm", "forecast", "forest", "H", "HA", "h", "ha", char_from_u8(u8"Ørn"), char_from_u8(u8"øre"), char_from_u8(u8"Ås"), char_from_u8(u8"ås")};
+ "for", "forbid", "force", "ford", "forearm", "forecast", "forest", "H", "HA", "h", "ha",
+ char_from_u8(u8"Ørn"), char_from_u8(u8"øre"), char_from_u8(u8"Ås"), char_from_u8(u8"ås")};
populate_dictionary(words);
expect_prefix_matches("a", 1, {});
expect_prefix_matches("b", 1, {"bob"});
@@ -321,6 +328,25 @@ TEST_P(DfaFuzzyMatcherTest, fuzzy_match_in_dictionary_with_prefix_size)
}
}
+TEST_P(DfaFuzzyMatcherTest, fuzzy_match_in_dictionary_with_prefix_matching) {
+ // We include the empty string to make "everything matches"-checks easier since
+ // the dictionary implicitly returns such an empty string sentinel already.
+ StringVector words = {"", "board", "boat", "bob", "door", "food", "foot", "football", "foothill",
+ "for", "forbid", "force", "ford", "forearm", "forecast", "forest"};
+ populate_dictionary(words);
+ expect_prefix_matches("bo", 0, true, words); // matches everything
+ expect_prefix_matches("bo", 2, true, {"board", "boat", "bob"});
+ expect_prefix_matches("boar", 0, true, {"board", "boat", "bob", "door", "for", "forbid",
+ "force", "ford", "forearm", "forecast", "forest"});
+ expect_prefix_matches("board", 0, true, {"board", "boat", "ford"});
+ expect_prefix_matches("footb", 0, true, {"food", "foot", "football", "foothill", "forbid"});
+ expect_prefix_matches("footba", 0, true, {"foot", "football", "foothill"});
+ expect_prefix_matches("footbal", 0, true, {"football", "foothill"});
+ expect_prefix_matches("football", 0, true, {"football", "foothill"});
+ expect_prefix_matches("footballs", 0, true, {"football"});
+ expect_prefix_matches("z", 1, true, {});
+}
+
void
benchmark_fuzzy_match_in_dictionary(const StringEnumStore& store, const RawDictionary& dict, size_t words_to_match, bool cased, bool dfa_algorithm)
{
@@ -329,9 +355,9 @@ benchmark_fuzzy_match_in_dictionary(const StringEnumStore& store, const RawDicti
for (size_t i = 0; i < std::min(words_to_match, dict.size()); ++i) {
const auto& entry = dict[i];
if (dfa_algorithm) {
- dfa_fuzzy_match_in_dictionary<false>(entry.first, store, 0, cased, stats, dummy);
+ dfa_fuzzy_match_in_dictionary<false>(entry.first, store, 0, cased, false, stats, dummy);
} else {
- brute_force_fuzzy_match_in_dictionary<false>(entry.first, store, 0, cased, stats, dummy);
+ brute_force_fuzzy_match_in_dictionary<false>(entry.first, store, 0, cased, false, stats, dummy);
}
}
std::cout << (dfa_algorithm ? "DFA:" : "Brute force:") << " samples=" << stats.samples << ", avg_matches=" << stats.avg_matches() << ", avg_seeks=" << stats.avg_seeks() << ", avg_elapsed_ms=" << stats.avg_elapsed_ms() << std::endl;
diff --git a/searchlib/src/tests/query/customtypevisitor_test.cpp b/searchlib/src/tests/query/customtypevisitor_test.cpp
index 815ff3bf1fb..fb89f2ef061 100644
--- a/searchlib/src/tests/query/customtypevisitor_test.cpp
+++ b/searchlib/src/tests/query/customtypevisitor_test.cpp
@@ -37,7 +37,7 @@ struct MyRangeTerm : InitTerm<RangeTerm> {};
struct MyStringTerm : InitTerm<StringTerm> {};
struct MySubstrTerm : InitTerm<SubstringTerm> {};
struct MySuffixTerm : InitTerm<SuffixTerm> {};
-struct MyFuzzyTerm : FuzzyTerm { MyFuzzyTerm(): FuzzyTerm("term", "view", 0, Weight(0), 2, 0) {} };
+struct MyFuzzyTerm : FuzzyTerm { MyFuzzyTerm(): FuzzyTerm("term", "view", 0, Weight(0), 2, 0, false) {} };
struct MyWeakAnd : WeakAnd { MyWeakAnd() : WeakAnd(1234, "view") {} };
struct MyWeightedSetTerm : WeightedSetTerm { MyWeightedSetTerm() : WeightedSetTerm(0, "view", 0, Weight(42)) {} };
struct MyDotProduct : DotProduct { MyDotProduct() : DotProduct(0, "view", 0, Weight(42)) {} };
diff --git a/searchlib/src/tests/query/query_visitor_test.cpp b/searchlib/src/tests/query/query_visitor_test.cpp
index b0c9c4e570e..bfce382b684 100644
--- a/searchlib/src/tests/query/query_visitor_test.cpp
+++ b/searchlib/src/tests/query/query_visitor_test.cpp
@@ -88,7 +88,7 @@ TEST("requireThatAllNodesCanBeVisited") {
checkVisit<NearestNeighborTerm>(new SimpleNearestNeighborTerm("query_tensor", "doc_tensor", 0, Weight(0), 123, true, 321, 100100.25));
checkVisit<TrueQueryNode>(new SimpleTrue());
checkVisit<FalseQueryNode>(new SimpleFalse());
- checkVisit<FuzzyTerm>(new SimpleFuzzyTerm("t", "field", 0, Weight(0), 2, 0));
+ checkVisit<FuzzyTerm>(new SimpleFuzzyTerm("t", "field", 0, Weight(0), 2, 0, false));
checkVisit<InTerm>(new SimpleInTerm(std::make_unique<StringTermVector>(0), MultiTerm::Type::STRING, "field", 0, Weight(0)));
}
diff --git a/searchlib/src/tests/query/querybuilder_test.cpp b/searchlib/src/tests/query/querybuilder_test.cpp
index be795c8c94b..c70fe5b510b 100644
--- a/searchlib/src/tests/query/querybuilder_test.cpp
+++ b/searchlib/src/tests/query/querybuilder_test.cpp
@@ -117,7 +117,7 @@ Node::UP createQueryTree() {
builder.add_true_node();
builder.add_false_node();
}
- builder.addFuzzyTerm(str[5], view[5], id[5], weight[5], 3, 1);
+ builder.addFuzzyTerm(str[5], view[5], id[5], weight[5], 3, 1, false);
}
Node::UP node = builder.build();
ASSERT_TRUE(node.get());
@@ -326,8 +326,8 @@ void checkQueryTreeTypes(Node *node) {
auto* fuzzy_term = as_node<FuzzyTerm>(and_node->getChildren()[12]);
EXPECT_TRUE(checkTerm(fuzzy_term, str[5], view[5], id[5], weight[5]));
- EXPECT_EQUAL(3u, fuzzy_term->getMaxEditDistance());
- EXPECT_EQUAL(1u, fuzzy_term->getPrefixLength());
+ EXPECT_EQUAL(3u, fuzzy_term->max_edit_distance());
+ EXPECT_EQUAL(1u, fuzzy_term->prefix_lock_length());
}
struct AbstractTypes {
@@ -452,8 +452,9 @@ struct MyTrue : TrueQueryNode {};
struct MyFalse : FalseQueryNode {};
struct MyFuzzyTerm : FuzzyTerm {
MyFuzzyTerm(const Type &t, const string &f, int32_t i, Weight w,
- uint32_t m, uint32_t p)
- : FuzzyTerm(t, f, i, w, m, p) {
+ uint32_t m, uint32_t p, bool prefix_match)
+ : FuzzyTerm(t, f, i, w, m, p, prefix_match)
+ {
}
};
struct MyInTerm : InTerm {
@@ -645,23 +646,27 @@ TEST("require that All Range Syntaxes Work") {
EXPECT_TRUE(range2 == range_term->getTerm());
}
-TEST("require that fuzzy node can be created") {
- QueryBuilder<SimpleQueryNodeTypes> builder;
- builder.addFuzzyTerm("term", "view", 0, Weight(0), 3, 1);
- Node::UP node = builder.build();
+TEST("fuzzy node can be created") {
+ for (bool prefix_match : {false, true}) {
+ QueryBuilder<SimpleQueryNodeTypes> builder;
+ builder.addFuzzyTerm("term", "view", 0, Weight(0), 3, 1, prefix_match);
+ Node::UP node = builder.build();
- string stackDump = StackDumpCreator::create(*node);
- {
- SimpleQueryStackDumpIterator iterator(stackDump);
- Node::UP new_node = QueryTreeCreator<SimpleQueryNodeTypes>::create(iterator);
- FuzzyTerm *fuzzy_node = as_node<FuzzyTerm>(new_node.get());
- EXPECT_EQUAL(3u, fuzzy_node->getMaxEditDistance());
- EXPECT_EQUAL(1u, fuzzy_node->getPrefixLength());
- }
- {
- search::QueryTermSimple::UP queryTermSimple = search::QueryTermDecoder::decodeTerm(stackDump);
- EXPECT_EQUAL(3u, queryTermSimple->getFuzzyMaxEditDistance());
- EXPECT_EQUAL(1u, queryTermSimple->getFuzzyPrefixLength());
+ string stackDump = StackDumpCreator::create(*node);
+ {
+ SimpleQueryStackDumpIterator iterator(stackDump);
+ Node::UP new_node = QueryTreeCreator<SimpleQueryNodeTypes>::create(iterator);
+ auto *fuzzy_node = as_node<FuzzyTerm>(new_node.get());
+ EXPECT_EQUAL(3u, fuzzy_node->max_edit_distance());
+ EXPECT_EQUAL(1u, fuzzy_node->prefix_lock_length());
+ EXPECT_EQUAL(prefix_match, fuzzy_node->prefix_match());
+ }
+ {
+ search::QueryTermSimple::UP queryTermSimple = search::QueryTermDecoder::decodeTerm(stackDump);
+ EXPECT_EQUAL(3u, queryTermSimple->fuzzy_max_edit_distance());
+ EXPECT_EQUAL(1u, queryTermSimple->fuzzy_prefix_lock_length());
+ EXPECT_EQUAL(prefix_match, queryTermSimple->fuzzy_prefix_match());
+ }
}
}
diff --git a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp
index 5f3ab9cd3d8..3ae2bdd3598 100644
--- a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.cpp
@@ -44,8 +44,12 @@ extract_suffix(std::string_view target, uint32_t prefix_size)
}
-DfaFuzzyMatcher::DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, uint32_t prefix_size, bool cased, LevenshteinDfa::DfaType dfa_type)
- : _dfa(vespalib::fuzzy::LevenshteinDfa::build(extract_suffix(target, prefix_size), max_edits, (cased ? LevenshteinDfa::Casing::Cased : LevenshteinDfa::Casing::Uncased), dfa_type)),
+DfaFuzzyMatcher::DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, uint32_t prefix_size,
+ bool cased, bool prefix_match, LevenshteinDfa::DfaType dfa_type)
+ : _dfa(vespalib::fuzzy::LevenshteinDfa::build(extract_suffix(target, prefix_size), max_edits,
+ (cased ? LevenshteinDfa::Casing::Cased : LevenshteinDfa::Casing::Uncased),
+ dfa_type, // TODO reorder args
+ (prefix_match ? LevenshteinDfa::Matching::Prefix : LevenshteinDfa::Matching::FullString))),
_successor(),
_prefix(extract_prefix(target, prefix_size, cased)),
_prefix_size(prefix_size),
@@ -54,8 +58,8 @@ DfaFuzzyMatcher::DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, uin
_successor = _prefix;
}
-DfaFuzzyMatcher::DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, uint32_t prefix_size, bool cased)
- : DfaFuzzyMatcher(target, max_edits, prefix_size, cased, LevenshteinDfa::DfaType::Table)
+DfaFuzzyMatcher::DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, uint32_t prefix_size, bool cased, bool prefix_match)
+ : DfaFuzzyMatcher(target, max_edits, prefix_size, cased, prefix_match, LevenshteinDfa::DfaType::Table)
{
}
diff --git a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h
index 653af602c0d..ee57e9e6583 100644
--- a/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h
+++ b/searchlib/src/vespa/searchlib/attribute/dfa_fuzzy_matcher.h
@@ -26,8 +26,10 @@ private:
const char* skip_prefix(const char* word) const;
public:
- DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, uint32_t prefix_size, bool cased, vespalib::fuzzy::LevenshteinDfa::DfaType dfa_type);
- DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, uint32_t prefix_size, bool cased); // Defaults to table-based DFA
+ DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, uint32_t prefix_size, bool cased, bool prefix_match,
+ vespalib::fuzzy::LevenshteinDfa::DfaType dfa_type);
+ // Defaults to table-based DFA:
+ DfaFuzzyMatcher(std::string_view target, uint8_t max_edits, uint32_t prefix_size, bool cased, bool prefix_match);
~DfaFuzzyMatcher();
[[nodiscard]] static constexpr bool supports_max_edits(uint8_t edits) noexcept {
diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp
index f1a643dc376..c6cb6e4a637 100644
--- a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp
@@ -49,17 +49,19 @@ StringSearchHelper::StringSearchHelper(QueryTermUCS4 & term, bool cased, vespali
? vespalib::Regex::from_pattern(term.getTerm(), vespalib::Regex::Options::None)
: vespalib::Regex::from_pattern(term.getTerm(), vespalib::Regex::Options::IgnoreCase);
} else if (isFuzzy()) {
- auto max_edit_dist = term.getFuzzyMaxEditDistance();
+ const auto max_edit_dist = term.fuzzy_max_edit_distance();
_fuzzyMatcher = std::make_unique<vespalib::FuzzyMatcher>(term.getTerm(),
max_edit_dist,
- term.getFuzzyPrefixLength(),
- isCased());
+ term.fuzzy_prefix_lock_length(),
+ isCased(),
+ term.fuzzy_prefix_match());
if ((fuzzy_matching_algorithm != FMA::BruteForce) &&
(max_edit_dist > 0 && max_edit_dist <= 2)) {
_dfa_fuzzy_matcher = std::make_unique<DfaFuzzyMatcher>(term.getTerm(),
max_edit_dist,
- term.getFuzzyPrefixLength(),
+ term.fuzzy_prefix_lock_length(),
isCased(),
+ term.fuzzy_prefix_match(),
to_dfa_type(fuzzy_matching_algorithm));
}
} else if (isCased()) {
diff --git a/searchlib/src/vespa/searchlib/parsequery/parse.h b/searchlib/src/vespa/searchlib/parsequery/parse.h
index 5e3b1dffe3a..a05c68bea46 100644
--- a/searchlib/src/vespa/searchlib/parsequery/parse.h
+++ b/searchlib/src/vespa/searchlib/parsequery/parse.h
@@ -87,6 +87,8 @@ public:
IFLAG_NORANK = 0x00000001, // this term should not be ranked (not exposed to rank framework)
IFLAG_SPECIALTOKEN = 0x00000002,
IFLAG_NOPOSITIONDATA = 0x00000004, // we should not use position data when ranking this term
+ IFLAG_FILTER = 0x00000008, // see GetCreator(flags) below
+ IFLAG_PREFIX_MATCH = 0x00000010
};
/** Extra information on each item (creator id) coded in bit 3 of flags */
diff --git a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h
index d15e1ca0512..bf6e3abf1ea 100644
--- a/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h
+++ b/searchlib/src/vespa/searchlib/parsequery/stackdumpiterator.h
@@ -113,9 +113,18 @@ public:
uint32_t getUniqueId() const noexcept { return _currUniqueId; }
// Get the flags of the current item.
- bool hasNoRankFlag() const noexcept { return (_currFlags & ParseItem::IFLAG_NORANK) != 0; }
- bool hasSpecialTokenFlag() const noexcept { return (_currFlags & ParseItem::IFLAG_SPECIALTOKEN) != 0; }
- bool hasNoPositionDataFlag() const noexcept { return (_currFlags & ParseItem::IFLAG_NOPOSITIONDATA) != 0; }
+ [[nodiscard]] bool hasNoRankFlag() const noexcept {
+ return (_currFlags & ParseItem::IFLAG_NORANK) != 0;
+ }
+ [[nodiscard]] bool hasSpecialTokenFlag() const noexcept {
+ return (_currFlags & ParseItem::IFLAG_SPECIALTOKEN) != 0;
+ }
+ [[nodiscard]] bool hasNoPositionDataFlag() const noexcept {
+ return (_currFlags & ParseItem::IFLAG_NOPOSITIONDATA) != 0;
+ }
+ [[nodiscard]] bool has_prefix_match_semantics() const noexcept {
+ return (_currFlags & ParseItem::IFLAG_PREFIX_MATCH) != 0;
+ }
uint32_t getArity() const noexcept { return _currArity; }
@@ -127,9 +136,9 @@ public:
bool getAllowApproximate() const noexcept { return (_extraIntArg2 != 0); }
uint32_t getExploreAdditionalHits() const noexcept { return _extraIntArg3; }
- // fuzzy match arguments
- uint32_t getFuzzyMaxEditDistance() const noexcept { return _extraIntArg1; }
- uint32_t getFuzzyPrefixLength() const noexcept { return _extraIntArg2; }
+ // fuzzy match arguments (see also: has_prefix_match_semantics() for fuzzy prefix matching)
+ [[nodiscard]] uint32_t fuzzy_max_edit_distance() const noexcept { return _extraIntArg1; }
+ [[nodiscard]] uint32_t fuzzy_prefix_lock_length() const noexcept { return _extraIntArg2; }
std::unique_ptr<query::PredicateQueryTerm> getPredicateQueryTerm();
std::unique_ptr<query::TermVector> get_terms();
diff --git a/searchlib/src/vespa/searchlib/query/query_term_simple.cpp b/searchlib/src/vespa/searchlib/query/query_term_simple.cpp
index 060cd5015b3..09fc443cf0e 100644
--- a/searchlib/src/vespa/searchlib/query/query_term_simple.cpp
+++ b/searchlib/src/vespa/searchlib/query/query_term_simple.cpp
@@ -248,10 +248,11 @@ QueryTermSimple::QueryTermSimple(const string & term_, Type type)
_type(type),
_diversityCutoffStrict(false),
_valid(true),
+ _fuzzy_prefix_match(false),
_term(term_),
_diversityAttribute(),
- _fuzzyMaxEditDistance(2),
- _fuzzyPrefixLength(0)
+ _fuzzy_max_edit_distance(2),
+ _fuzzy_prefix_lock_length(0)
{
if (isFullRange(_term)) {
stringref rest(_term.c_str() + 1, _term.size() - 2);
diff --git a/searchlib/src/vespa/searchlib/query/query_term_simple.h b/searchlib/src/vespa/searchlib/query/query_term_simple.h
index a740afb0340..b0ee7aa54bc 100644
--- a/searchlib/src/vespa/searchlib/query/query_term_simple.h
+++ b/searchlib/src/vespa/searchlib/query/query_term_simple.h
@@ -44,8 +44,9 @@ public:
size_t getDiversityCutoffGroups() const noexcept { return _diversityCutoffGroups; }
bool getDiversityCutoffStrict() const noexcept { return _diversityCutoffStrict; }
vespalib::stringref getDiversityAttribute() const noexcept { return _diversityAttribute; }
- size_t getFuzzyMaxEditDistance() const noexcept { return _fuzzyMaxEditDistance; }
- size_t getFuzzyPrefixLength() const noexcept { return _fuzzyPrefixLength; }
+ [[nodiscard]] size_t fuzzy_max_edit_distance() const noexcept { return _fuzzy_max_edit_distance; }
+ [[nodiscard]] size_t fuzzy_prefix_lock_length() const noexcept { return _fuzzy_prefix_lock_length; }
+ [[nodiscard]] bool fuzzy_prefix_match() const noexcept { return _fuzzy_prefix_match; }
bool getAsIntegerTerm(int64_t & lower, int64_t & upper) const noexcept;
bool getAsFloatTerm(double & lower, double & upper) const noexcept;
bool getAsFloatTerm(float & lower, float & upper) const noexcept;
@@ -77,14 +78,17 @@ private:
Type _type;
bool _diversityCutoffStrict;
bool _valid;
+protected:
+ bool _fuzzy_prefix_match; // set in QueryTerm
+private:
string _term;
stringref _diversityAttribute;
template <typename T, typename D>
bool getAsNumericTerm(T & lower, T & upper, D d) const noexcept;
protected:
- uint32_t _fuzzyMaxEditDistance; // set in QueryTerm
- uint32_t _fuzzyPrefixLength;
+ uint32_t _fuzzy_max_edit_distance; // set in QueryTerm
+ uint32_t _fuzzy_prefix_lock_length;
};
}
diff --git a/searchlib/src/vespa/searchlib/query/streaming/fuzzy_term.cpp b/searchlib/src/vespa/searchlib/query/streaming/fuzzy_term.cpp
index f33fe44369a..8a4ec184de8 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/fuzzy_term.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/fuzzy_term.cpp
@@ -13,20 +13,21 @@ constexpr bool normalizing_implies_cased(Normalizing norm) noexcept {
FuzzyTerm::FuzzyTerm(std::unique_ptr<QueryNodeResultBase> result_base, stringref term,
const string& index, Type type, Normalizing normalizing,
- uint8_t max_edits, uint32_t prefix_size)
+ uint8_t max_edits, uint32_t prefix_lock_length, bool prefix_match)
: QueryTerm(std::move(result_base), term, index, type, normalizing),
_dfa_matcher(),
_fallback_matcher()
{
- setFuzzyMaxEditDistance(max_edits);
- setFuzzyPrefixLength(prefix_size);
+ set_fuzzy_max_edit_distance(max_edits);
+ set_fuzzy_prefix_lock_length(prefix_lock_length);
+ set_fuzzy_prefix_match(prefix_match);
std::string_view term_view(term.data(), term.size());
const bool cased = normalizing_implies_cased(normalizing);
if (attribute::DfaFuzzyMatcher::supports_max_edits(max_edits)) {
- _dfa_matcher = std::make_unique<attribute::DfaFuzzyMatcher>(term_view, max_edits, prefix_size, cased);
+ _dfa_matcher = std::make_unique<attribute::DfaFuzzyMatcher>(term_view, max_edits, prefix_lock_length, cased, prefix_match);
} else {
- _fallback_matcher = std::make_unique<vespalib::FuzzyMatcher>(term_view, max_edits, prefix_size, cased);
+ _fallback_matcher = std::make_unique<vespalib::FuzzyMatcher>(term_view, max_edits, prefix_lock_length, cased, prefix_match);
}
}
diff --git a/searchlib/src/vespa/searchlib/query/streaming/fuzzy_term.h b/searchlib/src/vespa/searchlib/query/streaming/fuzzy_term.h
index c6c88b18969..552a839cb3b 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/fuzzy_term.h
+++ b/searchlib/src/vespa/searchlib/query/streaming/fuzzy_term.h
@@ -23,7 +23,7 @@ class FuzzyTerm : public QueryTerm {
public:
FuzzyTerm(std::unique_ptr<QueryNodeResultBase> result_base, stringref term,
const string& index, Type type, Normalizing normalizing,
- uint8_t max_edits, uint32_t prefix_size);
+ uint8_t max_edits, uint32_t prefix_lock_length, bool prefix_match);
~FuzzyTerm() override;
[[nodiscard]] FuzzyTerm* as_fuzzy_term() noexcept override { return this; }
diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp
index 94a479fd2d3..68e276d0fde 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp
@@ -130,7 +130,8 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor
qt = std::make_unique<RegexpTerm>(factory.create(), ssTerm, ssIndex, TermType::REGEXP, normalize_mode);
} else if (sTerm == TermType::FUZZYTERM) {
qt = std::make_unique<FuzzyTerm>(factory.create(), ssTerm, ssIndex, TermType::FUZZYTERM, normalize_mode,
- queryRep.getFuzzyMaxEditDistance(), queryRep.getFuzzyPrefixLength());
+ queryRep.fuzzy_max_edit_distance(), queryRep.fuzzy_prefix_lock_length(),
+ queryRep.has_prefix_match_semantics());
} else [[likely]] {
qt = std::make_unique<QueryTerm>(factory.create(), ssTerm, ssIndex, sTerm, normalize_mode);
}
diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h
index 05b12804d52..0e0ab98295b 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h
+++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h
@@ -100,8 +100,9 @@ public:
void visitMembers(vespalib::ObjectVisitor &visitor) const override;
void setIndex(const string & index_) override { _index = index_; }
const string & getIndex() const override { return _index; }
- void setFuzzyMaxEditDistance(uint32_t fuzzyMaxEditDistance) { _fuzzyMaxEditDistance = fuzzyMaxEditDistance; }
- void setFuzzyPrefixLength(uint32_t fuzzyPrefixLength) { _fuzzyPrefixLength = fuzzyPrefixLength; }
+ void set_fuzzy_max_edit_distance(uint32_t fuzzy_max_edit_distance) noexcept { _fuzzy_max_edit_distance = fuzzy_max_edit_distance; }
+ void set_fuzzy_prefix_lock_length(uint32_t fuzzy_prefix_length) noexcept { _fuzzy_prefix_lock_length = fuzzy_prefix_length; }
+ void set_fuzzy_prefix_match(bool prefix_match) noexcept { _fuzzy_prefix_match = prefix_match; }
virtual NearestNeighborQueryNode* as_nearest_neighbor_query_node() noexcept;
virtual MultiTerm* as_multi_term() noexcept;
virtual const MultiTerm* as_multi_term() const noexcept;
diff --git a/searchlib/src/vespa/searchlib/query/tree/querybuilder.h b/searchlib/src/vespa/searchlib/query/tree/querybuilder.h
index 41990af6908..a498076bb09 100644
--- a/searchlib/src/vespa/searchlib/query/tree/querybuilder.h
+++ b/searchlib/src/vespa/searchlib/query/tree/querybuilder.h
@@ -226,8 +226,8 @@ create_nearest_neighbor_term(vespalib::stringref query_tensor_name, vespalib::st
template <class NodeTypes>
typename NodeTypes::FuzzyTerm *
createFuzzyTerm(vespalib::stringref term, vespalib::stringref view, int32_t id, Weight weight,
- uint32_t maxEditDistance, uint32_t prefixLength) {
- return new typename NodeTypes::FuzzyTerm(term, view, id, weight, maxEditDistance, prefixLength);
+ uint32_t max_edit_distance, uint32_t prefix_lock_length, bool prefix_match) {
+ return new typename NodeTypes::FuzzyTerm(term, view, id, weight, max_edit_distance, prefix_lock_length, prefix_match);
}
template <class NodeTypes>
@@ -343,9 +343,10 @@ public:
return addTerm(createRegExpTerm<NodeTypes>(term, view, id, weight));
}
typename NodeTypes::FuzzyTerm &addFuzzyTerm(stringref term, stringref view, int32_t id, Weight weight,
- uint32_t maxEditDistance, uint32_t prefixLength) {
+ uint32_t max_edit_distance, uint32_t prefix_lock_length,
+ bool prefix_match) {
adjustWeight(weight);
- return addTerm(createFuzzyTerm<NodeTypes>(term, view, id, weight, maxEditDistance, prefixLength));
+ return addTerm(createFuzzyTerm<NodeTypes>(term, view, id, weight, max_edit_distance, prefix_lock_length, prefix_match));
}
typename NodeTypes::NearestNeighborTerm &add_nearest_neighbor_term(stringref query_tensor_name, stringref field_name,
int32_t id, Weight weight, uint32_t target_num_hits,
diff --git a/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h b/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h
index f578bb57e21..116e677d439 100644
--- a/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h
+++ b/searchlib/src/vespa/searchlib/query/tree/queryreplicator.h
@@ -201,7 +201,8 @@ private:
replicate(node, _builder.addFuzzyTerm(
node.getTerm(), node.getView(),
node.getId(), node.getWeight(),
- node.getMaxEditDistance(), node.getPrefixLength()));
+ node.max_edit_distance(), node.prefix_lock_length(),
+ node.prefix_match()));
}
std::unique_ptr<TermVector> replicate_subterms(const InTerm& original) {
diff --git a/searchlib/src/vespa/searchlib/query/tree/simplequery.h b/searchlib/src/vespa/searchlib/query/tree/simplequery.h
index 535402bf14d..34bc39ae504 100644
--- a/searchlib/src/vespa/searchlib/query/tree/simplequery.h
+++ b/searchlib/src/vespa/searchlib/query/tree/simplequery.h
@@ -163,8 +163,10 @@ struct SimpleNearestNeighborTerm : NearestNeighborTerm {
struct SimpleFuzzyTerm : FuzzyTerm {
SimpleFuzzyTerm(const Type &term, vespalib::stringref view,
int32_t id, Weight weight,
- uint32_t maxEditDistance, uint32_t prefixLength)
- : FuzzyTerm(term, view, id, weight, maxEditDistance, prefixLength) {
+ uint32_t max_edit_distance, uint32_t prefix_lock_length,
+ bool prefix_match)
+ : FuzzyTerm(term, view, id, weight, max_edit_distance, prefix_lock_length, prefix_match)
+ {
}
~SimpleFuzzyTerm() override;
};
diff --git a/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp b/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp
index 6707712ee69..b71a86a214a 100644
--- a/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp
+++ b/searchlib/src/vespa/searchlib/query/tree/stackdumpcreator.cpp
@@ -225,6 +225,9 @@ class QueryNodeConverter : public QueryVisitor {
if (!node.usePositionData()) {
flags |= ParseItem::IFLAG_NOPOSITIONDATA;
}
+ if (node.prefix_match()) {
+ flags |= ParseItem::IFLAG_PREFIX_MATCH;
+ }
if (flags != 0) {
features |= ParseItem::IF_FLAGS;
}
@@ -289,8 +292,8 @@ class QueryNodeConverter : public QueryVisitor {
void visit(FuzzyTerm &node) override {
createTerm(node, ParseItem::ITEM_FUZZY);
- appendCompressedPositiveNumber(node.getMaxEditDistance());
- appendCompressedPositiveNumber(node.getPrefixLength());
+ appendCompressedPositiveNumber(node.max_edit_distance());
+ appendCompressedPositiveNumber(node.prefix_lock_length());
}
void visit(NearestNeighborTerm &node) override {
diff --git a/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h b/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h
index 7c743a72567..6f7d7ddd9ad 100644
--- a/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h
+++ b/searchlib/src/vespa/searchlib/query/tree/stackdumpquerycreator.h
@@ -42,6 +42,9 @@ public:
if (queryStack.hasNoPositionDataFlag()) {
t->setPositionData(false);
}
+ if (queryStack.has_prefix_match_semantics()) [[unlikely]] {
+ t->set_prefix_match(true);
+ }
}
}
if (builder.hasError()) {
@@ -185,9 +188,10 @@ private:
} else if (type == ParseItem::ITEM_REGEXP) {
t = &builder.addRegExpTerm(term, view, id, weight);
} else if (type == ParseItem::ITEM_FUZZY) {
- uint32_t maxEditDistance = queryStack.getFuzzyMaxEditDistance();
- uint32_t prefixLength = queryStack.getFuzzyPrefixLength();
- t = &builder.addFuzzyTerm(term, view, id, weight, maxEditDistance, prefixLength);
+ uint32_t max_edit_distance = queryStack.fuzzy_max_edit_distance();
+ uint32_t prefix_lock_length = queryStack.fuzzy_prefix_lock_length();
+ bool prefix_match = queryStack.has_prefix_match_semantics();
+ t = &builder.addFuzzyTerm(term, view, id, weight, max_edit_distance, prefix_lock_length, prefix_match);
} else if (type == ParseItem::ITEM_STRING_IN) {
t = &builder.add_in_term(queryStack.get_terms(), MultiTerm::Type::STRING, view, id, weight);
} else if (type == ParseItem::ITEM_NUMERIC_IN) {
diff --git a/searchlib/src/vespa/searchlib/query/tree/term.cpp b/searchlib/src/vespa/searchlib/query/tree/term.cpp
index 45ba259a799..35f6ea5c8f5 100644
--- a/searchlib/src/vespa/searchlib/query/tree/term.cpp
+++ b/searchlib/src/vespa/searchlib/query/tree/term.cpp
@@ -12,12 +12,14 @@ Term::Term(vespalib::stringref view, int32_t id, Weight weight) :
_id(id),
_weight(weight),
_ranked(true),
- _position_data(true)
+ _position_data(true),
+ _prefix_match(false)
{ }
void Term::setStateFrom(const Term& other) {
setRanked(other.isRanked());
setPositionData(other.usePositionData());
+ set_prefix_match(other.prefix_match());
// too late to copy this state:
assert(_view == other.getView());
assert(_id == other.getId());
diff --git a/searchlib/src/vespa/searchlib/query/tree/term.h b/searchlib/src/vespa/searchlib/query/tree/term.h
index 2f57c3cb06d..678d018d111 100644
--- a/searchlib/src/vespa/searchlib/query/tree/term.h
+++ b/searchlib/src/vespa/searchlib/query/tree/term.h
@@ -18,6 +18,7 @@ class Term
Weight _weight;
bool _ranked;
bool _position_data;
+ bool _prefix_match;
public:
virtual ~Term() = 0;
@@ -25,14 +26,17 @@ public:
void setView(const vespalib::string & view) { _view = view; }
void setRanked(bool ranked) noexcept { _ranked = ranked; }
void setPositionData(bool position_data) noexcept { _position_data = position_data; }
+ // Used for fuzzy prefix matching. Not to be confused with distinct Prefix query term type
+ void set_prefix_match(bool prefix_match) noexcept { _prefix_match = prefix_match; }
void setStateFrom(const Term& other);
const vespalib::string & getView() const noexcept { return _view; }
Weight getWeight() const noexcept { return _weight; }
int32_t getId() const noexcept { return _id; }
- bool isRanked() const noexcept { return _ranked; }
- bool usePositionData() const noexcept { return _position_data; }
+ [[nodiscard]] bool isRanked() const noexcept { return _ranked; }
+ [[nodiscard]] bool usePositionData() const noexcept { return _position_data; }
+ [[nodiscard]] bool prefix_match() const noexcept { return _prefix_match; }
static bool isPossibleRangeTerm(vespalib::stringref term) noexcept {
return (term[0] == '[' || term[0] == '<' || term[0] == '>');
diff --git a/searchlib/src/vespa/searchlib/query/tree/termnodes.h b/searchlib/src/vespa/searchlib/query/tree/termnodes.h
index 97e659dcc77..42dc0717368 100644
--- a/searchlib/src/vespa/searchlib/query/tree/termnodes.h
+++ b/searchlib/src/vespa/searchlib/query/tree/termnodes.h
@@ -119,19 +119,22 @@ public:
//-----------------------------------------------------------------------------
class FuzzyTerm : public QueryNodeMixin<FuzzyTerm, StringBase> {
-private:
- uint32_t _maxEditDistance;
- uint32_t _prefixLength;
+ uint32_t _max_edit_distance;
+ uint32_t _prefix_lock_length;
+ // Prefix match mode is stored in parent Term
public:
FuzzyTerm(const Type &term, vespalib::stringref view,
- int32_t id, Weight weight, uint32_t maxEditDistance, uint32_t prefixLength)
- : QueryNodeMixinType(term, view, id, weight),
- _maxEditDistance(maxEditDistance),
- _prefixLength(prefixLength)
- {}
+ int32_t id, Weight weight, uint32_t max_edit_distance,
+ uint32_t prefix_lock_length, bool prefix_match)
+ : QueryNodeMixinType(term, view, id, weight),
+ _max_edit_distance(max_edit_distance),
+ _prefix_lock_length(prefix_lock_length)
+ {
+ set_prefix_match(prefix_match);
+ }
- uint32_t getMaxEditDistance() const { return _maxEditDistance; }
- uint32_t getPrefixLength() const { return _prefixLength; }
+ [[nodiscard]] uint32_t max_edit_distance() const { return _max_edit_distance; }
+ [[nodiscard]] uint32_t prefix_lock_length() const { return _prefix_lock_length; }
virtual ~FuzzyTerm() = 0;
};