summaryrefslogtreecommitdiffstats
path: root/searchlib/src/tests/attribute
diff options
context:
space:
mode:
authorAlexey Chernyshev <aleksei@spotify.com>2022-03-10 16:33:07 +0100
committerAlexey Chernyshev <aleksei@spotify.com>2022-03-23 16:20:59 +0100
commitd9805209e3b0e33be3c0cc454c4604043663c1c4 (patch)
tree7446c79f68acd8775233ace4d5a70058f90c8406 /searchlib/src/tests/attribute
parenta2b1e6654cabc90ddf7422e58adf641876e5201c (diff)
Introducing fuzzy search
Diffstat (limited to 'searchlib/src/tests/attribute')
-rw-r--r--searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp72
-rw-r--r--searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp20
2 files changed, 90 insertions, 2 deletions
diff --git a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp
index 4f037415b35..65de302ae04 100644
--- a/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp
+++ b/searchlib/src/tests/attribute/searchcontext/searchcontext_test.cpp
@@ -242,6 +242,12 @@ private:
void testPrefixSearch(const AttributePtr & ptr);
void testPrefixSearch();
+ // test fuzzy search
+ void performFuzzySearch(const StringAttribute & vec, const vespalib::string & term,
+ const DocSet & expected, TermType termType);
+ void testFuzzySearch(const AttributePtr & ptr);
+ void testFuzzySearch();
+
// test that search is working after clear doc
template <typename VectorType, typename ValueType>
void requireThatSearchIsWorkingAfterClearDoc(const vespalib::string & name, const Config & cfg,
@@ -402,6 +408,7 @@ SearchContextTest::buildTermQuery(std::vector<char> & buffer, const vespalib::st
switch (termType) {
case TermType::PREFIXTERM: buffer[p++] = ParseItem::ITEM_PREFIXTERM; break;
case TermType::REGEXP: buffer[p++] = ParseItem::ITEM_REGEXP; break;
+ case TermType::FUZZYTERM: buffer[p++] = ParseItem::ITEM_FUZZY; break;
default:
buffer[p++] = ParseItem::ITEM_TERM;
break;
@@ -1498,6 +1505,70 @@ SearchContextTest::testPrefixSearch()
}
}
+//-----------------------------------------------------------------------------
+// Test fuzzy search
+//-----------------------------------------------------------------------------
+
+void
+SearchContextTest::performFuzzySearch(const StringAttribute & vec, const vespalib::string & term,
+ const DocSet & expected, TermType termType)
+{
+ performSearch(vec, term, expected, termType);
+}
+
+void
+SearchContextTest::testFuzzySearch(const AttributePtr & ptr)
+{
+ LOG(info, "testFuzzySearch: vector '%s'", ptr->getName().c_str());
+
+ auto & vec = dynamic_cast<StringAttribute &>(*ptr.get());
+
+ uint32_t numDocs = 2;
+ addDocs(*ptr.get(), numDocs);
+
+ const char * strings [] = {"fuzzysearch", "FUZZYSEARCH"};
+ const char * terms[][2] = {
+ {"fuzzysearch", "FUZZYSEARCH"},
+ {"fuzzysearck", "FUZZYSEARCK"},
+ {"fuzzysekkkk", "FUZZYSEKKKK"}
+ };
+
+ for (uint32_t doc = 1; doc < numDocs + 1; ++doc) {
+ ASSERT_TRUE(doc < vec.getNumDocs());
+ EXPECT_TRUE(vec.update(doc, strings[doc - 1]));
+ }
+
+ ptr->commit(true);
+
+ std::vector<DocSet> expected;
+ DocSet empty;
+ {
+ uint32_t docs[] = {1, 2};
+ expected.emplace_back(docs, docs + 2); // normal search
+ }
+ {
+ uint32_t docs[] = {1, 2};
+ expected.emplace_back(docs, docs + 2); // fuzzy search
+ }
+
+ expected.emplace_back(); // results
+
+ for (uint32_t i = 0; i < 3; ++i) {
+ for (uint32_t j = 0; j < 2; ++j) {
+ performFuzzySearch(vec, terms[i][j], expected[i], TermType::FUZZYTERM);
+ }
+ }
+}
+
+void
+SearchContextTest::testFuzzySearch()
+{
+ for (const auto & cfg : _stringCfg) {
+ testFuzzySearch(AttributeFactory::createAttribute(cfg.first, cfg.second));
+ }
+}
+
+
template <typename VectorType, typename ValueType>
void
SearchContextTest::requireThatSearchIsWorkingAfterClearDoc(const vespalib::string & name,
@@ -2028,6 +2099,7 @@ SearchContextTest::Main()
testPrefixSearch();
testSearchIteratorConformance();
testSearchIteratorUnpacking();
+ testFuzzySearch();
TEST_DO(requireThatSearchIsWorkingAfterClearDoc());
TEST_DO(requireThatSearchIsWorkingAfterLoadAndClearDoc());
TEST_DO(requireThatSearchIsWorkingAfterUpdates());
diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
index 8a17114057c..2f0f0d5a6ae 100644
--- a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
+++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
@@ -386,8 +386,8 @@ testSingleValue(Attribute & svsa, Config &cfg)
TEST("testSingleValue")
{
EXPECT_EQUAL(24u, sizeof(AttributeVector::SearchContext));
- EXPECT_EQUAL(24u, sizeof(StringSearchHelper));
- EXPECT_EQUAL(56u, sizeof(SingleValueStringAttribute::StringSingleImplSearchContext));
+ EXPECT_EQUAL(32u, sizeof(StringSearchHelper));
+ EXPECT_EQUAL(64u, sizeof(SingleValueStringAttribute::StringSingleImplSearchContext));
{
Config cfg(BasicType::STRING, CollectionType::SINGLE);
SingleValueStringAttribute svsa("svsa", cfg);
@@ -494,4 +494,20 @@ TEST("test cased regex match") {
EXPECT_FALSE(helper.isMatch("xY"));
}
+TEST("test fuzzy match") {
+ QueryTermUCS4 xyz("xyz", QueryTermSimple::Type::FUZZYTERM);
+ StringSearchHelper helper(xyz, false);
+ EXPECT_FALSE(helper.isCased());
+ EXPECT_FALSE(helper.isPrefix());
+ EXPECT_FALSE(helper.isRegex());
+ EXPECT_TRUE(helper.isFuzzy());
+ EXPECT_TRUE(helper.isMatch("xyz"));
+ EXPECT_TRUE(helper.isMatch("xyza"));
+ EXPECT_TRUE(helper.isMatch("xyv"));
+ EXPECT_TRUE(helper.isMatch("xy"));
+ EXPECT_TRUE(helper.isMatch("x"));
+ EXPECT_TRUE(helper.isMatch("xvv"));
+ EXPECT_FALSE(helper.isMatch("vvv"));
+}
+
TEST_MAIN() { TEST_RUN_ALL(); }