summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2022-10-17 17:19:43 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2022-10-17 17:27:08 +0000
commita13c1f486ea9470a72a5dd564929216370f91951 (patch)
tree4bb3c9283a3035846ef2691cadcd0d0dcb0487b1 /searchlib
parent169187771dfc974f58238fc19db3b534c4b6c9f4 (diff)
- Use unique_ptr to hide FuzzyMatcher to make most common SearchContext smaller.
- GC unused stringattribute files.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp14
-rw-r--r--searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/CMakeLists.txt1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/single_enum_search_context.h5
-rw-r--r--searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp1
-rw-r--r--searchlib/src/vespa/searchlib/attribute/string_search_context.cpp3
-rw-r--r--searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp21
-rw-r--r--searchlib/src/vespa/searchlib/attribute/string_search_helper.h18
-rw-r--r--searchlib/src/vespa/searchlib/attribute/stringattribute.cpp8
-rw-r--r--searchlib/src/vespa/searchlib/attribute/stringattribute.h11
-rw-r--r--searchlib/src/vespa/searchlib/attribute/stringbase.h3
14 files changed, 34 insertions, 60 deletions
diff --git a/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp
index 3fa74b78d2a..9b6187bcc77 100644
--- a/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp
+++ b/searchlib/src/tests/attribute/attributemanager/attributemanager_test.cpp
@@ -7,7 +7,6 @@
#include <vespa/searchlib/attribute/configconverter.h>
#include <vespa/searchlib/attribute/multinumericattribute.h>
#include <vespa/searchlib/attribute/multinumericattribute.hpp>
-#include <vespa/searchlib/attribute/stringattribute.h>
#include <vespa/vespalib/testkit/testapp.h>
#include <vespa/log/log.h>
@@ -31,7 +30,7 @@ using TestAttributeBase = MultiValueNumericAttribute< IntegerAttributeTemplate<i
class TestAttribute : public TestAttributeBase
{
public:
- TestAttribute(const std::string &name)
+ explicit TestAttribute(const std::string &name)
: TestAttributeBase(name)
{}
@@ -44,18 +43,17 @@ public:
TEST("Test attribute guards")
{
- AttributeVector::SP vec(new TestAttribute("mvint") );
- TestAttribute * v = static_cast<TestAttribute *> (vec.get());
+ auto v = std::make_shared<TestAttribute>("mvint");
EXPECT_EQUAL(v->getGen(), unsigned(0));
EXPECT_EQUAL(v->getRefCount(0), unsigned(0));
EXPECT_EQUAL(v->oldest_used_gen(), unsigned(0));
{
- AttributeGuard g0(vec);
+ AttributeGuard g0(v);
EXPECT_EQUAL(v->getGen(), unsigned(0));
EXPECT_EQUAL(v->getRefCount(0), unsigned(1));
EXPECT_EQUAL(v->oldest_used_gen(), unsigned(0));
{
- AttributeGuard g1(vec);
+ AttributeGuard g1(v);
EXPECT_EQUAL(v->getGen(), unsigned(0));
EXPECT_EQUAL(v->getRefCount(0), unsigned(2));
EXPECT_EQUAL(v->oldest_used_gen(), unsigned(0));
@@ -72,14 +70,14 @@ TEST("Test attribute guards")
EXPECT_EQUAL(v->getRefCount(1), unsigned(0));
EXPECT_EQUAL(v->oldest_used_gen(), unsigned(1));
{
- AttributeGuard g0(vec);
+ AttributeGuard g0(v);
EXPECT_EQUAL(v->getGen(), unsigned(1));
EXPECT_EQUAL(v->getRefCount(0), unsigned(0));
EXPECT_EQUAL(v->getRefCount(1), unsigned(1));
EXPECT_EQUAL(v->oldest_used_gen(), unsigned(1));
{
v->incGen();
- AttributeGuard g1(vec);
+ AttributeGuard g1(v);
EXPECT_EQUAL(v->getGen(), unsigned(2));
EXPECT_EQUAL(v->getRefCount(0), unsigned(0));
EXPECT_EQUAL(v->getRefCount(1), unsigned(1));
diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
index 96039bee15b..2289000a273 100644
--- a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
+++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
@@ -386,8 +386,8 @@ testSingleValue(Attribute & svsa, Config &cfg)
TEST("testSingleValue")
{
EXPECT_EQUAL(24u, sizeof(SearchContext));
- EXPECT_EQUAL(96u, sizeof(StringSearchHelper));
- EXPECT_EQUAL(144u, sizeof(attribute::SingleStringEnumSearchContext));
+ EXPECT_EQUAL(32u, sizeof(StringSearchHelper));
+ EXPECT_EQUAL(80u, sizeof(attribute::SingleStringEnumSearchContext));
{
Config cfg(BasicType::STRING, CollectionType::SINGLE);
SingleValueStringAttribute svsa("svsa", cfg);
diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
index 704db67aa03..72c7efe3094 100644
--- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
+++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt
@@ -127,7 +127,6 @@ vespa_add_library(searchlib_attribute OBJECT
single_string_enum_search_context.cpp
single_string_enum_hint_search_context.cpp
sourceselector.cpp
- stringattribute.cpp
stringbase.cpp
string_matcher.cpp
string_search_context.cpp
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp
index 5e2d279a756..c34f64a7da4 100644
--- a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp
@@ -3,7 +3,6 @@
#pragma once
#include "multistringattribute.h"
-#include "stringattribute.h"
#include "enumattribute.hpp"
#include "enumerated_multi_value_read_view.h"
#include "multienumattribute.hpp"
diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
index cfd00f84636..1c1ad909da6 100644
--- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp
@@ -3,7 +3,6 @@
#pragma once
#include "multistringpostattribute.h"
-#include "stringattribute.h"
#include "multistringattribute.hpp"
#include "multi_string_enum_search_context.h"
#include <vespa/searchlib/query/query_term_simple.h>
diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
index 61bd8e2e2bb..40682002818 100644
--- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
+++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h
@@ -10,6 +10,7 @@
#include <vespa/searchcommon/attribute/search_context_params.h>
#include <vespa/searchcommon/common/range.h>
#include <vespa/vespalib/util/regexp.h>
+#include <vespa/vespalib/fuzzy/fuzzy_matcher.h>
#include <regex>
namespace search::attribute {
@@ -52,7 +53,7 @@ protected:
PostingListSearchContext(const IEnumStoreDictionary& dictionary, uint32_t docIdLimit, uint64_t numValues, bool hasWeight,
uint32_t minBvDocFreq, bool useBitVector, const ISearchContext &baseSearchCtx);
- ~PostingListSearchContext();
+ ~PostingListSearchContext() override;
void lookupTerm(const vespalib::datastore::EntryComparator &comp);
void lookupRange(const vespalib::datastore::EntryComparator &low, const vespalib::datastore::EntryComparator &high);
diff --git a/searchlib/src/vespa/searchlib/attribute/single_enum_search_context.h b/searchlib/src/vespa/searchlib/attribute/single_enum_search_context.h
index f0e6908fc14..83d6c696117 100644
--- a/searchlib/src/vespa/searchlib/attribute/single_enum_search_context.h
+++ b/searchlib/src/vespa/searchlib/attribute/single_enum_search_context.h
@@ -4,7 +4,6 @@
#include "search_context.h"
#include "enumstore.h"
-#include "multi_value_mapping.h"
namespace search::attribute {
@@ -21,11 +20,11 @@ protected:
const vespalib::datastore::AtomicEntryRef* _enum_indices;
const EnumStoreT<T>& _enum_store;
- int32_t onFind(DocId docId, int32_t elemId, int32_t & weight) const override final {
+ int32_t onFind(DocId docId, int32_t elemId, int32_t & weight) const final {
return find(docId, elemId, weight);
}
- int32_t onFind(DocId docId, int32_t elemId) const override final {
+ int32_t onFind(DocId docId, int32_t elemId) const final {
return find(docId, elemId);
}
diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp
index 64abbf86108..82a4393fc91 100644
--- a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp
+++ b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp
@@ -3,7 +3,6 @@
#pragma once
#include "singlestringattribute.h"
-#include "stringattribute.h"
#include "singleenumattribute.hpp"
#include "attributevector.hpp"
#include "single_string_enum_hint_search_context.h"
diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/string_search_context.cpp
index aa7aa016720..e548ab8078c 100644
--- a/searchlib/src/vespa/searchlib/attribute/string_search_context.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/string_search_context.cpp
@@ -3,8 +3,9 @@
#include "string_search_context.h"
#include "enumhintsearchcontext.h"
#include "enumstore.h"
-#include <vespa/vespalib/util/regexp.h>
#include <vespa/searchlib/query/query_term_ucs4.h>
+#include <vespa/vespalib/util/regexp.h>
+#include <vespa/vespalib/fuzzy/fuzzy_matcher.h>
namespace search::attribute {
diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp
index 5df0efe256e..206c2bcbd69 100644
--- a/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/string_search_helper.cpp
@@ -4,6 +4,8 @@
#include <vespa/searchlib/query/query_term_ucs4.h>
#include <vespa/vespalib/text/lowercase.h>
#include <vespa/vespalib/text/utf8.h>
+#include <vespa/vespalib/fuzzy/fuzzy_matcher.h>
+
namespace search::attribute {
@@ -18,17 +20,14 @@ StringSearchHelper::StringSearchHelper(QueryTermUCS4 & term, bool cased)
_isFuzzy(term.isFuzzy())
{
if (isRegex()) {
- if (isCased()) {
- _regex = vespalib::Regex::from_pattern(term.getTerm(), vespalib::Regex::Options::None);
- } else {
- _regex = vespalib::Regex::from_pattern(term.getTerm(), vespalib::Regex::Options::IgnoreCase);
- }
+ _regex = (isCased())
+ ? vespalib::Regex::from_pattern(term.getTerm(), vespalib::Regex::Options::None)
+ : vespalib::Regex::from_pattern(term.getTerm(), vespalib::Regex::Options::IgnoreCase);
} else if (isFuzzy()) {
- _fuzzyMatcher = vespalib::FuzzyMatcher(
- term.getTerm(),
- term.getFuzzyMaxEditDistance(),
- term.getFuzzyPrefixLength(),
- isCased());
+ _fuzzyMatcher = std::make_unique<vespalib::FuzzyMatcher>(term.getTerm(),
+ term.getFuzzyMaxEditDistance(),
+ term.getFuzzyPrefixLength(),
+ isCased());
} else if (isCased()) {
_term._char = term.getTerm();
_termLen = term.getTermLen();
@@ -44,7 +43,7 @@ StringSearchHelper::~StringSearchHelper() = default;
bool
StringSearchHelper::isMatch(const char *src) const {
if (__builtin_expect(isRegex(), false)) {
- return getRegex().valid() ? getRegex().partial_match(std::string_view(src)) : false;
+ return getRegex().valid() && getRegex().partial_match(std::string_view(src));
}
if (__builtin_expect(isFuzzy(), false)) {
return getFuzzyMatcher().isMatch(src);
diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_helper.h b/searchlib/src/vespa/searchlib/attribute/string_search_helper.h
index 5947f397359..146d1653a3e 100644
--- a/searchlib/src/vespa/searchlib/attribute/string_search_helper.h
+++ b/searchlib/src/vespa/searchlib/attribute/string_search_helper.h
@@ -4,8 +4,8 @@
#include <vespa/fastlib/text/unicodeutil.h>
#include <vespa/vespalib/regex/regex.h>
-#include <vespa/vespalib/fuzzy/fuzzy_matcher.h>
+namespace vespalib { class FuzzyMatcher; }
namespace search { class QueryTermUCS4; }
namespace search::attribute {
@@ -18,17 +18,19 @@ class StringSearchHelper {
public:
StringSearchHelper(QueryTermUCS4 & qTerm, bool cased);
StringSearchHelper(StringSearchHelper&&) noexcept;
+ StringSearchHelper(const StringSearchHelper &) = delete;
+ StringSearchHelper & operator =(const StringSearchHelper &) = delete;
~StringSearchHelper();
bool isMatch(const char *src) const;
- bool isPrefix() const { return _isPrefix; }
- bool isRegex() const { return _isRegex; }
- bool isCased() const { return _isCased; }
- bool isFuzzy() const { return _isFuzzy; }
- const vespalib::Regex & getRegex() const { return _regex; }
- const vespalib::FuzzyMatcher & getFuzzyMatcher() const { return _fuzzyMatcher; }
+ bool isPrefix() const noexcept{ return _isPrefix; }
+ bool isRegex() const noexcept{ return _isRegex; }
+ bool isCased() const noexcept{ return _isCased; }
+ bool isFuzzy() const noexcept{ return _isFuzzy; }
+ const vespalib::Regex & getRegex() const noexcept { return _regex; }
+ const vespalib::FuzzyMatcher & getFuzzyMatcher() const noexcept { return *_fuzzyMatcher; }
private:
vespalib::Regex _regex;
- vespalib::FuzzyMatcher _fuzzyMatcher;
+ std::unique_ptr<vespalib::FuzzyMatcher> _fuzzyMatcher;
union {
const ucs4_t *_ucs4;
const char *_char;
diff --git a/searchlib/src/vespa/searchlib/attribute/stringattribute.cpp b/searchlib/src/vespa/searchlib/attribute/stringattribute.cpp
deleted file mode 100644
index 2f47f2f9862..00000000000
--- a/searchlib/src/vespa/searchlib/attribute/stringattribute.cpp
+++ /dev/null
@@ -1,8 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-
-#include "stringattribute.h"
-
-namespace search {
-
-} // namespace search
diff --git a/searchlib/src/vespa/searchlib/attribute/stringattribute.h b/searchlib/src/vespa/searchlib/attribute/stringattribute.h
deleted file mode 100644
index 70508f14168..00000000000
--- a/searchlib/src/vespa/searchlib/attribute/stringattribute.h
+++ /dev/null
@@ -1,11 +0,0 @@
-// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
-
-#pragma once
-
-#include "stringbase.h"
-#include "enumstore.h"
-
-namespace search {
-
-} // namespace search
-
diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.h b/searchlib/src/vespa/searchlib/attribute/stringbase.h
index b1e55c3d640..e20a40d2df3 100644
--- a/searchlib/src/vespa/searchlib/attribute/stringbase.h
+++ b/searchlib/src/vespa/searchlib/attribute/stringbase.h
@@ -4,11 +4,8 @@
#include "no_loaded_vector.h"
#include "attributevector.h"
-#include "changevector.h"
#include "i_enum_store.h"
#include "loadedenumvalue.h"
-#include "search_context.h"
-#include "string_matcher.h"
#include "string_search_context.h"
namespace search {