summaryrefslogtreecommitdiffstats
path: root/searchlib
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2021-03-04 16:15:53 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2021-03-04 18:07:04 +0000
commit4d4b1024cbaa9ad537927d66309db8aa9f628c37 (patch)
tree834db36e1f9c1fa8804713ae049deb3d86c6264b /searchlib
parent311e77aad06f187c70864a80a0703082f72bb3d8 (diff)
Keep only one ucs4 buffer, and create the buffer lazy.
Diffstat (limited to 'searchlib')
-rw-r--r--searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp2
-rw-r--r--searchlib/src/tests/query/streaming_query_test.cpp4
-rw-r--r--searchlib/src/vespa/searchlib/attribute/stringbase.cpp14
-rw-r--r--searchlib/src/vespa/searchlib/attribute/stringbase.h2
-rw-r--r--searchlib/src/vespa/searchlib/query/query_term_ucs4.cpp46
-rw-r--r--searchlib/src/vespa/searchlib/query/query_term_ucs4.h25
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp2
-rw-r--r--searchlib/src/vespa/searchlib/query/streaming/queryterm.h6
-rw-r--r--searchlib/src/vespa/searchlib/test/imported_attribute_fixture.cpp3
9 files changed, 53 insertions, 51 deletions
diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
index 87d9f081ffc..aaae2772687 100644
--- a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
+++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp
@@ -386,7 +386,7 @@ testSingleValue(Attribute & svsa, Config &cfg)
TEST("testSingleValue")
{
EXPECT_EQUAL(24u, sizeof(AttributeVector::SearchContext));
- EXPECT_EQUAL(72u, sizeof(SingleValueStringAttribute::StringSingleImplSearchContext));
+ EXPECT_EQUAL(56u, sizeof(SingleValueStringAttribute::StringSingleImplSearchContext));
{
Config cfg(BasicType::STRING, CollectionType::SINGLE);
SingleValueStringAttribute svsa("svsa", cfg);
diff --git a/searchlib/src/tests/query/streaming_query_test.cpp b/searchlib/src/tests/query/streaming_query_test.cpp
index 9aa07570c0d..2db25da03f7 100644
--- a/searchlib/src/tests/query/streaming_query_test.cpp
+++ b/searchlib/src/tests/query/streaming_query_test.cpp
@@ -734,8 +734,8 @@ TEST("testSameElementEvaluate") {
TEST("Control the size of query terms") {
EXPECT_EQUAL(104u, sizeof(QueryTermSimple));
- EXPECT_EQUAL(136u, sizeof(QueryTermUCS4));
- EXPECT_EQUAL(280u, sizeof(QueryTerm));
+ EXPECT_EQUAL(120u, sizeof(QueryTermUCS4));
+ EXPECT_EQUAL(264u, sizeof(QueryTerm));
}
TEST_MAIN() { TEST_RUN_ALL(); }
diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp
index d64e03c67a4..56a644a68b1 100644
--- a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp
+++ b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp
@@ -225,13 +225,15 @@ StringAttribute::StringSearchContext::StringSearchContext(QueryTermSimple::UP qT
const StringAttribute & toBeSearched) :
SearchContext(toBeSearched),
_queryTerm(static_cast<QueryTermUCS4 *>(qTerm.release())),
- _termUCS4(queryTerm()->getUCS4Term()),
+ _termUCS4(nullptr),
_regex(),
_isPrefix(_queryTerm->isPrefix()),
_isRegex(_queryTerm->isRegex())
{
if (isRegex()) {
_regex = vespalib::Regex::from_pattern(_queryTerm->getTerm(), vespalib::Regex::Options::IgnoreCase);
+ } else {
+ _queryTerm->term(_termUCS4);
}
}
@@ -261,16 +263,6 @@ StringAttribute::clearDoc(DocId doc)
return removed;
}
-namespace {
-
-class DirectAccessor {
-public:
- DirectAccessor() { }
- const char * get(const char * v) const { return v; }
-};
-
-}
-
bool
StringAttribute::applyWeight(DocId doc, const FieldValue & fv, const ArithmeticValueUpdate & wAdjust)
{
diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.h b/searchlib/src/vespa/searchlib/attribute/stringbase.h
index d72f7002086..b8fef783d58 100644
--- a/searchlib/src/vespa/searchlib/attribute/stringbase.h
+++ b/searchlib/src/vespa/searchlib/attribute/stringbase.h
@@ -157,7 +157,7 @@ protected:
const vespalib::Regex & getRegex() const { return _regex; }
private:
std::unique_ptr<QueryTermUCS4> _queryTerm;
- std::vector<ucs4_t> _termUCS4;
+ const ucs4_t *_termUCS4;
vespalib::Regex _regex;
bool _isPrefix;
bool _isRegex;
diff --git a/searchlib/src/vespa/searchlib/query/query_term_ucs4.cpp b/searchlib/src/vespa/searchlib/query/query_term_ucs4.cpp
index b267ad9253e..7729c7ede36 100644
--- a/searchlib/src/vespa/searchlib/query/query_term_ucs4.cpp
+++ b/searchlib/src/vespa/searchlib/query/query_term_ucs4.cpp
@@ -3,37 +3,30 @@
#include "query_term_ucs4.h"
#include <vespa/vespalib/objects/visit.h>
#include <vespa/vespalib/text/utf8.h>
+#include <mutex>
namespace search {
-QueryTermUCS4::UCS4StringT
-QueryTermUCS4::getUCS4Term() const {
- UCS4StringT ucs4;
- const string & term = getTermString();
- ucs4.reserve(term.size() + 1);
- vespalib::Utf8Reader r(term);
- while (r.hasMore()) {
- ucs4_t u = r.getChar();
- ucs4.push_back(u);
- }
- ucs4.push_back(0);
- return ucs4;
+namespace {
+ std::mutex _globalMutex;
}
+ucs4_t QueryTermUCS4::ZERO_TERM(0);
+
QueryTermUCS4::QueryTermUCS4() :
QueryTermSimple(),
+ _termUCS4(),
_cachedTermLen(0),
- _termUCS4()
-{
- _termUCS4.push_back(0);
-}
+ _filled(true)
+{ }
QueryTermUCS4::~QueryTermUCS4() = default;
QueryTermUCS4::QueryTermUCS4(const string & termS, Type type) :
QueryTermSimple(termS, type),
+ _termUCS4(),
_cachedTermLen(0),
- _termUCS4()
+ _filled(false)
{
vespalib::Utf8Reader r(termS);
while (r.hasMore()) {
@@ -44,6 +37,25 @@ QueryTermUCS4::QueryTermUCS4(const string & termS, Type type) :
}
void
+QueryTermUCS4::fillUCS4() {
+ /*
+ * Double checked locking......
+ * This is a 'dirty' optimisation, but this is done to avoid writing a lot of data and blow the cpu caches with something
+ * you do not really need most of the time. That matters when qps is very high and query is wide, and hits are few.
+ */
+ std::lock_guard guard(_globalMutex);
+ if (_filled) return;
+ _termUCS4.reset(new ucs4_t[_cachedTermLen + 1]);
+ vespalib::Utf8Reader r(getTermString());
+ uint32_t i(0);
+ while (r.hasMore()) {
+ _termUCS4.get()[i++] = r.getChar();
+ }
+ _termUCS4.get()[_cachedTermLen] = 0;
+ _filled = true;
+}
+
+void
QueryTermUCS4::visitMembers(vespalib::ObjectVisitor & visitor) const
{
QueryTermSimple::visitMembers(visitor);
diff --git a/searchlib/src/vespa/searchlib/query/query_term_ucs4.h b/searchlib/src/vespa/searchlib/query/query_term_ucs4.h
index 90f5c07b7ca..d5e92acb378 100644
--- a/searchlib/src/vespa/searchlib/query/query_term_ucs4.h
+++ b/searchlib/src/vespa/searchlib/query/query_term_ucs4.h
@@ -14,29 +14,30 @@ namespace search {
*/
class QueryTermUCS4 : public QueryTermSimple {
public:
- typedef std::vector<ucs4_t> UCS4StringT;
typedef std::unique_ptr<QueryTermUCS4> UP;
- QueryTermUCS4(const QueryTermUCS4 &) = default;
- QueryTermUCS4 & operator = (const QueryTermUCS4 &) = default;
+ QueryTermUCS4(const QueryTermUCS4 &) = delete;
+ QueryTermUCS4 & operator = (const QueryTermUCS4 &) = delete;
QueryTermUCS4(QueryTermUCS4 &&) = default;
QueryTermUCS4 & operator = (QueryTermUCS4 &&) = default;
QueryTermUCS4();
QueryTermUCS4(const string & term_, Type type);
~QueryTermUCS4();
- size_t getTermLen() const { return _cachedTermLen; }
- size_t term(const char * & t) const { t = getTerm(); return _cachedTermLen; }
- UCS4StringT getUCS4Term() const;
+ uint32_t getTermLen() const { return _cachedTermLen; }
+ uint32_t term(const char * & t) const { t = getTerm(); return _cachedTermLen; }
void visitMembers(vespalib::ObjectVisitor &visitor) const override;
- size_t term(const ucs4_t * & t) {
- if (_termUCS4.empty()) {
- _termUCS4 = getUCS4Term();
+ uint32_t term(const ucs4_t * & t) {
+ if (!_filled) {
+ fillUCS4();
}
- t = &_termUCS4[0];
+ t = (_termUCS4) ? _termUCS4.get() : &ZERO_TERM;
return _cachedTermLen;
}
private:
- size_t _cachedTermLen;
- UCS4StringT _termUCS4;
+ void fillUCS4();
+ static ucs4_t ZERO_TERM;
+ std::unique_ptr<ucs4_t[]> _termUCS4;
+ uint32_t _cachedTermLen;
+ bool _filled;
};
}
diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp
index 3caa47bf55d..a50c5a8bf8b 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp
+++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp
@@ -51,8 +51,6 @@ QueryTerm::QueryTerm() :
_fieldInfo()
{ }
-QueryTerm::QueryTerm(const QueryTerm &) = default;
-QueryTerm & QueryTerm::operator = (const QueryTerm &) = default;
QueryTerm::QueryTerm(QueryTerm &&) noexcept = default;
QueryTerm & QueryTerm::operator = (QueryTerm &&) noexcept = default;
diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h
index 6daa60a317a..4f323b7f9f1 100644
--- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h
+++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h
@@ -26,7 +26,7 @@ public:
class EncodingBitMap
{
public:
- EncodingBitMap(unsigned bm=0) : _enc(bm) { }
+ EncodingBitMap(uint8_t bm=0) : _enc(bm) { }
bool isFloat() const { return _enc & Float; }
bool isBase10Integer() const { return _enc & Base10Integer; }
bool isAscii7Bit() const { return _enc & Ascii7Bit; }
@@ -35,7 +35,7 @@ public:
void setFloat(bool v) { if (v) _enc |= Float; else _enc &= ~Float; }
private:
enum { Ascii7Bit=0x01, Base10Integer=0x02, Float=0x04 };
- unsigned _enc;
+ uint8_t _enc;
};
class FieldInfo {
public:
@@ -55,8 +55,6 @@ public:
};
QueryTerm();
QueryTerm(std::unique_ptr<QueryNodeResultBase> resultBase, const string & term, const string & index, Type type);
- QueryTerm(const QueryTerm &);
- QueryTerm & operator = (const QueryTerm &);
QueryTerm(QueryTerm &&) noexcept;
QueryTerm & operator = (QueryTerm &&) noexcept;
~QueryTerm();
diff --git a/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.cpp b/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.cpp
index 02b1063cc37..61eaae40e90 100644
--- a/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.cpp
+++ b/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.cpp
@@ -2,6 +2,7 @@
#include "imported_attribute_fixture.h"
#include "mock_gid_to_lid_mapping.h"
+#include <vespa/searchlib/query/query_term_ucs4.h>
#include <vespa/vespalib/util/stringfmt.h>
#include <future>
@@ -55,7 +56,7 @@ GlobalId dummy_gid(uint32_t doc_index) {
}
std::unique_ptr<QueryTermSimple> word_term(vespalib::stringref term) {
- return std::make_unique<QueryTermSimple>(term, QueryTermSimple::Type::WORD);
+ return std::make_unique<QueryTermUCS4>(term, QueryTermSimple::Type::WORD);
}