diff options
author | Henning Baldersheim <balder@yahoo-inc.com> | 2021-03-05 08:58:41 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-03-05 08:58:41 +0100 |
commit | 0c6804dfc335357958dfcfce661f5e9c50037ee5 (patch) | |
tree | 995631cefb77e8f1b69fb29307f0547dcb662c21 /searchlib | |
parent | 80d13258f7d2afac139f4559decbb1646c8a20cd (diff) | |
parent | 17ff40f0ee5b35d76e6e3ed0318e1de6d9a3d162 (diff) |
Merge pull request #16804 from vespa-engine/balder/keep-one-ucs4-version
Keep only one ucs4 buffer, and create the buffer lazy.
Diffstat (limited to 'searchlib')
11 files changed, 312 insertions, 289 deletions
diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp index 87d9f081ffc..aaae2772687 100644 --- a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp +++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp @@ -386,7 +386,7 @@ testSingleValue(Attribute & svsa, Config &cfg) TEST("testSingleValue") { EXPECT_EQUAL(24u, sizeof(AttributeVector::SearchContext)); - EXPECT_EQUAL(72u, sizeof(SingleValueStringAttribute::StringSingleImplSearchContext)); + EXPECT_EQUAL(56u, sizeof(SingleValueStringAttribute::StringSingleImplSearchContext)); { Config cfg(BasicType::STRING, CollectionType::SINGLE); SingleValueStringAttribute svsa("svsa", cfg); diff --git a/searchlib/src/tests/query/streaming_query_test.cpp b/searchlib/src/tests/query/streaming_query_test.cpp index 9aa07570c0d..5ce34cfcc3f 100644 --- a/searchlib/src/tests/query/streaming_query_test.cpp +++ b/searchlib/src/tests/query/streaming_query_test.cpp @@ -24,201 +24,255 @@ TEST("testQueryLanguage") { int64_t ia(0), ib(0); double da(0), db(0); - QueryTerm q(factory.create(), "7", "index", TermType::WORD); - EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); - EXPECT_EQUAL(ia, 7); - EXPECT_EQUAL(ib, 7); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); - EXPECT_EQUAL(da, 7); - EXPECT_EQUAL(db, 7); - - q = QueryTerm(factory.create(), "-7", "index", TermType::WORD); - EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); - EXPECT_EQUAL(ia, -7); - EXPECT_EQUAL(ib, -7); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); - EXPECT_EQUAL(da, -7); - EXPECT_EQUAL(db, -7); - - q = QueryTerm(factory.create(), "7.5", "index", TermType::WORD); - EXPECT_TRUE(!q.getAsIntegerTerm(ia, ib)); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); - EXPECT_EQUAL(da, 7.5); - EXPECT_EQUAL(db, 7.5); - - q = QueryTerm(factory.create(), "-7.5", "index", TermType::WORD); - EXPECT_TRUE(!q.getAsIntegerTerm(ia, ib)); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); - EXPECT_EQUAL(da, -7.5); - EXPECT_EQUAL(db, -7.5); - - q = QueryTerm(factory.create(), "<7", "index", TermType::WORD); - EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); - EXPECT_EQUAL(ia, std::numeric_limits<int64_t>::min()); - EXPECT_EQUAL(ib, 6); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); - EXPECT_EQUAL(da, -std::numeric_limits<double>::max()); - EXPECT_LESS(db, 7); - EXPECT_GREATER(db, 6.99); - - q = QueryTerm(factory.create(), "[;7]", "index", TermType::WORD); - EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); - EXPECT_EQUAL(ia, std::numeric_limits<int64_t>::min()); - EXPECT_EQUAL(ib, 7); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); - EXPECT_EQUAL(da, -std::numeric_limits<double>::max()); - EXPECT_EQUAL(db, 7); - - q = QueryTerm(factory.create(), ">7", "index", TermType::WORD); - EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); - EXPECT_EQUAL(ia, 8); - EXPECT_EQUAL(ib, std::numeric_limits<int64_t>::max()); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); - EXPECT_GREATER(da, 7); - EXPECT_LESS(da, 7.01); - EXPECT_EQUAL(db, std::numeric_limits<double>::max()); - - q = QueryTerm(factory.create(), "[7;]", "index", TermType::WORD); - EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); - EXPECT_EQUAL(ia, 7); - EXPECT_EQUAL(ib, std::numeric_limits<int64_t>::max()); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); - EXPECT_EQUAL(da, 7); - EXPECT_EQUAL(db, std::numeric_limits<double>::max()); - - q = QueryTerm(factory.create(), "[-7;7]", "index", TermType::WORD); - EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); - EXPECT_EQUAL(ia, -7); - EXPECT_EQUAL(ib, 7); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); - EXPECT_EQUAL(da, -7); - EXPECT_EQUAL(db, 7); - - q = QueryTerm(factory.create(), "[-7.1;7.1]", "index", TermType::WORD); - EXPECT_FALSE(q.getAsIntegerTerm(ia, ib)); // This is dubious and perhaps a regression. - EXPECT_EQUAL(ia, std::numeric_limits<int64_t>::min()); - EXPECT_EQUAL(ib, std::numeric_limits<int64_t>::max()); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); - EXPECT_EQUAL(da, -7.1); - EXPECT_EQUAL(db, 7.1); - - q = QueryTerm(factory.create(), "[500.0;1.7976931348623157E308]", "index", TermType::WORD); - EXPECT_FALSE(q.getAsIntegerTerm(ia, ib)); // This is dubious and perhaps a regression. - EXPECT_EQUAL(ia, std::numeric_limits<int64_t>::min()); - EXPECT_EQUAL(ib, std::numeric_limits<int64_t>::max()); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); - EXPECT_EQUAL(da, 500.0); - EXPECT_EQUAL(db, std::numeric_limits<double>::max()); + { + QueryTerm q(factory.create(), "7", "index", TermType::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, 7); + EXPECT_EQUAL(ib, 7); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, 7); + EXPECT_EQUAL(db, 7); + } + + { + QueryTerm q(factory.create(), "-7", "index", TermType::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, -7); + EXPECT_EQUAL(ib, -7); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -7); + EXPECT_EQUAL(db, -7); + } + + { + QueryTerm q(factory.create(), "7.5", "index", TermType::WORD); + EXPECT_TRUE(!q.getAsIntegerTerm(ia, ib)); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, 7.5); + EXPECT_EQUAL(db, 7.5); + } + + { + QueryTerm q(factory.create(), "-7.5", "index", TermType::WORD); + EXPECT_TRUE(!q.getAsIntegerTerm(ia, ib)); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -7.5); + EXPECT_EQUAL(db, -7.5); + } + + { + QueryTerm q(factory.create(), "<7", "index", TermType::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, std::numeric_limits<int64_t>::min()); + EXPECT_EQUAL(ib, 6); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -std::numeric_limits<double>::max()); + EXPECT_LESS(db, 7); + EXPECT_GREATER(db, 6.99); + } + + { + QueryTerm q(factory.create(), "[;7]", "index", TermType::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, std::numeric_limits<int64_t>::min()); + EXPECT_EQUAL(ib, 7); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -std::numeric_limits<double>::max()); + EXPECT_EQUAL(db, 7); + } + + { + QueryTerm q(factory.create(), ">7", "index", TermType::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, 8); + EXPECT_EQUAL(ib, std::numeric_limits<int64_t>::max()); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_GREATER(da, 7); + EXPECT_LESS(da, 7.01); + EXPECT_EQUAL(db, std::numeric_limits<double>::max()); + } + + { + QueryTerm q(factory.create(), "[7;]", "index", TermType::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, 7); + EXPECT_EQUAL(ib, std::numeric_limits<int64_t>::max()); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, 7); + EXPECT_EQUAL(db, std::numeric_limits<double>::max()); + } + + { + QueryTerm q(factory.create(), "[-7;7]", "index", TermType::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, -7); + EXPECT_EQUAL(ib, 7); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -7); + EXPECT_EQUAL(db, 7); + } + + { + QueryTerm q(factory.create(), "[-7.1;7.1]", "index", TermType::WORD); + EXPECT_FALSE(q.getAsIntegerTerm(ia, ib)); // This is dubious and perhaps a regression. + EXPECT_EQUAL(ia, std::numeric_limits<int64_t>::min()); + EXPECT_EQUAL(ib, std::numeric_limits<int64_t>::max()); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -7.1); + EXPECT_EQUAL(db, 7.1); + } + + { + QueryTerm q(factory.create(), "[500.0;1.7976931348623157E308]", "index", TermType::WORD); + EXPECT_FALSE(q.getAsIntegerTerm(ia, ib)); // This is dubious and perhaps a regression. + EXPECT_EQUAL(ia, std::numeric_limits<int64_t>::min()); + EXPECT_EQUAL(ib, std::numeric_limits<int64_t>::max()); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, 500.0); + EXPECT_EQUAL(db, std::numeric_limits<double>::max()); + } const double minusSeven(-7), seven(7); - q = QueryTerm(factory.create(), "<-7;7]", "index", TermType::WORD); - EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); - EXPECT_EQUAL(ia, -6); - EXPECT_EQUAL(ib, 7); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); - EXPECT_EQUAL(da, std::nextafterf(minusSeven, seven)); - EXPECT_EQUAL(db, seven); - - q = QueryTerm(factory.create(), "<-7;7>", "index", TermType::WORD); - EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); - EXPECT_EQUAL(ia, -6); - EXPECT_EQUAL(ib, 6); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); - EXPECT_EQUAL(da, std::nextafterf(minusSeven, seven)); - EXPECT_EQUAL(db, std::nextafterf(seven, minusSeven)); - - q = QueryTerm(factory.create(), "<1;2>", "index", TermType::WORD); - EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); - EXPECT_EQUAL(ia, 2); - EXPECT_EQUAL(ib, 1); - - q = QueryTerm(factory.create(), "[-7;7>", "index", TermType::WORD); - EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); - EXPECT_EQUAL(ia, -7); - EXPECT_EQUAL(ib, 6); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); - EXPECT_EQUAL(da, minusSeven); - EXPECT_EQUAL(db, std::nextafterf(seven, minusSeven)); - - q = QueryTerm(factory.create(), "<-7", "index", TermType::WORD); - EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); - EXPECT_EQUAL(ia, std::numeric_limits<int64_t>::min()); - EXPECT_EQUAL(ib, -8); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); - EXPECT_EQUAL(da, -std::numeric_limits<double>::max()); - EXPECT_LESS(db, -7); - EXPECT_GREATER(db, -7.01); - - q = QueryTerm(factory.create(), "[;-7]", "index", TermType::WORD); - EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); - EXPECT_EQUAL(ia, std::numeric_limits<int64_t>::min()); - EXPECT_EQUAL(ib, -7); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); - EXPECT_EQUAL(da, -std::numeric_limits<double>::max()); - EXPECT_EQUAL(db, -7); - - q = QueryTerm(factory.create(), "<;-7]", "index", TermType::WORD); - EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); - EXPECT_EQUAL(ia, std::numeric_limits<int64_t>::min()); - EXPECT_EQUAL(ib, -7); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); - EXPECT_EQUAL(da, -std::numeric_limits<double>::max()); - EXPECT_EQUAL(db, -7); - - q = QueryTerm(factory.create(), ">-7", "index", TermType::WORD); - EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); - EXPECT_EQUAL(ia, -6); - EXPECT_EQUAL(ib, std::numeric_limits<int64_t>::max()); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); - EXPECT_GREATER(da, -7); - EXPECT_LESS(da, -6.99); - EXPECT_EQUAL(db, std::numeric_limits<double>::max()); - - q = QueryTerm(factory.create(), "[-7;]", "index", TermType::WORD); - EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); - EXPECT_EQUAL(ia, -7); - EXPECT_EQUAL(ib, std::numeric_limits<int64_t>::max()); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); - EXPECT_EQUAL(da, -7); - EXPECT_EQUAL(db, std::numeric_limits<double>::max()); - - q = QueryTerm(factory.create(), "[-7;>", "index", TermType::WORD); - EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); - EXPECT_EQUAL(ia, -7); - EXPECT_EQUAL(ib, std::numeric_limits<int64_t>::max()); - EXPECT_TRUE(q.getAsDoubleTerm(da, db)); - EXPECT_EQUAL(da, -7); - EXPECT_EQUAL(db, std::numeric_limits<double>::max()); - - q = QueryTerm(factory.create(), "a", "index", TermType::WORD); - EXPECT_TRUE(!q.getAsIntegerTerm(ia, ib)); - EXPECT_TRUE(!q.getAsDoubleTerm(da, db)); - - q = QueryTerm(factory.create(), "word", "index", TermType::WORD); - EXPECT_TRUE(!q.isPrefix()); - EXPECT_TRUE(!q.isSubstring()); - EXPECT_TRUE(!q.isSuffix()); - - q = QueryTerm(factory.create(), "prefix", "index", TermType::PREFIXTERM); - EXPECT_TRUE(q.isPrefix()); - EXPECT_TRUE(!q.isSubstring()); - EXPECT_TRUE(!q.isSuffix()); - - q = QueryTerm(factory.create(), "substring", "index", TermType::SUBSTRINGTERM); - EXPECT_TRUE(!q.isPrefix()); - EXPECT_TRUE(q.isSubstring()); - EXPECT_TRUE(!q.isSuffix()); - - q = QueryTerm(factory.create(), "suffix", "index", TermType::SUFFIXTERM); - EXPECT_TRUE(!q.isPrefix()); - EXPECT_TRUE(!q.isSubstring()); - EXPECT_TRUE(q.isSuffix()); - - q = QueryTerm(factory.create(), "regexp", "index", TermType::REGEXP); - EXPECT_TRUE(!q.isPrefix()); - EXPECT_TRUE(!q.isSubstring()); - EXPECT_TRUE(!q.isSuffix()); - EXPECT_TRUE(q.isRegex()); + { + QueryTerm q(factory.create(), "<-7;7]", "index", TermType::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, -6); + EXPECT_EQUAL(ib, 7); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, std::nextafterf(minusSeven, seven)); + EXPECT_EQUAL(db, seven); + } + + { + QueryTerm q(factory.create(), "<-7;7>", "index", TermType::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, -6); + EXPECT_EQUAL(ib, 6); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, std::nextafterf(minusSeven, seven)); + EXPECT_EQUAL(db, std::nextafterf(seven, minusSeven)); + } + + { + QueryTerm q(factory.create(), "<1;2>", "index", TermType::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, 2); + EXPECT_EQUAL(ib, 1); + } + + { + QueryTerm q(factory.create(), "[-7;7>", "index", TermType::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, -7); + EXPECT_EQUAL(ib, 6); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, minusSeven); + EXPECT_EQUAL(db, std::nextafterf(seven, minusSeven)); + } + + { + QueryTerm q(factory.create(), "<-7", "index", TermType::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, std::numeric_limits<int64_t>::min()); + EXPECT_EQUAL(ib, -8); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -std::numeric_limits<double>::max()); + EXPECT_LESS(db, -7); + EXPECT_GREATER(db, -7.01); + } + + { + QueryTerm q(factory.create(), "[;-7]", "index", TermType::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, std::numeric_limits<int64_t>::min()); + EXPECT_EQUAL(ib, -7); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -std::numeric_limits<double>::max()); + EXPECT_EQUAL(db, -7); + } + + { + QueryTerm q(factory.create(), "<;-7]", "index", TermType::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, std::numeric_limits<int64_t>::min()); + EXPECT_EQUAL(ib, -7); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -std::numeric_limits<double>::max()); + EXPECT_EQUAL(db, -7); + } + + { + QueryTerm q(factory.create(), ">-7", "index", TermType::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, -6); + EXPECT_EQUAL(ib, std::numeric_limits<int64_t>::max()); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_GREATER(da, -7); + EXPECT_LESS(da, -6.99); + EXPECT_EQUAL(db, std::numeric_limits<double>::max()); + } + + { + QueryTerm q(factory.create(), "[-7;]", "index", TermType::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, -7); + EXPECT_EQUAL(ib, std::numeric_limits<int64_t>::max()); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -7); + EXPECT_EQUAL(db, std::numeric_limits<double>::max()); + } + + { + QueryTerm q(factory.create(), "[-7;>", "index", TermType::WORD); + EXPECT_TRUE(q.getAsIntegerTerm(ia, ib)); + EXPECT_EQUAL(ia, -7); + EXPECT_EQUAL(ib, std::numeric_limits<int64_t>::max()); + EXPECT_TRUE(q.getAsDoubleTerm(da, db)); + EXPECT_EQUAL(da, -7); + EXPECT_EQUAL(db, std::numeric_limits<double>::max()); + } + + { + QueryTerm q(factory.create(), "a", "index", TermType::WORD); + EXPECT_TRUE(!q.getAsIntegerTerm(ia, ib)); + EXPECT_TRUE(!q.getAsDoubleTerm(da, db)); + } + + { + QueryTerm q(factory.create(), "word", "index", TermType::WORD); + EXPECT_TRUE(!q.isPrefix()); + EXPECT_TRUE(!q.isSubstring()); + EXPECT_TRUE(!q.isSuffix()); + } + + { + QueryTerm q(factory.create(), "prefix", "index", TermType::PREFIXTERM); + EXPECT_TRUE(q.isPrefix()); + EXPECT_TRUE(!q.isSubstring()); + EXPECT_TRUE(!q.isSuffix()); + } + + { + QueryTerm q(factory.create(), "substring", "index", TermType::SUBSTRINGTERM); + EXPECT_TRUE(!q.isPrefix()); + EXPECT_TRUE(q.isSubstring()); + EXPECT_TRUE(!q.isSuffix()); + } + + { + QueryTerm q(factory.create(), "suffix", "index", TermType::SUFFIXTERM); + EXPECT_TRUE(!q.isPrefix()); + EXPECT_TRUE(!q.isSubstring()); + EXPECT_TRUE(q.isSuffix()); + } + + { + QueryTerm q(factory.create(), "regexp", "index", TermType::REGEXP); + EXPECT_TRUE(!q.isPrefix()); + EXPECT_TRUE(!q.isSubstring()); + EXPECT_TRUE(!q.isSuffix()); + EXPECT_TRUE(q.isRegex()); + } } class AllowRewrite : public QueryNodeResultFactory @@ -734,8 +788,8 @@ TEST("testSameElementEvaluate") { TEST("Control the size of query terms") { EXPECT_EQUAL(104u, sizeof(QueryTermSimple)); - EXPECT_EQUAL(136u, sizeof(QueryTermUCS4)); - EXPECT_EQUAL(280u, sizeof(QueryTerm)); + EXPECT_EQUAL(120u, sizeof(QueryTermUCS4)); + EXPECT_EQUAL(264u, sizeof(QueryTerm)); } TEST_MAIN() { TEST_RUN_ALL(); } diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp index d64e03c67a4..56a644a68b1 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp @@ -225,13 +225,15 @@ StringAttribute::StringSearchContext::StringSearchContext(QueryTermSimple::UP qT const StringAttribute & toBeSearched) : SearchContext(toBeSearched), _queryTerm(static_cast<QueryTermUCS4 *>(qTerm.release())), - _termUCS4(queryTerm()->getUCS4Term()), + _termUCS4(nullptr), _regex(), _isPrefix(_queryTerm->isPrefix()), _isRegex(_queryTerm->isRegex()) { if (isRegex()) { _regex = vespalib::Regex::from_pattern(_queryTerm->getTerm(), vespalib::Regex::Options::IgnoreCase); + } else { + _queryTerm->term(_termUCS4); } } @@ -261,16 +263,6 @@ StringAttribute::clearDoc(DocId doc) return removed; } -namespace { - -class DirectAccessor { -public: - DirectAccessor() { } - const char * get(const char * v) const { return v; } -}; - -} - bool StringAttribute::applyWeight(DocId doc, const FieldValue & fv, const ArithmeticValueUpdate & wAdjust) { diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.h b/searchlib/src/vespa/searchlib/attribute/stringbase.h index d72f7002086..b8fef783d58 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.h +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.h @@ -157,7 +157,7 @@ protected: const vespalib::Regex & getRegex() const { return _regex; } private: std::unique_ptr<QueryTermUCS4> _queryTerm; - std::vector<ucs4_t> _termUCS4; + const ucs4_t *_termUCS4; vespalib::Regex _regex; bool _isPrefix; bool _isRegex; diff --git a/searchlib/src/vespa/searchlib/query/query_term_simple.cpp b/searchlib/src/vespa/searchlib/query/query_term_simple.cpp index d1f616622ff..2c5e977928c 100644 --- a/searchlib/src/vespa/searchlib/query/query_term_simple.cpp +++ b/searchlib/src/vespa/searchlib/query/query_term_simple.cpp @@ -189,17 +189,6 @@ bool QueryTermSimple::getAsDoubleTerm(double & lower, double & upper) const return getAsNumericTerm(lower, upper, DoubleDecoder()); } -QueryTermSimple::QueryTermSimple() - : _rangeLimit(0), - _maxPerGroup(0), - _diversityCutoffGroups(std::numeric_limits<uint32_t>::max()), - _type(Type::WORD), - _diversityCutoffStrict(false), - _valid(true), - _term(), - _diversityAttribute() -{ } - QueryTermSimple::~QueryTermSimple() = default; namespace { diff --git a/searchlib/src/vespa/searchlib/query/query_term_simple.h b/searchlib/src/vespa/searchlib/query/query_term_simple.h index 0ee97f9d43c..93b19212926 100644 --- a/searchlib/src/vespa/searchlib/query/query_term_simple.h +++ b/searchlib/src/vespa/searchlib/query/query_term_simple.h @@ -34,11 +34,10 @@ public: bool isEqual() const { return low == high; } }; - QueryTermSimple(const QueryTermSimple &) = default; - QueryTermSimple & operator = (const QueryTermSimple &) = default; - QueryTermSimple(QueryTermSimple &&) = default; - QueryTermSimple & operator = (QueryTermSimple &&) = default; - QueryTermSimple(); + QueryTermSimple(const QueryTermSimple &) = delete; + QueryTermSimple & operator = (const QueryTermSimple &) = delete; + QueryTermSimple(QueryTermSimple &&) = delete; + QueryTermSimple & operator = (QueryTermSimple &&) = delete; QueryTermSimple(const string & term_, Type type); virtual ~QueryTermSimple(); /** diff --git a/searchlib/src/vespa/searchlib/query/query_term_ucs4.cpp b/searchlib/src/vespa/searchlib/query/query_term_ucs4.cpp index b267ad9253e..be0398e1a50 100644 --- a/searchlib/src/vespa/searchlib/query/query_term_ucs4.cpp +++ b/searchlib/src/vespa/searchlib/query/query_term_ucs4.cpp @@ -3,37 +3,25 @@ #include "query_term_ucs4.h" #include <vespa/vespalib/objects/visit.h> #include <vespa/vespalib/text/utf8.h> +#include <mutex> namespace search { -QueryTermUCS4::UCS4StringT -QueryTermUCS4::getUCS4Term() const { - UCS4StringT ucs4; - const string & term = getTermString(); - ucs4.reserve(term.size() + 1); - vespalib::Utf8Reader r(term); - while (r.hasMore()) { - ucs4_t u = r.getChar(); - ucs4.push_back(u); - } - ucs4.push_back(0); - return ucs4; +namespace { + std::mutex _globalMutex; } -QueryTermUCS4::QueryTermUCS4() : - QueryTermSimple(), - _cachedTermLen(0), - _termUCS4() -{ - _termUCS4.push_back(0); +QueryTermUCS4::~QueryTermUCS4() { + ucs4_t * ucs4 = _termUCS4.load(std::memory_order_relaxed); + if (ucs4 != nullptr) { + delete [] ucs4; + } } -QueryTermUCS4::~QueryTermUCS4() = default; - QueryTermUCS4::QueryTermUCS4(const string & termS, Type type) : QueryTermSimple(termS, type), - _cachedTermLen(0), - _termUCS4() + _termUCS4(nullptr), + _cachedTermLen(0) { vespalib::Utf8Reader r(termS); while (r.hasMore()) { @@ -43,6 +31,27 @@ QueryTermUCS4::QueryTermUCS4(const string & termS, Type type) : } } +const ucs4_t * +QueryTermUCS4::fillUCS4() { + /* + * Double checked locking...... + * This is a 'dirty' optimisation, but this is done to avoid writing a lot of data and blow the cpu caches with something + * you do not really need most of the time. That matters when qps is very high and query is wide, and hits are few. + */ + std::lock_guard guard(_globalMutex); + ucs4_t * ucs4 = _termUCS4.load(std::memory_order_relaxed); + if (ucs4 != nullptr) return ucs4; + ucs4 = new ucs4_t[_cachedTermLen + 1]; + vespalib::Utf8Reader r(getTermString()); + uint32_t i(0); + while (r.hasMore()) { + ucs4[i++] = r.getChar(); + } + ucs4[_cachedTermLen] = 0; + _termUCS4.store(ucs4); + return ucs4; +} + void QueryTermUCS4::visitMembers(vespalib::ObjectVisitor & visitor) const { diff --git a/searchlib/src/vespa/searchlib/query/query_term_ucs4.h b/searchlib/src/vespa/searchlib/query/query_term_ucs4.h index 90f5c07b7ca..00ac59d729e 100644 --- a/searchlib/src/vespa/searchlib/query/query_term_ucs4.h +++ b/searchlib/src/vespa/searchlib/query/query_term_ucs4.h @@ -2,10 +2,8 @@ #pragma once #include "query_term_simple.h" -#include <vespa/vespalib/util/memory.h> -#include <vespa/vespalib/objects/objectvisitor.h> #include <vespa/fastlib/text/unicodeutil.h> -#include <vector> +#include <atomic> namespace search { @@ -14,29 +12,27 @@ namespace search { */ class QueryTermUCS4 : public QueryTermSimple { public: - typedef std::vector<ucs4_t> UCS4StringT; typedef std::unique_ptr<QueryTermUCS4> UP; - QueryTermUCS4(const QueryTermUCS4 &) = default; - QueryTermUCS4 & operator = (const QueryTermUCS4 &) = default; - QueryTermUCS4(QueryTermUCS4 &&) = default; - QueryTermUCS4 & operator = (QueryTermUCS4 &&) = default; - QueryTermUCS4(); + QueryTermUCS4(const QueryTermUCS4 &) = delete; + QueryTermUCS4 & operator = (const QueryTermUCS4 &) = delete; + QueryTermUCS4(QueryTermUCS4 &&) = delete; + QueryTermUCS4 & operator = (QueryTermUCS4 &&) = delete; QueryTermUCS4(const string & term_, Type type); - ~QueryTermUCS4(); - size_t getTermLen() const { return _cachedTermLen; } - size_t term(const char * & t) const { t = getTerm(); return _cachedTermLen; } - UCS4StringT getUCS4Term() const; + ~QueryTermUCS4() override; + uint32_t getTermLen() const { return _cachedTermLen; } + uint32_t term(const char * & t) const { t = getTerm(); return _cachedTermLen; } void visitMembers(vespalib::ObjectVisitor &visitor) const override; - size_t term(const ucs4_t * & t) { - if (_termUCS4.empty()) { - _termUCS4 = getUCS4Term(); + uint32_t term(const ucs4_t * & t) { + t = _termUCS4.load(std::memory_order_relaxed); + if (t == nullptr) { + t = fillUCS4(); } - t = &_termUCS4[0]; return _cachedTermLen; } private: - size_t _cachedTermLen; - UCS4StringT _termUCS4; + const ucs4_t * fillUCS4(); + std::atomic<ucs4_t *> _termUCS4; + uint32_t _cachedTermLen; }; } diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp index 3caa47bf55d..69250d84cab 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.cpp @@ -40,22 +40,6 @@ static CharInfo _G_charTable; namespace search::streaming { -QueryTerm::QueryTerm() : - QueryTermUCS4(), - _index(), - _encoding(), - _result(), - _hitList(), - _weight(100), - _uniqueId(0), - _fieldInfo() -{ } - -QueryTerm::QueryTerm(const QueryTerm &) = default; -QueryTerm & QueryTerm::operator = (const QueryTerm &) = default; -QueryTerm::QueryTerm(QueryTerm &&) noexcept = default; -QueryTerm & QueryTerm::operator = (QueryTerm &&) noexcept = default; - QueryTerm::~QueryTerm() = default; void diff --git a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h index 6daa60a317a..134945e36d6 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/queryterm.h +++ b/searchlib/src/vespa/searchlib/query/streaming/queryterm.h @@ -26,7 +26,7 @@ public: class EncodingBitMap { public: - EncodingBitMap(unsigned bm=0) : _enc(bm) { } + EncodingBitMap(uint8_t bm=0) : _enc(bm) { } bool isFloat() const { return _enc & Float; } bool isBase10Integer() const { return _enc & Base10Integer; } bool isAscii7Bit() const { return _enc & Ascii7Bit; } @@ -35,7 +35,7 @@ public: void setFloat(bool v) { if (v) _enc |= Float; else _enc &= ~Float; } private: enum { Ascii7Bit=0x01, Base10Integer=0x02, Float=0x04 }; - unsigned _enc; + uint8_t _enc; }; class FieldInfo { public: @@ -53,12 +53,11 @@ public: uint32_t _hitCount; uint32_t _fieldLength; }; - QueryTerm(); QueryTerm(std::unique_ptr<QueryNodeResultBase> resultBase, const string & term, const string & index, Type type); - QueryTerm(const QueryTerm &); - QueryTerm & operator = (const QueryTerm &); - QueryTerm(QueryTerm &&) noexcept; - QueryTerm & operator = (QueryTerm &&) noexcept; + QueryTerm(const QueryTerm &) = delete; + QueryTerm & operator = (const QueryTerm &) = delete; + QueryTerm(QueryTerm &&) = delete; + QueryTerm & operator = (QueryTerm &&) = delete; ~QueryTerm(); bool evaluate() const override; const HitList & evaluateHits(HitList & hl) const override; diff --git a/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.cpp b/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.cpp index 02b1063cc37..61eaae40e90 100644 --- a/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.cpp +++ b/searchlib/src/vespa/searchlib/test/imported_attribute_fixture.cpp @@ -2,6 +2,7 @@ #include "imported_attribute_fixture.h" #include "mock_gid_to_lid_mapping.h" +#include <vespa/searchlib/query/query_term_ucs4.h> #include <vespa/vespalib/util/stringfmt.h> #include <future> @@ -55,7 +56,7 @@ GlobalId dummy_gid(uint32_t doc_index) { } std::unique_ptr<QueryTermSimple> word_term(vespalib::stringref term) { - return std::make_unique<QueryTermSimple>(term, QueryTermSimple::Type::WORD); + return std::make_unique<QueryTermUCS4>(term, QueryTermSimple::Type::WORD); } |