diff options
17 files changed, 121 insertions, 96 deletions
diff --git a/searchcommon/src/vespa/searchcommon/attribute/i_search_context.h b/searchcommon/src/vespa/searchcommon/attribute/i_search_context.h index f75bf8699c8..9a24ab82c9f 100644 --- a/searchcommon/src/vespa/searchcommon/attribute/i_search_context.h +++ b/searchcommon/src/vespa/searchcommon/attribute/i_search_context.h @@ -7,7 +7,7 @@ namespace search::fef { class TermFieldMatchData; } namespace search::queryeval { class SearchIterator; } -namespace search { class QueryTermBase; } +namespace search { class QueryTermUCS4; } namespace search::attribute { @@ -47,7 +47,7 @@ public: virtual bool valid() const = 0; virtual Int64Range getAsIntegerTerm() const = 0; - virtual const QueryTermBase * queryTerm() const = 0; + virtual const QueryTermUCS4 * queryTerm() const = 0; virtual const vespalib::string &attributeName() const = 0; int32_t find(DocId docId, int32_t elementId, int32_t &weight) const { return onFind(docId, elementId, weight); } diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp index 9940808bb8c..596e6894482 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_blueprint_factory.cpp @@ -10,7 +10,7 @@ #include <vespa/searchlib/common/location.h> #include <vespa/searchlib/common/locationiterators.h> #include <vespa/searchlib/query/query_term_decoder.h> -#include <vespa/searchlib/query/queryterm.h> +#include <vespa/searchlib/query/query_term_ucs4.h> #include <vespa/searchlib/query/tree/stackdumpcreator.h> #include <vespa/searchlib/queryeval/andsearchstrict.h> #include <vespa/searchlib/queryeval/create_blueprint_visitor_helper.h> @@ -541,7 +541,7 @@ public: if (isInteger) { return std::make_unique<QueryTermSimple>(term, QueryTermSimple::WORD); } - return std::make_unique<QueryTermBase>(term, QueryTermSimple::WORD); + return std::make_unique<QueryTermUCS4>(term, QueryTermSimple::WORD); } template <typename WS, typename NODE> diff --git a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp index af1bcb19708..9195ce30c6e 100644 --- a/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp +++ b/searchlib/src/vespa/searchlib/attribute/attribute_weighted_set_blueprint.cpp @@ -4,7 +4,7 @@ #include <vespa/searchcommon/attribute/i_search_context.h> #include <vespa/searchlib/common/bitvector.h> #include <vespa/searchlib/fef/matchdatalayout.h> -#include <vespa/searchlib/query/queryterm.h> +#include <vespa/searchlib/query/query_term_ucs4.h> #include <vespa/searchlib/queryeval/weighted_set_term_search.h> #include <vespa/vespalib/objects/visit.h> #include <vespa/vespalib/stllike/hash_map.h> diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.h b/searchlib/src/vespa/searchlib/attribute/attributevector.h index 39423ef74c4..d45cd995ea5 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributevector.h +++ b/searchlib/src/vespa/searchlib/attribute/attributevector.h @@ -47,7 +47,7 @@ namespace search { class IAttributeSaveTarget; struct IDocumentWeightAttribute; class QueryTermSimple; - class QueryTermBase; + class QueryTermUCS4; namespace fef { class TermFieldMatchData; @@ -525,8 +525,8 @@ public: void fetchPostings(bool strict) override; bool valid() const override { return false; } Int64Range getAsIntegerTerm() const override { return Int64Range(); } - const QueryTermBase * queryTerm() const override { - return static_cast<const QueryTermBase *>(nullptr); + const QueryTermUCS4 * queryTerm() const override { + return static_cast<const QueryTermUCS4 *>(nullptr); } const vespalib::string &attributeName() const override { return _attr.getName(); diff --git a/searchlib/src/vespa/searchlib/attribute/imported_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/imported_search_context.cpp index 3e683dee3c1..bc1ade979e6 100644 --- a/searchlib/src/vespa/searchlib/attribute/imported_search_context.cpp +++ b/searchlib/src/vespa/searchlib/attribute/imported_search_context.cpp @@ -5,7 +5,7 @@ #include "imported_attribute_vector.h" #include "reference_attribute.h" #include <vespa/searchlib/common/bitvectoriterator.h> -#include <vespa/searchlib/query/queryterm.h> +#include <vespa/searchlib/query/query_term_ucs4.h> #include <vespa/searchlib/queryeval/emptysearch.h> #include "attributeiterators.hpp" @@ -250,7 +250,7 @@ Int64Range ImportedSearchContext::getAsIntegerTerm() const { return _target_search_context->getAsIntegerTerm(); } -const QueryTermBase * ImportedSearchContext::queryTerm() const { +const QueryTermUCS4 * ImportedSearchContext::queryTerm() const { return _target_search_context->queryTerm(); } diff --git a/searchlib/src/vespa/searchlib/attribute/imported_search_context.h b/searchlib/src/vespa/searchlib/attribute/imported_search_context.h index ae6ce181af0..2ceb61bc2b2 100644 --- a/searchlib/src/vespa/searchlib/attribute/imported_search_context.h +++ b/searchlib/src/vespa/searchlib/attribute/imported_search_context.h @@ -59,7 +59,7 @@ public: void fetchPostings(bool strict) override; bool valid() const override; Int64Range getAsIntegerTerm() const override; - const QueryTermBase * queryTerm() const override; + const QueryTermUCS4 * queryTerm() const override; const vespalib::string& attributeName() const override; using DocId = IAttributeVector::DocId; diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp index dd87237e22b..6cefc03dd70 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp @@ -11,7 +11,7 @@ #include <vespa/vespalib/text/lowercase.h> #include <vespa/vespalib/util/bufferwriter.h> #include <vespa/vespalib/util/regexp.h> -#include <vespa/searchlib/query/queryterm.h> +#include <vespa/searchlib/query/query_term_ucs4.h> namespace search { diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp index 820b0d99908..214da6bf230 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp @@ -11,7 +11,7 @@ #include <vespa/vespalib/text/lowercase.h> #include <vespa/vespalib/util/bufferwriter.h> #include <vespa/vespalib/util/regexp.h> -#include <vespa/searchlib/query/queryterm.h> +#include <vespa/searchlib/query/query_term_ucs4.h> namespace search { diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp index f3e6252f331..59a5a9b8cac 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp @@ -3,7 +3,7 @@ #pragma once #include <vespa/searchlib/attribute/singlestringpostattribute.h> -#include <vespa/searchlib/query/queryterm.h> +#include <vespa/searchlib/query/query_term_ucs4.h> namespace search { diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp index 99ddfb5eaa9..d7523c86e29 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp @@ -5,7 +5,7 @@ #include "readerbase.h" #include <vespa/document/fieldvalue/fieldvalue.h> #include <vespa/searchlib/util/fileutil.hpp> -#include <vespa/searchlib/query/queryterm.h> +#include <vespa/searchlib/query/query_term_ucs4.h> #include <vespa/vespalib/locale/c.h> #include <vespa/vespalib/util/array.hpp> @@ -251,10 +251,10 @@ StringAttribute::StringSearchContext::valid() const return (_queryTerm.get() && (!_queryTerm->empty())); } -const QueryTermBase * +const QueryTermUCS4 * StringAttribute::StringSearchContext::queryTerm() const { - return static_cast<const QueryTermBase *>(_queryTerm.get()); + return static_cast<const QueryTermUCS4 *>(_queryTerm.get()); } uint32_t StringAttribute::clearDoc(DocId doc) diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.h b/searchlib/src/vespa/searchlib/attribute/stringbase.h index a5e2893f8e9..cf0a92253de 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.h +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.h @@ -100,7 +100,7 @@ protected: protected: bool valid() const override; - const QueryTermBase * queryTerm() const override; + const QueryTermUCS4 * queryTerm() const override; bool isMatch(const char *src) const { if (__builtin_expect(isRegex(), false)) { return _regex ? std::regex_search(src, *_regex) : false; diff --git a/searchlib/src/vespa/searchlib/query/CMakeLists.txt b/searchlib/src/vespa/searchlib/query/CMakeLists.txt index 307e525ba9e..a8adb31f9e1 100644 --- a/searchlib/src/vespa/searchlib/query/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/query/CMakeLists.txt @@ -2,6 +2,7 @@ vespa_add_library(searchlib_query OBJECT SOURCES query_term_simple.cpp + query_term_ucs4.cpp queryterm.cpp querynode.cpp query.cpp diff --git a/searchlib/src/vespa/searchlib/query/query_term_ucs4.cpp b/searchlib/src/vespa/searchlib/query/query_term_ucs4.cpp new file mode 100644 index 00000000000..86cda7e6786 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/query_term_ucs4.cpp @@ -0,0 +1,53 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "query_term_ucs4.h" +#include <vespa/vespalib/objects/visit.h> +#include <vespa/vespalib/text/utf8.h> + +namespace search { + +QueryTermUCS4::UCS4StringT +QueryTermUCS4::getUCS4Term() const { + UCS4StringT ucs4; + const string & term = getTermString(); + ucs4.reserve(term.size() + 1); + vespalib::Utf8Reader r(term); + while (r.hasMore()) { + ucs4_t u = r.getChar(); + ucs4.push_back(u); + } + ucs4.push_back(0); + return ucs4; +} + +QueryTermUCS4::QueryTermUCS4() : + QueryTermSimple(), + _cachedTermLen(0), + _termUCS4() +{ + _termUCS4.push_back(0); +} + +QueryTermUCS4::~QueryTermUCS4() = default; + +QueryTermUCS4::QueryTermUCS4(const string & termS, SearchTerm type) : + QueryTermSimple(termS, type), + _cachedTermLen(0), + _termUCS4() +{ + vespalib::Utf8Reader r(termS); + while (r.hasMore()) { + ucs4_t u = r.getChar(); + (void) u; + _cachedTermLen++; + } +} + +void +QueryTermUCS4::visitMembers(vespalib::ObjectVisitor & visitor) const +{ + QueryTermSimple::visitMembers(visitor); + visit(visitor, "termlength", static_cast<uint64_t>(_cachedTermLen)); +} + +} diff --git a/searchlib/src/vespa/searchlib/query/query_term_ucs4.h b/searchlib/src/vespa/searchlib/query/query_term_ucs4.h new file mode 100644 index 00000000000..8a270d47777 --- /dev/null +++ b/searchlib/src/vespa/searchlib/query/query_term_ucs4.h @@ -0,0 +1,43 @@ +// Copyright 2019 Oath Inc. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +#pragma once + +#include "query_term_simple.h" +#include <vespa/vespalib/util/memory.h> +#include <vespa/vespalib/objects/objectvisitor.h> +#include <vespa/fastlib/text/unicodeutil.h> +#include <vector> + +namespace search { + +/** + * Query term that can be returned in UCS-4 encoded form. + */ +class QueryTermUCS4 : public QueryTermSimple { +public: + typedef std::vector<ucs4_t> UCS4StringT; + typedef std::unique_ptr<QueryTermUCS4> UP; + QueryTermUCS4(const QueryTermUCS4 &) = default; + QueryTermUCS4 & operator = (const QueryTermUCS4 &) = default; + QueryTermUCS4(QueryTermUCS4 &&) = default; + QueryTermUCS4 & operator = (QueryTermUCS4 &&) = default; + QueryTermUCS4(); + QueryTermUCS4(const string & term_, SearchTerm type); + ~QueryTermUCS4(); + size_t getTermLen() const { return _cachedTermLen; } + size_t term(const char * & t) const { t = getTerm(); return _cachedTermLen; } + UCS4StringT getUCS4Term() const; + void visitMembers(vespalib::ObjectVisitor &visitor) const override; + size_t term(const ucs4_t * & t) { + if (_termUCS4.empty()) { + _termUCS4 = getUCS4Term(); + } + t = &_termUCS4[0]; + return _cachedTermLen; + } +private: + size_t _cachedTermLen; + UCS4StringT _termUCS4; +}; + +} + diff --git a/searchlib/src/vespa/searchlib/query/queryterm.cpp b/searchlib/src/vespa/searchlib/query/queryterm.cpp index 0d3550a1ce2..ba6862c82ca 100644 --- a/searchlib/src/vespa/searchlib/query/queryterm.cpp +++ b/searchlib/src/vespa/searchlib/query/queryterm.cpp @@ -40,45 +40,8 @@ static CharInfo _G_charTable; namespace search { -QueryTermBase::UCS4StringT -QueryTermBase::getUCS4Term() const { - UCS4StringT ucs4; - const string & term = getTermString(); - ucs4.reserve(term.size() + 1); - vespalib::Utf8Reader r(term); - while (r.hasMore()) { - ucs4_t u = r.getChar(); - ucs4.push_back(u); - } - ucs4.push_back(0); - return ucs4; -} - -QueryTermBase::QueryTermBase() : - QueryTermSimple(), - _cachedTermLen(0), - _termUCS4() -{ - _termUCS4.push_back(0); -} - -QueryTermBase::~QueryTermBase() = default; - -QueryTermBase::QueryTermBase(const string & termS, SearchTerm type) : - QueryTermSimple(termS, type), - _cachedTermLen(0), - _termUCS4() -{ - vespalib::Utf8Reader r(termS); - while (r.hasMore()) { - ucs4_t u = r.getChar(); - (void) u; - _cachedTermLen++; - } -} - QueryTerm::QueryTerm() : - QueryTermBase(), + QueryTermUCS4(), _index(), _encoding(), _result(), @@ -96,16 +59,9 @@ QueryTerm & QueryTerm::operator = (QueryTerm &&) = default; QueryTerm::~QueryTerm() = default; void -QueryTermBase::visitMembers(vespalib::ObjectVisitor & visitor) const -{ - QueryTermSimple::visitMembers(visitor); - visit(visitor, "termlength", static_cast<uint64_t>(_cachedTermLen)); -} - -void QueryTerm::visitMembers(vespalib::ObjectVisitor & visitor) const { - QueryTermBase::visitMembers(visitor); + QueryTermUCS4::visitMembers(visitor); visit(visitor, "encoding.isBase10Integer", _encoding.isBase10Integer()); visit(visitor, "encoding.isFloat", _encoding.isFloat()); visit(visitor, "encoding.isAscii7Bit", _encoding.isAscii7Bit()); @@ -115,7 +71,7 @@ QueryTerm::visitMembers(vespalib::ObjectVisitor & visitor) const } QueryTerm::QueryTerm(std::unique_ptr<QueryNodeResultBase> org, const string & termS, const string & indexS, SearchTerm type) : - QueryTermBase(termS, type), + QueryTermUCS4(termS, type), _index(indexS), _encoding(0x01), _result(org.release()), diff --git a/searchlib/src/vespa/searchlib/query/queryterm.h b/searchlib/src/vespa/searchlib/query/queryterm.h index df4830604c8..105dc4bfab3 100644 --- a/searchlib/src/vespa/searchlib/query/queryterm.h +++ b/searchlib/src/vespa/searchlib/query/queryterm.h @@ -2,7 +2,7 @@ #pragma once #include "posocc.h" -#include "query_term_simple.h" +#include "query_term_ucs4.h" #include "querynode.h" #include "querynoderesultbase.h" #include "weight.h" @@ -13,41 +13,13 @@ namespace search { -class QueryTermBase : public QueryTermSimple -{ -public: - typedef std::vector<ucs4_t> UCS4StringT; - typedef std::unique_ptr<QueryTermBase> UP; - QueryTermBase(const QueryTermBase &) = default; - QueryTermBase & operator = (const QueryTermBase &) = default; - QueryTermBase(QueryTermBase &&) = default; - QueryTermBase & operator = (QueryTermBase &&) = default; - QueryTermBase(); - QueryTermBase(const string & term_, SearchTerm type); - ~QueryTermBase(); - size_t getTermLen() const { return _cachedTermLen; } - size_t term(const char * & t) const { t = getTerm(); return _cachedTermLen; } - UCS4StringT getUCS4Term() const; - void visitMembers(vespalib::ObjectVisitor &visitor) const override; - size_t term(const ucs4_t * & t) { - if (_termUCS4.empty()) { - _termUCS4 = getUCS4Term(); - } - t = &_termUCS4[0]; - return _cachedTermLen; - } -private: - size_t _cachedTermLen; - UCS4StringT _termUCS4; -}; - /** This is a leaf in the Query tree. All terms are leafs. A QueryTerm has the index for where to find the term. The term is a string, both char(utf8) and ucs4. There are flags indicating encoding. And there are flags indicating if it should be considered a prefix. */ -class QueryTerm : public QueryTermBase, public QueryNode +class QueryTerm : public QueryTermUCS4, public QueryNode { public: typedef std::unique_ptr<QueryTerm> UP; diff --git a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp index 042de113a35..4fb1f44e156 100644 --- a/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/leaf_blueprints.cpp @@ -90,7 +90,7 @@ struct FakeContext : attribute::ISearchContext { void fetchPostings(bool) override { } bool valid() const override { return true; } search::Int64Range getAsIntegerTerm() const override { abort(); } - const search::QueryTermBase * queryTerm() const override { abort(); } + const search::QueryTermUCS4 * queryTerm() const override { abort(); } const vespalib::string &attributeName() const override { return name; } }; |