diff options
38 files changed, 433 insertions, 492 deletions
diff --git a/configserver/src/main/java/com/yahoo/vespa/config/server/filedistribution/ApplicationFileManager.java b/configserver/src/main/java/com/yahoo/vespa/config/server/filedistribution/ApplicationFileManager.java index 79fa919fabe..8072dab978f 100644 --- a/configserver/src/main/java/com/yahoo/vespa/config/server/filedistribution/ApplicationFileManager.java +++ b/configserver/src/main/java/com/yahoo/vespa/config/server/filedistribution/ApplicationFileManager.java @@ -16,6 +16,8 @@ import java.nio.ByteBuffer; import java.nio.channels.Channels; import java.nio.channels.ReadableByteChannel; import java.nio.file.Files; +import java.util.List; +import java.util.Locale; /** * @author baldersheim @@ -42,35 +44,32 @@ public class ApplicationFileManager implements AddFileInterface { @Override public FileReference addUri(String uri, Path path) { - throw new UnsupportedOperationException("URI type is not supported"); - /* TODO: this needs to be super-restricted if the config server should ever do this. try (TmpDir tmp = new TmpDir()) { - return addFile(download(uri, tmp.dir, path.getRelative())); + return addFile(download(uri, tmp.dir, path)); } catch (IOException e) { throw new IllegalArgumentException(e); } - */ } @Override public FileReference addBlob(ByteBuffer blob, Path path) { try (TmpDir tmp = new TmpDir()) { - return addFile(writeBlob(blob, tmp.dir, path.getRelative())); + return addFile(writeBlob(blob, tmp.dir, path)); } catch (IOException e) { throw new IllegalArgumentException(e); } } - private File writeBlob(ByteBuffer blob, File tmpDir, String relativePath) { + private File writeBlob(ByteBuffer blob, File tmpDir, Path path) { FileOutputStream fos = null; File file = null; try { - file = new File(tmpDir, relativePath); + file = new File(tmpDir, path.getRelative()); Files.createDirectories(file.getParentFile().toPath()); fos = new FileOutputStream(file); - if (relativePath.endsWith(".lz4")) { + if (path.last().endsWith(".lz4")) { LZ4FrameOutputStream lz4 = new LZ4FrameOutputStream(fos); lz4.write(blob.array(), blob.arrayOffset(), blob.remaining()); lz4.close(); @@ -91,14 +90,16 @@ public class ApplicationFileManager implements AddFileInterface { } } - private File download(String uri, File tmpDir, String relativePath) { + private File download(String uri, File tmpDir, Path path) { File file = null; FileOutputStream fos = null; ReadableByteChannel rbc = null; try { - file = new File(tmpDir, relativePath); + file = new File(tmpDir, path.getRelative()); Files.createDirectories(file.getParentFile().toPath()); URL website = new URL(uri); + if ( ! List.of("http", "https").contains(website.getProtocol().toLowerCase(Locale.ROOT))) + throw new IllegalArgumentException("only HTTP(S) supported for URI type resources"); rbc = Channels.newChannel(website.openStream()); fos = new FileOutputStream(file); fos.getChannel().transferFrom(rbc, 0, Long.MAX_VALUE); diff --git a/eval/src/tests/tensor/onnx_wrapper/onnx_wrapper_test.cpp b/eval/src/tests/tensor/onnx_wrapper/onnx_wrapper_test.cpp index e50c41e2e09..021b20149d1 100644 --- a/eval/src/tests/tensor/onnx_wrapper/onnx_wrapper_test.cpp +++ b/eval/src/tests/tensor/onnx_wrapper/onnx_wrapper_test.cpp @@ -482,7 +482,11 @@ TEST(OnnxTest, inspect_float_to_int8_conversion) { TEST(OnnxTest, default_allocator_type) { Ort::AllocatorWithDefaultOptions default_alloc; +#if ORT_API_VERSION >= 10 + OrtAllocatorType res = OrtInvalidAllocator; +#else OrtAllocatorType res = Invalid; +#endif Ort::ThrowOnError(Ort::GetApi().MemoryInfoGetType(default_alloc.GetInfo(), &res)); fprintf(stderr, "default allocator type: %d\n", int(res)); } diff --git a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp index ac408cfb2de..f1243665636 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/match_thread.cpp @@ -18,7 +18,6 @@ LOG_SETUP(".proton.matching.match_thread"); namespace proton::matching { -using search::queryeval::OptimizedAndNotForBlackListing; using search::queryeval::SearchIterator; using search::fef::MatchData; using search::fef::RankProgram; @@ -48,17 +47,6 @@ struct SimpleStrategy { } }; -// seek_next maps to OptimizedAndNotForBlackListing::seekFast -struct FastBlackListingStrategy { - static bool can_use(bool do_rank, bool do_limit, SearchIterator &search) { - return (!do_rank && !do_limit && - (dynamic_cast<OptimizedAndNotForBlackListing *>(&search) != nullptr)); - } - static uint32_t seek_next(SearchIterator &search, uint32_t docid) { - return static_cast<OptimizedAndNotForBlackListing &>(search).seekFast(docid); - } -}; - LazyValue get_score_feature(const RankProgram &rankProgram) { FeatureResolver resolver(rankProgram.get_seeds()); assert(resolver.num_features() == 1u); @@ -222,11 +210,7 @@ template <bool do_rank, bool do_limit, bool do_share, bool use_rank_drop_limit> void MatchThread::match_loop_helper_rank_limit_share_drop(MatchTools &tools, HitCollector &hits) { - if (FastBlackListingStrategy::can_use(do_rank, do_limit, tools.search())) { - match_loop<FastBlackListingStrategy, do_rank, do_limit, do_share, use_rank_drop_limit>(tools, hits); - } else { - match_loop<SimpleStrategy, do_rank, do_limit, do_share, use_rank_drop_limit>(tools, hits); - } + match_loop<SimpleStrategy, do_rank, do_limit, do_share, use_rank_drop_limit>(tools, hits); } template <bool do_rank, bool do_limit, bool do_share> diff --git a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp index fd10bfcf47c..1f570e0a381 100644 --- a/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp +++ b/searchlib/src/tests/attribute/stringattribute/stringattribute_test.cpp @@ -389,7 +389,7 @@ TEST("testSingleValue") { EXPECT_EQUAL(24u, sizeof(SearchContext)); EXPECT_EQUAL(56u, sizeof(StringSearchHelper)); - EXPECT_EQUAL(88u, sizeof(SingleValueStringAttribute::StringSingleImplSearchContext)); + EXPECT_EQUAL(104u, sizeof(attribute::SingleStringEnumSearchContext)); { Config cfg(BasicType::STRING, CollectionType::SINGLE); SingleValueStringAttribute svsa("svsa", cfg); diff --git a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt index ba4ec2b3f12..b3aa3bd958b 100644 --- a/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt +++ b/searchlib/src/vespa/searchlib/attribute/CMakeLists.txt @@ -66,6 +66,8 @@ vespa_add_library(searchlib_attribute OBJECT loadedvalue.cpp multi_numeric_enum_search_context.cpp multi_numeric_search_context.cpp + multi_string_enum_search_context.cpp + multi_string_enum_hint_search_context.cpp multi_value_mapping.cpp multi_value_mapping_base.cpp multienumattribute.cpp @@ -108,10 +110,14 @@ vespa_add_library(searchlib_attribute OBJECT singlestringpostattribute.cpp single_numeric_enum_search_context.cpp single_numeric_search_context.cpp + single_small_numeric_search_context.cpp + single_string_enum_search_context.cpp + single_string_enum_hint_search_context.cpp sourceselector.cpp stringattribute.cpp stringbase.cpp string_matcher.cpp + string_search_context.cpp string_search_helper.cpp DEPENDS ) diff --git a/searchlib/src/vespa/searchlib/attribute/attributevector.h b/searchlib/src/vespa/searchlib/attribute/attributevector.h index 4e3bd259cc6..61f578d9f2b 100644 --- a/searchlib/src/vespa/searchlib/attribute/attributevector.h +++ b/searchlib/src/vespa/searchlib/attribute/attributevector.h @@ -386,7 +386,7 @@ public: /** Return the fixed length of the attribute. If 0 then you must inquire each document. */ size_t getFixedWidth() const override { return _config.basicType().fixedSize(); } - const Config &getConfig() const { return _config; } + const Config &getConfig() const noexcept { return _config; } void update_config(const Config& cfg); BasicType getInternalBasicType() const { return _config.basicType(); } CollectionType getInternalCollectionType() const { return _config.collectionType(); } diff --git a/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.h index 7042de9ddb8..0342976ffd6 100644 --- a/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.h +++ b/searchlib/src/vespa/searchlib/attribute/enumhintsearchcontext.h @@ -31,9 +31,11 @@ protected: uint64_t numValues); ~EnumHintSearchContext() override; +public: void lookupTerm(const vespalib::datastore::EntryComparator &comp); void lookupRange(const vespalib::datastore::EntryComparator &low, const vespalib::datastore::EntryComparator &high); +protected: std::unique_ptr<queryeval::SearchIterator> createPostingIterator(fef::TermFieldMatchData *matchData, bool strict) override; diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.cpp new file mode 100644 index 00000000000..55886ac85fa --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.cpp @@ -0,0 +1,15 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "multi_string_enum_hint_search_context.hpp" +#include <vespa/searchcommon/attribute/multivalue.h> + +using ValueRef = search::multivalue::Value<vespalib::datastore::AtomicEntryRef>; +using WeightedValueRef = search::multivalue::WeightedValue<vespalib::datastore::AtomicEntryRef>; + +namespace search::attribute { + +template class MultiStringEnumHintSearchContext<ValueRef>; + +template class MultiStringEnumHintSearchContext<WeightedValueRef>; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.h b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.h new file mode 100644 index 00000000000..92650851116 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.h @@ -0,0 +1,24 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "multi_string_enum_search_context.h" +#include "enumhintsearchcontext.h" + +namespace search::attribute { + +/* + * MultiStringEnumHintSearchContext handles the creation of search iterators + * for a query term on a multi value string enumerated attribute vector using + * dictionary information to eliminate searches for nonexisting words. + */ +template <typename M> +class MultiStringEnumHintSearchContext : public MultiStringEnumSearchContext<M>, + public EnumHintSearchContext +{ +public: + MultiStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, const MultiValueMapping<M>& mv_mapping, const EnumStoreT<const char*>& enum_store, uint32_t doc_id_limit, uint64_t num_values); + ~MultiStringEnumHintSearchContext() override; +}; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.hpp b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.hpp new file mode 100644 index 00000000000..a6b0f3f5eb9 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_hint_search_context.hpp @@ -0,0 +1,20 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "multi_string_enum_hint_search_context.h" +#include <vespa/searchlib/query/query_term_ucs4.h> + +namespace search::attribute { + +template <typename M> +MultiStringEnumHintSearchContext<M>::MultiStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, const MultiValueMapping<M>& mv_mapping, const EnumStoreT<const char*>& enum_store, uint32_t doc_id_limit, uint64_t num_values) + : MultiStringEnumSearchContext<M>(std::move(qTerm), cased, toBeSearched, mv_mapping, enum_store), + EnumHintSearchContext(enum_store.get_dictionary(), + doc_id_limit, num_values) +{ + this->setup_enum_hint_sc(enum_store, *this); +} + +template <typename M> +MultiStringEnumHintSearchContext<M>::~MultiStringEnumHintSearchContext() = default; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.cpp new file mode 100644 index 00000000000..4abaf02e2e8 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.cpp @@ -0,0 +1,15 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "multi_string_enum_search_context.hpp" +#include <vespa/searchcommon/attribute/multivalue.h> + +using ValueRef = search::multivalue::Value<vespalib::datastore::AtomicEntryRef>; +using WeightedValueRef = search::multivalue::WeightedValue<vespalib::datastore::AtomicEntryRef>; + +namespace search::attribute { + +template class MultiStringEnumSearchContext<ValueRef>; + +template class MultiStringEnumSearchContext<WeightedValueRef>; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.h b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.h new file mode 100644 index 00000000000..a4f05a5c9cc --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.h @@ -0,0 +1,21 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "multi_enum_search_context.h" +#include "string_search_context.h" + +namespace search::attribute { + +/* + * MultiStringEnumSearchContext handles the creation of search iterators for + * a query term on a multi value string enumerated attribute vector. + */ +template <typename M> +class MultiStringEnumSearchContext : public MultiEnumSearchContext<const char*, StringSearchContext, M> +{ +public: + MultiStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, const MultiValueMapping<M>& mv_mapping, const EnumStoreT<const char*>& enum_store); +}; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.hpp b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.hpp new file mode 100644 index 00000000000..02a740b06dc --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/multi_string_enum_search_context.hpp @@ -0,0 +1,17 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "multi_string_enum_search_context.h" +#include "multi_enum_search_context.hpp" +#include <vespa/searchlib/query/query_term_simple.h> + +namespace search::attribute { + +template <typename M> +MultiStringEnumSearchContext<M>::MultiStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, const MultiValueMapping<M>& mv_mapping, const EnumStoreT<const char*>& enum_store) + : MultiEnumSearchContext<const char*, StringSearchContext, M>(StringMatcher(std::move(qTerm), cased), toBeSearched, mv_mapping, enum_store) +{ +} + +} diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.h b/searchlib/src/vespa/searchlib/attribute/multistringattribute.h index e832f53777b..cf4169138fe 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.h @@ -7,7 +7,6 @@ #include "enumstore.h" #include "multienumattribute.h" #include "multi_value_mapping.h" -#include "enumhintsearchcontext.h" #include <vespa/searchcommon/attribute/multivalue.h> namespace search { @@ -104,58 +103,6 @@ public: return getWeightedHelper(doc, v, sz); } - /* - * Specialization of SearchContext for weighted set type - */ - class StringImplSearchContext : public StringAttribute::StringSearchContext { - public: - StringImplSearchContext(QueryTermSimpleUP qTerm, const StringAttribute & toBeSearched) : - StringAttribute::StringSearchContext(std::move(qTerm), toBeSearched) - { } - protected: - const MultiValueStringAttributeT<B, M> & myAttribute() const { - return static_cast< const MultiValueStringAttributeT<B, M> & > (attribute()); - } - int32_t onFind(DocId docId, int32_t elemId) const override; - - template <typename Collector> - int32_t findNextWeight(DocId doc, int32_t elemId, int32_t & weight, Collector & collector) const; - }; - - /* - * Specialization of SearchContext for weighted set type - */ - class StringSetImplSearchContext : public StringImplSearchContext { - public: - StringSetImplSearchContext(attribute::SearchContext::QueryTermSimpleUP qTerm, const StringAttribute & toBeSearched) : - StringImplSearchContext(std::move(qTerm), toBeSearched) - { } - protected: - int32_t onFind(DocId docId, int32_t elemId, int32_t &weight) const override; - }; - - /* - * Specialization of SearchContext for array type - */ - class StringArrayImplSearchContext : public StringImplSearchContext { - public: - StringArrayImplSearchContext(attribute::SearchContext::QueryTermSimpleUP qTerm, const StringAttribute & toBeSearched) : - StringImplSearchContext(std::move(qTerm), toBeSearched) - { } - protected: - int32_t onFind(DocId docId, int32_t elemId, int32_t &weight) const override; - }; - - template <typename BT> - class StringTemplSearchContext : public BT, - public attribute::EnumHintSearchContext - { - using BT::queryTerm; - using AttrType = MultiValueStringAttributeT<B, M>; - public: - StringTemplSearchContext(attribute::SearchContext::QueryTermSimpleUP qTerm, const AttrType & toBeSearched); - }; - std::unique_ptr<attribute::SearchContext> getSearch(QueryTermSimpleUP term, const attribute::SearchContextParams & params) const override; }; diff --git a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp index 454913c3990..212a71dad74 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multistringattribute.hpp @@ -6,6 +6,7 @@ #include "multistringattribute.h" #include "enumattribute.hpp" #include "multienumattribute.hpp" +#include "multi_string_enum_hint_search_context.h" #include <vespa/vespalib/text/utf8.h> #include <vespa/vespalib/text/lowercase.h> #include <vespa/searchlib/util/bufferwriter.h> @@ -41,99 +42,8 @@ std::unique_ptr<attribute::SearchContext> MultiValueStringAttributeT<B, M>::getSearch(QueryTermSimpleUP qTerm, const attribute::SearchContextParams &) const { - if (this->getCollectionType() == attribute::CollectionType::WSET) { - return std::make_unique<StringTemplSearchContext<StringSetImplSearchContext>>(std::move(qTerm), *this); - } else { - return std::make_unique<StringTemplSearchContext<StringArrayImplSearchContext>>(std::move(qTerm), *this); - } -} - -namespace { - -template <typename E> -class EnumAccessor { -public: - EnumAccessor(const E & enumStore) : _enumStore(enumStore) { } - const char * get(typename E::Index index) const { return _enumStore.get_value(index); } -private: - const E & _enumStore; -}; - -} - -template <typename B, typename M> -int32_t -MultiValueStringAttributeT<B, M>::StringSetImplSearchContext::onFind(DocId doc, int32_t elemId, int32_t &weight) const -{ - StringAttribute::StringSearchContext::CollectWeight collector; - return this->findNextWeight(doc, elemId, weight, collector); -} - -template <typename B, typename M> -int32_t -MultiValueStringAttributeT<B, M>::StringArrayImplSearchContext::onFind(DocId doc, int32_t elemId, int32_t &weight) const -{ - StringAttribute::StringSearchContext::CollectHitCount collector; - return this->findNextWeight(doc, elemId, weight, collector); -} - -template <typename B, typename M> -template <typename Collector> -int32_t -MultiValueStringAttributeT<B, M>::StringImplSearchContext::findNextWeight(DocId doc, int32_t elemId, int32_t & weight, Collector & collector) const -{ - WeightedIndexArrayRef indices(myAttribute()._mvMapping.get(doc)); - - EnumAccessor<typename B::EnumStore> accessor(myAttribute()._enumStore); - int32_t foundElem = findNextMatch(indices, elemId, accessor, collector); - weight = collector.getWeight(); - return foundElem; -} - -template <typename B, typename M> -int32_t -MultiValueStringAttributeT<B, M>::StringImplSearchContext::onFind(DocId doc, int32_t elemId) const -{ - const auto& attr = static_cast<const MultiValueStringAttributeT<B, M>&>(attribute()); - WeightedIndexArrayRef indices(attr._mvMapping.get(doc)); - for (uint32_t i(elemId); i < indices.size(); i++) { - if (isMatch(attr._enumStore.get_value(indices[i].value_ref().load_acquire()))) { - return i; - } - } - - return -1; -} - -template <typename B, typename M> -template <typename BT> -MultiValueStringAttributeT<B, M>::StringTemplSearchContext<BT>:: -StringTemplSearchContext(QueryTermSimpleUP qTerm, const AttrType & toBeSearched) : - BT(std::move(qTerm), toBeSearched), - EnumHintSearchContext(toBeSearched.getEnumStore().get_dictionary(), - toBeSearched.getCommittedDocIdLimit(), - toBeSearched.getStatus().getNumValues()) -{ - const EnumStore &enumStore(toBeSearched.getEnumStore()); - - this->_plsc = static_cast<attribute::IPostingListSearchContext *>(this); - if (this->valid()) { - if (this->isPrefix()) { - auto comp = enumStore.make_folded_comparator_prefix(queryTerm()->getTerm()); - lookupRange(comp, comp); - } else if (this->isRegex()) { - vespalib::string prefix(vespalib::RegexpUtil::get_prefix(this->queryTerm()->getTerm())); - auto comp = enumStore.make_folded_comparator_prefix(prefix.c_str()); - lookupRange(comp, comp); - } else if (this->isFuzzy()) { - vespalib::string prefix(this->getFuzzyMatcher().getPrefix()); - auto comp = enumStore.make_folded_comparator_prefix(prefix.c_str()); - lookupRange(comp, comp); - } else { - auto comp = enumStore.make_folded_comparator(queryTerm()->getTerm()); - lookupTerm(comp); - } - } + bool cased = this->get_match_is_cased(); + return std::make_unique<attribute::MultiStringEnumHintSearchContext<M>>(std::move(qTerm), cased, *this, this->_mvMapping, this->_enumStore, this->getCommittedDocIdLimit(), this->getStatus().getNumValues()); } } // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h index 9f8827028cc..17a67a67ddf 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.h @@ -54,10 +54,6 @@ private: using PostingMap = typename PostingParent::PostingMap; using QueryTermSimpleUP = AttributeVector::QueryTermSimpleUP; using SelfType = MultiValueStringPostingAttributeT<B, T>; - using StringArrayImplSearchContext = typename MultiValueStringAttributeT<B, T>::StringArrayImplSearchContext; - using StringArrayPostingSearchContext = attribute::StringPostingSearchContext<StringArrayImplSearchContext, SelfType, int32_t>; - using StringSetImplSearchContext = typename MultiValueStringAttributeT<B, T>::StringSetImplSearchContext; - using StringSetPostingSearchContext = attribute::StringPostingSearchContext<StringSetImplSearchContext, SelfType, int32_t>; using WeightedIndex = typename MultiValueStringAttributeT<B, T>::WeightedIndex; using generation_t = typename MultiValueStringAttributeT<B, T>::generation_t; diff --git a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp index 13de3bc6493..2c2ac48979d 100644 --- a/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/multistringpostattribute.hpp @@ -5,6 +5,7 @@ #include "stringattribute.h" #include "multistringpostattribute.h" #include "multistringattribute.hpp" +#include "multi_string_enum_search_context.h" #include <vespa/searchlib/query/query_term_simple.h> namespace search { @@ -89,9 +90,10 @@ std::unique_ptr<attribute::SearchContext> MultiValueStringPostingAttributeT<B, T>::getSearch(QueryTermSimpleUP qTerm, const attribute::SearchContextParams & params) const { - using BaseSC = std::conditional_t<T::_hasWeight, StringSetImplSearchContext, StringArrayImplSearchContext>; - using SC = std::conditional_t<T::_hasWeight, StringSetPostingSearchContext, StringArrayPostingSearchContext>; - BaseSC base_sc(std::move(qTerm), *this); + using BaseSC = attribute::MultiStringEnumSearchContext<T>; + using SC = attribute::StringPostingSearchContext<BaseSC, SelfType, int32_t>; + bool cased = this->get_match_is_cased(); + BaseSC base_sc(std::move(qTerm), cased, *this, this->_mvMapping, this->_enumStore); return std::make_unique<SC>(std::move(base_sc), params.useBitVector(), *this); } diff --git a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h index 78e372ed218..9ee56a27a85 100644 --- a/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h +++ b/searchlib/src/vespa/searchlib/attribute/postinglistsearchcontext.h @@ -301,7 +301,7 @@ StringPostingSearchContext<BaseSC, AttrT, DataT>::useThis(const PostingListSearc ? this->getRegex().partial_match(_enumStore.get_value(it.getKey().load_acquire())) : false; } else if ( this->isCased() ) { - return this->isMatch(_enumStore.get_value(it.getKey().load_acquire())); + return this->match(_enumStore.get_value(it.getKey().load_acquire())); } else if (this->isFuzzy()) { return this->getFuzzyMatcher().isMatch(_enumStore.get_value(it.getKey().load_acquire())); } diff --git a/searchlib/src/vespa/searchlib/attribute/single_small_numeric_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/single_small_numeric_search_context.cpp new file mode 100644 index 00000000000..5eeef7cd61a --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/single_small_numeric_search_context.cpp @@ -0,0 +1,35 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "single_small_numeric_search_context.h" +#include "attributeiterators.hpp" +#include <vespa/searchlib/queryeval/emptysearch.h> + +namespace search::attribute { + +SingleSmallNumericSearchContext::SingleSmallNumericSearchContext(std::unique_ptr<QueryTermSimple> qTerm, const AttributeVector& toBeSearched, const Word* word_data, Word value_mask, uint32_t value_shift_shift, uint32_t value_shift_mask, uint32_t word_shift) + : NumericSearchContext<NumericRangeMatcher<T>>(toBeSearched, *qTerm, false), + _wordData(word_data), + _valueMask(value_mask), + _valueShiftShift(value_shift_shift), + _valueShiftMask(value_shift_mask), + _wordShift(word_shift) +{ +} + +std::unique_ptr<queryeval::SearchIterator> +SingleSmallNumericSearchContext::createFilterIterator(fef::TermFieldMatchData* matchData, bool strict) +{ + if (!valid()) { + return std::make_unique<queryeval::EmptySearch>(); + } + if (getIsFilter()) { + return strict + ? std::make_unique<FilterAttributeIteratorStrict<SingleSmallNumericSearchContext>>(*this, matchData) + : std::make_unique<FilterAttributeIteratorT<SingleSmallNumericSearchContext>>(*this, matchData); + } + return strict + ? std::make_unique<AttributeIteratorStrict<SingleSmallNumericSearchContext>>(*this, matchData) + : std::make_unique<AttributeIteratorT<SingleSmallNumericSearchContext>>(*this, matchData); +} + +} diff --git a/searchlib/src/vespa/searchlib/attribute/single_small_numeric_search_context.h b/searchlib/src/vespa/searchlib/attribute/single_small_numeric_search_context.h new file mode 100644 index 00000000000..46ed02b3eca --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/single_small_numeric_search_context.h @@ -0,0 +1,58 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "numeric_search_context.h" +#include "numeric_range_matcher.h" +#include <vespa/vespalib/util/atomic.h> + +namespace search::attribute { + +/* + * SingleSmallNumericSearchContext handles the creation of search iterators for + * a query term on a single value small numeric attribute vector. + */ +class SingleSmallNumericSearchContext : public NumericSearchContext<NumericRangeMatcher<int8_t>> +{ +private: + using Word = uint32_t; + using T = int8_t; + const Word *_wordData; + Word _valueMask; + uint32_t _valueShiftShift; + uint32_t _valueShiftMask; + uint32_t _wordShift; + + int32_t onFind(DocId docId, int32_t elementId, int32_t & weight) const override { + return find(docId, elementId, weight); + } + + int32_t onFind(DocId docId, int32_t elementId) const override { + return find(docId, elementId); + } + +public: + SingleSmallNumericSearchContext(std::unique_ptr<QueryTermSimple> qTerm, const AttributeVector& toBeSearched, const Word* word_data, Word value_mask, uint32_t value_shift_shift, uint32_t value_shift_mask, uint32_t word_shift); + + int32_t find(DocId docId, int32_t elemId, int32_t & weight) const { + if ( elemId != 0) return -1; + const Word &word = _wordData[docId >> _wordShift]; + uint32_t valueShift = (docId & _valueShiftMask) << _valueShiftShift; + T v = (vespalib::atomic::load_ref_relaxed(word) >> valueShift) & _valueMask; + weight = 1; + return match(v) ? 0 : -1; + } + + int32_t find(DocId docId, int32_t elemId) const { + if ( elemId != 0) return -1; + const Word &word = _wordData[docId >> _wordShift]; + uint32_t valueShift = (docId & _valueShiftMask) << _valueShiftShift; + T v = (vespalib::atomic::load_ref_relaxed(word) >> valueShift) & _valueMask; + return match(v) ? 0 : -1; + } + + std::unique_ptr<queryeval::SearchIterator> + createFilterIterator(fef::TermFieldMatchData* matchData, bool strict) override; +}; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.cpp new file mode 100644 index 00000000000..70023b27802 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.cpp @@ -0,0 +1,18 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "single_string_enum_hint_search_context.h" +#include <vespa/searchlib/query/query_term_ucs4.h> + +namespace search::attribute { + +SingleStringEnumHintSearchContext::SingleStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, const vespalib::datastore::AtomicEntryRef* enum_indices, const EnumStoreT<const char*>& enum_store, uint32_t doc_id_limit, uint64_t num_values) + : SingleStringEnumSearchContext(std::move(qTerm), cased, toBeSearched, enum_indices, enum_store), + EnumHintSearchContext(enum_store.get_dictionary(), + doc_id_limit, num_values) +{ + setup_enum_hint_sc(enum_store, *this); +} + +SingleStringEnumHintSearchContext::~SingleStringEnumHintSearchContext() = default; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.h b/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.h new file mode 100644 index 00000000000..f9d44454cd0 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/single_string_enum_hint_search_context.h @@ -0,0 +1,23 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "single_string_enum_search_context.h" +#include "enumhintsearchcontext.h" + +namespace search::attribute { + +/* + * SingleStringEnumHintSearchContext handles the creation of search iterators + * for a query term on a single value string enumerated attribute vector using + * dictionary information to eliminate searches for nonexisting words. + */ +class SingleStringEnumHintSearchContext : public SingleStringEnumSearchContext, + public EnumHintSearchContext +{ +public: + SingleStringEnumHintSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, const vespalib::datastore::AtomicEntryRef* enum_indices, const EnumStoreT<const char*>& enum_store, uint32_t doc_id_limit, uint64_t num_values); + ~SingleStringEnumHintSearchContext() override; +}; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.cpp new file mode 100644 index 00000000000..cba1d207501 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.cpp @@ -0,0 +1,18 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "single_string_enum_search_context.h" +#include "single_enum_search_context.hpp" +#include <vespa/searchlib/query/query_term_simple.h> + +namespace search::attribute { + +SingleStringEnumSearchContext::SingleStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, const vespalib::datastore::AtomicEntryRef* enum_indices, const EnumStoreT<const char*>& enum_store) + : SingleEnumSearchContext<const char*, StringSearchContext>(StringMatcher(std::move(qTerm), cased), toBeSearched, enum_indices, enum_store) +{ +} + +SingleStringEnumSearchContext::SingleStringEnumSearchContext(SingleStringEnumSearchContext&&) noexcept = default; + +SingleStringEnumSearchContext::~SingleStringEnumSearchContext() = default; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.h b/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.h new file mode 100644 index 00000000000..6a9ed38b4ea --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/single_string_enum_search_context.h @@ -0,0 +1,22 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "single_enum_search_context.h" +#include "string_search_context.h" + +namespace search::attribute { + +/* + * SingleStringEnumSearchContext handles the creation of search iterators for + * a query term on a single value string enumerated attribute vector. + */ +class SingleStringEnumSearchContext : public SingleEnumSearchContext<const char*, StringSearchContext> +{ +public: + SingleStringEnumSearchContext(std::unique_ptr<QueryTermSimple> qTerm, bool cased, const AttributeVector& toBeSearched, const vespalib::datastore::AtomicEntryRef* enum_indices, const EnumStoreT<const char*>& enum_store); + SingleStringEnumSearchContext(SingleStringEnumSearchContext&&) noexcept; + ~SingleStringEnumSearchContext() override; +}; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp b/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp index 009078447dc..eca74255026 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp +++ b/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.cpp @@ -1,12 +1,11 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "singlesmallnumericattribute.h" -#include "attributeiterators.hpp" #include "attributevector.hpp" #include "iattributesavetarget.h" #include "primitivereader.h" +#include "single_small_numeric_search_context.h" #include <vespa/searchlib/query/query_term_simple.h> -#include <vespa/searchlib/queryeval/emptysearch.h> #include <vespa/searchlib/util/file_settings.h> #include <vespa/vespalib/data/databuffer.h> #include <vespa/vespalib/util/size_literals.h> @@ -172,7 +171,7 @@ std::unique_ptr<attribute::SearchContext> SingleValueSmallNumericAttribute::getSearch(std::unique_ptr<QueryTermSimple> qTerm, const attribute::SearchContextParams &) const { - return std::make_unique<SingleSearchContext>(std::move(qTerm), *this); + return std::make_unique<attribute::SingleSmallNumericSearchContext>(std::move(qTerm), *this, &_wordData.acquire_elem_ref(0), _valueMask, _valueShiftShift, _valueShiftMask, _wordShift); } void @@ -208,40 +207,6 @@ SingleValueSmallNumericAttribute::getEstimatedSaveByteSize() const return headerSize + sz; } -bool SingleValueSmallNumericAttribute::SingleSearchContext::valid() const { return this->isValid(); } - - -SingleValueSmallNumericAttribute::SingleSearchContext::SingleSearchContext(std::unique_ptr<QueryTermSimple> qTerm, - const SingleValueSmallNumericAttribute & toBeSearched) - : attribute::NumericRangeMatcher<T>(*qTerm), - SearchContext(toBeSearched), _wordData(&toBeSearched._wordData.acquire_elem_ref(0)), - _valueMask(toBeSearched._valueMask), - _valueShiftShift(toBeSearched._valueShiftShift), - _valueShiftMask(toBeSearched._valueShiftMask), - _wordShift(toBeSearched._wordShift) -{ } - -Int64Range -SingleValueSmallNumericAttribute::SingleSearchContext::getAsIntegerTerm() const { - return this->getRange(); -} - -std::unique_ptr<queryeval::SearchIterator> -SingleValueSmallNumericAttribute::SingleSearchContext::createFilterIterator(fef::TermFieldMatchData * matchData, bool strict) -{ - if (!valid()) { - return std::make_unique<queryeval::EmptySearch>(); - } - if (getIsFilter()) { - return strict - ? std::make_unique<FilterAttributeIteratorStrict<SingleSearchContext>>(*this, matchData) - : std::make_unique<FilterAttributeIteratorT<SingleSearchContext>>(*this, matchData); - } - return strict - ? std::make_unique<AttributeIteratorStrict<SingleSearchContext>>(*this, matchData) - : std::make_unique<AttributeIteratorT<SingleSearchContext>>(*this, matchData); -} - namespace { template <typename TT> diff --git a/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.h b/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.h index 77c4133817c..f6059d3d510 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/singlesmallnumericattribute.h @@ -3,8 +3,6 @@ #pragma once #include "integerbase.h" -#include "floatbase.h" -#include "numeric_range_matcher.h" #include "search_context.h" #include <vespa/vespalib/util/atomic.h> #include <vespa/vespalib/util/rcuvector.h> @@ -15,7 +13,6 @@ namespace search { class SingleValueSmallNumericAttribute : public IntegerAttributeTemplate<int8_t> { private: -// friend class attribute::SearchContext; typedef IntegerAttributeTemplate<int8_t> B; typedef B::BaseType T; typedef B::DocId DocId; @@ -58,53 +55,6 @@ protected: public: - /* - * Specialization of SearchContext - */ - class SingleSearchContext : public attribute::NumericRangeMatcher<T>, public attribute::SearchContext - { - private: - const Word *_wordData; - Word _valueMask; - uint32_t _valueShiftShift; - uint32_t _valueShiftMask; - uint32_t _wordShift; - - int32_t onFind(DocId docId, int32_t elementId, int32_t & weight) const override { - return find(docId, elementId, weight); - } - - int32_t onFind(DocId docId, int32_t elementId) const override { - return find(docId, elementId); - } - - bool valid() const override; - - public: - SingleSearchContext(std::unique_ptr<QueryTermSimple> qTerm, const SingleValueSmallNumericAttribute & toBeSearched); - - int32_t find(DocId docId, int32_t elemId, int32_t & weight) const { - if ( elemId != 0) return -1; - const Word &word = _wordData[docId >> _wordShift]; - uint32_t valueShift = (docId & _valueShiftMask) << _valueShiftShift; - T v = (vespalib::atomic::load_ref_relaxed(word) >> valueShift) & _valueMask; - weight = 1; - return match(v) ? 0 : -1; - } - - int32_t find(DocId docId, int32_t elemId) const { - if ( elemId != 0) return -1; - const Word &word = _wordData[docId >> _wordShift]; - uint32_t valueShift = (docId & _valueShiftMask) << _valueShiftShift; - T v = (vespalib::atomic::load_ref_relaxed(word) >> valueShift) & _valueMask; - return match(v) ? 0 : -1; - } - - Int64Range getAsIntegerTerm() const override; - - std::unique_ptr<queryeval::SearchIterator> - createFilterIterator(fef::TermFieldMatchData * matchData, bool strict) override; - }; SingleValueSmallNumericAttribute(const vespalib::string & baseFileName, const Config &c, Word valueMask, uint32_t valueShiftShift, uint32_t valueShiftMask, uint32_t wordShift); diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.h b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.h index 7a3f7ee2e07..e9bde76d4ec 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.h @@ -5,7 +5,6 @@ #include <vespa/searchlib/attribute/stringbase.h> #include <vespa/searchlib/attribute/enumattribute.h> #include <vespa/searchlib/attribute/singleenumattribute.h> -#include "enumhintsearchcontext.h" namespace search { @@ -79,35 +78,6 @@ public: std::unique_ptr<attribute::SearchContext> getSearch(QueryTermSimpleUP term, const attribute::SearchContextParams & params) const override; - - class StringSingleImplSearchContext : public StringAttribute::StringSearchContext { - public: - StringSingleImplSearchContext(QueryTermSimpleUP qTerm, const StringAttribute & toBeSearched) : - StringSearchContext(std::move(qTerm), toBeSearched) - { } - StringSingleImplSearchContext(StringSingleImplSearchContext&&) noexcept = default; - protected: - int32_t onFind(DocId doc, int32_t elemId, int32_t &weight) const override { - weight = 1; - return onFind(doc, elemId); - } - - int32_t onFind(DocId doc, int32_t elemId) const override { - if ( elemId != 0) return -1; - const SingleValueStringAttributeT<B> & attr(static_cast<const SingleValueStringAttributeT<B> &>(attribute())); - return isMatch(attr._enumStore.get_value(attr.acquire_enum_entry_ref(doc))) ? 0 : -1; - } - - }; - - class StringTemplSearchContext : public StringSingleImplSearchContext, - public attribute::EnumHintSearchContext - { - using AttrType = SingleValueStringAttributeT<B>; - using StringSingleImplSearchContext::queryTerm; - public: - StringTemplSearchContext(QueryTermSimpleUP qTerm, const AttrType & toBeSearched); - }; }; using SingleValueStringAttribute = SingleValueStringAttributeT<EnumAttribute<StringAttribute> >; diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp index 6c99db68bd4..50ea4ef42b6 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlestringattribute.hpp @@ -6,6 +6,7 @@ #include "stringattribute.h" #include "singleenumattribute.hpp" #include "attributevector.hpp" +#include "single_string_enum_hint_search_context.h" #include <vespa/vespalib/text/utf8.h> #include <vespa/vespalib/text/lowercase.h> #include <vespa/searchlib/util/bufferwriter.h> @@ -40,36 +41,8 @@ std::unique_ptr<attribute::SearchContext> SingleValueStringAttributeT<B>::getSearch(QueryTermSimpleUP qTerm, const attribute::SearchContextParams &) const { - return std::make_unique<StringTemplSearchContext>(std::move(qTerm), *this); -} - -template <typename B> -SingleValueStringAttributeT<B>::StringTemplSearchContext::StringTemplSearchContext(QueryTermSimple::UP qTerm, const AttrType & toBeSearched) : - StringSingleImplSearchContext(std::move(qTerm), toBeSearched), - EnumHintSearchContext(toBeSearched.getEnumStore().get_dictionary(), - toBeSearched.getCommittedDocIdLimit(), - toBeSearched.getStatus().getNumValues()) -{ - const EnumStore &enumStore(toBeSearched.getEnumStore()); - - this->_plsc = static_cast<attribute::IPostingListSearchContext *>(this); - if (this->valid()) { - if (this->isPrefix()) { - auto comp = enumStore.make_folded_comparator_prefix(queryTerm()->getTerm()); - lookupRange(comp, comp); - } else if (this->isRegex()) { - vespalib::string prefix(vespalib::RegexpUtil::get_prefix(this->queryTerm()->getTerm())); - auto comp = enumStore.make_folded_comparator_prefix(prefix.c_str()); - lookupRange(comp, comp); - } else if (this->isFuzzy()) { - vespalib::string prefix(this->getFuzzyMatcher().getPrefix()); - auto comp = enumStore.make_folded_comparator_prefix(prefix.c_str()); - lookupRange(comp, comp); - } else { - auto comp = enumStore.make_folded_comparator(queryTerm()->getTerm()); - lookupTerm(comp); - } - } + bool cased = this->get_match_is_cased(); + return std::make_unique<attribute::SingleStringEnumHintSearchContext>(std::move(qTerm), cased, *this, &this->_enumIndices.acquire_elem_ref(0), this->_enumStore, this->getCommittedDocIdLimit(), this->getStatus().getNumValues()); } } diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.h b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.h index f75926e5a60..2643f11eaf3 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.h +++ b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.h @@ -40,10 +40,6 @@ private: using PostingMap = typename PostingParent::PostingMap; using QueryTermSimpleUP = AttributeVector::QueryTermSimpleUP; using SelfType = SingleValueStringPostingAttributeT<B>; - using StringSingleImplSearchContext = typename SingleValueStringAttributeT<B>::StringSingleImplSearchContext; - using StringSinglePostingSearchContext = attribute::StringPostingSearchContext<StringSingleImplSearchContext, - SelfType, - vespalib::btree::BTreeNoLeafData>; using ValueModifier = typename SingleValueStringAttributeT<B>::ValueModifier; using generation_t = typename SingleValueStringAttributeT<B>::generation_t; diff --git a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp index cbffe1a2662..ddbc24d2b75 100644 --- a/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp +++ b/searchlib/src/vespa/searchlib/attribute/singlestringpostattribute.hpp @@ -3,6 +3,7 @@ #pragma once #include "singlestringpostattribute.h" +#include "single_string_enum_search_context.h" #include <vespa/searchlib/query/query_term_ucs4.h> namespace search { @@ -139,10 +140,13 @@ std::unique_ptr<attribute::SearchContext> SingleValueStringPostingAttributeT<B>::getSearch(QueryTermSimpleUP qTerm, const attribute::SearchContextParams & params) const { - StringSingleImplSearchContext base_sc(std::move(qTerm), *this); - return std::make_unique<StringSinglePostingSearchContext>(std::move(base_sc), - params.useBitVector(), - *this); + using BaseSC = attribute::SingleStringEnumSearchContext; + using SC = attribute::StringPostingSearchContext<BaseSC, SelfType, vespalib::btree::BTreeNoLeafData>; + bool cased = this->get_match_is_cased(); + BaseSC base_sc(std::move(qTerm), cased, *this, &this->_enumIndices.acquire_elem_ref(0), this->_enumStore); + return std::make_unique<SC>(std::move(base_sc), + params.useBitVector(), + *this); } } // namespace search diff --git a/searchlib/src/vespa/searchlib/attribute/string_matcher.h b/searchlib/src/vespa/searchlib/attribute/string_matcher.h index 51cd3d238a6..ea4debecc0d 100644 --- a/searchlib/src/vespa/searchlib/attribute/string_matcher.h +++ b/searchlib/src/vespa/searchlib/attribute/string_matcher.h @@ -17,10 +17,11 @@ class StringMatcher private: std::unique_ptr<QueryTermUCS4> _query_term; attribute::StringSearchHelper _helper; -protected: +public: StringMatcher(std::unique_ptr<QueryTermSimple> qTerm, bool cased); StringMatcher(StringMatcher&&) noexcept; ~StringMatcher(); +protected: bool isValid() const; bool match(const char *src) const { return _helper.isMatch(src); } bool isPrefix() const { return _helper.isPrefix(); } diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_context.cpp b/searchlib/src/vespa/searchlib/attribute/string_search_context.cpp new file mode 100644 index 00000000000..aa7aa016720 --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/string_search_context.cpp @@ -0,0 +1,58 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#include "string_search_context.h" +#include "enumhintsearchcontext.h" +#include "enumstore.h" +#include <vespa/vespalib/util/regexp.h> +#include <vespa/searchlib/query/query_term_ucs4.h> + +namespace search::attribute { + +StringSearchContext::StringSearchContext(const AttributeVector& to_be_searched, std::unique_ptr<QueryTermSimple> query_term, bool cased) + : SearchContext(to_be_searched), + StringMatcher(std::move(query_term), cased) +{ +} + +StringSearchContext::StringSearchContext(const AttributeVector& to_be_searched, StringMatcher &&matcher) + : SearchContext(to_be_searched), + StringMatcher(std::move(matcher)) +{ +} + +const QueryTermUCS4* +StringSearchContext::queryTerm() const +{ + return get_query_term_ptr(); +} + +bool +StringSearchContext::valid() const +{ + return StringMatcher::isValid(); +} + +void +StringSearchContext::setup_enum_hint_sc(const EnumStoreT<const char*>& enum_store, EnumHintSearchContext& enum_hint_sc) +{ + _plsc = &enum_hint_sc; + if (valid()) { + if (isPrefix()) { + auto comp = enum_store.make_folded_comparator_prefix(queryTerm()->getTerm()); + enum_hint_sc.lookupRange(comp, comp); + } else if (isRegex()) { + vespalib::string prefix(vespalib::RegexpUtil::get_prefix(queryTerm()->getTerm())); + auto comp = enum_store.make_folded_comparator_prefix(prefix.c_str()); + enum_hint_sc.lookupRange(comp, comp); + } else if (isFuzzy()) { + vespalib::string prefix(getFuzzyMatcher().getPrefix()); + auto comp = enum_store.make_folded_comparator_prefix(prefix.c_str()); + enum_hint_sc.lookupRange(comp, comp); + } else { + auto comp = enum_store.make_folded_comparator(queryTerm()->getTerm()); + enum_hint_sc.lookupTerm(comp); + } + } +} + +} diff --git a/searchlib/src/vespa/searchlib/attribute/string_search_context.h b/searchlib/src/vespa/searchlib/attribute/string_search_context.h new file mode 100644 index 00000000000..fc9f3688a7a --- /dev/null +++ b/searchlib/src/vespa/searchlib/attribute/string_search_context.h @@ -0,0 +1,35 @@ +// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. + +#pragma once + +#include "search_context.h" +#include "string_matcher.h" + +namespace search { + +template <class EntryT> class EnumStoreT; + +} + +namespace search::attribute { + +class EnumHintSearchContext; + +/* + * StringSearchContext is an abstract base class for search contexts + * handling a query term on a string attribute vector. + */ +class StringSearchContext : public SearchContext, public StringMatcher +{ +protected: + using MatcherType = StringMatcher; +public: + StringSearchContext(const AttributeVector& to_be_searched, std::unique_ptr<QueryTermSimple> query_term, bool cased); + StringSearchContext(const AttributeVector& to_be_searched, StringMatcher&& matcher); + const QueryTermUCS4* queryTerm() const override; + bool valid() const override; + + void setup_enum_hint_sc(const EnumStoreT<const char*>& enum_store, EnumHintSearchContext& enum_hint_sc); +}; + +} diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp index 355e14bee45..b60ec269383 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.cpp +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.cpp @@ -152,29 +152,6 @@ StringAttribute::onSerializeForDescendingSort(DocId doc, void * serTo, long avai return buf.size(); } -StringAttribute::StringSearchContext::StringSearchContext(QueryTermSimple::UP qTerm, - const StringAttribute & toBeSearched) - : StringMatcher(std::move(qTerm), toBeSearched.getConfig().get_match() == Config::Match::CASED), - SearchContext(toBeSearched) -{ -} - -StringAttribute::StringSearchContext::StringSearchContext(StringSearchContext&&) noexcept = default; - -StringAttribute::StringSearchContext::~StringSearchContext() = default; - -bool -StringAttribute::StringSearchContext::valid() const -{ - return isValid(); -} - -const QueryTermUCS4 * -StringAttribute::StringSearchContext::queryTerm() const -{ - return get_query_term_ptr(); -} - uint32_t StringAttribute::clearDoc(DocId doc) { diff --git a/searchlib/src/vespa/searchlib/attribute/stringbase.h b/searchlib/src/vespa/searchlib/attribute/stringbase.h index 630d85e23c3..e5e14829118 100644 --- a/searchlib/src/vespa/searchlib/attribute/stringbase.h +++ b/searchlib/src/vespa/searchlib/attribute/stringbase.h @@ -9,6 +9,7 @@ #include "loadedenumvalue.h" #include "search_context.h" #include "string_matcher.h" +#include "string_search_context.h" namespace search { @@ -71,6 +72,8 @@ protected: bool onAddDoc(DocId doc) override; vespalib::MemoryUsage getChangeVectorMemoryUsage() const override; + + bool get_match_is_cased() const noexcept { return getConfig().get_match() == attribute::Config::Match::CASED; } private: virtual void load_posting_lists(LoadedVector& loaded); virtual void load_enum_store(LoadedVector& loaded); @@ -86,55 +89,6 @@ private: long onSerializeForAscendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override; long onSerializeForDescendingSort(DocId doc, void * serTo, long available, const common::BlobConverter * bc) const override; - -protected: - class StringSearchContext : public attribute::StringMatcher, public attribute::SearchContext { - public: - StringSearchContext(QueryTermSimpleUP qTerm, const StringAttribute & toBeSearched); - StringSearchContext(StringSearchContext&&) noexcept; - ~StringSearchContext() override; - protected: - bool valid() const override; - const QueryTermUCS4 * queryTerm() const override; - bool isMatch(const char *src) const { return match(src); } - - class CollectHitCount { - public: - CollectHitCount() : _hitCount(0) { } - void addWeight(int32_t w) { - (void) w; - _hitCount++; - } - int32_t getWeight() const { return _hitCount; } - bool hasMatch() const { return _hitCount != 0; } - private: - uint32_t _hitCount; - }; - class CollectWeight { - public: - CollectWeight() : _hitCount(0), _weight(0) { } - void addWeight(int32_t w) { - _weight += w; - _hitCount++; - } - int32_t getWeight() const { return _weight; } - bool hasMatch() const { return _hitCount != 0; } - private: - uint32_t _hitCount; - int32_t _weight; - }; - - template<typename WeightedT, typename Accessor, typename Collector> - int32_t findNextMatch(vespalib::ConstArrayRef<WeightedT> w, int32_t elemId, const Accessor & ac, Collector & collector) const { - for (uint32_t i(elemId); i < w.size(); i++) { - if (isMatch(ac.get(w[i].value_ref().load_acquire()))) { - collector.addWeight(w[i].weight()); - return i; - } - } - return -1; - } - }; }; } diff --git a/searchlib/src/vespa/searchlib/queryeval/andnotsearch.cpp b/searchlib/src/vespa/searchlib/queryeval/andnotsearch.cpp index 2307b778381..c7b81bc9da7 100644 --- a/searchlib/src/vespa/searchlib/queryeval/andnotsearch.cpp +++ b/searchlib/src/vespa/searchlib/queryeval/andnotsearch.cpp @@ -105,41 +105,11 @@ AndNotSearchStrict::internalSeek(uint32_t docid) } // namespace -OptimizedAndNotForBlackListing::OptimizedAndNotForBlackListing(MultiSearch::Children children) : - AndNotSearchStrictBase(std::move(children)) -{ -} - -void OptimizedAndNotForBlackListing::initRange(uint32_t beginid, uint32_t endid) -{ - AndNotSearch::initRange(beginid, endid); - setDocId(internalSeek<false>(beginid)); -} - -bool OptimizedAndNotForBlackListing::isBlackListIterator(const SearchIterator * iterator) -{ - return dynamic_cast<const BlackListIterator *>(iterator) != 0; -} - -void OptimizedAndNotForBlackListing::doSeek(uint32_t docid) -{ - setDocId(internalSeek<true>(docid)); -} - -void OptimizedAndNotForBlackListing::doUnpack(uint32_t docid) -{ - positive()->doUnpack(docid); -} - std::unique_ptr<SearchIterator> AndNotSearch::create(ChildrenIterators children_in, bool strict) { MultiSearch::Children children = std::move(children_in); if (strict) { - if ((children.size() == 2) && OptimizedAndNotForBlackListing::isBlackListIterator(children[1].get())) { - return std::make_unique<OptimizedAndNotForBlackListing>(std::move(children)); - } else { - return std::make_unique<AndNotSearchStrict>(std::move(children)); - } + return std::make_unique<AndNotSearchStrict>(std::move(children)); } else { return SearchIterator::UP(new AndNotSearch(std::move(children))); } diff --git a/searchlib/src/vespa/searchlib/queryeval/andnotsearch.h b/searchlib/src/vespa/searchlib/queryeval/andnotsearch.h index d65a3d9c72e..e474ab7c90c 100644 --- a/searchlib/src/vespa/searchlib/queryeval/andnotsearch.h +++ b/searchlib/src/vespa/searchlib/queryeval/andnotsearch.h @@ -48,50 +48,4 @@ private: UP andWith(UP filter, uint32_t estimate) override; }; -/** - * This is a specialized andnot iterator you get when you have no andnot's in you query and only get the blacklist blueprint. - * This one is now constructed at getSearch() phase. However this should be better handled in the AndNotBlueprint. - */ -class OptimizedAndNotForBlackListing : public AndNotSearchStrictBase -{ -private: - // This is the actual iterator that should be produced by the documentmetastore in searchcore, but that - // will probably be changed later on. An ordinary bitvector could be even better as that would open up for more optimizations. - //typedef FilterAttributeIteratorT<SingleValueSmallNumericAttribute::SingleSearchContext> BlackListIterator; - typedef AttributeIteratorT<SingleValueSmallNumericAttribute::SingleSearchContext> BlackListIterator; -public: - OptimizedAndNotForBlackListing(MultiSearch::Children children); - static bool isBlackListIterator(const SearchIterator * iterator); - - uint32_t seekFast(uint32_t docid) { - return internalSeek<true>(docid); - } - void initRange(uint32_t beginid, uint32_t endid) override; -private: - SearchIterator * positive() { return getChildren()[0].get(); } - BlackListIterator * blackList() { return static_cast<BlackListIterator *>(getChildren()[1].get()); } - template<bool doSeekOnly> - uint32_t internalSeek(uint32_t docid) { - uint32_t curr(docid); - while (true) { - if (doSeekOnly) { - positive()->doSeek(curr); - } else { - positive()->seek(curr); - } - if ( ! positive()->isAtEnd() ) { - curr = positive()->getDocId(); - if (! blackList()->seekFast(curr)) { - return curr; - } - curr++; - } else { - return search::endDocId; - } - } - } - void doSeek(uint32_t docid) override; - void doUnpack(uint32_t docid) override; -}; - } diff --git a/vespajlib/src/test/java/ai/vespa/validation/NameTest.java b/vespajlib/src/test/java/ai/vespa/validation/NameTest.java index 0b50340870e..26a640b0ec0 100644 --- a/vespajlib/src/test/java/ai/vespa/validation/NameTest.java +++ b/vespajlib/src/test/java/ai/vespa/validation/NameTest.java @@ -1,12 +1,8 @@ // Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package ai.vespa.validation; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import java.nio.file.Path; - -import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; /** |