diff options
9 files changed, 155 insertions, 143 deletions
diff --git a/searchlib/src/tests/query/streaming_query_test.cpp b/searchlib/src/tests/query/streaming_query_test.cpp index 08705fa837b..0a751e96222 100644 --- a/searchlib/src/tests/query/streaming_query_test.cpp +++ b/searchlib/src/tests/query/streaming_query_test.cpp @@ -287,7 +287,10 @@ TEST(StreamingQueryTest, test_query_language) class AllowRewrite : public QueryNodeResultFactory { public: - virtual bool getRewriteFloatTerms() const override { return true; } + explicit AllowRewrite(vespalib::stringref index) noexcept : _allowedIndex(index) {} + bool getRewriteFloatTerms(vespalib::stringref index) const noexcept override { return index == _allowedIndex; } +private: + vespalib::string _allowedIndex; }; const char TERM_UNIQ = static_cast<char>(ParseItem::ITEM_TERM) | static_cast<char>(ParseItem::IF_UNIQUEID); @@ -297,12 +300,12 @@ TEST(StreamingQueryTest, e_is_not_rewritten_even_if_allowed) const char term[6] = {TERM_UNIQ, 3, 1, 'c', 1, 'e'}; vespalib::stringref stackDump(term, sizeof(term)); EXPECT_EQ(6u, stackDump.size()); - AllowRewrite allowRewrite; + AllowRewrite allowRewrite("c"); const Query q(allowRewrite, stackDump); EXPECT_TRUE(q.valid()); const QueryNode & root = q.getRoot(); EXPECT_TRUE(dynamic_cast<const QueryTerm *>(&root) != nullptr); - const QueryTerm & qt = static_cast<const QueryTerm &>(root); + const auto & qt = static_cast<const QueryTerm &>(root); EXPECT_EQ("c", qt.index()); EXPECT_EQ(vespalib::stringref("e"), qt.getTerm()); EXPECT_EQ(3u, qt.uniqueId()); @@ -313,12 +316,12 @@ TEST(StreamingQueryTest, onedot0e_is_not_rewritten_by_default) const char term[9] = {TERM_UNIQ, 3, 1, 'c', 4, '1', '.', '0', 'e'}; vespalib::stringref stackDump(term, sizeof(term)); EXPECT_EQ(9u, stackDump.size()); - QueryNodeResultFactory empty; + AllowRewrite empty("nix"); const Query q(empty, stackDump); EXPECT_TRUE(q.valid()); const QueryNode & root = q.getRoot(); EXPECT_TRUE(dynamic_cast<const QueryTerm *>(&root) != nullptr); - const QueryTerm & qt = static_cast<const QueryTerm &>(root); + const auto & qt = static_cast<const QueryTerm &>(root); EXPECT_EQ("c", qt.index()); EXPECT_EQ(vespalib::stringref("1.0e"), qt.getTerm()); EXPECT_EQ(3u, qt.uniqueId()); @@ -329,34 +332,34 @@ TEST(StreamingQueryTest, onedot0e_is_rewritten_if_allowed_too) const char term[9] = {TERM_UNIQ, 3, 1, 'c', 4, '1', '.', '0', 'e'}; vespalib::stringref stackDump(term, sizeof(term)); EXPECT_EQ(9u, stackDump.size()); - AllowRewrite empty; + AllowRewrite empty("c"); const Query q(empty, stackDump); EXPECT_TRUE(q.valid()); const QueryNode & root = q.getRoot(); EXPECT_TRUE(dynamic_cast<const EquivQueryNode *>(&root) != nullptr); - const EquivQueryNode & equiv = static_cast<const EquivQueryNode &>(root); + const auto & equiv = static_cast<const EquivQueryNode &>(root); EXPECT_EQ(2u, equiv.size()); EXPECT_TRUE(dynamic_cast<const QueryTerm *>(equiv[0].get()) != nullptr); { - const QueryTerm & qt = static_cast<const QueryTerm &>(*equiv[0]); + const auto & qt = static_cast<const QueryTerm &>(*equiv[0]); EXPECT_EQ("c", qt.index()); EXPECT_EQ(vespalib::stringref("1.0e"), qt.getTerm()); EXPECT_EQ(3u, qt.uniqueId()); } EXPECT_TRUE(dynamic_cast<const PhraseQueryNode *>(equiv[1].get()) != nullptr); { - const PhraseQueryNode & phrase = static_cast<const PhraseQueryNode &>(*equiv[1]); + const auto & phrase = static_cast<const PhraseQueryNode &>(*equiv[1]); EXPECT_EQ(2u, phrase.size()); EXPECT_TRUE(dynamic_cast<const QueryTerm *>(phrase[0].get()) != nullptr); { - const QueryTerm & qt = static_cast<const QueryTerm &>(*phrase[0]); + const auto & qt = static_cast<const QueryTerm &>(*phrase[0]); EXPECT_EQ("c", qt.index()); EXPECT_EQ(vespalib::stringref("1"), qt.getTerm()); EXPECT_EQ(0u, qt.uniqueId()); } EXPECT_TRUE(dynamic_cast<const QueryTerm *>(phrase[1].get()) != nullptr); { - const QueryTerm & qt = static_cast<const QueryTerm &>(*phrase[1]); + const auto & qt = static_cast<const QueryTerm &>(*phrase[1]); EXPECT_EQ("c", qt.index()); EXPECT_EQ(vespalib::stringref("0e"), qt.getTerm()); EXPECT_EQ(0u, qt.uniqueId()); @@ -460,7 +463,7 @@ TEST(StreamingQueryTest, test_phrase_evaluate) terms[1]->add(1, 5, 0, 1); terms[2]->add(0, 5, 0, 1); HitList hits; - PhraseQueryNode * p = static_cast<PhraseQueryNode *>(phrases[0]); + auto * p = static_cast<PhraseQueryNode *>(phrases[0]); p->evaluateHits(hits); ASSERT_EQ(3u, hits.size()); EXPECT_EQ(hits[0].wordpos(), 2u); @@ -750,7 +753,7 @@ TEST(StreamingQueryTest, require_that_incorrectly_specified_diversity_can_be_par TEST(StreamingQueryTest, require_that_we_do_not_break_the_stack_on_bad_query) { - QueryTermSimple term("<form><iframe+	 +src=\\\"javascript:alert(1)\\\" 	;>", TermType::WORD); + QueryTermSimple term(R"(<form><iframe+	 +src=\"javascript:alert(1)\" 	;>)", TermType::WORD); EXPECT_FALSE(term.isValid()); } @@ -759,7 +762,7 @@ TEST(StreamingQueryTest, a_unhandled_sameElement_stack) const char * stack = "\022\002\026xyz_abcdefghij_xyzxyzxQ\001\vxxxxxx_name\034xxxxxx_xxxx_xxxxxxx_xxxxxxxxE\002\005delta\b<0.00393"; vespalib::stringref stackDump(stack); EXPECT_EQ(85u, stackDump.size()); - AllowRewrite empty; + AllowRewrite empty(""); const Query q(empty, stackDump); EXPECT_TRUE(q.valid()); const QueryNode & root = q.getRoot(); @@ -793,7 +796,7 @@ TEST(StreamingQueryTest, test_same_element_evaluate) vespalib::string stackDump = StackDumpCreator::create(*node); QueryNodeResultFactory empty; Query q(empty, stackDump); - SameElementQueryNode * sameElem = dynamic_cast<SameElementQueryNode *>(&q.getRoot()); + auto * sameElem = dynamic_cast<SameElementQueryNode *>(&q.getRoot()); EXPECT_TRUE(sameElem != nullptr); EXPECT_EQ("field", sameElem->getIndex()); EXPECT_EQ(3u, sameElem->size()); diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp index 6b3aa7a2fd0..9484999f45a 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp +++ b/searchlib/src/vespa/searchlib/query/streaming/querynode.cpp @@ -156,7 +156,7 @@ QueryNode::Build(const QueryNode * parent, const QueryNodeResultFactory & factor qt->setFuzzyMaxEditDistance(queryRep.getFuzzyMaxEditDistance()); qt->setFuzzyPrefixLength(queryRep.getFuzzyPrefixLength()); } - if (possibleFloat(*qt, ssTerm) && factory.getRewriteFloatTerms() && allowRewrite) { + if (allowRewrite && possibleFloat(*qt, ssTerm) && factory.getRewriteFloatTerms(ssIndex)) { auto phrase = std::make_unique<PhraseQueryNode>(); auto dotPos = ssTerm.find('.'); phrase->addChild(std::make_unique<QueryTerm>(factory.create(), ssTerm.substr(0, dotPos), ssIndex, TermType::WORD)); diff --git a/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h b/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h index 10f3b7cbf21..d7704fb60e1 100644 --- a/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h +++ b/searchlib/src/vespa/searchlib/query/streaming/querynoderesultbase.h @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #pragma once +#include <vespa/vespalib/stllike/string.h> #include <memory> namespace search::streaming { @@ -20,7 +21,7 @@ public: class QueryNodeResultFactory { public: virtual ~QueryNodeResultFactory() = default; - virtual bool getRewriteFloatTerms() const { return false; } + virtual bool getRewriteFloatTerms(vespalib::stringref index) const noexcept { (void) index; return false; } virtual std::unique_ptr<QueryNodeResultBase> create() const { return {}; } }; } diff --git a/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp b/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp index 2d138d1d336..93e35e4c6d2 100644 --- a/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp +++ b/streamingvisitors/src/tests/rank_processor/rank_processor_test.cpp @@ -40,7 +40,7 @@ protected: RankProcessorTest::RankProcessorTest() : testing::Test(), - _factory(), + _factory(nullptr), _query(), _query_wrapper() { diff --git a/streamingvisitors/src/vespa/searchvisitor/querytermdata.h b/streamingvisitors/src/vespa/searchvisitor/querytermdata.h index 8c1c3771917..36176f70d1d 100644 --- a/streamingvisitors/src/vespa/searchvisitor/querytermdata.h +++ b/streamingvisitors/src/vespa/searchvisitor/querytermdata.h @@ -17,15 +17,26 @@ private: search::fef::SimpleTermData _termData; public: QueryTermData * clone() const override { return new QueryTermData(); } - search::fef::SimpleTermData &getTermData() { return _termData; } + search::fef::SimpleTermData &getTermData() noexcept { return _termData; } +}; + +class SearchMethodInfo { +public: + virtual ~SearchMethodInfo() = default; + virtual bool is_text_matching(vespalib::stringref index) const noexcept = 0; }; class QueryTermDataFactory final : public search::streaming::QueryNodeResultFactory { public: + QueryTermDataFactory(const SearchMethodInfo * searchMethodInfo) noexcept : _searchMethodInfo(searchMethodInfo) {} std::unique_ptr<search::streaming::QueryNodeResultBase> create() const override { return std::make_unique<QueryTermData>(); } - bool getRewriteFloatTerms() const override { return true; } + bool getRewriteFloatTerms(vespalib::stringref index ) const noexcept override { + return _searchMethodInfo && _searchMethodInfo->is_text_matching(index); + } +private: + const SearchMethodInfo * _searchMethodInfo; }; diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp index 4d31c71c0a0..49604135afc 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp @@ -91,7 +91,7 @@ ForceWordfolderInit::ForceWordfolderInit() Fast_NormalizeWordFolder::DO_MULTICHAR_EXPANSION); } -static ForceWordfolderInit _G_forceNormWordFolderInit; +static ForceWordfolderInit G_forceNormWordFolderInit; // Leftovers from FS4 protocol with limited use here. enum queryflags { @@ -238,14 +238,16 @@ SearchVisitor::SummaryGenerator::fillSummary(AttributeVector::DocId lid, const H return {}; } -void SearchVisitor::HitsResultPreparator::execute(vespalib::Identifiable & obj) +void +SearchVisitor::HitsResultPreparator::execute(vespalib::Identifiable & obj) { auto & hitsAggr(static_cast<HitsAggregationResult &>(obj)); hitsAggr.setSummaryGenerator(_summaryGenerator); _numHitsAggregators++; } -bool SearchVisitor::HitsResultPreparator::check(const vespalib::Identifiable & obj) const +bool +SearchVisitor::HitsResultPreparator::check(const vespalib::Identifiable & obj) const { return obj.getClass().inherits(HitsAggregationResult::classId); } @@ -259,7 +261,8 @@ SearchVisitor::GroupingEntry::GroupingEntry(Grouping * grouping) : SearchVisitor::GroupingEntry::~GroupingEntry() = default; -void SearchVisitor::GroupingEntry::aggregate(const document::Document & doc, search::HitRank rank) +void +SearchVisitor::GroupingEntry::aggregate(const document::Document & doc, search::HitRank rank) { if (_count < _limit) { _grouping->aggregate(doc, rank); @@ -310,7 +313,21 @@ SearchVisitor::SearchVisitor(StorageComponent& component, LOG(debug, "Created SearchVisitor"); } -void SearchVisitor::init(const Parameters & params) +bool +SearchVisitor::is_text_matching(vespalib::stringref index) const noexcept { + StringFieldIdTMap fieldIdMap; + _fieldSearchSpecMap.addFieldsFromIndex(index, fieldIdMap); + for (const auto & fieldId : fieldIdMap.map()) { + auto found = _fieldSearchSpecMap.specMap().find(fieldId.second); + if ((found != _fieldSearchSpecMap.specMap().end()) && found->second.uses_string_search_method()) { + return true; + } + } + return false; +} + +void +SearchVisitor::init(const Parameters & params) { VISITOR_TRACE(6, "About to lazily init VSM adapter"); _attrMan.add(_documentIdAttributeBacking); @@ -397,7 +414,14 @@ void SearchVisitor::init(const Parameters & params) if ( params.lookup("query", queryBlob) ) { LOG(spam, "Received query blob of %zu bytes", queryBlob.size()); VISITOR_TRACE(9, vespalib::make_string("Setting up for query blob of %zu bytes", queryBlob.size())); - QueryTermDataFactory addOnFactory; + // Create mapping from field name to field id, from field id to search spec, + // and from index name to list of field ids + _fieldSearchSpecMap.buildFromConfig(_env->get_vsm_fields_config()); + auto additionalFields = registerAdditionalFields(_env->get_docsum_tools()->getFieldSpecs()); + // Add extra elements to mapping from field name to field id + _fieldSearchSpecMap.buildFromConfig(additionalFields); + + QueryTermDataFactory addOnFactory(this); _query = Query(addOnFactory, vespalib::stringref(queryBlob.data(), queryBlob.size())); _searchBuffer->reserve(0x10000); @@ -408,19 +432,11 @@ void SearchVisitor::init(const Parameters & params) LOG(warning, "Request without query stack count"); } - std::vector<vespalib::string> additionalFields; - registerAdditionalFields(_env->get_docsum_tools()->getFieldSpecs(), additionalFields); - - StringFieldIdTMap fieldsInQuery; - setupFieldSearchers(additionalFields, fieldsInQuery); - - + StringFieldIdTMap fieldsInQuery = setupFieldSearchers(); setupScratchDocument(fieldsInQuery); - _syntheticFieldsController.setup(_fieldSearchSpecMap.nameIdMap(), fieldsInQuery); setupAttributeVectors(); - setupAttributeVectorsForSorting(_sortSpec); _rankController.setRankManagerSnapshot(_env->get_rank_manager_snapshot()); @@ -436,7 +452,6 @@ void SearchVisitor::init(const Parameters & params) // This depends on _fieldPathMap (from setupScratchDocument), // and IQueryEnvironment (from setupRankProcessors). prepare_field_searchers(); - } else { LOG(warning, "No query received"); } @@ -529,10 +544,7 @@ SearchVisitor::PositionInserter::PositionInserter(AttributeVector & attribute, A SearchVisitor::PositionInserter::~PositionInserter() = default; void -SearchVisitor::PositionInserter::onPrimitive(uint32_t, const Content & c) -{ - (void) c; -} +SearchVisitor::PositionInserter::onPrimitive(uint32_t, const Content &) { } void SearchVisitor::PositionInserter::onStructStart(const Content & c) @@ -605,7 +617,6 @@ SearchVisitor::RankController::setupRankProcessors(Query & query, { _rankSetup = &_rankManagerSnapshot->getRankSetup(_rankProfile); _rankProcessor = std::make_unique<RankProcessor>(_rankManagerSnapshot, _rankProfile, query, location, _queryProperties, &attrMan); - LOG(debug, "Initialize rank processor"); _rankProcessor->initForRanking(wantedHitCount); // register attribute vectors needed for ranking processAccessedAttributes(_rankProcessor->get_real_query_env(), true, attrMan, attributeFields); @@ -637,8 +648,7 @@ SearchVisitor::RankController::rankMatchedDocument(uint32_t docId) { _rankProcessor->runRankProgram(docId); LOG(debug, "Rank score for matched document %u: %f", - docId, - _rankProcessor->getRankScore()); + docId, _rankProcessor->getRankScore()); if (_dumpFeatures) { _dumpProcessor->runRankProgram(docId); // we must transfer the score to this match data to make sure that the same hits @@ -718,9 +728,8 @@ SearchVisitor::SyntheticFieldsController::setup(const StringFieldIdTMap & fieldR } void -SearchVisitor::SyntheticFieldsController::onDocument(StorageDocument & document) +SearchVisitor::SyntheticFieldsController::onDocument(StorageDocument &) { - (void) document; } void @@ -730,10 +739,10 @@ SearchVisitor::SyntheticFieldsController::onDocumentMatch(StorageDocument & docu document.setField(_documentIdFId, std::make_unique<document::StringFieldValue>(documentId)); } -void -SearchVisitor::registerAdditionalFields(const std::vector<vsm::DocsumTools::FieldSpec> & docsumSpec, - std::vector<vespalib::string> & fieldList) +std::vector<vespalib::string> +SearchVisitor::registerAdditionalFields(const std::vector<vsm::DocsumTools::FieldSpec> & docsumSpec) { + std::vector<vespalib::string> fieldList; for (const vsm::DocsumTools::FieldSpec & spec : docsumSpec) { fieldList.push_back(spec.getOutputName()); const std::vector<vespalib::string> & inputNames = spec.getInputNames(); @@ -748,25 +757,20 @@ SearchVisitor::registerAdditionalFields(const std::vector<vsm::DocsumTools::Fiel fieldList.emplace_back("[docid]"); fieldList.emplace_back("[rank]"); fieldList.emplace_back("documentid"); + return fieldList; } -void -SearchVisitor::setupFieldSearchers(const std::vector<vespalib::string> & additionalFields, - StringFieldIdTMap & fieldsInQuery) +StringFieldIdTMap +SearchVisitor::setupFieldSearchers() { - // Create mapping from field name to field id, from field id to search spec, - // and from index name to list of field ids - _fieldSearchSpecMap.buildFromConfig(_env->get_vsm_fields_config()); - // Add extra elements to mapping from field name to field id - _fieldSearchSpecMap.buildFromConfig(additionalFields); - // Reconfig field searchers based on the query _fieldSearchSpecMap.reconfigFromQuery(_query); // Map field name to field id for all fields in the query - _fieldSearchSpecMap.buildFieldsInQuery(_query, fieldsInQuery); + StringFieldIdTMap fieldsInQuery = _fieldSearchSpecMap.buildFieldsInQuery(_query); // Connect field names in the query to field searchers _fieldSearchSpecMap.buildSearcherMap(fieldsInQuery.map(), _fieldSearcherMap); + return fieldsInQuery; } void @@ -947,8 +951,7 @@ class SingleDocumentStore : public vsm::IDocSumCache { public: explicit SingleDocumentStore(const StorageDocument & doc) : _doc(doc) { } - const vsm::Document & getDocSum(const search::DocumentIdT & docId) const override { - (void) docId; + const vsm::Document & getDocSum(const search::DocumentIdT &) const override { return _doc; } private: @@ -959,19 +962,12 @@ bool SearchVisitor::compatibleDocumentTypes(const document::DocumentType& typeA, const document::DocumentType& typeB) { - if (&typeA == &typeB) { - return true; - } else { - return (typeA.getName() == typeB.getName()); - } + return (&typeA == &typeB) || (typeA.getName() == typeB.getName()); } void -SearchVisitor::handleDocuments(const document::BucketId&, - DocEntryList & entries, - HitCounter& hitCounter) +SearchVisitor::handleDocuments(const document::BucketId&, DocEntryList & entries, HitCounter& ) { - (void) hitCounter; if (!_init_called) { init(_params); } @@ -1016,37 +1012,25 @@ SearchVisitor::handleDocument(StorageDocument & document) RankProcessor & rp = *_rankController.getRankProcessor(); vespalib::string documentId(document.docDoc().getId().getScheme().toString()); LOG(debug, "Matched document with id '%s'", documentId.c_str()); - document.setDocId(rp.getDocId()); - fillAttributeVectors(documentId, document); - _rankController.rankMatchedDocument(rp.getDocId()); - if (_shouldFillRankAttribute) { _rankAttribute.add(rp.getRankScore()); } - if (_rankController.keepMatchedDocument()) { - bool amongTheBest = _rankController.collectMatchedDocument(!_sortList.empty(), *this, _tmpSortBuffer, &document); - _syntheticFieldsController.onDocumentMatch(document, documentId); - SingleDocumentStore single(document); _summaryGenerator.setDocsumCache(single); group(document.docDoc(), rp.getRankScore(), false); - if (amongTheBest) { needToKeepDocument = true; } - } else { _hitsRejectedCount++; LOG(debug, "Do not keep document with id '%s' because rank score (%f) <= rank score drop limit (%f)", - documentId.c_str(), - rp.getRankScore(), - _rankController.getRankSetup()->getRankScoreDropLimit()); + documentId.c_str(), rp.getRankScore(), _rankController.getRankSetup()->getRankScoreDropLimit()); } } else { LOG(debug, "Did not match document with id '%s'", document.docDoc().getId().getScheme().toString().c_str()); @@ -1145,7 +1129,8 @@ SearchVisitor::fillSortBuffer() return pos; } -void SearchVisitor::completedBucket(const document::BucketId&, HitCounter&) +void +SearchVisitor::completedBucket(const document::BucketId&, HitCounter&) { LOG(debug, "Completed bucket"); } @@ -1157,7 +1142,8 @@ SearchVisitor::generate_query_result(HitCounter& counter) return std::move(_queryResult); } -void SearchVisitor::completedVisitingInternal(HitCounter& hitCounter) +void +SearchVisitor::completedVisitingInternal(HitCounter& hitCounter) { if (!_init_called) { init(_params); diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h index ef7a41f23a5..709564bcf02 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h @@ -8,6 +8,7 @@ #include "rankmanager.h" #include "rankprocessor.h" #include "searchenvironment.h" +#include "querytermdata.h" #include <vespa/vsm/common/docsum.h> #include <vespa/vsm/common/documenttypemapping.h> #include <vespa/vsm/common/storagedocument.h> @@ -42,7 +43,8 @@ class SearchEnvironmentSnapshot; * @brief Visitor that applies a search query to visitor data and * converts them to a QueryResultCommand. **/ -class SearchVisitor : public storage::Visitor { +class SearchVisitor : public storage::Visitor, + public SearchMethodInfo { public: SearchVisitor(storage::StorageComponent&, storage::VisitorEnvironment& vEnv, const vdslib::Parameters & params); @@ -253,19 +255,15 @@ private: * @param docsumSpec config with the field names used by the docsum setup. * @param fieldList list of field names that are built. **/ - static void registerAdditionalFields(const std::vector<vsm::DocsumTools::FieldSpec> & docsumSpec, - std::vector<vespalib::string> & fieldList); + static std::vector<vespalib::string> registerAdditionalFields(const std::vector<vsm::DocsumTools::FieldSpec> & docsumSpec); /** * Setup the field searchers used when matching the query with the stream of documents. * This includes setting up various mappings in FieldSearchSpecMap and building mapping * for fields used by the query. * - * @param additionalFields list of additional field names used when setting up the mappings. - * @param fieldsInQuery mapping from field name to field id that are built based on the query. **/ - void setupFieldSearchers(const std::vector<vespalib::string> & additionalFields, - vsm::StringFieldIdTMap & fieldsInQuery); + vsm::StringFieldIdTMap setupFieldSearchers(); /** * Prepare the field searchers for the given query. @@ -488,6 +486,7 @@ private: vsm::StringFieldIdTMapT _fieldsUnion; void setupAttributeVector(const vsm::FieldPath &fieldPath); + bool is_text_matching(vespalib::stringref index) const noexcept override; }; class SearchVisitorFactory : public storage::VisitorFactory { diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp index e33408a2e26..4b0efd58a56 100644 --- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp +++ b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp @@ -28,7 +28,8 @@ namespace vsm { namespace { -void setMatchType(FieldSearcherContainer & searcher, vespalib::stringref arg1) { +void +setMatchType(FieldSearcherContainer & searcher, vespalib::stringref arg1) { if (arg1 == "prefix") { searcher->setMatchType(FieldSearcher::PREFIX); } else if (arg1 == "substring") { @@ -44,14 +45,14 @@ void setMatchType(FieldSearcherContainer & searcher, vespalib::stringref arg1) { } -FieldSearchSpec::FieldSearchSpec() : - _id(0), - _name(), - _maxLength(0x100000), - _searcher(), - _searchMethod(VsmfieldsConfig::Fieldspec::Searchmethod::NONE), - _arg1(), - _reconfigured(false) +FieldSearchSpec::FieldSearchSpec() + : _id(0), + _name(), + _maxLength(0x100000), + _searcher(), + _searchMethod(VsmfieldsConfig::Fieldspec::Searchmethod::NONE), + _arg1(), + _reconfigured(false) { } FieldSearchSpec::~FieldSearchSpec() = default; @@ -150,7 +151,8 @@ FieldSearchSpec::reconfig(const QueryTerm & term) } } -vespalib::asciistream & operator <<(vespalib::asciistream & os, const FieldSearchSpec & f) +vespalib::asciistream & +operator <<(vespalib::asciistream & os, const FieldSearchSpec & f) { os << f._id << ' ' << f._name << ' '; if ( ! f._searcher) { @@ -171,7 +173,8 @@ namespace { const std::regex _G_array("\\[[0-9]+\\]"); } -vespalib::string FieldSearchSpecMap::stripNonFields(const vespalib::string & rawIndex) +vespalib::string +FieldSearchSpecMap::stripNonFields(vespalib::stringref rawIndex) { if ((rawIndex.find('[') != vespalib::string::npos) || (rawIndex.find('{') != vespalib::string::npos)) { std::string index = std::regex_replace(std::string(rawIndex), _G_map1, _G_value); @@ -182,44 +185,48 @@ vespalib::string FieldSearchSpecMap::stripNonFields(const vespalib::string & raw return rawIndex; } -bool FieldSearchSpecMap::buildFieldsInQuery(const Query & query, StringFieldIdTMap & fieldsInQuery) const +void +FieldSearchSpecMap::addFieldsFromIndex(vespalib::stringref rawIndex, StringFieldIdTMap & fieldIdMap) const { + for (const auto & dtm : documentTypeMap()) { + const IndexFieldMapT & fim = dtm.second; + vespalib::string index(stripNonFields(rawIndex)); + auto fIt = fim.find(index); + if (fIt != fim.end()) { + for(FieldIdT fid : fIt->second) { + const FieldSearchSpec & spec = specMap().find(fid)->second; + LOG(debug, "buildFieldsInQuery = rawIndex='%s', index='%s'", rawIndex.data(), index.c_str()); + if ((rawIndex != index) && (spec.name().find(index) == 0)) { + vespalib::string modIndex(rawIndex); + modIndex.append(spec.name().substr(index.size())); + fieldIdMap.add(modIndex, spec.id()); + } else { + fieldIdMap.add(spec.name(),spec.id()); + } + } + } else { + LOG(warning, "No valid indexes registered for index %s", rawIndex.data()); + } + } +} + +StringFieldIdTMap +FieldSearchSpecMap::buildFieldsInQuery(const Query & query) const { - bool retval(true); + StringFieldIdTMap fieldsInQuery; ConstQueryTermList qtl; query.getLeaves(qtl); for (const auto & term : qtl) { - for (const auto & dtm : documentTypeMap()) { - const IndexFieldMapT & fim = dtm.second; - vespalib::string rawIndex(term->index()); - vespalib::string index(stripNonFields(rawIndex)); - auto fIt = fim.find(index); - if (fIt != fim.end()) { - for(FieldIdT fid : fIt->second) { - const FieldSearchSpec & spec = specMap().find(fid)->second; - LOG(debug, "buildFieldsInQuery = rawIndex='%s', index='%s'", rawIndex.c_str(), index.c_str()); - if ((rawIndex != index) && (spec.name().find(index) == 0)) { - vespalib::string modIndex(rawIndex); - modIndex.append(spec.name().substr(index.size())); - fieldsInQuery.add(modIndex, spec.id()); - } else { - fieldsInQuery.add(spec.name(),spec.id()); - } - } - } else { - LOG(warning, "No valid indexes registered for index %s", term->index().c_str()); - retval = false; - } - } + addFieldsFromIndex(term->index(), fieldsInQuery); } - return retval; + return fieldsInQuery; } -void FieldSearchSpecMap::buildFromConfig(const std::vector<vespalib::string> & otherFieldsNeeded) +void +FieldSearchSpecMap::buildFromConfig(const std::vector<vespalib::string> & otherFieldsNeeded) { - for(size_t i(0), m(otherFieldsNeeded.size()); i < m; i++) { - LOG(debug, "otherFieldsNeeded[%zd] = '%s'", i, otherFieldsNeeded[i].c_str()); - _nameIdMap.add(otherFieldsNeeded[i]); + for (const auto & i : otherFieldsNeeded) { + _nameIdMap.add(i); } } @@ -253,7 +260,8 @@ buildFieldSet(const VsmfieldsConfig::Documenttype::Index & ci, const FieldSearch } -bool FieldSearchSpecMap::buildFromConfig(const VsmfieldsHandle & conf) +bool +FieldSearchSpecMap::buildFromConfig(const VsmfieldsHandle & conf) { bool retval(true); LOG(spam, "Parsing %zd fields", conf->fieldspec.size()); @@ -297,12 +305,14 @@ FieldSearchSpecMap::reconfigFromQuery(const Query & query) } } -bool lesserField(const FieldSearcherContainer & a, const FieldSearcherContainer & b) +bool +lesserField(const FieldSearcherContainer & a, const FieldSearcherContainer & b) { return a->field() < b->field(); } -void FieldSearchSpecMap::buildSearcherMap(const StringFieldIdTMapT & fieldsInQuery, FieldIdTSearcherMap & fieldSearcherMap) +void +FieldSearchSpecMap::buildSearcherMap(const StringFieldIdTMapT & fieldsInQuery, FieldIdTSearcherMap & fieldSearcherMap) const { fieldSearcherMap.clear(); for (const auto & entry : fieldsInQuery) { @@ -331,7 +341,8 @@ FieldSearchSpecMap::get_distance_metric(const vespalib::string& name) const return vsm::NearestNeighborFieldSearcher::distance_metric_from_string(itr->second.get_arg1()); } -vespalib::asciistream & operator <<(vespalib::asciistream & os, const FieldSearchSpecMap & df) +vespalib::asciistream & +operator <<(vespalib::asciistream & os, const FieldSearchSpecMap & df) { os << "DocumentTypeMap = \n"; for (const auto & dtm : df.documentTypeMap()) { diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h index b0154a82dae..43bb5b04481 100644 --- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h +++ b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h @@ -23,6 +23,11 @@ public: bool valid() const { return static_cast<bool>(_searcher); } size_t maxLength() const { return _maxLength; } bool uses_nearest_neighbor_search_method() const noexcept { return _searchMethod == VsmfieldsConfig::Fieldspec::Searchmethod::NEAREST_NEIGHBOR; } + bool uses_string_search_method() const noexcept { + return (_searchMethod == VsmfieldsConfig::Fieldspec::Searchmethod::UTF8) || + (_searchMethod == VsmfieldsConfig::Fieldspec::Searchmethod::AUTOUTF8) || + (_searchMethod == VsmfieldsConfig::Fieldspec::Searchmethod::SSE2UTF8); + } const vespalib::string& get_arg1() const noexcept { return _arg1; } /** @@ -71,17 +76,13 @@ public: * Adds a [field name, field id] entry to the given mapping for each field name used in the given query. * This is achieved by mapping from query term index name -> list of field ids -> [field name, field id] pairs. **/ - bool buildFieldsInQuery(const search::streaming::Query & query, StringFieldIdTMap & fieldsInQuery) const; - - /** - * Adds a [field name, field id] entry to the given mapping for each field name in the given vector. - **/ - void buildFieldsInQuery(const std::vector<vespalib::string> & otherFieldsNeeded, StringFieldIdTMap & fieldsInQuery) const; + StringFieldIdTMap buildFieldsInQuery(const search::streaming::Query & query) const; + void addFieldsFromIndex(vespalib::stringref index, StringFieldIdTMap & fieldIdMap) const; /** * Adds a FieldSearcher object to the given field searcher map for each field name in the other map. **/ - void buildSearcherMap(const StringFieldIdTMapT & fieldsInQuery, FieldIdTSearcherMap & fieldSearcherMap); + void buildSearcherMap(const StringFieldIdTMapT & fieldsInQuery, FieldIdTSearcherMap & fieldSearcherMap) const; const FieldSearchSpecMapT & specMap() const { return _specMap; } //const IndexFieldMapT & indexMap() const { return _documentTypeMap.begin()->second; } @@ -89,7 +90,7 @@ public: const StringFieldIdTMap & nameIdMap() const { return _nameIdMap; } friend vespalib::asciistream & operator <<(vespalib::asciistream & os, const FieldSearchSpecMap & f); - static vespalib::string stripNonFields(const vespalib::string & rawIndex); + static vespalib::string stripNonFields(vespalib::stringref rawIndex); search::attribute::DistanceMetric get_distance_metric(const vespalib::string& name) const; private: |