diff options
Diffstat (limited to 'streamingvisitors')
4 files changed, 90 insertions, 115 deletions
diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp index bd22ba65816..49604135afc 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp @@ -91,7 +91,7 @@ ForceWordfolderInit::ForceWordfolderInit() Fast_NormalizeWordFolder::DO_MULTICHAR_EXPANSION); } -static ForceWordfolderInit _G_forceNormWordFolderInit; +static ForceWordfolderInit G_forceNormWordFolderInit; // Leftovers from FS4 protocol with limited use here. enum queryflags { @@ -315,9 +315,15 @@ SearchVisitor::SearchVisitor(StorageComponent& component, bool SearchVisitor::is_text_matching(vespalib::stringref index) const noexcept { - vsm::FieldIdT fId = _fieldSearchSpecMap.nameIdMap().fieldNo(index); - auto found = _fieldSearchSpecMap.specMap().find(fId); - return (found != _fieldSearchSpecMap.specMap().end()) && found->second.uses_string_search_method(); + StringFieldIdTMap fieldIdMap; + _fieldSearchSpecMap.addFieldsFromIndex(index, fieldIdMap); + for (const auto & fieldId : fieldIdMap.map()) { + auto found = _fieldSearchSpecMap.specMap().find(fieldId.second); + if ((found != _fieldSearchSpecMap.specMap().end()) && found->second.uses_string_search_method()) { + return true; + } + } + return false; } void @@ -408,10 +414,12 @@ SearchVisitor::init(const Parameters & params) if ( params.lookup("query", queryBlob) ) { LOG(spam, "Received query blob of %zu bytes", queryBlob.size()); VISITOR_TRACE(9, vespalib::make_string("Setting up for query blob of %zu bytes", queryBlob.size())); - // Create mapping from field name to field id, from field id to search spec, // and from index name to list of field ids _fieldSearchSpecMap.buildFromConfig(_env->get_vsm_fields_config()); + auto additionalFields = registerAdditionalFields(_env->get_docsum_tools()->getFieldSpecs()); + // Add extra elements to mapping from field name to field id + _fieldSearchSpecMap.buildFromConfig(additionalFields); QueryTermDataFactory addOnFactory(this); _query = Query(addOnFactory, vespalib::stringref(queryBlob.data(), queryBlob.size())); @@ -424,18 +432,11 @@ SearchVisitor::init(const Parameters & params) LOG(warning, "Request without query stack count"); } - std::vector<vespalib::string> additionalFields; - registerAdditionalFields(_env->get_docsum_tools()->getFieldSpecs(), additionalFields); - - StringFieldIdTMap fieldsInQuery; - setupFieldSearchers(additionalFields, fieldsInQuery); - + StringFieldIdTMap fieldsInQuery = setupFieldSearchers(); setupScratchDocument(fieldsInQuery); - _syntheticFieldsController.setup(_fieldSearchSpecMap.nameIdMap(), fieldsInQuery); setupAttributeVectors(); - setupAttributeVectorsForSorting(_sortSpec); _rankController.setRankManagerSnapshot(_env->get_rank_manager_snapshot()); @@ -451,7 +452,6 @@ SearchVisitor::init(const Parameters & params) // This depends on _fieldPathMap (from setupScratchDocument), // and IQueryEnvironment (from setupRankProcessors). prepare_field_searchers(); - } else { LOG(warning, "No query received"); } @@ -544,10 +544,7 @@ SearchVisitor::PositionInserter::PositionInserter(AttributeVector & attribute, A SearchVisitor::PositionInserter::~PositionInserter() = default; void -SearchVisitor::PositionInserter::onPrimitive(uint32_t, const Content & c) -{ - (void) c; -} +SearchVisitor::PositionInserter::onPrimitive(uint32_t, const Content &) { } void SearchVisitor::PositionInserter::onStructStart(const Content & c) @@ -620,7 +617,6 @@ SearchVisitor::RankController::setupRankProcessors(Query & query, { _rankSetup = &_rankManagerSnapshot->getRankSetup(_rankProfile); _rankProcessor = std::make_unique<RankProcessor>(_rankManagerSnapshot, _rankProfile, query, location, _queryProperties, &attrMan); - LOG(debug, "Initialize rank processor"); _rankProcessor->initForRanking(wantedHitCount); // register attribute vectors needed for ranking processAccessedAttributes(_rankProcessor->get_real_query_env(), true, attrMan, attributeFields); @@ -652,8 +648,7 @@ SearchVisitor::RankController::rankMatchedDocument(uint32_t docId) { _rankProcessor->runRankProgram(docId); LOG(debug, "Rank score for matched document %u: %f", - docId, - _rankProcessor->getRankScore()); + docId, _rankProcessor->getRankScore()); if (_dumpFeatures) { _dumpProcessor->runRankProgram(docId); // we must transfer the score to this match data to make sure that the same hits @@ -733,9 +728,8 @@ SearchVisitor::SyntheticFieldsController::setup(const StringFieldIdTMap & fieldR } void -SearchVisitor::SyntheticFieldsController::onDocument(StorageDocument & document) +SearchVisitor::SyntheticFieldsController::onDocument(StorageDocument &) { - (void) document; } void @@ -745,10 +739,10 @@ SearchVisitor::SyntheticFieldsController::onDocumentMatch(StorageDocument & docu document.setField(_documentIdFId, std::make_unique<document::StringFieldValue>(documentId)); } -void -SearchVisitor::registerAdditionalFields(const std::vector<vsm::DocsumTools::FieldSpec> & docsumSpec, - std::vector<vespalib::string> & fieldList) +std::vector<vespalib::string> +SearchVisitor::registerAdditionalFields(const std::vector<vsm::DocsumTools::FieldSpec> & docsumSpec) { + std::vector<vespalib::string> fieldList; for (const vsm::DocsumTools::FieldSpec & spec : docsumSpec) { fieldList.push_back(spec.getOutputName()); const std::vector<vespalib::string> & inputNames = spec.getInputNames(); @@ -763,22 +757,20 @@ SearchVisitor::registerAdditionalFields(const std::vector<vsm::DocsumTools::Fiel fieldList.emplace_back("[docid]"); fieldList.emplace_back("[rank]"); fieldList.emplace_back("documentid"); + return fieldList; } -void -SearchVisitor::setupFieldSearchers(const std::vector<vespalib::string> & additionalFields, - StringFieldIdTMap & fieldsInQuery) +StringFieldIdTMap +SearchVisitor::setupFieldSearchers() { - // Add extra elements to mapping from field name to field id - _fieldSearchSpecMap.buildFromConfig(additionalFields); - // Reconfig field searchers based on the query _fieldSearchSpecMap.reconfigFromQuery(_query); // Map field name to field id for all fields in the query - _fieldSearchSpecMap.buildFieldsInQuery(_query, fieldsInQuery); + StringFieldIdTMap fieldsInQuery = _fieldSearchSpecMap.buildFieldsInQuery(_query); // Connect field names in the query to field searchers _fieldSearchSpecMap.buildSearcherMap(fieldsInQuery.map(), _fieldSearcherMap); + return fieldsInQuery; } void @@ -959,8 +951,7 @@ class SingleDocumentStore : public vsm::IDocSumCache { public: explicit SingleDocumentStore(const StorageDocument & doc) : _doc(doc) { } - const vsm::Document & getDocSum(const search::DocumentIdT & docId) const override { - (void) docId; + const vsm::Document & getDocSum(const search::DocumentIdT &) const override { return _doc; } private: @@ -971,19 +962,12 @@ bool SearchVisitor::compatibleDocumentTypes(const document::DocumentType& typeA, const document::DocumentType& typeB) { - if (&typeA == &typeB) { - return true; - } else { - return (typeA.getName() == typeB.getName()); - } + return (&typeA == &typeB) || (typeA.getName() == typeB.getName()); } void -SearchVisitor::handleDocuments(const document::BucketId&, - DocEntryList & entries, - HitCounter& hitCounter) +SearchVisitor::handleDocuments(const document::BucketId&, DocEntryList & entries, HitCounter& ) { - (void) hitCounter; if (!_init_called) { init(_params); } @@ -1028,37 +1012,25 @@ SearchVisitor::handleDocument(StorageDocument & document) RankProcessor & rp = *_rankController.getRankProcessor(); vespalib::string documentId(document.docDoc().getId().getScheme().toString()); LOG(debug, "Matched document with id '%s'", documentId.c_str()); - document.setDocId(rp.getDocId()); - fillAttributeVectors(documentId, document); - _rankController.rankMatchedDocument(rp.getDocId()); - if (_shouldFillRankAttribute) { _rankAttribute.add(rp.getRankScore()); } - if (_rankController.keepMatchedDocument()) { - bool amongTheBest = _rankController.collectMatchedDocument(!_sortList.empty(), *this, _tmpSortBuffer, &document); - _syntheticFieldsController.onDocumentMatch(document, documentId); - SingleDocumentStore single(document); _summaryGenerator.setDocsumCache(single); group(document.docDoc(), rp.getRankScore(), false); - if (amongTheBest) { needToKeepDocument = true; } - } else { _hitsRejectedCount++; LOG(debug, "Do not keep document with id '%s' because rank score (%f) <= rank score drop limit (%f)", - documentId.c_str(), - rp.getRankScore(), - _rankController.getRankSetup()->getRankScoreDropLimit()); + documentId.c_str(), rp.getRankScore(), _rankController.getRankSetup()->getRankScoreDropLimit()); } } else { LOG(debug, "Did not match document with id '%s'", document.docDoc().getId().getScheme().toString().c_str()); diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h index 76b2016e2e2..709564bcf02 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h @@ -255,19 +255,15 @@ private: * @param docsumSpec config with the field names used by the docsum setup. * @param fieldList list of field names that are built. **/ - static void registerAdditionalFields(const std::vector<vsm::DocsumTools::FieldSpec> & docsumSpec, - std::vector<vespalib::string> & fieldList); + static std::vector<vespalib::string> registerAdditionalFields(const std::vector<vsm::DocsumTools::FieldSpec> & docsumSpec); /** * Setup the field searchers used when matching the query with the stream of documents. * This includes setting up various mappings in FieldSearchSpecMap and building mapping * for fields used by the query. * - * @param additionalFields list of additional field names used when setting up the mappings. - * @param fieldsInQuery mapping from field name to field id that are built based on the query. **/ - void setupFieldSearchers(const std::vector<vespalib::string> & additionalFields, - vsm::StringFieldIdTMap & fieldsInQuery); + vsm::StringFieldIdTMap setupFieldSearchers(); /** * Prepare the field searchers for the given query. diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp index e33408a2e26..4b0efd58a56 100644 --- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp +++ b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp @@ -28,7 +28,8 @@ namespace vsm { namespace { -void setMatchType(FieldSearcherContainer & searcher, vespalib::stringref arg1) { +void +setMatchType(FieldSearcherContainer & searcher, vespalib::stringref arg1) { if (arg1 == "prefix") { searcher->setMatchType(FieldSearcher::PREFIX); } else if (arg1 == "substring") { @@ -44,14 +45,14 @@ void setMatchType(FieldSearcherContainer & searcher, vespalib::stringref arg1) { } -FieldSearchSpec::FieldSearchSpec() : - _id(0), - _name(), - _maxLength(0x100000), - _searcher(), - _searchMethod(VsmfieldsConfig::Fieldspec::Searchmethod::NONE), - _arg1(), - _reconfigured(false) +FieldSearchSpec::FieldSearchSpec() + : _id(0), + _name(), + _maxLength(0x100000), + _searcher(), + _searchMethod(VsmfieldsConfig::Fieldspec::Searchmethod::NONE), + _arg1(), + _reconfigured(false) { } FieldSearchSpec::~FieldSearchSpec() = default; @@ -150,7 +151,8 @@ FieldSearchSpec::reconfig(const QueryTerm & term) } } -vespalib::asciistream & operator <<(vespalib::asciistream & os, const FieldSearchSpec & f) +vespalib::asciistream & +operator <<(vespalib::asciistream & os, const FieldSearchSpec & f) { os << f._id << ' ' << f._name << ' '; if ( ! f._searcher) { @@ -171,7 +173,8 @@ namespace { const std::regex _G_array("\\[[0-9]+\\]"); } -vespalib::string FieldSearchSpecMap::stripNonFields(const vespalib::string & rawIndex) +vespalib::string +FieldSearchSpecMap::stripNonFields(vespalib::stringref rawIndex) { if ((rawIndex.find('[') != vespalib::string::npos) || (rawIndex.find('{') != vespalib::string::npos)) { std::string index = std::regex_replace(std::string(rawIndex), _G_map1, _G_value); @@ -182,44 +185,48 @@ vespalib::string FieldSearchSpecMap::stripNonFields(const vespalib::string & raw return rawIndex; } -bool FieldSearchSpecMap::buildFieldsInQuery(const Query & query, StringFieldIdTMap & fieldsInQuery) const +void +FieldSearchSpecMap::addFieldsFromIndex(vespalib::stringref rawIndex, StringFieldIdTMap & fieldIdMap) const { + for (const auto & dtm : documentTypeMap()) { + const IndexFieldMapT & fim = dtm.second; + vespalib::string index(stripNonFields(rawIndex)); + auto fIt = fim.find(index); + if (fIt != fim.end()) { + for(FieldIdT fid : fIt->second) { + const FieldSearchSpec & spec = specMap().find(fid)->second; + LOG(debug, "buildFieldsInQuery = rawIndex='%s', index='%s'", rawIndex.data(), index.c_str()); + if ((rawIndex != index) && (spec.name().find(index) == 0)) { + vespalib::string modIndex(rawIndex); + modIndex.append(spec.name().substr(index.size())); + fieldIdMap.add(modIndex, spec.id()); + } else { + fieldIdMap.add(spec.name(),spec.id()); + } + } + } else { + LOG(warning, "No valid indexes registered for index %s", rawIndex.data()); + } + } +} + +StringFieldIdTMap +FieldSearchSpecMap::buildFieldsInQuery(const Query & query) const { - bool retval(true); + StringFieldIdTMap fieldsInQuery; ConstQueryTermList qtl; query.getLeaves(qtl); for (const auto & term : qtl) { - for (const auto & dtm : documentTypeMap()) { - const IndexFieldMapT & fim = dtm.second; - vespalib::string rawIndex(term->index()); - vespalib::string index(stripNonFields(rawIndex)); - auto fIt = fim.find(index); - if (fIt != fim.end()) { - for(FieldIdT fid : fIt->second) { - const FieldSearchSpec & spec = specMap().find(fid)->second; - LOG(debug, "buildFieldsInQuery = rawIndex='%s', index='%s'", rawIndex.c_str(), index.c_str()); - if ((rawIndex != index) && (spec.name().find(index) == 0)) { - vespalib::string modIndex(rawIndex); - modIndex.append(spec.name().substr(index.size())); - fieldsInQuery.add(modIndex, spec.id()); - } else { - fieldsInQuery.add(spec.name(),spec.id()); - } - } - } else { - LOG(warning, "No valid indexes registered for index %s", term->index().c_str()); - retval = false; - } - } + addFieldsFromIndex(term->index(), fieldsInQuery); } - return retval; + return fieldsInQuery; } -void FieldSearchSpecMap::buildFromConfig(const std::vector<vespalib::string> & otherFieldsNeeded) +void +FieldSearchSpecMap::buildFromConfig(const std::vector<vespalib::string> & otherFieldsNeeded) { - for(size_t i(0), m(otherFieldsNeeded.size()); i < m; i++) { - LOG(debug, "otherFieldsNeeded[%zd] = '%s'", i, otherFieldsNeeded[i].c_str()); - _nameIdMap.add(otherFieldsNeeded[i]); + for (const auto & i : otherFieldsNeeded) { + _nameIdMap.add(i); } } @@ -253,7 +260,8 @@ buildFieldSet(const VsmfieldsConfig::Documenttype::Index & ci, const FieldSearch } -bool FieldSearchSpecMap::buildFromConfig(const VsmfieldsHandle & conf) +bool +FieldSearchSpecMap::buildFromConfig(const VsmfieldsHandle & conf) { bool retval(true); LOG(spam, "Parsing %zd fields", conf->fieldspec.size()); @@ -297,12 +305,14 @@ FieldSearchSpecMap::reconfigFromQuery(const Query & query) } } -bool lesserField(const FieldSearcherContainer & a, const FieldSearcherContainer & b) +bool +lesserField(const FieldSearcherContainer & a, const FieldSearcherContainer & b) { return a->field() < b->field(); } -void FieldSearchSpecMap::buildSearcherMap(const StringFieldIdTMapT & fieldsInQuery, FieldIdTSearcherMap & fieldSearcherMap) +void +FieldSearchSpecMap::buildSearcherMap(const StringFieldIdTMapT & fieldsInQuery, FieldIdTSearcherMap & fieldSearcherMap) const { fieldSearcherMap.clear(); for (const auto & entry : fieldsInQuery) { @@ -331,7 +341,8 @@ FieldSearchSpecMap::get_distance_metric(const vespalib::string& name) const return vsm::NearestNeighborFieldSearcher::distance_metric_from_string(itr->second.get_arg1()); } -vespalib::asciistream & operator <<(vespalib::asciistream & os, const FieldSearchSpecMap & df) +vespalib::asciistream & +operator <<(vespalib::asciistream & os, const FieldSearchSpecMap & df) { os << "DocumentTypeMap = \n"; for (const auto & dtm : df.documentTypeMap()) { diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h index f7ca07b4dc5..43bb5b04481 100644 --- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h +++ b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h @@ -76,17 +76,13 @@ public: * Adds a [field name, field id] entry to the given mapping for each field name used in the given query. * This is achieved by mapping from query term index name -> list of field ids -> [field name, field id] pairs. **/ - bool buildFieldsInQuery(const search::streaming::Query & query, StringFieldIdTMap & fieldsInQuery) const; - - /** - * Adds a [field name, field id] entry to the given mapping for each field name in the given vector. - **/ - void buildFieldsInQuery(const std::vector<vespalib::string> & otherFieldsNeeded, StringFieldIdTMap & fieldsInQuery) const; + StringFieldIdTMap buildFieldsInQuery(const search::streaming::Query & query) const; + void addFieldsFromIndex(vespalib::stringref index, StringFieldIdTMap & fieldIdMap) const; /** * Adds a FieldSearcher object to the given field searcher map for each field name in the other map. **/ - void buildSearcherMap(const StringFieldIdTMapT & fieldsInQuery, FieldIdTSearcherMap & fieldSearcherMap); + void buildSearcherMap(const StringFieldIdTMapT & fieldsInQuery, FieldIdTSearcherMap & fieldSearcherMap) const; const FieldSearchSpecMapT & specMap() const { return _specMap; } //const IndexFieldMapT & indexMap() const { return _documentTypeMap.begin()->second; } @@ -94,7 +90,7 @@ public: const StringFieldIdTMap & nameIdMap() const { return _nameIdMap; } friend vespalib::asciistream & operator <<(vespalib::asciistream & os, const FieldSearchSpecMap & f); - static vespalib::string stripNonFields(const vespalib::string & rawIndex); + static vespalib::string stripNonFields(vespalib::stringref rawIndex); search::attribute::DistanceMetric get_distance_metric(const vespalib::string& name) const; private: |