summaryrefslogtreecommitdiffstats
path: root/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp')
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp114
1 files changed, 50 insertions, 64 deletions
diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
index 4d31c71c0a0..49604135afc 100644
--- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
@@ -91,7 +91,7 @@ ForceWordfolderInit::ForceWordfolderInit()
Fast_NormalizeWordFolder::DO_MULTICHAR_EXPANSION);
}
-static ForceWordfolderInit _G_forceNormWordFolderInit;
+static ForceWordfolderInit G_forceNormWordFolderInit;
// Leftovers from FS4 protocol with limited use here.
enum queryflags {
@@ -238,14 +238,16 @@ SearchVisitor::SummaryGenerator::fillSummary(AttributeVector::DocId lid, const H
return {};
}
-void SearchVisitor::HitsResultPreparator::execute(vespalib::Identifiable & obj)
+void
+SearchVisitor::HitsResultPreparator::execute(vespalib::Identifiable & obj)
{
auto & hitsAggr(static_cast<HitsAggregationResult &>(obj));
hitsAggr.setSummaryGenerator(_summaryGenerator);
_numHitsAggregators++;
}
-bool SearchVisitor::HitsResultPreparator::check(const vespalib::Identifiable & obj) const
+bool
+SearchVisitor::HitsResultPreparator::check(const vespalib::Identifiable & obj) const
{
return obj.getClass().inherits(HitsAggregationResult::classId);
}
@@ -259,7 +261,8 @@ SearchVisitor::GroupingEntry::GroupingEntry(Grouping * grouping) :
SearchVisitor::GroupingEntry::~GroupingEntry() = default;
-void SearchVisitor::GroupingEntry::aggregate(const document::Document & doc, search::HitRank rank)
+void
+SearchVisitor::GroupingEntry::aggregate(const document::Document & doc, search::HitRank rank)
{
if (_count < _limit) {
_grouping->aggregate(doc, rank);
@@ -310,7 +313,21 @@ SearchVisitor::SearchVisitor(StorageComponent& component,
LOG(debug, "Created SearchVisitor");
}
-void SearchVisitor::init(const Parameters & params)
+bool
+SearchVisitor::is_text_matching(vespalib::stringref index) const noexcept {
+ StringFieldIdTMap fieldIdMap;
+ _fieldSearchSpecMap.addFieldsFromIndex(index, fieldIdMap);
+ for (const auto & fieldId : fieldIdMap.map()) {
+ auto found = _fieldSearchSpecMap.specMap().find(fieldId.second);
+ if ((found != _fieldSearchSpecMap.specMap().end()) && found->second.uses_string_search_method()) {
+ return true;
+ }
+ }
+ return false;
+}
+
+void
+SearchVisitor::init(const Parameters & params)
{
VISITOR_TRACE(6, "About to lazily init VSM adapter");
_attrMan.add(_documentIdAttributeBacking);
@@ -397,7 +414,14 @@ void SearchVisitor::init(const Parameters & params)
if ( params.lookup("query", queryBlob) ) {
LOG(spam, "Received query blob of %zu bytes", queryBlob.size());
VISITOR_TRACE(9, vespalib::make_string("Setting up for query blob of %zu bytes", queryBlob.size()));
- QueryTermDataFactory addOnFactory;
+ // Create mapping from field name to field id, from field id to search spec,
+ // and from index name to list of field ids
+ _fieldSearchSpecMap.buildFromConfig(_env->get_vsm_fields_config());
+ auto additionalFields = registerAdditionalFields(_env->get_docsum_tools()->getFieldSpecs());
+ // Add extra elements to mapping from field name to field id
+ _fieldSearchSpecMap.buildFromConfig(additionalFields);
+
+ QueryTermDataFactory addOnFactory(this);
_query = Query(addOnFactory, vespalib::stringref(queryBlob.data(), queryBlob.size()));
_searchBuffer->reserve(0x10000);
@@ -408,19 +432,11 @@ void SearchVisitor::init(const Parameters & params)
LOG(warning, "Request without query stack count");
}
- std::vector<vespalib::string> additionalFields;
- registerAdditionalFields(_env->get_docsum_tools()->getFieldSpecs(), additionalFields);
-
- StringFieldIdTMap fieldsInQuery;
- setupFieldSearchers(additionalFields, fieldsInQuery);
-
-
+ StringFieldIdTMap fieldsInQuery = setupFieldSearchers();
setupScratchDocument(fieldsInQuery);
-
_syntheticFieldsController.setup(_fieldSearchSpecMap.nameIdMap(), fieldsInQuery);
setupAttributeVectors();
-
setupAttributeVectorsForSorting(_sortSpec);
_rankController.setRankManagerSnapshot(_env->get_rank_manager_snapshot());
@@ -436,7 +452,6 @@ void SearchVisitor::init(const Parameters & params)
// This depends on _fieldPathMap (from setupScratchDocument),
// and IQueryEnvironment (from setupRankProcessors).
prepare_field_searchers();
-
} else {
LOG(warning, "No query received");
}
@@ -529,10 +544,7 @@ SearchVisitor::PositionInserter::PositionInserter(AttributeVector & attribute, A
SearchVisitor::PositionInserter::~PositionInserter() = default;
void
-SearchVisitor::PositionInserter::onPrimitive(uint32_t, const Content & c)
-{
- (void) c;
-}
+SearchVisitor::PositionInserter::onPrimitive(uint32_t, const Content &) { }
void
SearchVisitor::PositionInserter::onStructStart(const Content & c)
@@ -605,7 +617,6 @@ SearchVisitor::RankController::setupRankProcessors(Query & query,
{
_rankSetup = &_rankManagerSnapshot->getRankSetup(_rankProfile);
_rankProcessor = std::make_unique<RankProcessor>(_rankManagerSnapshot, _rankProfile, query, location, _queryProperties, &attrMan);
- LOG(debug, "Initialize rank processor");
_rankProcessor->initForRanking(wantedHitCount);
// register attribute vectors needed for ranking
processAccessedAttributes(_rankProcessor->get_real_query_env(), true, attrMan, attributeFields);
@@ -637,8 +648,7 @@ SearchVisitor::RankController::rankMatchedDocument(uint32_t docId)
{
_rankProcessor->runRankProgram(docId);
LOG(debug, "Rank score for matched document %u: %f",
- docId,
- _rankProcessor->getRankScore());
+ docId, _rankProcessor->getRankScore());
if (_dumpFeatures) {
_dumpProcessor->runRankProgram(docId);
// we must transfer the score to this match data to make sure that the same hits
@@ -718,9 +728,8 @@ SearchVisitor::SyntheticFieldsController::setup(const StringFieldIdTMap & fieldR
}
void
-SearchVisitor::SyntheticFieldsController::onDocument(StorageDocument & document)
+SearchVisitor::SyntheticFieldsController::onDocument(StorageDocument &)
{
- (void) document;
}
void
@@ -730,10 +739,10 @@ SearchVisitor::SyntheticFieldsController::onDocumentMatch(StorageDocument & docu
document.setField(_documentIdFId, std::make_unique<document::StringFieldValue>(documentId));
}
-void
-SearchVisitor::registerAdditionalFields(const std::vector<vsm::DocsumTools::FieldSpec> & docsumSpec,
- std::vector<vespalib::string> & fieldList)
+std::vector<vespalib::string>
+SearchVisitor::registerAdditionalFields(const std::vector<vsm::DocsumTools::FieldSpec> & docsumSpec)
{
+ std::vector<vespalib::string> fieldList;
for (const vsm::DocsumTools::FieldSpec & spec : docsumSpec) {
fieldList.push_back(spec.getOutputName());
const std::vector<vespalib::string> & inputNames = spec.getInputNames();
@@ -748,25 +757,20 @@ SearchVisitor::registerAdditionalFields(const std::vector<vsm::DocsumTools::Fiel
fieldList.emplace_back("[docid]");
fieldList.emplace_back("[rank]");
fieldList.emplace_back("documentid");
+ return fieldList;
}
-void
-SearchVisitor::setupFieldSearchers(const std::vector<vespalib::string> & additionalFields,
- StringFieldIdTMap & fieldsInQuery)
+StringFieldIdTMap
+SearchVisitor::setupFieldSearchers()
{
- // Create mapping from field name to field id, from field id to search spec,
- // and from index name to list of field ids
- _fieldSearchSpecMap.buildFromConfig(_env->get_vsm_fields_config());
- // Add extra elements to mapping from field name to field id
- _fieldSearchSpecMap.buildFromConfig(additionalFields);
-
// Reconfig field searchers based on the query
_fieldSearchSpecMap.reconfigFromQuery(_query);
// Map field name to field id for all fields in the query
- _fieldSearchSpecMap.buildFieldsInQuery(_query, fieldsInQuery);
+ StringFieldIdTMap fieldsInQuery = _fieldSearchSpecMap.buildFieldsInQuery(_query);
// Connect field names in the query to field searchers
_fieldSearchSpecMap.buildSearcherMap(fieldsInQuery.map(), _fieldSearcherMap);
+ return fieldsInQuery;
}
void
@@ -947,8 +951,7 @@ class SingleDocumentStore : public vsm::IDocSumCache
{
public:
explicit SingleDocumentStore(const StorageDocument & doc) : _doc(doc) { }
- const vsm::Document & getDocSum(const search::DocumentIdT & docId) const override {
- (void) docId;
+ const vsm::Document & getDocSum(const search::DocumentIdT &) const override {
return _doc;
}
private:
@@ -959,19 +962,12 @@ bool
SearchVisitor::compatibleDocumentTypes(const document::DocumentType& typeA,
const document::DocumentType& typeB)
{
- if (&typeA == &typeB) {
- return true;
- } else {
- return (typeA.getName() == typeB.getName());
- }
+ return (&typeA == &typeB) || (typeA.getName() == typeB.getName());
}
void
-SearchVisitor::handleDocuments(const document::BucketId&,
- DocEntryList & entries,
- HitCounter& hitCounter)
+SearchVisitor::handleDocuments(const document::BucketId&, DocEntryList & entries, HitCounter& )
{
- (void) hitCounter;
if (!_init_called) {
init(_params);
}
@@ -1016,37 +1012,25 @@ SearchVisitor::handleDocument(StorageDocument & document)
RankProcessor & rp = *_rankController.getRankProcessor();
vespalib::string documentId(document.docDoc().getId().getScheme().toString());
LOG(debug, "Matched document with id '%s'", documentId.c_str());
-
document.setDocId(rp.getDocId());
-
fillAttributeVectors(documentId, document);
-
_rankController.rankMatchedDocument(rp.getDocId());
-
if (_shouldFillRankAttribute) {
_rankAttribute.add(rp.getRankScore());
}
-
if (_rankController.keepMatchedDocument()) {
-
bool amongTheBest = _rankController.collectMatchedDocument(!_sortList.empty(), *this, _tmpSortBuffer, &document);
-
_syntheticFieldsController.onDocumentMatch(document, documentId);
-
SingleDocumentStore single(document);
_summaryGenerator.setDocsumCache(single);
group(document.docDoc(), rp.getRankScore(), false);
-
if (amongTheBest) {
needToKeepDocument = true;
}
-
} else {
_hitsRejectedCount++;
LOG(debug, "Do not keep document with id '%s' because rank score (%f) <= rank score drop limit (%f)",
- documentId.c_str(),
- rp.getRankScore(),
- _rankController.getRankSetup()->getRankScoreDropLimit());
+ documentId.c_str(), rp.getRankScore(), _rankController.getRankSetup()->getRankScoreDropLimit());
}
} else {
LOG(debug, "Did not match document with id '%s'", document.docDoc().getId().getScheme().toString().c_str());
@@ -1145,7 +1129,8 @@ SearchVisitor::fillSortBuffer()
return pos;
}
-void SearchVisitor::completedBucket(const document::BucketId&, HitCounter&)
+void
+SearchVisitor::completedBucket(const document::BucketId&, HitCounter&)
{
LOG(debug, "Completed bucket");
}
@@ -1157,7 +1142,8 @@ SearchVisitor::generate_query_result(HitCounter& counter)
return std::move(_queryResult);
}
-void SearchVisitor::completedVisitingInternal(HitCounter& hitCounter)
+void
+SearchVisitor::completedVisitingInternal(HitCounter& hitCounter)
{
if (!_init_called) {
init(_params);