summaryrefslogtreecommitdiffstats
path: root/streamingvisitors
diff options
context:
space:
mode:
authorHenning Baldersheim <balder@yahoo-inc.com>2024-01-03 13:14:28 +0000
committerHenning Baldersheim <balder@yahoo-inc.com>2024-01-03 13:14:28 +0000
commit26481d66f602014a9e9945d0ed10c3efc9a2f572 (patch)
treea99545a42d026df09b5f16ddcfe2aa168870da5d /streamingvisitors
parent8144d3fbc2b07921a0468f4956342ce9cb50de3f (diff)
- Must resolve index and check all fields if any require text matching.
- Make methods const if possible. - Return results instead of modifying a reference. - Varoius code unification.
Diffstat (limited to 'streamingvisitors')
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp86
-rw-r--r--streamingvisitors/src/vespa/searchvisitor/searchvisitor.h8
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp99
-rw-r--r--streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h12
4 files changed, 90 insertions, 115 deletions
diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
index bd22ba65816..49604135afc 100644
--- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
+++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp
@@ -91,7 +91,7 @@ ForceWordfolderInit::ForceWordfolderInit()
Fast_NormalizeWordFolder::DO_MULTICHAR_EXPANSION);
}
-static ForceWordfolderInit _G_forceNormWordFolderInit;
+static ForceWordfolderInit G_forceNormWordFolderInit;
// Leftovers from FS4 protocol with limited use here.
enum queryflags {
@@ -315,9 +315,15 @@ SearchVisitor::SearchVisitor(StorageComponent& component,
bool
SearchVisitor::is_text_matching(vespalib::stringref index) const noexcept {
- vsm::FieldIdT fId = _fieldSearchSpecMap.nameIdMap().fieldNo(index);
- auto found = _fieldSearchSpecMap.specMap().find(fId);
- return (found != _fieldSearchSpecMap.specMap().end()) && found->second.uses_string_search_method();
+ StringFieldIdTMap fieldIdMap;
+ _fieldSearchSpecMap.addFieldsFromIndex(index, fieldIdMap);
+ for (const auto & fieldId : fieldIdMap.map()) {
+ auto found = _fieldSearchSpecMap.specMap().find(fieldId.second);
+ if ((found != _fieldSearchSpecMap.specMap().end()) && found->second.uses_string_search_method()) {
+ return true;
+ }
+ }
+ return false;
}
void
@@ -408,10 +414,12 @@ SearchVisitor::init(const Parameters & params)
if ( params.lookup("query", queryBlob) ) {
LOG(spam, "Received query blob of %zu bytes", queryBlob.size());
VISITOR_TRACE(9, vespalib::make_string("Setting up for query blob of %zu bytes", queryBlob.size()));
-
// Create mapping from field name to field id, from field id to search spec,
// and from index name to list of field ids
_fieldSearchSpecMap.buildFromConfig(_env->get_vsm_fields_config());
+ auto additionalFields = registerAdditionalFields(_env->get_docsum_tools()->getFieldSpecs());
+ // Add extra elements to mapping from field name to field id
+ _fieldSearchSpecMap.buildFromConfig(additionalFields);
QueryTermDataFactory addOnFactory(this);
_query = Query(addOnFactory, vespalib::stringref(queryBlob.data(), queryBlob.size()));
@@ -424,18 +432,11 @@ SearchVisitor::init(const Parameters & params)
LOG(warning, "Request without query stack count");
}
- std::vector<vespalib::string> additionalFields;
- registerAdditionalFields(_env->get_docsum_tools()->getFieldSpecs(), additionalFields);
-
- StringFieldIdTMap fieldsInQuery;
- setupFieldSearchers(additionalFields, fieldsInQuery);
-
+ StringFieldIdTMap fieldsInQuery = setupFieldSearchers();
setupScratchDocument(fieldsInQuery);
-
_syntheticFieldsController.setup(_fieldSearchSpecMap.nameIdMap(), fieldsInQuery);
setupAttributeVectors();
-
setupAttributeVectorsForSorting(_sortSpec);
_rankController.setRankManagerSnapshot(_env->get_rank_manager_snapshot());
@@ -451,7 +452,6 @@ SearchVisitor::init(const Parameters & params)
// This depends on _fieldPathMap (from setupScratchDocument),
// and IQueryEnvironment (from setupRankProcessors).
prepare_field_searchers();
-
} else {
LOG(warning, "No query received");
}
@@ -544,10 +544,7 @@ SearchVisitor::PositionInserter::PositionInserter(AttributeVector & attribute, A
SearchVisitor::PositionInserter::~PositionInserter() = default;
void
-SearchVisitor::PositionInserter::onPrimitive(uint32_t, const Content & c)
-{
- (void) c;
-}
+SearchVisitor::PositionInserter::onPrimitive(uint32_t, const Content &) { }
void
SearchVisitor::PositionInserter::onStructStart(const Content & c)
@@ -620,7 +617,6 @@ SearchVisitor::RankController::setupRankProcessors(Query & query,
{
_rankSetup = &_rankManagerSnapshot->getRankSetup(_rankProfile);
_rankProcessor = std::make_unique<RankProcessor>(_rankManagerSnapshot, _rankProfile, query, location, _queryProperties, &attrMan);
- LOG(debug, "Initialize rank processor");
_rankProcessor->initForRanking(wantedHitCount);
// register attribute vectors needed for ranking
processAccessedAttributes(_rankProcessor->get_real_query_env(), true, attrMan, attributeFields);
@@ -652,8 +648,7 @@ SearchVisitor::RankController::rankMatchedDocument(uint32_t docId)
{
_rankProcessor->runRankProgram(docId);
LOG(debug, "Rank score for matched document %u: %f",
- docId,
- _rankProcessor->getRankScore());
+ docId, _rankProcessor->getRankScore());
if (_dumpFeatures) {
_dumpProcessor->runRankProgram(docId);
// we must transfer the score to this match data to make sure that the same hits
@@ -733,9 +728,8 @@ SearchVisitor::SyntheticFieldsController::setup(const StringFieldIdTMap & fieldR
}
void
-SearchVisitor::SyntheticFieldsController::onDocument(StorageDocument & document)
+SearchVisitor::SyntheticFieldsController::onDocument(StorageDocument &)
{
- (void) document;
}
void
@@ -745,10 +739,10 @@ SearchVisitor::SyntheticFieldsController::onDocumentMatch(StorageDocument & docu
document.setField(_documentIdFId, std::make_unique<document::StringFieldValue>(documentId));
}
-void
-SearchVisitor::registerAdditionalFields(const std::vector<vsm::DocsumTools::FieldSpec> & docsumSpec,
- std::vector<vespalib::string> & fieldList)
+std::vector<vespalib::string>
+SearchVisitor::registerAdditionalFields(const std::vector<vsm::DocsumTools::FieldSpec> & docsumSpec)
{
+ std::vector<vespalib::string> fieldList;
for (const vsm::DocsumTools::FieldSpec & spec : docsumSpec) {
fieldList.push_back(spec.getOutputName());
const std::vector<vespalib::string> & inputNames = spec.getInputNames();
@@ -763,22 +757,20 @@ SearchVisitor::registerAdditionalFields(const std::vector<vsm::DocsumTools::Fiel
fieldList.emplace_back("[docid]");
fieldList.emplace_back("[rank]");
fieldList.emplace_back("documentid");
+ return fieldList;
}
-void
-SearchVisitor::setupFieldSearchers(const std::vector<vespalib::string> & additionalFields,
- StringFieldIdTMap & fieldsInQuery)
+StringFieldIdTMap
+SearchVisitor::setupFieldSearchers()
{
- // Add extra elements to mapping from field name to field id
- _fieldSearchSpecMap.buildFromConfig(additionalFields);
-
// Reconfig field searchers based on the query
_fieldSearchSpecMap.reconfigFromQuery(_query);
// Map field name to field id for all fields in the query
- _fieldSearchSpecMap.buildFieldsInQuery(_query, fieldsInQuery);
+ StringFieldIdTMap fieldsInQuery = _fieldSearchSpecMap.buildFieldsInQuery(_query);
// Connect field names in the query to field searchers
_fieldSearchSpecMap.buildSearcherMap(fieldsInQuery.map(), _fieldSearcherMap);
+ return fieldsInQuery;
}
void
@@ -959,8 +951,7 @@ class SingleDocumentStore : public vsm::IDocSumCache
{
public:
explicit SingleDocumentStore(const StorageDocument & doc) : _doc(doc) { }
- const vsm::Document & getDocSum(const search::DocumentIdT & docId) const override {
- (void) docId;
+ const vsm::Document & getDocSum(const search::DocumentIdT &) const override {
return _doc;
}
private:
@@ -971,19 +962,12 @@ bool
SearchVisitor::compatibleDocumentTypes(const document::DocumentType& typeA,
const document::DocumentType& typeB)
{
- if (&typeA == &typeB) {
- return true;
- } else {
- return (typeA.getName() == typeB.getName());
- }
+ return (&typeA == &typeB) || (typeA.getName() == typeB.getName());
}
void
-SearchVisitor::handleDocuments(const document::BucketId&,
- DocEntryList & entries,
- HitCounter& hitCounter)
+SearchVisitor::handleDocuments(const document::BucketId&, DocEntryList & entries, HitCounter& )
{
- (void) hitCounter;
if (!_init_called) {
init(_params);
}
@@ -1028,37 +1012,25 @@ SearchVisitor::handleDocument(StorageDocument & document)
RankProcessor & rp = *_rankController.getRankProcessor();
vespalib::string documentId(document.docDoc().getId().getScheme().toString());
LOG(debug, "Matched document with id '%s'", documentId.c_str());
-
document.setDocId(rp.getDocId());
-
fillAttributeVectors(documentId, document);
-
_rankController.rankMatchedDocument(rp.getDocId());
-
if (_shouldFillRankAttribute) {
_rankAttribute.add(rp.getRankScore());
}
-
if (_rankController.keepMatchedDocument()) {
-
bool amongTheBest = _rankController.collectMatchedDocument(!_sortList.empty(), *this, _tmpSortBuffer, &document);
-
_syntheticFieldsController.onDocumentMatch(document, documentId);
-
SingleDocumentStore single(document);
_summaryGenerator.setDocsumCache(single);
group(document.docDoc(), rp.getRankScore(), false);
-
if (amongTheBest) {
needToKeepDocument = true;
}
-
} else {
_hitsRejectedCount++;
LOG(debug, "Do not keep document with id '%s' because rank score (%f) <= rank score drop limit (%f)",
- documentId.c_str(),
- rp.getRankScore(),
- _rankController.getRankSetup()->getRankScoreDropLimit());
+ documentId.c_str(), rp.getRankScore(), _rankController.getRankSetup()->getRankScoreDropLimit());
}
} else {
LOG(debug, "Did not match document with id '%s'", document.docDoc().getId().getScheme().toString().c_str());
diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h
index 76b2016e2e2..709564bcf02 100644
--- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h
+++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.h
@@ -255,19 +255,15 @@ private:
* @param docsumSpec config with the field names used by the docsum setup.
* @param fieldList list of field names that are built.
**/
- static void registerAdditionalFields(const std::vector<vsm::DocsumTools::FieldSpec> & docsumSpec,
- std::vector<vespalib::string> & fieldList);
+ static std::vector<vespalib::string> registerAdditionalFields(const std::vector<vsm::DocsumTools::FieldSpec> & docsumSpec);
/**
* Setup the field searchers used when matching the query with the stream of documents.
* This includes setting up various mappings in FieldSearchSpecMap and building mapping
* for fields used by the query.
*
- * @param additionalFields list of additional field names used when setting up the mappings.
- * @param fieldsInQuery mapping from field name to field id that are built based on the query.
**/
- void setupFieldSearchers(const std::vector<vespalib::string> & additionalFields,
- vsm::StringFieldIdTMap & fieldsInQuery);
+ vsm::StringFieldIdTMap setupFieldSearchers();
/**
* Prepare the field searchers for the given query.
diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp
index e33408a2e26..4b0efd58a56 100644
--- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp
+++ b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.cpp
@@ -28,7 +28,8 @@ namespace vsm {
namespace {
-void setMatchType(FieldSearcherContainer & searcher, vespalib::stringref arg1) {
+void
+setMatchType(FieldSearcherContainer & searcher, vespalib::stringref arg1) {
if (arg1 == "prefix") {
searcher->setMatchType(FieldSearcher::PREFIX);
} else if (arg1 == "substring") {
@@ -44,14 +45,14 @@ void setMatchType(FieldSearcherContainer & searcher, vespalib::stringref arg1) {
}
-FieldSearchSpec::FieldSearchSpec() :
- _id(0),
- _name(),
- _maxLength(0x100000),
- _searcher(),
- _searchMethod(VsmfieldsConfig::Fieldspec::Searchmethod::NONE),
- _arg1(),
- _reconfigured(false)
+FieldSearchSpec::FieldSearchSpec()
+ : _id(0),
+ _name(),
+ _maxLength(0x100000),
+ _searcher(),
+ _searchMethod(VsmfieldsConfig::Fieldspec::Searchmethod::NONE),
+ _arg1(),
+ _reconfigured(false)
{
}
FieldSearchSpec::~FieldSearchSpec() = default;
@@ -150,7 +151,8 @@ FieldSearchSpec::reconfig(const QueryTerm & term)
}
}
-vespalib::asciistream & operator <<(vespalib::asciistream & os, const FieldSearchSpec & f)
+vespalib::asciistream &
+operator <<(vespalib::asciistream & os, const FieldSearchSpec & f)
{
os << f._id << ' ' << f._name << ' ';
if ( ! f._searcher) {
@@ -171,7 +173,8 @@ namespace {
const std::regex _G_array("\\[[0-9]+\\]");
}
-vespalib::string FieldSearchSpecMap::stripNonFields(const vespalib::string & rawIndex)
+vespalib::string
+FieldSearchSpecMap::stripNonFields(vespalib::stringref rawIndex)
{
if ((rawIndex.find('[') != vespalib::string::npos) || (rawIndex.find('{') != vespalib::string::npos)) {
std::string index = std::regex_replace(std::string(rawIndex), _G_map1, _G_value);
@@ -182,44 +185,48 @@ vespalib::string FieldSearchSpecMap::stripNonFields(const vespalib::string & raw
return rawIndex;
}
-bool FieldSearchSpecMap::buildFieldsInQuery(const Query & query, StringFieldIdTMap & fieldsInQuery) const
+void
+FieldSearchSpecMap::addFieldsFromIndex(vespalib::stringref rawIndex, StringFieldIdTMap & fieldIdMap) const {
+ for (const auto & dtm : documentTypeMap()) {
+ const IndexFieldMapT & fim = dtm.second;
+ vespalib::string index(stripNonFields(rawIndex));
+ auto fIt = fim.find(index);
+ if (fIt != fim.end()) {
+ for(FieldIdT fid : fIt->second) {
+ const FieldSearchSpec & spec = specMap().find(fid)->second;
+ LOG(debug, "buildFieldsInQuery = rawIndex='%s', index='%s'", rawIndex.data(), index.c_str());
+ if ((rawIndex != index) && (spec.name().find(index) == 0)) {
+ vespalib::string modIndex(rawIndex);
+ modIndex.append(spec.name().substr(index.size()));
+ fieldIdMap.add(modIndex, spec.id());
+ } else {
+ fieldIdMap.add(spec.name(),spec.id());
+ }
+ }
+ } else {
+ LOG(warning, "No valid indexes registered for index %s", rawIndex.data());
+ }
+ }
+}
+
+StringFieldIdTMap
+FieldSearchSpecMap::buildFieldsInQuery(const Query & query) const
{
- bool retval(true);
+ StringFieldIdTMap fieldsInQuery;
ConstQueryTermList qtl;
query.getLeaves(qtl);
for (const auto & term : qtl) {
- for (const auto & dtm : documentTypeMap()) {
- const IndexFieldMapT & fim = dtm.second;
- vespalib::string rawIndex(term->index());
- vespalib::string index(stripNonFields(rawIndex));
- auto fIt = fim.find(index);
- if (fIt != fim.end()) {
- for(FieldIdT fid : fIt->second) {
- const FieldSearchSpec & spec = specMap().find(fid)->second;
- LOG(debug, "buildFieldsInQuery = rawIndex='%s', index='%s'", rawIndex.c_str(), index.c_str());
- if ((rawIndex != index) && (spec.name().find(index) == 0)) {
- vespalib::string modIndex(rawIndex);
- modIndex.append(spec.name().substr(index.size()));
- fieldsInQuery.add(modIndex, spec.id());
- } else {
- fieldsInQuery.add(spec.name(),spec.id());
- }
- }
- } else {
- LOG(warning, "No valid indexes registered for index %s", term->index().c_str());
- retval = false;
- }
- }
+ addFieldsFromIndex(term->index(), fieldsInQuery);
}
- return retval;
+ return fieldsInQuery;
}
-void FieldSearchSpecMap::buildFromConfig(const std::vector<vespalib::string> & otherFieldsNeeded)
+void
+FieldSearchSpecMap::buildFromConfig(const std::vector<vespalib::string> & otherFieldsNeeded)
{
- for(size_t i(0), m(otherFieldsNeeded.size()); i < m; i++) {
- LOG(debug, "otherFieldsNeeded[%zd] = '%s'", i, otherFieldsNeeded[i].c_str());
- _nameIdMap.add(otherFieldsNeeded[i]);
+ for (const auto & i : otherFieldsNeeded) {
+ _nameIdMap.add(i);
}
}
@@ -253,7 +260,8 @@ buildFieldSet(const VsmfieldsConfig::Documenttype::Index & ci, const FieldSearch
}
-bool FieldSearchSpecMap::buildFromConfig(const VsmfieldsHandle & conf)
+bool
+FieldSearchSpecMap::buildFromConfig(const VsmfieldsHandle & conf)
{
bool retval(true);
LOG(spam, "Parsing %zd fields", conf->fieldspec.size());
@@ -297,12 +305,14 @@ FieldSearchSpecMap::reconfigFromQuery(const Query & query)
}
}
-bool lesserField(const FieldSearcherContainer & a, const FieldSearcherContainer & b)
+bool
+lesserField(const FieldSearcherContainer & a, const FieldSearcherContainer & b)
{
return a->field() < b->field();
}
-void FieldSearchSpecMap::buildSearcherMap(const StringFieldIdTMapT & fieldsInQuery, FieldIdTSearcherMap & fieldSearcherMap)
+void
+FieldSearchSpecMap::buildSearcherMap(const StringFieldIdTMapT & fieldsInQuery, FieldIdTSearcherMap & fieldSearcherMap) const
{
fieldSearcherMap.clear();
for (const auto & entry : fieldsInQuery) {
@@ -331,7 +341,8 @@ FieldSearchSpecMap::get_distance_metric(const vespalib::string& name) const
return vsm::NearestNeighborFieldSearcher::distance_metric_from_string(itr->second.get_arg1());
}
-vespalib::asciistream & operator <<(vespalib::asciistream & os, const FieldSearchSpecMap & df)
+vespalib::asciistream &
+operator <<(vespalib::asciistream & os, const FieldSearchSpecMap & df)
{
os << "DocumentTypeMap = \n";
for (const auto & dtm : df.documentTypeMap()) {
diff --git a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h
index f7ca07b4dc5..43bb5b04481 100644
--- a/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h
+++ b/streamingvisitors/src/vespa/vsm/vsm/fieldsearchspec.h
@@ -76,17 +76,13 @@ public:
* Adds a [field name, field id] entry to the given mapping for each field name used in the given query.
* This is achieved by mapping from query term index name -> list of field ids -> [field name, field id] pairs.
**/
- bool buildFieldsInQuery(const search::streaming::Query & query, StringFieldIdTMap & fieldsInQuery) const;
-
- /**
- * Adds a [field name, field id] entry to the given mapping for each field name in the given vector.
- **/
- void buildFieldsInQuery(const std::vector<vespalib::string> & otherFieldsNeeded, StringFieldIdTMap & fieldsInQuery) const;
+ StringFieldIdTMap buildFieldsInQuery(const search::streaming::Query & query) const;
+ void addFieldsFromIndex(vespalib::stringref index, StringFieldIdTMap & fieldIdMap) const;
/**
* Adds a FieldSearcher object to the given field searcher map for each field name in the other map.
**/
- void buildSearcherMap(const StringFieldIdTMapT & fieldsInQuery, FieldIdTSearcherMap & fieldSearcherMap);
+ void buildSearcherMap(const StringFieldIdTMapT & fieldsInQuery, FieldIdTSearcherMap & fieldSearcherMap) const;
const FieldSearchSpecMapT & specMap() const { return _specMap; }
//const IndexFieldMapT & indexMap() const { return _documentTypeMap.begin()->second; }
@@ -94,7 +90,7 @@ public:
const StringFieldIdTMap & nameIdMap() const { return _nameIdMap; }
friend vespalib::asciistream & operator <<(vespalib::asciistream & os, const FieldSearchSpecMap & f);
- static vespalib::string stripNonFields(const vespalib::string & rawIndex);
+ static vespalib::string stripNonFields(vespalib::stringref rawIndex);
search::attribute::DistanceMetric get_distance_metric(const vespalib::string& name) const;
private: