diff options
13 files changed, 35 insertions, 70 deletions
diff --git a/searchlib/src/tests/fef/fef_test.cpp b/searchlib/src/tests/fef/fef_test.cpp index b3107e57fae..56ea31a601b 100644 --- a/searchlib/src/tests/fef/fef_test.cpp +++ b/searchlib/src/tests/fef/fef_test.cpp @@ -36,7 +36,6 @@ Test::testLayout() EXPECT_EQUAL(mdl.allocFeature(), 2u); MatchData::UP md = mdl.createMatchData(); - EXPECT_EQUAL(TermFieldMatchData::invalidId(), md->getDocId()); EXPECT_EQUAL(md->getNumTermFields(), 3u); EXPECT_EQUAL(md->getNumFeatures(), 3u); TermFieldMatchData *t0 = md->resolveTermField(0); diff --git a/searchlib/src/tests/fef/rank_program/rank_program_test.cpp b/searchlib/src/tests/fef/rank_program/rank_program_test.cpp index b6da6f9f5e3..b70b78d8d28 100644 --- a/searchlib/src/tests/fef/rank_program/rank_program_test.cpp +++ b/searchlib/src/tests/fef/rank_program/rank_program_test.cpp @@ -107,13 +107,6 @@ struct MySetup { } }; -TEST_F("require that match data docid is set by run", MySetup()) { - f1.compile(); - EXPECT_NOT_EQUAL(1u, f1.program.match_data().getDocId()); - f1.run(); - EXPECT_EQUAL(1u, f1.program.match_data().getDocId()); -} - TEST_F("require that simple program works", MySetup()) { EXPECT_EQUAL(15.0, f1.add("mysum(value(10),ivalue(5))").compile().run().get()); EXPECT_EQUAL(3u, f1.program.num_executors()); diff --git a/searchlib/src/tests/fef/termfieldmodel/termfieldmodel_test.cpp b/searchlib/src/tests/fef/termfieldmodel/termfieldmodel_test.cpp index 26a02d38adf..be55d64c182 100644 --- a/searchlib/src/tests/fef/termfieldmodel/termfieldmodel_test.cpp +++ b/searchlib/src/tests/fef/termfieldmodel/termfieldmodel_test.cpp @@ -120,7 +120,6 @@ void testGenerate(State &state) { // fresh unpacked data - state.md->setDocId(10); state.f3->reset(10); { TermFieldMatchDataPosition pos; @@ -154,9 +153,9 @@ void testGenerate(State &state) { } void testAnalyze(State &state) { - EXPECT_EQUAL(state.md->getDocId(), state.f3->getDocId()); - EXPECT_NOT_EQUAL(state.md->getDocId(), state.f5->getDocId()); - EXPECT_EQUAL(state.md->getDocId(), state.f7->getDocId()); + EXPECT_EQUAL(10u, state.f3->getDocId()); + EXPECT_NOT_EQUAL(10u, state.f5->getDocId()); + EXPECT_EQUAL(10u, state.f7->getDocId()); FieldPositionsIterator it = state.f3->getIterator(); EXPECT_EQUAL(20u, it.getFieldLength()); diff --git a/searchlib/src/vespa/searchlib/fef/matchdata.cpp b/searchlib/src/vespa/searchlib/fef/matchdata.cpp index 898707e2fed..7782d50b9c8 100644 --- a/searchlib/src/vespa/searchlib/fef/matchdata.cpp +++ b/searchlib/src/vespa/searchlib/fef/matchdata.cpp @@ -7,8 +7,7 @@ namespace search { namespace fef { MatchData::MatchData(const Params &cparams) - : _docid(TermFieldMatchData::invalidId()), - _termFields(cparams.numTermFields()), + : _termFields(cparams.numTermFields()), _features(cparams.numFeatures()), _feature_is_object(cparams.numFeatures(), false), _termwise_limit(1.0) diff --git a/searchlib/src/vespa/searchlib/fef/matchdata.h b/searchlib/src/vespa/searchlib/fef/matchdata.h index 1db181cac78..86bbdf05262 100644 --- a/searchlib/src/vespa/searchlib/fef/matchdata.h +++ b/searchlib/src/vespa/searchlib/fef/matchdata.h @@ -20,7 +20,6 @@ namespace fef { class MatchData { private: - uint32_t _docid; std::vector<TermFieldMatchData> _termFields; std::vector<NumberOrObject> _features; std::vector<bool> _feature_is_object; @@ -86,25 +85,6 @@ public: void set_termwise_limit(double value) { _termwise_limit = value; } /** - * Set the document id for this match object. This method is - * invoked by the parallel query evaluation driver code during - * term data unpacking. - * - * @param docid docid for this match data - **/ - void setDocId(uint32_t docid) { _docid = docid; } - - /** - * Obtain the document id for this match data. This may be used to - * check if we have term match data for the document we are - * processing or not. Also, it will be used when merging hits from - * the heap back into the full result set. - * - * @return document id for this match data - **/ - uint32_t getDocId() const { return _docid; } - - /** * Obtain the number of term fields allocated in this match data * structure. * diff --git a/searchlib/src/vespa/searchlib/fef/rank_program.h b/searchlib/src/vespa/searchlib/fef/rank_program.h index d5e2fbb88cc..52d92d7d965 100644 --- a/searchlib/src/vespa/searchlib/fef/rank_program.h +++ b/searchlib/src/vespa/searchlib/fef/rank_program.h @@ -114,8 +114,6 @@ public: * @param docid the document we are ranking **/ void run(uint32_t docid) { - MatchData &md = match_data(); - md.setDocId(docid); for (FeatureExecutor *executor: _program) { executor->execute(docid); } diff --git a/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp index 2324198d302..96c1a183f06 100644 --- a/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp +++ b/searchlib/src/vespa/searchlib/fef/test/matchdatabuilder.cpp @@ -23,11 +23,10 @@ MatchDataBuilder::MatchDataBuilder(QueryEnvironment &queryEnv, MatchData &data) _index(), _match() { - // reset all match data objects and set docId to 'endId' (aka -1) + // reset all match data objects. for (TermFieldHandle handle = 0; handle < _data.getNumTermFields(); ++handle) { _data.resolveTermField(handle)->reset(TermFieldMatchData::invalidId()); } - _data.setDocId(TermFieldMatchData::invalidId()); } TermFieldMatchData * @@ -119,8 +118,6 @@ MatchDataBuilder::setWeight(const vespalib::string &fieldName, uint32_t termId, bool MatchDataBuilder::apply(uint32_t docId) { - _data.setDocId(docId); - // For each term, do for (TermMap::const_iterator term_iter = _match.begin(); term_iter != _match.end(); ++term_iter) diff --git a/streamingvisitors/src/tests/hitcollector/hitcollector.cpp b/streamingvisitors/src/tests/hitcollector/hitcollector.cpp index e362c9f83fe..bed3ae8bc36 100644 --- a/streamingvisitors/src/tests/hitcollector/hitcollector.cpp +++ b/streamingvisitors/src/tests/hitcollector/hitcollector.cpp @@ -71,8 +71,7 @@ HitCollectorTest::addHit(HitCollector &hc, uint32_t docId, double score, const c StorageDocument::SP sdoc(new StorageDocument(std::move(doc))); ASSERT_TRUE(sdoc->valid()); MatchData md(MatchData::params()); - md.setDocId(docId); - hc.addHit(sdoc, md, score, sortData, sortDataSize); + hc.addHit(sdoc, docId, md, score, sortData, sortDataSize); } void @@ -237,11 +236,9 @@ public: _fooValue(), _barValue() {} - virtual const search::fef::MatchData &run(uint32_t docid, const std::vector<search::fef::TermFieldMatchData> &) override { - _matchData.setDocId(docid); + virtual void run(uint32_t docid, const std::vector<search::fef::TermFieldMatchData> &) override { _fooValue.as_number = docid + 10; _barValue.as_number = docid + 30; - return _matchData; } FeatureResolver get_resolver() { diff --git a/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp index 09fa3fd5639..6cb49a77ed2 100644 --- a/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/hitcollector.cpp @@ -32,17 +32,17 @@ HitCollector::getDocSum(const search::DocumentIdT & docId) const } bool -HitCollector::addHit(const vsm::StorageDocument::SP & doc, const search::fef::MatchData & data, double score) +HitCollector::addHit(const vsm::StorageDocument::SP & doc, uint32_t docId, const search::fef::MatchData & data, double score) { - Hit h(doc, data, score); + Hit h(doc, docId, data, score); return addHit(h); } bool -HitCollector::addHit(const vsm::StorageDocument::SP & doc, const search::fef::MatchData & data, +HitCollector::addHit(const vsm::StorageDocument::SP & doc, uint32_t docId, const search::fef::MatchData & data, double score, const void * sortData, size_t sortDataLen) { - Hit h(doc, data, score, sortData, sortDataLen); + Hit h(doc, docId, data, score, sortData, sortDataLen); return addHit(h); } @@ -139,8 +139,8 @@ HitCollector::getFeatureSet(IRankProgram &rankProgram, } FeatureSet::SP retval = FeatureSet::SP(new FeatureSet(names, _hits.size())); for (HitVector::iterator it(_hits.begin()), mt(_hits.end()); it != mt; ++it) { - const MatchData &matchData = rankProgram.run(it->getDocId(), it->getMatchData()); - uint32_t docId = matchData.getDocId(); + rankProgram.run(it->getDocId(), it->getMatchData()); + uint32_t docId = it->getDocId(); search::feature_t * f = retval->getFeaturesByIndex(retval->addDocId(docId)); for (uint32_t j = 0; j < names.size(); ++j) { f[j] = *resolver.resolve_number(j); diff --git a/streamingvisitors/src/vespa/searchvisitor/hitcollector.h b/streamingvisitors/src/vespa/searchvisitor/hitcollector.h index fc43e748a6d..ea4a20c187b 100644 --- a/streamingvisitors/src/vespa/searchvisitor/hitcollector.h +++ b/streamingvisitors/src/vespa/searchvisitor/hitcollector.h @@ -22,9 +22,9 @@ private: class Hit { public: - Hit(const vsm::StorageDocument::SP & doc, const search::fef::MatchData & matchData, + Hit(const vsm::StorageDocument::SP & doc, uint32_t docId, const search::fef::MatchData & matchData, double score, const void * sortData, size_t sortDataLen) : - _docid(matchData.getDocId()), + _docid(docId), _score(score), _document(doc), _matchData(), @@ -35,8 +35,8 @@ private: _matchData.emplace_back(*matchData.resolveTermField(handle)); } } - Hit(const vsm::StorageDocument::SP & doc, const search::fef::MatchData & matchData, double score) - : Hit(doc, matchData, score, nullptr, 0) {} + Hit(const vsm::StorageDocument::SP & doc, uint32_t docId, const search::fef::MatchData & matchData, double score) + : Hit(doc, docId, matchData, score, nullptr, 0) {} search::DocumentIdT getDocId() const { return _docid; } const vsm::StorageDocument::SP & getDocument() const { return _document; } const std::vector<search::fef::TermFieldMatchData> &getMatchData() const { return _matchData; } @@ -87,7 +87,7 @@ public: struct IRankProgram { virtual ~IRankProgram() {} - virtual const search::fef::MatchData &run(uint32_t docid, const std::vector<search::fef::TermFieldMatchData> &matchData) = 0; + virtual void run(uint32_t docid, const std::vector<search::fef::TermFieldMatchData> &matchData) = 0; }; HitCollector(size_t wantedHits); @@ -104,7 +104,7 @@ public: * @param data The match data for the hit. * @return true if the document was added to the heap **/ - bool addHit(const vsm::StorageDocument::SP & doc, const search::fef::MatchData & data, double score); + bool addHit(const vsm::StorageDocument::SP & doc, uint32_t docId, const search::fef::MatchData & data, double score); /** * Adds a hit to this hit collector. @@ -118,7 +118,7 @@ public: * @param sortDataLen The length of the sortdata. * @return true if the document was added to the heap **/ - bool addHit(const vsm::StorageDocument::SP & doc, const search::fef::MatchData & data, + bool addHit(const vsm::StorageDocument::SP & doc, uint32_t docId, const search::fef::MatchData & data, double score, const void * sortData, size_t sortDataLen); /** diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp index d6870d485d5..6a028f15719 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.cpp @@ -139,6 +139,7 @@ RankProcessor::RankProcessor(RankManager::Snapshot::SP snapshot, _queryEnv(location, snapshot->getIndexEnvironment(rankProfile), queryProperties, attrMgr), _mdLayout(), _rankProgram(), + _docId(TermFieldMatchData::invalidId()), _score(0.0), _summaryProgram(), _rankScorePtr(nullptr), @@ -188,11 +189,10 @@ private: public: RankProgramWrapper(RankProgram &rankProgram) : _rankProgram(rankProgram) {} - virtual const MatchData &run(uint32_t docid, const std::vector<search::fef::TermFieldMatchData> &matchData) override { + virtual void run(uint32_t docid, const std::vector<search::fef::TermFieldMatchData> &matchData) override { // Prepare the match data object used by the rank program with earlier unpacked match data. copyTermFieldMatchData(matchData, _rankProgram.match_data()); _rankProgram.run(docid); - return _rankProgram.match_data(); } }; @@ -221,7 +221,7 @@ void RankProcessor::unpackMatchData(uint32_t docId) { MatchData &matchData = _rankProgram->match_data(); - matchData.setDocId(docId); + _docId = docId; unpackMatchData(matchData); } @@ -263,8 +263,8 @@ RankProcessor::unpackMatchData(MatchData &matchData) tmd = matchData.resolveTermField(tfd->getHandle()); tmd->setFieldId(fieldId); // reset field match data, but only once per docId - if (tmd->getDocId() != matchData.getDocId()) { - tmd->reset(matchData.getDocId()); + if (tmd->getDocId() != _docId) { + tmd->reset(_docId); } } // find fieldLen for new field diff --git a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h index 20dd3f5fb88..04b8e798298 100644 --- a/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h +++ b/streamingvisitors/src/vespa/searchvisitor/rankprocessor.h @@ -30,6 +30,7 @@ private: QueryEnvironment _queryEnv; search::fef::MatchDataLayout _mdLayout; search::fef::RankProgram::UP _rankProgram; + uint32_t _docId; double _score; search::fef::RankProgram::UP _summaryProgram; const search::feature_t *_rankScorePtr; @@ -69,6 +70,7 @@ public: void setRankScore(double score) { _score = score; } double getRankScore() const { return _score; } HitCollector & getHitCollector() { return *_hitCollector; } + uint32_t getDocId() const { return _docId; } }; } // namespace storage diff --git a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp index ff54a506468..cf63c8ef1d1 100644 --- a/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp +++ b/streamingvisitors/src/vespa/searchvisitor/searchvisitor.cpp @@ -536,7 +536,7 @@ SearchVisitor::RankController::rankMatchedDocument(uint32_t docId) { _rankProcessor->runRankProgram(docId); LOG(debug, "Rank score for matched document %u: %f", - _rankProcessor->getMatchData().getDocId(), + docId, _rankProcessor->getRankScore()); if (_dumpFeatures) { _dumpProcessor->runRankProgram(docId); @@ -560,18 +560,19 @@ SearchVisitor::RankController::collectMatchedDocument(bool hasSorting, const vsm::StorageDocument::SP & document) { bool amongTheBest(false); + uint32_t docId = _rankProcessor->getDocId(); if (!hasSorting) { - amongTheBest = _rankProcessor->getHitCollector().addHit(document, _rankProcessor->getMatchData(), _rankProcessor->getRankScore()); + amongTheBest = _rankProcessor->getHitCollector().addHit(document, docId, _rankProcessor->getMatchData(), _rankProcessor->getRankScore()); if (amongTheBest && _dumpFeatures) { - _dumpProcessor->getHitCollector().addHit(vsm::StorageDocument::SP(NULL), _dumpProcessor->getMatchData(), _dumpProcessor->getRankScore()); + _dumpProcessor->getHitCollector().addHit(vsm::StorageDocument::SP(NULL), docId, _dumpProcessor->getMatchData(), _dumpProcessor->getRankScore()); } } else { size_t pos = visitor.fillSortBuffer(); LOG(spam, "SortBlob is %ld bytes", pos); - amongTheBest = _rankProcessor->getHitCollector().addHit(document, _rankProcessor->getMatchData(), _rankProcessor->getRankScore(), + amongTheBest = _rankProcessor->getHitCollector().addHit(document, docId, _rankProcessor->getMatchData(), _rankProcessor->getRankScore(), &tmpSortBuffer[0], pos); if (amongTheBest && _dumpFeatures) { - _dumpProcessor->getHitCollector().addHit(vsm::StorageDocument::SP(NULL), _dumpProcessor->getMatchData(), _dumpProcessor->getRankScore(), + _dumpProcessor->getHitCollector().addHit(vsm::StorageDocument::SP(NULL), docId, _dumpProcessor->getMatchData(), _dumpProcessor->getRankScore(), &tmpSortBuffer[0], pos); } } @@ -950,11 +951,11 @@ SearchVisitor::handleDocument(const vsm::StorageDocument::SP & document) vespalib::string documentId(document->docDoc().getId().getScheme().toString()); LOG(debug, "Matched document with id '%s'", documentId.c_str()); - document->setDocId(rp.getMatchData().getDocId()); + document->setDocId(rp.getDocId()); fillAttributeVectors(documentId, *document); - _rankController.rankMatchedDocument(rp.getMatchData().getDocId()); + _rankController.rankMatchedDocument(rp.getDocId()); if (_shouldFillRankAttribute) { _rankAttribute.add(rp.getRankScore()); |