aboutsummaryrefslogtreecommitdiffstats
path: root/streamingvisitors/src/vespa/searchvisitor/hitcollector.h
blob: 07418b85c75636fe2f14512d81fb656b8e93b412 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#pragma once

#include <vespa/searchlib/common/stringmap.h>
#include <vespa/searchlib/fef/matchdata.h>
#include <vespa/vdslib/container/searchresult.h>
#include <vespa/vsm/common/docsum.h>
#include <vespa/vsm/common/storagedocument.h>
#include <vespa/vespalib/stllike/string.h>
#include <vespa/vespalib/util/featureset.h>

namespace search { namespace fef { class FeatureResolver; } }

namespace streaming {

/**
 * This class is used to store hits and MatchData objects for the m best hits.
 **/
class HitCollector : public vsm::IDocSumCache
{
private:
    class Hit
    {
    public:
        Hit(const vsm::StorageDocument * doc, uint32_t docId, const search::fef::MatchData & matchData,
            double score, const void * sortData, size_t sortDataLen);
        Hit(const vsm::StorageDocument * doc, uint32_t docId, const search::fef::MatchData & matchData, double score)
            : Hit(doc, docId, matchData, score, nullptr, 0)
        { }
        ~Hit();
        Hit(const Hit &) = delete;
        Hit & operator = (const Hit &) = delete;
        Hit(Hit && rhs) = default;
        Hit & operator = (Hit && rhs) = default;
        search::DocumentIdT getDocId() const { return _docid; }
        const vsm::StorageDocument & getDocument() const { return *_document; }
        const std::vector<search::fef::TermFieldMatchData> &getMatchData() const { return _matchData; }
        search::feature_t getRankScore() const { return _score; }
        const vespalib::string & getSortBlob() const { return _sortBlob; }
        bool operator < (const Hit & b) const { return getDocId() < b.getDocId(); }
        int cmpDocId(const Hit & b) const { return getDocId() - b.getDocId(); }
        int cmpRank(const Hit & b) const {
            return (getRankScore() > b.getRankScore()) ?
                -1 : ((getRankScore() < b.getRankScore()) ? 1 : cmpDocId(b));
        }
        int cmpSort(const Hit & b) const {
            int diff = _sortBlob.compare(b._sortBlob.c_str(), b._sortBlob.size());
            return (diff == 0) ? cmpDocId(b) : diff;
        }
        class RankComparator {
        public:
            RankComparator() {}
            bool operator() (const Hit & lhs, const Hit & rhs) const {
                return lhs.cmpRank(rhs) < 0;
            }
        };
        class SortComparator {
        public:
            SortComparator() {}
            bool operator() (const Hit & lhs, const Hit & rhs) const {
                return lhs.cmpSort(rhs) < 0;
            }
        };

    private:
        uint32_t _docid;
        double _score;
        const vsm::StorageDocument * _document;
        std::vector<search::fef::TermFieldMatchData> _matchData;
        vespalib::string _sortBlob;
    };
    using HitVector = std::vector<Hit>;
    HitVector _hits;
    bool      _sortedByDocId; // flag for whether the hit vector is sorted on docId

    void sortByDocId();
    bool addHitToHeap(const Hit & hit) const;
    bool addHit(Hit && hit);

public:
    using UP = std::unique_ptr<HitCollector>;

    struct IRankProgram {
        virtual ~IRankProgram() {}
        virtual void run(uint32_t docid, const std::vector<search::fef::TermFieldMatchData> &matchData) = 0;
    };

    HitCollector(size_t wantedHits);

    virtual const vsm::Document & getDocSum(const search::DocumentIdT & docId) const override;

    /**
     * Adds a hit to this hit collector.
     * Make sure that the hits are added in increasing local docId order.
     * If you add a nullptr document you should not use getDocSum() or fillSearchResult(),
     * as these functions expect valid documents.
     *
     * @param doc   The document that is a hit. Must be kept alive on the outside.
     * @param data  The match data for the hit.
     * @return true if the document was added to the heap
     **/
    bool addHit(const vsm::StorageDocument * doc, uint32_t docId, const search::fef::MatchData & data, double score);

    /**
     * Adds a hit to this hit collector.
     * Make sure that the hits are added in increasing local docId order.
     * If you add a nullptr document you should not use getDocSum() or fillSearchResult(),
     * as these functions expect valid documents.
     *
     * @param doc   The document that is a hit. Must be kept alive on the outside.
     * @param data  The match data for the hit.
     * @param sortData The buffer of the sortdata.
     * @param sortDataLen The length of the sortdata.
     * @return true if the document was added to the heap
     **/
    bool addHit(const vsm::StorageDocument * doc, uint32_t docId, const search::fef::MatchData & data,
                double score, const void * sortData, size_t sortDataLen);

    /**
     * Fills the given search result with the m best hits from the hit heap.
     * Invoking this method will destroy the heap property of the hit heap.
     **/
    void fillSearchResult(vdslib::SearchResult & searchResult, vespalib::FeatureValues&& match_features);
    void fillSearchResult(vdslib::SearchResult & searchResult);

    /**
     * Extract features from the hits stored in the hit heap.
     * Invoking this method will destroy the heap property of the hit heap.
     * Note that this method will calculate any additional features.
     *
     * @return features for all hits on the heap.
     * @param rankProgram the rank program used to calculate all features.
     * @param resolver   feature resolver, gives feature names and values
     **/
    vespalib::FeatureSet::SP getFeatureSet(IRankProgram &rankProgram,
                                           const search::fef::FeatureResolver &resolver,
                                           const search::StringStringMap &feature_rename_map);

    vespalib::FeatureValues get_match_features(IRankProgram& rank_program,
                                               const search::fef::FeatureResolver& resolver,
                                               const search::StringStringMap& feature_rename_map);
};

} // namespace streaming