aboutsummaryrefslogtreecommitdiffstats
path: root/searchlib/src/vespa/searchlib/query/streaming/queryterm.hpp
blob: bf10d02e6bc3d75dd22b333091ea8ff9465d5ae8 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.

#pragma once

#include "queryterm.h"
#include <vespa/searchlib/fef/iindexenvironment.h>
#include <vespa/searchlib/fef/itermdata.h>
#include <vespa/searchlib/fef/matchdata.h>
#include <algorithm>
#include <limits>


namespace search::streaming {

namespace {

uint16_t
cap_16_bits(uint32_t value)
{
    return std::min(value, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()));
}

uint32_t
extract_field_length(const QueryTerm& term, uint32_t field_id)
{
    return (field_id < term.getFieldInfoSize()) ? term.getFieldInfo(field_id).getFieldLength() : search::fef::FieldPositionsIterator::UNKNOWN_LENGTH;
}

void
set_interleaved_features(search::fef::TermFieldMatchData& tmd, uint32_t field_length, uint32_t num_occs)
{
    tmd.setFieldLength(cap_16_bits(field_length));
    tmd.setNumOccs(cap_16_bits(num_occs));
}

}

template <typename HitListType>
void
QueryTerm::unpack_match_data_helper(uint32_t docid, const fef::ITermData& td, fef::MatchData& match_data, const HitListType& hit_list, const QueryTerm& fl_term, bool term_filter, const fef::IIndexEnvironment& index_env)
{
    (void) fl_term;
    if (!hit_list.empty()) { // only unpack if we have a hit

        uint32_t last_field_id = -1;
        uint32_t last_field_length = 0;
        search::fef::TermFieldMatchData *tmd = nullptr;
        uint32_t num_occs = 0;
        bool filter = false;

        // optimize for hitlist giving all hits for a single field in one chunk
        for (const auto& hit : hit_list) {
            uint32_t field_id = hit.field_id();
            if (field_id != last_field_id) {
                if (tmd != nullptr) {
                    if (tmd->needs_interleaved_features()) {
                        set_interleaved_features(*tmd, last_field_length, num_occs);
                    }
                    // reset to notfound/unknown values
                    tmd = nullptr;
                }
                num_occs = 0;
                auto field = index_env.getField(field_id);
                filter = term_filter || (field != nullptr && field->isFilter());

                // setup for new field that had a hit
                const search::fef::ITermFieldData *tfd = td.lookupField(field_id);
                if (tfd != nullptr) {
                    tmd = match_data.resolveTermField(tfd->getHandle());
                    tmd->setFieldId(field_id);
                    // reset field match data, but only once per docId
                    if (tmd->getDocId() != docid) {
                        tmd->reset(docid);
                    }
                }
                last_field_id = field_id;
                if constexpr (std::is_same_v<HitList, HitListType>) {
                    last_field_length = extract_field_length(fl_term, field_id);
                } else {
                    last_field_length = hit.get_field_length();
                }
            }
            ++num_occs;
            if (tmd != nullptr && !filter) {
                search::fef::TermFieldMatchDataPosition pos(hit.element_id(), hit.position(),
                                               hit.element_weight(), hit.element_length());
                tmd->appendPosition(pos);
            }
        }
        if (tmd != nullptr) {
            if (tmd->needs_interleaved_features()) {
                set_interleaved_features(*tmd, last_field_length, num_occs);
            }
        }
    }
}

}