blob: 0b99ac06997b1f55290def858314827b6440e16d (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
|
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once
#include "iqueryenvironment.h"
#include "simpletermdata.h"
namespace search::fef {
/**
* This class is used together with PhraseSplitter to split all phrase
* terms in a query environment into separate terms. New TermData
* objects are created for each splitted phrase term and managed by
* this class. Unmodified single terms are served from the query
* environment and match data.
*
* Use this class and PhraseSplitter class if you want to handle a
* phrase term the same way as single terms.
**/
class PhraseSplitterQueryEnv : public IQueryEnvironment
{
private:
struct TermIdx {
uint32_t idx; // index into either query environment or vector of TermData objects
bool splitted; // whether this term has been splitted or not
TermIdx(uint32_t i, bool s) : idx(i), splitted(s) {}
};
public:
struct PhraseTerm {
const ITermData & term; // for original phrase
uint32_t idx; // index into vector of our TermData objects
TermFieldHandle orig_handle;
PhraseTerm(const ITermData & t, uint32_t i, uint32_t h) : term(t), idx(i), orig_handle(h) {}
};
struct HowToCopy {
TermFieldHandle orig_handle;
TermFieldHandle split_handle;
uint32_t offsetInPhrase;
};
private:
const IQueryEnvironment &_queryEnv;
std::vector<SimpleTermData> _terms; // splitted terms
std::vector<HowToCopy> _copyInfo;
std::vector<TermIdx> _termIdxMap; // renumbering of terms
TermFieldHandle _maxHandle; // the largest among original term field handles
TermFieldHandle _skipHandles; // how many handles to skip
uint32_t _field_id;
std::vector<PhraseTerm> _phrase_terms; // data about original phrase terms
void considerTerm(uint32_t termIdx, const ITermData &term, uint32_t fieldId);
public:
/**
* Create a phrase splitter based on the given query environment.
*
* @param queryEnv the query environment to wrap.
* @param fieldId the field where we need to split phrases
**/
PhraseSplitterQueryEnv(const IQueryEnvironment & queryEnv, uint32_t fieldId);
~PhraseSplitterQueryEnv();
/**
* Update the underlying TermFieldMatchData objects based on the bound MatchData object.
**/
uint32_t getNumTerms() const override { return _termIdxMap.size(); }
const ITermData * getTerm(uint32_t idx) const override {
if (idx >= _termIdxMap.size()) {
return nullptr;
}
const TermIdx & ti = _termIdxMap[idx];
return ti.splitted ? &_terms[ti.idx] : _queryEnv.getTerm(ti.idx);
}
const Properties & getProperties() const override { return _queryEnv.getProperties(); }
GeoLocationSpecPtrs getAllLocations() const override {
return _queryEnv.getAllLocations();
}
const attribute::IAttributeContext & getAttributeContext() const override { return _queryEnv.getAttributeContext(); }
double get_average_field_length(const vespalib::string &field_name) const override { return _queryEnv.get_average_field_length(field_name); }
const IIndexEnvironment & getIndexEnvironment() const override { return _queryEnv.getIndexEnvironment(); }
// Accessor methods used by PhraseSplitter
TermFieldHandle get_skip_handles() const { return _skipHandles; }
uint32_t get_num_phrase_split_terms() const { return _terms.size(); }
uint32_t get_field_id() const { return _field_id; }
const std::vector<HowToCopy>& get_copy_info() const { return _copyInfo; }
const std::vector<PhraseTerm>& get_phrase_terms() const { return _phrase_terms; }
};
}
|