aboutsummaryrefslogtreecommitdiffstats
path: root/searchsummary/src/vespa/juniper/matchobject.h
blob: 5ab90c1a61e9c13b4be4f564f2df042c95be5b82 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#pragma once

#include "queryhandle.h"
#include "querynode.h"
#include "hashbase.h"
#include <vespa/fastlib/text/unicodeutil.h>
#include "reducematcher.h"
#include "ITokenProcessor.h"

using Result = juniper::Result;
using Token = ITokenProcessor::Token;

// Reverse length order, longest match first - needed to allow matcher to
// match on the most explicit matches before the more implicit ones
// Quick hack for setting up matchobject (which depend on (<=)
//
struct QueryTermLengthComparator
{
    inline bool operator()(QueryTerm* m1, QueryTerm* m2)
    {
        return m1->len <= m2->len;
    }
};

typedef Fast_HashTable<ucs4_t, QueryTerm*, 0x20,
		       QueryTermLengthComparator> queryterm_hashtable;

class match_iterator
{
public:
    match_iterator(MatchObject* mo, Result* rhandle);
    QueryTerm* current();
    QueryTerm* next();
    QueryTerm* first_match(Token& token);
private:
    QueryTerm* first();
    QueryTerm* next_reduce_match();
    queryterm_hashtable& _table;
    queryterm_hashtable::element* _el;
public:
    Result* _rhandle;
private:
    bool _reductions;
    const std::vector<QueryTerm*>* _reduce_matches;
    std::vector<QueryTerm*>::const_iterator _reduce_matches_it;
    MatchObject* _mo;
    size_t _len, _stem_min, _stemext;
    const ucs4_t* _term;

    match_iterator(match_iterator &);
    match_iterator &operator=(match_iterator &);
};


// MatchObject encapsulate the data structure necessary to map from a query word to a
// unique index + options for this query.
// A MatchObject keeps no state for a particular document
// so it can be reused for later results for
// the same query/language combination.

class MatchObject
{
public:
    // Constructor for the default match object.
    // Resumes ownership of query
    MatchObject(QueryExpr* query, bool has_reductions);

    // Constructor for language specific extensions:
    // Creates a duplicate of query
    MatchObject(QueryExpr* query, bool has_reductions, uint32_t langid);

    ~MatchObject();

    using iterator = match_iterator;

    /** Check if the given string matches any query term in the MatchObject
     * @param an iterator that will be updated to iterate over all matching query terms
     * @param term the term to match
     * @param len the length of the term
     * @param options tell if match was exact/pre/post etc.
     * @return true if a match was found (and the iterator points to the first element)
     */
    bool Match(iterator& mi, Token& token, unsigned& options);

    inline QueryTerm* Term(int idx) { return _qt[idx]; }

    inline size_t TermCount() { return _qt.size(); }
    inline size_t NontermCount() { return _nonterms.size(); }
    inline int MaxArity() { return _max_arity; }

    inline bool HasConstraints() { return (_query ? (_query->_options & X_CONSTR) : false); }
    inline bool UsesValid() { return (_query ? (_query->_options & X_CHKVAL) : false); }

    inline QueryExpr* Query() { return _query; }
    inline bool HasReductions() { return _has_reductions; }

    // internal use only..
    void add_queryterm(QueryTerm* term);
    void add_nonterm(QueryNode* n);
    void add_reduction_term(QueryTerm* term, juniper::Rewriter*);
private:
    friend class match_iterator;
    QueryExpr* _query;
    std::vector<QueryTerm*> _qt; // fast lookup by index
    std::vector<QueryNode*> _nonterms;
    bool _match_overlap;
    int _max_arity;
    bool _has_reductions; // query contains terms that reqs reduction of tokens before matching
    queryterm_hashtable _qt_byname; // fast lookup by name
    juniper::ReduceMatcher _reduce_matchers;

    MatchObject(MatchObject &);
    MatchObject &operator=(MatchObject &);
};