aboutsummaryrefslogtreecommitdiffstats
path: root/searchsummary/src/vespa/juniper/querynode.h
blob: 5ff7310e7520f10afe4349d9e87a5c4fbeacbadb (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
/* $Id$ */

#pragma once

#include <string>
#include <vector>
#include <vespa/fastlib/text/unicodeutil.h>
#include "querymodifier.h"

/** The internal query data structure used by the matching engine
 *  in Matcher.h
 */


// Option bit definitions:
#define X_ORDERED   0x1  // PHRASE and WITHIN operator have the ordered property
#define X_LIMIT     0x2  // NEAR and WITHIN operators have the limit property
#define X_EXACT     0x4  // PHRASE and descendants have this property
#define X_COMPLETE  0x8  // All keywords must be present (NEAR/PHRASE/WITHIN)
#define X_AND       0x10  // threshold must be recomputed when complete - AND semantics
#define X_OR        0x20  // threshold must be recomputed when complete + OR semantics
#define X_ANY       0x40  // threshold must be recomputed when complete + ANY semantics
#define X_CONSTR   0x100  // Bit telling if this subquery has constraints applied somewhere
#define X_CHKVAL   0x200  // Bit set if validity of keyword occurrences must be checked
#define X_NOT      0x400  // Limit has opposite sign (eliminate below: NOT_WITHIN semantics)
#define X_PREFIX  0x1000  // This is a prefix search (valid on terms only)
#define X_POSTFIX 0x2000  // This is a postfix search (valid on terms only)
#define X_WILD    0x4000  // This is a wildcard search (valid on terms only)
#define X_ONLY_1  0x8000  // Tell simplifier to delete all childs but #1 (RANK/ANDNOT)
#define X_SPECIALTOKEN  0x10000  // This is a special token (valid on terms only)

class QueryNode;
class QueryTerm;

using querynode_vector = std::vector<QueryNode*>;


// Support slightly extended visitor pattern for QueryExpr nodes..

class IQueryExprVisitor
{
public:
    virtual ~IQueryExprVisitor() {}

    // Visit before visiting subnodes
    virtual void VisitQueryNode(QueryNode*) = 0;
    // visit after visiting subnodes - default: do nothing
    virtual void RevisitQueryNode(QueryNode*);

    virtual void VisitQueryTerm(QueryTerm*) = 0;
};


/** Base class for query expressions in Juniper */
class QueryExpr
{
public:
    explicit QueryExpr(int weight, int arity);
    explicit QueryExpr(QueryExpr* e);

    /** Add a child to the end of the list of children for this node.
     * @param child A pointer to a child node to add or NULL to denote that
     *    we have eliminated a child from this node - to trigger an arity update
     * @return A pointer to this node if more children are needed or else nearest
     *   parent that needs more children
     */
    virtual QueryNode* AddChild(QueryExpr* child) = 0;
    virtual ~QueryExpr();
    virtual int Limit() = 0;
    virtual void Dump(std::string&) = 0;
    virtual bool StackComplete() = 0;
    virtual void ComputeThreshold();
    virtual QueryNode* AsNode() = 0;
    virtual QueryTerm* AsTerm() = 0;
    virtual bool Complex() = 0;

    virtual void Accept(IQueryExprVisitor& v) = 0;

    virtual int MaxArity() { return 0; }

    inline bool HasConstraints() { return _options & X_CONSTR; }
    inline bool UsesValid() { return _options & X_CHKVAL; }
    inline bool HasLimit() { return _options & X_LIMIT; }
    inline bool Exact() { return _options & X_EXACT; }

    int _options;       // Applied options (bitmap) for this node
    int _weight;        // Weight of this term by parent - if 0: weight is sum of children
    int _arity;         // Arity of this query subexpression (may get decremented..)
    QueryNode* _parent; // Pointer to parent or NULL if this is the root of the query
    int _childno;       // Position number within parent's children (0 if no parents)

private:
    QueryExpr(QueryExpr &);
    QueryExpr &operator=(QueryExpr &);
};


/** Internal node of a query
 */
class QueryNode : public QueryExpr
{
public:
    // Create a new node with arity children
    QueryNode(int arity, int threshold, int weight = 0);

    // Create a copy of the node n wrt. arity etc, but without adding any children..
    explicit QueryNode(QueryNode* n);

    ~QueryNode();
    QueryNode* AddChild(QueryExpr* child) override;
    int Limit() override;
    bool Complete() const { return _arity == _nchild; }
    void Dump(std::string&) override;
    bool StackComplete() override;
    void ComputeThreshold() override;
    QueryNode* AsNode() override;
    QueryTerm* AsTerm() override;
    bool Complex() override;
    int MaxArity() override;

    void Accept(IQueryExprVisitor& v) override;

    // return true if a match for n should lead to creation of a new candidate node
    // corresponding to this query tree node:
    bool AcceptsInitially(QueryExpr* n);

    int _threshold;       // Threshold for this expression node to be considered complete
    int _limit;           // NEAR/WITHIN limit if X_LIMIT option set

    /* Pointer to an array of length _arity of pointers to
     * subqueries associated with this query */
    QueryExpr** _children;
    int _nchild;   // end pointer (fill level) of _children
    int _node_idx; // Index (position) of this nonterminal within table of all nonterminals

private:
    QueryNode(QueryNode &);
    QueryNode &operator=(QueryNode &);
};


/** Terminal node of a query
 */
class QueryTerm : public QueryExpr
{
public:
    QueryTerm(const char* t, int length, int ix, int weight = 100);
    explicit QueryTerm(QueryTerm* const);
    ~QueryTerm();
    int Limit() override;
    QueryNode* AddChild(QueryExpr* child) override;
    void Dump(std::string&) override;
    bool StackComplete() override;
    QueryNode* AsNode() override;
    QueryTerm* AsTerm() override;
    bool Complex() override;

    void Accept(IQueryExprVisitor& v) override;
    inline const char* term() { return _rep; }
    inline const ucs4_t* ucs4_term() { return _ucs4_term; }
    inline bool is_prefix() { return _options & X_PREFIX; }
    inline bool is_wildcard() { return _options & X_WILD; }
    inline bool isSpecialToken() { return _options & X_SPECIALTOKEN; }

    size_t len;
    size_t ucs4_len;
    int total_match_cnt;
    int exact_match_cnt;
    int idx;
    juniper::Rewriter* rewriter;
    juniper::string_matcher* reduce_matcher;
private:
    char* _rep;
    ucs4_t* _ucs4_term;

    QueryTerm(QueryTerm &);
    QueryTerm &operator=(QueryTerm &);
};


/** Modify the given stack by eliminating unnecessary internal nodes
 *  with arity 1 or non-terms with arity 0
 */
void SimplifyStack(QueryExpr*& orig_stack);