summaryrefslogtreecommitdiffstats
path: root/searchsummary/src/vespa/juniper/rpinterface.h
blob: ee1f4e3a3d880f404c85bf449cb2b09098cbc774 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
/* $Id$ */

#pragma once

#include "IJuniperProperties.h"
#include "rewriter.h"
#include <memory>

/** @file rpinterface.h This file is the main include file for the advanced
 *    result processing interface to Juniper. The complete set of new interfaces
 *    to Juniper as of Juniper v.2.x.x is contained in the juniper namespace.
 *    This file together with query.h is the result processing part of these interfaces.
 *    The other part is the indexing/document processing interface with main include file
 *    dpinterface.h
 */

/** This define will be changed only in case of backward incompatible
 * API changes - we use 2 initially to avoid confusion with Juniper 1.0.x..
 */
#define JUNIPER_RP_ABI_VERSION 3

/* Changes to this version number indicates minor interface additions
 * where the original interface is kept unchanged. Can be used to test for features.
 */
#define JUNIPER_RP_API_MINOR_VERSION 1

class Fast_WordFolder;

/** This is the new query/document provider interface to Juniper as of Juniper 2.0.x
 *  It replaces the Juniper 1.0.x interface on the result processing side, previously
 *  defined by simpledynsum.h .
 *  While the old interface (simpledynsum.h) is kept for backward compatibility, it is
 *  depreciated as it allows less flexibility and thus provides lower quality teasers.
 */
namespace juniper
{

/** Version tag generated from Makefile/configure system */
extern const char* version_tag;

/** Opaque object encapsulating a default configuration set for Juniper.
 *  Multiple such configurations can co-exist, for instance to allow different
 *  summary fields to use different teaser configurations.
 *  Note that in addition to this (relatively static) configuration set,
 *  configuration parameters can be overridden on a per query basis by means of the
 *  juniperoptions query parameter. See the Juniper 2.x.x documentation for details.
 */
class Config;

/** Opaque object encapsulating state associated with a particular query
 */
class QueryHandle;

/** Opaque object encapsulating the result of a partial or full Juniper
 *  analysis of a document.
 */
class Result;

class QueryModifier;

class Summary
{
public:
    virtual ~Summary() {}
    // The textual representation of the generated summary
    virtual const char* Text() const = 0;
    virtual size_t Length() const = 0;
};

class Juniper {
public:
    /**
     * Convenience typedefs.
     */
    typedef std::unique_ptr<Juniper> UP;
    typedef std::shared_ptr<Juniper> SP;

    /** Initialize the Juniper subsystem.
     * @param props A pointer to the object containing all available configuration
     *   property values for the Juniper parameters.
     * @param wordfolder A pointer to a custom wordfolder object to use. If
     *  NULL, a default wordfolder will be maintained by Juniper if necessary.
     *  In case of errors during initialization or config object creation,
     *  the cause will be appropriately reported to the @param log object
     *  with status ELOG_CRITICAL
     * @param api_version Version check parameter
     *   - should always be left with the default value to ensure binary backward
     *     compatibility between versions.
     */
    Juniper(IJuniperProperties* props,
          Fast_WordFolder* wordfolder, int api_version = JUNIPER_RP_ABI_VERSION);
    /** Deinitialize the Juniper subsystem. Release all remaining resources
     *  associated with Juniper - reverse the effect of the Init function.
     *  Assumes that all Result objects have been released.
     */
    ~Juniper();

    Fast_WordFolder & getWordFolder() { return *_wordfolder; }
    IJuniperProperties & getProp() { return *_props; }
    QueryModifier & getModifier() { return *_modifier; }

    /** Create a result processing configuration of Juniper for subsequent use
     * @param config_name a symbolic prefix to be used in the fsearch configuration file
     *  (fsearchrc/fsearch.addon*). The default value reflects the Juniper 1.x.x usage where
     *  Juniper configuration variables are supplied as "juniper.dynsum.length value" pairs.
     *  If a configuration object gets a config name of "mysummaryfield", then
     *  if "mysummaryfield.dynsum.length exists as a property in the config file,
     *  then that value is used, otherwise the default "juniper.dynsum.length" value is used.
     * @return a nonzero object for subsequent reference if initialization is done,
     *  NULL if an error occurred.
     */

    std::unique_ptr<Config> CreateConfig(const char* config_name = "juniper");
    /** Allocate a query handle for the given query for subsequent calls to Analyse
     *  for different hits. Performs the necessary per query processing for Juniper.
     * @param query A query to start result processing for.
     * @param juniperoptions The value of the special juniperoption URL parameter
     *   provided for this search. This parameter is parsed by Juniper to support optional
     *   behaviour such as user customization of teaser parameters, selectively
     *   enabling of Juniper debugging/tracing features and to support Juniper extensions
     *   to the query language.
     * @return A unique pointer to a QueryHandle.
     */
    std::unique_ptr<QueryHandle> CreateQueryHandle(const IQuery& query, const char* juniperoptions);

    /** Add an rewriter for all terms that are prefixed with the given index.
     *  When Juniper encounter a term in the query tagged with this index,
     *  Juniper assumes that that term has been subject to expansion, and will
     *  apply the rewriter to all terms in all analysed documents before
     *  matching with the query.
     */
    void AddRewriter(const char* index_name, IRewriter* rewriter, bool for_query, bool for_document);

    // Mostly for testing - being able to start with clean sheets for each test:
    void FlushRewriters();

private:
    IJuniperProperties * _props;
    Fast_WordFolder    * _wordfolder;
    std::unique_ptr<QueryModifier>      _modifier;
};

/** This function defines an equality relation over Juniper configs,
 *  @return true if a previously acquired result handle (through use of
 *     one of the Config objects can be reused (typically to produce a
 *     differently looking teaser) with the other Config object.
 *     This is the case if the two config objects only differ in the teaser
 *     parameters (eg. those named *.dynsum.*)
 */
bool AnalyseCompatible(Config* conf1, Config* conf2);

/** Perform initial content analysis on a query/content pair.
 *  Note that the content may either be a simple UTF-8 encoded string or a
 *  more advanced representation including document structure elements, as provided
 *  by the Juniper document processing interface (see dpinterface.h)
 * @param config A valid pointer to the parameter configuration to use for the analysis
 * @param query The query, represented by a QueryHandle to base the analysis on.
 *    (previously generated by CreateQueryHandle)
 * @param docsum A reference to a document summary to be analysed.
 * @param docsum_len The length in bytes of the document summary, including
 any meta information.
 * @param docid A 32 bit number uniquely identifying the document to be analysed
 * @param langid A unique 32 bit id representing the language which
 this document summary is to be analysed in context of.
 * @return A unique pointer to a Result
 */
std::unique_ptr<Result> Analyse(const Config& config, QueryHandle& query,
                const char* docsum, size_t docsum_len,
                uint32_t docid,
                uint32_t langid);

/** Get the computed relevancy of the processed content from the result.
 *  @param result_handle The result to retrieve from
 *  @return The relevancy (proximitymetric) of the processed content.
 */
long GetRelevancy(Result& result_handle);

/** Generate a teaser based on the provided analysis result
 *  @param result_handle a handle obtained by a previous call to Analyse
 *  @param alt_config An optional alternate config to use for this teaser generation
 *    The purpose of alt_config is to allow generation of multiple teasers
 *    based on the same content and analysis.
 *  @return The generated Teaser object. This object is valid until result_handle is deleted.
 */
Summary* GetTeaser(Result& result_handle, const Config* alt_config = NULL);

/** Retrieve log information based on the previous calls to this result handle.
 *  Note that for the log to be complete, the juniper log override entry in
 *  the summary field map must be placed after any other juniper override directives.
 * @param result_handle a handle obtained by a previous call to Analyse.
 * @return value: a summary description containing the Juniper log as a text field
 *   if any log information is available, or else an empty summary.
 *   This object is valid until result_handle is deleted.
 */
Summary* GetLog(Result& result_handle);

} // end namespace juniper