aboutsummaryrefslogtreecommitdiffstats
path: root/container-search/src/test/java/com/yahoo/prelude/searcher/test/JuniperSearcherTestCase.java
blob: e064f8f2ba081effebd2ebb36eaa99a4b004bcb2 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
// Copyright Yahoo. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.prelude.searcher.test;

import static org.junit.jupiter.api.Assertions.*;

import com.yahoo.component.ComponentId;
import com.yahoo.component.chain.Chain;
import com.yahoo.container.QrSearchersConfig;
import com.yahoo.data.access.simple.Value;
import com.yahoo.prelude.Index;
import com.yahoo.prelude.IndexFacts;
import com.yahoo.prelude.IndexModel;
import com.yahoo.prelude.SearchDefinition;
import com.yahoo.search.Query;
import com.yahoo.search.Result;
import com.yahoo.prelude.fastsearch.FastHit;
import com.yahoo.search.Searcher;
import com.yahoo.search.result.Hit;
import com.yahoo.search.result.Relevance;
import com.yahoo.search.searchchain.testutil.DocumentSourceSearcher;
import com.yahoo.prelude.searcher.JuniperSearcher;
import com.yahoo.search.searchchain.Execution;
import org.junit.jupiter.api.Test;

import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

/**
 * Tests juniper highlighting
 *
 * @author Steinar Knutsen
 */
public class JuniperSearcherTestCase {

    /**
     * Creates a search chain which always returns a result with one hit containing information given in this
     *
     * @param sdName the search definition type of the returned hit
     * @param content the content of the "dynteaser" field of the returned hit
     */
    private Chain<Searcher> createSearchChain(String sdName, Object content) {
        JuniperSearcher searcher = new JuniperSearcher(new ComponentId("test"),
                                                       new QrSearchersConfig(new QrSearchersConfig.Builder()));

        DocumentSourceSearcher docsource = new DocumentSourceSearcher();
        addResult(new Query("?query=12"), sdName, content, docsource);
        addResult(new Query("?query=12&bolding=false"), sdName, content, docsource);
        return new Chain<>(searcher, docsource);
    }

    private void addResult(Query query, String sdName, Object content, DocumentSourceSearcher docsource) {
        Result r = new Result(query);
        FastHit hit = new FastHit();
        hit.setId("http://abc.html");
        hit.setRelevance(new Relevance(1));
        hit.setField(Hit.SDDOCNAME_FIELD, sdName);
        hit.setField("dynteaser", content);
        r.hits().add(hit);
        docsource.addResult(query, r);
    }

    /** Creates a result of the search definiton "one" */
    private Result createResult(Object content) {
        return createResult("one", content, true);
    }

    private Result createResult(String sdName, String content) {
        return createResult(sdName, content, true);
    }

    private Result createResult(String sdName, Object content, boolean bolding) {
        Chain<Searcher> chain = createSearchChain(sdName, content);
        Query query = new Query("?query=12");
        if ( ! bolding)
            query = new Query("?query=12&bolding=false");
        Execution execution = createExecution(chain);
        Result result = execution.search(query);
        execution.fill(result);
        return result;
    }

    private Execution createExecution(Chain<Searcher> chain) {
        Map<String, List<String>> clusters = new LinkedHashMap<>();
        Collection<SearchDefinition> searchDefs = List.of(createSearchDefinitionOne(), createSearchDefinitionTwo());
        IndexModel indexModel = new IndexModel(clusters, searchDefs);
        return new Execution(chain, Execution.Context.createContextStub(new IndexFacts(indexModel)));
    }

    private SearchDefinition createSearchDefinitionOne() {
        SearchDefinition one = new SearchDefinition("one");

        Index dynteaser = new Index("dynteaser");
        dynteaser.setDynamicSummary(true);
        one.addIndex(dynteaser);

        Index bigteaser = new Index("bigteaser");
        dynteaser.setHighlightSummary(true);
        one.addIndex(bigteaser);

        Index otherteaser = new Index("otherteaser");
        otherteaser.setDynamicSummary(true);
        one.addIndex(otherteaser);

        return one;
    }

    private SearchDefinition createSearchDefinitionTwo() {
        return new SearchDefinition("two");
    }

    @Test
    void testFieldRewriting() {
        Result check = createResult("\u001FXYZ\u001F\u001EQWE\u001FJKL\u001FASD&");
        assertEquals(1, check.getHitCount());
        assertEquals("<hi>XYZ</hi><sep />QWE<hi>JKL</hi>ASD&",
                check.hits().get(0).getField("dynteaser").toString());
        check = createResult("a&b&c");
        assertEquals(1, check.getHitCount());
        assertEquals("a&b&c",
                check.hits().get(0).getField("dynteaser").toString());
    }

    @Test
    void test_field_rewriting_for_array_of_string_field() {
        var content = new Value.ArrayValue()
                .add("\u001Faaa\u001F\u001Ebbb\u001Fccc\u001Fddd")
                .add("\u001Feee\u001F\u001Efff\u001Fggg\u001Fhhh");
        Result check = createResult(content);
        assertEquals(1, check.getHitCount());
        assertEquals("[\"<hi>aaa</hi><sep />bbb<hi>ccc</hi>ddd\",\"<hi>eee</hi><sep />fff<hi>ggg</hi>hhh\"]",
                check.hits().get(0).getField("dynteaser").toString());
    }

    @Test
    void testNoRewritingDueToSearchDefinition() {
        Result check = createResult("two", "\u001FXYZ\u001F\u001EQWE\u001FJKL\u001FASD&");
        assertEquals(1, check.getHitCount());
        assertEquals("\u001FXYZ\u001F\u001EQWE\u001FJKL\u001FASD&",
                check.hits().get(0).getField("dynteaser").toString());
        check = createResult("a&b&c");
        assertEquals(1, check.getHitCount());
        assertEquals("a&b&c",
                check.hits().get(0).getField("dynteaser").toString());
    }

    @Test
    void testBoldingEquals() {
        assertNotEquals(new Query("?query=12"), new Query("?query=12&bolding=false"));
    }

    @Test
    void testUnboldedRewriting() {
        Result check = createResult("one", "\u001FXYZ\u001F\u001EQWE\u001FJKL\u001FASD&", false);
        assertEquals(1, check.getHitCount());
        assertEquals("XYZ...QWEJKLASD&",
                check.hits().get(0).getField("dynteaser").toString());
    }

    @Test
    void testAnnotatedSummaryFields() {
        Result check = createResult("\uFFF9Feeding\uFFFAfeed\uFFFB \u001F\uFFF9documents\uFFFAdocument\uFFFB\u001F into Vespa \uFFF9is\uFFFAbe\u001Eincrement of a set of \u001F\uFFF9documents\uFFFAdocument\uFFFB\u001F fed into Vespa \uFFF9is\u001Efloat in XML when \u001Fdocument\u001F attribute \uFFF9is\uFFFAbe\uFFFB int\u001E");
        assertEquals(1, check.getHitCount());
        assertEquals("Feeding <hi>documents</hi> into Vespa is<sep />increment of a set of <hi>documents</hi> fed into Vespa <sep />float in XML when <hi>document</hi> attribute is int<sep />", check.hits().get(0).getField("dynteaser").toString());

        check = createResult("one", "\uFFF9Feeding\uFFFAfeed\uFFFB \u001F\uFFF9documents\uFFFAdocument\uFFFB\u001F into Vespa \uFFF9is\uFFFAbe\u001Eincrement of a set of \u001F\uFFF9documents\uFFFAdocument\uFFFB\u001F fed into Vespa \uFFF9is\u001Efloat in XML when \u001Fdocument\u001F attribute \uFFF9is\uFFFAbe\uFFFB int\u001E", false);
        assertEquals(1, check.getHitCount());
        assertEquals("Feeding documents into Vespa is...increment of a set of documents fed into Vespa ...float in XML when document attribute is int...", check.hits().get(0).getField("dynteaser").toString());

        check = createResult("\u001ecommon the term \uFFF9is\uFFFAbe\uFFFB within the set of \u001f\uFFF9documents\uFFFAdocument\uFFFB\u001f. Hence, unusual \uFFF9terms\uFFFAterm\uFFFB or \uFFF9phrases\uFFFAphrase\u001eadded\uFFFAadd\uFFFB to as a remedy). Each of the \u001fdocument\u001f \uFFF9fields\uFFFAfield\uFFFB in a catalog can be \uFFF9given\u001e");
        assertEquals(1, check.getHitCount());
        assertEquals("<sep />common the term is within the set of <hi>documents</hi>. Hence, unusual terms or phrases<sep /> to as a remedy). Each of the <hi>document</hi> fields in a catalog can be <sep />", check.hits().get(0).getField("dynteaser").toString());

        check = createResult("\u001e\uFFF9is\uFFFAbe\uFFFB within the set of \u001f\uFFF9documents\uFFFAdocument\uFFFB\u001f. \uFFF9phrases\uFFFAphrase\uFFFB\u001E\uFFFAadd\uFFFB to as a remedy). Each of the \u001fdocument\u001f \uFFF9fields\uFFFAfield\uFFFB in a catalog can be \uFFF9given\uFFFA\u001e");
        assertEquals(1, check.getHitCount());
        assertEquals("<sep />is within the set of <hi>documents</hi>. phrases<sep /> to as a remedy). Each of the <hi>document</hi> fields in a catalog can be given<sep />", check.hits().get(0).getField("dynteaser").toString());

        check = createResult("\u001eis\uFFFAbe\uFFFB within the set of \u001f\uFFF9documents\uFFFAdocument\uFFFB\u001f. \uFFF9phrases\uFFFAphrase\u001Eadd\uFFFB to as a remedy). Each of the \u001fdocument\u001f \uFFF9fields\uFFFAfield\uFFFB in a catalog can be \uFFF9given\u001e");
        assertEquals(1, check.getHitCount());
        assertEquals("<sep /> within the set of <hi>documents</hi>. phrases<sep /> to as a remedy). Each of the <hi>document</hi> fields in a catalog can be <sep />", check.hits().get(0).getField("dynteaser").toString());

        check = createResult("\u001e\uFFFAbe\uFFFB within the set of \u001f\uFFF9documents\uFFFAdocument\uFFFB\u001f. \uFFF9phrases\uFFFA\u001E\uFFFA\uFFFB to as a remedy). Each of the \u001fdocument\u001f \uFFF9fields\uFFFAfield\uFFFB in a catalog can be \uFFF9\u001e");
        assertEquals(1, check.getHitCount());
        assertEquals("<sep /> within the set of <hi>documents</hi>. phrases<sep /> to as a remedy). Each of the <hi>document</hi> fields in a catalog can be <sep />", check.hits().get(0).getField("dynteaser").toString());

        check = createResult("\u001e\uFFFAbe\uFFFB within the set of \u001f\uFFF9documents\uFFFAdocument\uFFFB\u001f\uFFF9phrases\uFFFA\u001E\uFFFA\uFFFB to as a remedy). Each of the \u001fdocument\u001f \uFFF9fields\uFFFAfield\uFFFB in a catalog can be \uFFF9\u001e");
        assertEquals(1, check.getHitCount());
        assertEquals("<sep /> within the set of <hi>documents</hi>phrases<sep /> to as a remedy). Each of the <hi>document</hi> fields in a catalog can be <sep />", check.hits().get(0).getField("dynteaser").toString());

        check = createResult("\u001e\uFFFAbe\uFFFB within the set of \uFFF9documents\uFFFAdocument\uFFFB\uFFF9phrases\uFFFA\u001E\uFFFA\uFFFB to as a remedy). Each of the \u001fdocument\u001f \uFFF9fields\uFFFAfield\uFFFB in a catalog can be \uFFF9\u001e");
        assertEquals(1, check.getHitCount());
        assertEquals("<sep /> within the set of documentsphrases<sep /> to as a remedy). Each of the <hi>document</hi> fields in a catalog can be <sep />", check.hits().get(0).getField("dynteaser").toString());
    }

    @Test
    void testThatIncompleteAnnotationWithHighlightIsIgnored() {
        // Look at bug 5707026 for details.
        {
            Result check = createResult("of\u001e\u001fyahoo\u001f\uFFFB! \uFFF9Angels\uFFFAangels\uFFFB \uFFF9\u001fYahoo\u001f\uFFFA\u001fyahoo\u001f\uFFFB! \uFFF9Angles\uFFFAangels\uFFFB \uFFF9is\uFFFAbe\u001e");
            assertEquals(1, check.getHitCount());
            assertEquals("of<sep />! Angels <hi>Yahoo</hi>! Angles is<sep />",
                    check.hits().get(0).getField("dynteaser").toString());
        }
        {
            Result check = createResult("\u001e\u001fY\u001f\uFFFA\u001fy\u001f\uFFFB! \uFFF9News\uFFFAnews\uFFFB \uFFF9RSS\uFFFArss\uFFFB \uFFF9\u001fY\u001f\uFFFA\u001fy\u001f\uFFFB!\u001e");
            assertEquals(1, check.getHitCount());
            assertEquals("<sep />! News RSS <hi>Y</hi>!<sep />",
                    check.hits().get(0).getField("dynteaser").toString());
        }
    }

    @Test
    void testThatIncompleteAnnotationWithHighlightAtTheBeginningIsIgnored() {
        {
            Result check = createResult("\u001e\u001fIncomplete\uFFFAincomplete\uFFFB\u001f \uFFF9Original\uFFFAstemmed\uFFFB\u001e");
            assertEquals(1, check.getHitCount());
            assertEquals("<sep /> Original<sep />", check.hits().get(0).getField("dynteaser").toString());
        }
        {
            Result check = createResult("\u001e\u001f\uFFFAincomplete\uFFFB\u001f \uFFF9Original\uFFFAstemmed\uFFFB\u001e");
            assertEquals(1, check.getHitCount());
            assertEquals("<sep /> Original<sep />", check.hits().get(0).getField("dynteaser").toString());
        }
        {
            Result check = createResult("\u001e\u001fincomplete\uFFFB\u001f \uFFF9Original\uFFFAstemmed\uFFFB\u001e");
            assertEquals(1, check.getHitCount());
            assertEquals("<sep /> Original<sep />", check.hits().get(0).getField("dynteaser").toString());
        }
    }

    @Test
    void testThatIncompleteAnnotationWithHighlightAtTheEndIsIgnored() {
        {
            Result check = createResult("\u001e\uFFF9Original\uFFFAstemmed\uFFFB \u001f\uFFF9Incomplete\uFFFAincomplete\u001f\u001e");
            assertEquals(1, check.getHitCount());
            assertEquals("<sep />Original <sep />", check.hits().get(0).getField("dynteaser").toString());
        }
        {
            Result check = createResult("\u001e\uFFF9Original\uFFFAstemmed\uFFFB \u001f\uFFF9Incomplete\uFFFA\u001f\u001e");
            assertEquals(1, check.getHitCount());
            assertEquals("<sep />Original <sep />", check.hits().get(0).getField("dynteaser").toString());
        }
        {
            Result check = createResult("\u001e\uFFF9Original\uFFFAstemmed\uFFFB \u001f\uFFF9Incomplete\u001f\u001e");
            assertEquals(1, check.getHitCount());
            assertEquals("<sep />Original <sep />", check.hits().get(0).getField("dynteaser").toString());
        }
    }

    @Test
    void testExplicitTwoPhase() {
        Chain<Searcher> searchChain = createSearchChain("one", "\u001e\uFFFAbe\uFFFB within the set of \u001f\uFFF9documents\uFFFAdocument\uFFFB\u001f. \uFFF9phrases\uFFFA\u001E\uFFFA\uFFFB to as a remedy). Each of the \u001fdocument\u001f \uFFF9fields\uFFFAfield\uFFFB in a catalog can be \uFFF9\u001e");
        Query q = new Query("?query=12");
        Result check = createExecution(searchChain).search(q);
        assertEquals(1, check.getHitCount());
        assertNull(check.hits().get(0).getField("dynteaser"));
        createExecution(searchChain).fill(check);
        assertEquals(1, check.getHitCount());
        assertEquals("<sep /> within the set of <hi>documents</hi>. phrases<sep /> to as a remedy). Each of the <hi>document</hi> fields in a catalog can be <sep />", check.hits().get(0).getField("dynteaser").toString());
    }

    @Test
    void testCompoundWordsBolding() {
        Result check = createResult("\u001eTest \u001fkommunikations\u001f\u001ffehler\u001f");
        assertEquals(1, check.getHitCount());
        assertEquals("<sep />Test <hi>kommunikationsfehler</hi>",  check.hits().get(0).getField("dynteaser").toString());
    }

}