aboutsummaryrefslogtreecommitdiffstats
path: root/container-search/src/main/java/com/yahoo/prelude/query/MultiRangeItem.java
blob: 7e36ed5719dd33343a7b012503d6947216fad294 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.prelude.query;

import com.yahoo.compress.IntegerCompressor;
import com.yahoo.prelude.query.textualrepresentation.Discloser;

import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
import java.util.function.BiConsumer;
import java.util.function.Function;

import static java.util.Objects.requireNonNull;

/**
 * A term which contains a set of numerical ranges; a match with any of these indicates a match.
 * <p>
 * This is a stricter version of an {@link OrItem} containing multiple {@link RangeItem}s, where all
 * ranges must be specified against the same field, or pair of fields, and where all the left boundaries,
 * and all the right boundaries, share whether they are {@link Limit#INCLUSIVE} or {@link Limit#EXCLUSIVE}.
 * Furthermore, all overlapping ranges are joined, such that the set of ranges this represents is
 * guaranteed to be non-overlapping, and thus, sorted order of start and end boundaries is the same,
 * which allows efficient filtering of document points or ranges by whether they are included in any of
 * the ranges contained in this term. Note that touching ranges, like [3, 5) and [5, 7), will be joined
 * unless this item is declared to have exclusive boundaries at both ends, or if one of the items is contained
 * within the other, e.g., (3, 5) and (5, 5) (see below for explanation of weirdness).
 * <p>
 * In the absence of a range type in the schema definition, all ranges (and points) contained in documents
 * are taken to be inclusive at both ends. However, intersection of range starts from documents and range
 * ends from the query is inclusive, i.e., matching on equality, iff. both boundaries are inclusive, and
 * likewise for intersection between range starts from query and range ends from documents. It is therefore
 * possible to achieve any matching by choosing inclusiveness for the query ranges appropriately.
 * For the case where document ranges are to be treated as exclusive, and the query has single points, this
 * becomes weird, since the ranges [1, 1), (1, 1] and (1, 1) are all logically empty, but this still works :)
 * <p>
 * Unless ranges are added in ascending start order, the implementation lazily sorts and merges ranges,
 * when a representation of the item is required. This is typically when the query is serialized and sent
 * to the backend, or when trace information is written, or {@link #toString()} is called on the item.
 * Adding ranges in ascending order is much faster than not; ascending order here has the rather lax
 * requirement that each added interval is not completely before the current last interval.
 *
 * @author jonmv
 */
public class MultiRangeItem<Type extends Number> extends MultiTermItem {

    public enum Limit {
        /** Points match the boundaries of ranges which are inclusive. */
        INCLUSIVE,
        /** Points do not match the boundaries of ranges which are exclusive. */
        EXCLUSIVE;
    }

    /** Type of numbers stored used to describe the ranges stored in a {@link MultiRangeItem}. */
    public static class NumberType<Type extends Number> {

        public static final NumberType<Integer> INTEGER = new NumberType<>((a, b) -> a < b ? -1 : a > b ? 1 : 0, ranges ->
                switch (IntegerCompressor.compressionMode(ranges.get(0).start, ranges.get(ranges.size() - 1).end)) {
                    case NONE -> NumberEncoder.UNIFORM_INTEGER;
                    case COMPRESSED -> NumberEncoder.COMPRESSED_INTEGER;
                    case COMPRESSED_POSITIVE -> NumberEncoder.COMPRESSED_POSITIVE_INTEGER;
                });
        public static final NumberType<Long>    LONG    = new NumberType<>((a, b) -> a < b ? -1 : a > b ? 1 : 0, __ -> NumberEncoder.UNIFORM_LONG);
        public static final NumberType<Double>  DOUBLE  = new NumberType<>((a, b) -> a < b ? -1 : a > b ? 1 : 0, __ -> NumberEncoder.UNIFORM_DOUBLE);

        private final Comparator<Type> comparator;
        private final Function<List<Range<Type>>, NumberEncoder<Type>> encoder;

        private NumberType(Comparator<Type> comparator, Function<List<Range<Type>>, NumberEncoder<Type>> encoder) {
            this.comparator = comparator;
            this.encoder = encoder;
        }

        NumberEncoder<Type> encoderFor(List<Range<Type>> ranges) {
            return encoder.apply(ranges);
        }

    }

    static class Range<Type extends Number> {

        final Type start;
        final Type end;

        Range(Type start, Type end) {
            this.start = start;
            this.end = end;
        }

        @Override
        public boolean equals(Object o) {
            if (this == o) return true;
            if (o == null || getClass() != o.getClass()) return false;
            Range<?> range = (Range<?>) o;
            return start.equals(range.start) && end.equals(range.end);
        }

        @Override
        public int hashCode() {
            return Objects.hash(start, end);
        }

        @Override
        public String toString() {
            return "(" + start + ", " + end + ")";
        }

    }

    private static class NumberEncoder<Type extends Number> {

        static NumberEncoder<Integer> UNIFORM_INTEGER             = new NumberEncoder<>(0, ByteBuffer::putInt);
        static NumberEncoder<Long>    UNIFORM_LONG                = new NumberEncoder<>(1, ByteBuffer::putLong);
        static NumberEncoder<Double>  UNIFORM_DOUBLE              = new NumberEncoder<>(2, ByteBuffer::putDouble);
        static NumberEncoder<Integer> COMPRESSED_INTEGER          = new NumberEncoder<>(3, (b, n) -> IntegerCompressor.putCompressedNumber(n, b));
        static NumberEncoder<Integer> COMPRESSED_POSITIVE_INTEGER = new NumberEncoder<>(4, (b, n) -> IntegerCompressor.putCompressedPositiveNumber(n, b));

        final byte id; // 5 bits
        final BiConsumer<ByteBuffer, Type> serializer;

        NumberEncoder(int id, BiConsumer<ByteBuffer, Type> serializer) {
            this.id = (byte) id;
            this.serializer = serializer;
        }

    }

    private final NumberType<Type> type;
    private final String startIndex;
    private final String endIndex;
    private final boolean startInclusive;
    private final boolean endInclusive;
    private List<Range<Type>> ranges = new ArrayList<>();
    private boolean sorted = true;
    private NumberEncoder<Type> encoder;

    /** A multi range item with ranges to intersect with ranges given by the pair of index names. */
    private MultiRangeItem(NumberType<Type> type, String startIndex, Limit startInclusive, String endIndex, Limit endInclusive) {
        if (startIndex.isBlank()) throw new IllegalArgumentException("start index name must be non-blank");
        if (endIndex.isBlank()) throw new IllegalArgumentException("end index name must be non-blank");
        this.type = requireNonNull(type);
        this.startIndex = startIndex;
        this.endIndex = endIndex;
        this.startInclusive = startInclusive == requireNonNull(Limit.INCLUSIVE);
        this.endInclusive = endInclusive == requireNonNull(Limit.INCLUSIVE);
    }

    /** A multi range item to intersect with the ranges defined by the two given index names. */
    public static <Type extends Number> MultiRangeItem<Type> overRanges(NumberType<Type> type,
                                                                        String startIndex, Limit startInclusive,
                                                                        String endIndex, Limit endInclusive) {
        return new MultiRangeItem<>(type, startIndex, startInclusive, endIndex, endInclusive);
    }

    /** A multi range item to contain the points defined by the single given index name. */
    public static <Type extends Number> MultiRangeItem<Type> overPoints(NumberType<Type> type, String index,
                                                                        Limit startInclusive, Limit endInclusive) {
        return overRanges(type, index, startInclusive, index, endInclusive);
    }

    /** Adds the given point to this item. More efficient when each added point is not completely before the current last one. */
    public MultiRangeItem<Type> addPoint(Type point) {
        return addRange(point, point);
    }

    /** Adds the given range to this item. More efficient when each added range is not completely before the current last one. */
    public MultiRangeItem<Type> addRange(Type start, Type end) {
        if (Double.isNaN(start.doubleValue()))
            throw new IllegalArgumentException("range start cannot be NaN");
        if (Double.isNaN(end.doubleValue()))
            throw new IllegalArgumentException("range end cannot be NaN");
        if (type.comparator.compare(start, end) > 0)
            throw new IllegalArgumentException("ranges must satisfy start <= end, but got " + start + " > " + end);

        Range<Type> range = new Range<>(start, end);
        // If the list of ranges is still sorted, we try to keep it sorted.
        if (sorted && ! ranges.isEmpty()) {
            // If the new range is not completely before the current last range, we can prune, and keep a sorted list.
            Range<Type> last = ranges.get(ranges.size() - 1);
            sorted = overlap(range, last);
            if (sorted) {
                // Pop all ranges that overlap with the new range ...
                Range<Type> popped = range;
                for (int i = ranges.size(); i-- > 0; )
                    if (overlap(ranges.get(i), range))
                        popped = ranges.remove(i);
                    else
                        break;
                // ... then add a new range, possibly with start and end from the popped and last ranges.
                ranges.add(popped == range ? range : new Range<>(min(popped.start, range.start), max(last.end, range.end)));
                return this;
            }
        }
        ranges.add(range);
        return this;
    }

    /**
     * Assumption: first.start <= second.start
     * Then there is overlap if
     * first.end  > second.start
     * first.end == second.start AND either of
     *   ranges are inclusive at either end, OR
     *   either range is a (logically empty) point (a, a), since this dominated by (or equal to) the other range.
     */
    private boolean overlap(Range<Type> first, Range<Type> second) {
        int comparison = type.comparator.compare(first.end, second.start);
        return   comparison  > 0
              || comparison == 0 && (startInclusive || endInclusive || first.start.equals(first.end) || second.start.equals(second.end));
    }

    private Type min(Type a, Type b) {
        return type.comparator.compare(a, b) <= 0 ? a : b;
    }

    private Type max(Type a, Type b) {
        return type.comparator.compare(a, b) >= 0 ? a : b;
    }

    List<Range<Type>> sortedRanges() {
        if (sorted)
            return ranges;

        ranges.sort((r1, r2) -> type.comparator.compare(r1.start, r2.start));

        List<Range<Type>> pruned = new ArrayList<>();
        Range<Type> start = ranges.get(0), end = start;
        for (int i = 1; i < ranges.size(); i++) {
            Range<Type> range = ranges.get(i);
            if (overlap(end, range)) {
                end = type.comparator.compare(end.end, range.end) < 0 ? range : end;
            }
            else {
                pruned.add(start == end ? start : new Range<>(start.start, end.end));
                start = end = range;
            }
        }
        pruned.add(start == end ? start : new Range<>(start.start, end.end));

        sorted = true;
        return ranges = pruned;
    }

    @Override
    public void setIndexName(String index) {
        throw new UnsupportedOperationException("index cannot be changed for " + MultiRangeItem.class.getSimpleName());
    }

    @Override
    OperatorType operatorType() {
        return OperatorType.OR;
    }

    @Override
    TermType termType() {
        return TermType.RANGES;
    }

    @Override
    int terms() {
        return sortedRanges().size();
    }

    @Override
    void encodeBlueprint(ByteBuffer buffer) {
        boolean sameIndex = startIndex.equals(endIndex);
        byte metadata = 0;
        if (sameIndex)      metadata |= 0b00000001;
        if (startInclusive) metadata |= 0b00000010;
        if (endInclusive)   metadata |= 0b00000100;

        encoder = type.encoderFor(sortedRanges());
        metadata |= (byte)(encoder.id << 3);

        buffer.put(metadata);
        putString(startIndex, buffer);
        if ( ! sameIndex) putString(endIndex, buffer);
    }

    @Override
    void encodeTerms(ByteBuffer buffer) {
        for (Range<Type> range : sortedRanges()) {
            encoder.serializer.accept(buffer, range.start);
            encoder.serializer.accept(buffer, range.end);
        }
    }

    @Override
    protected void appendBodyString(StringBuilder buffer) {
        asCompositeItem().appendBodyString(buffer);
    }

    @Override
    public void disclose(Discloser discloser) {
        asCompositeItem().disclose(discloser);
    }

    @Override
    @SuppressWarnings("unchecked")
    public Item clone() {
        MultiRangeItem<Type> clone = (MultiRangeItem<Type>) super.clone();
        clone.ranges = new ArrayList<>(ranges);
        return clone;
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) return true;
        if (o == null || getClass() != o.getClass()) return false;
        if ( ! super.equals(o)) return false;
        MultiRangeItem<?> that = (MultiRangeItem<?>) o;
        return    type == that.type
               && startInclusive == that.startInclusive
               && endInclusive == that.endInclusive
               && startIndex.equals(that.startIndex)
               && endIndex.equals(that.endIndex)
               && sortedRanges().equals(that.sortedRanges());
    }

    @Override
    public int hashCode() {
        return Objects.hash(super.hashCode(), type, startIndex, endIndex, startInclusive, endInclusive, sortedRanges());
    }

    @Override
    Item asCompositeItem() {
        OrItem root = new OrItem();
        if (startIndex.equals(endIndex)) {
            for (Range<Type> range : sortedRanges()) {
                root.addItem(new IntItem(new com.yahoo.prelude.query.Limit(range.start, startInclusive),
                                         new com.yahoo.prelude.query.Limit(range.end, endInclusive),
                                         startIndex));
            }
        }
        else {
            for (Range<Type> range : sortedRanges()) {
                AndItem pair = new AndItem();
                pair.addItem(new IntItem(new com.yahoo.prelude.query.Limit(range.start, startInclusive),
                                         com.yahoo.prelude.query.Limit.POSITIVE_INFINITY,
                                         startIndex));
                pair.addItem(new IntItem(com.yahoo.prelude.query.Limit.NEGATIVE_INFINITY,
                                         new com.yahoo.prelude.query.Limit(range.end, endInclusive),
                                         endIndex));
                root.addItem(pair);
            }
        }
        return root;
    }

}