// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.prelude.query; import com.yahoo.compress.IntegerCompressor; import com.yahoo.prelude.query.textualrepresentation.Discloser; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Comparator; import java.util.List; import java.util.Objects; import java.util.function.BiConsumer; import java.util.function.Function; import static java.util.Objects.requireNonNull; /** * A term which contains a set of numerical ranges; a match with any of these indicates a match. *

* This is a stricter version of an {@link OrItem} containing multiple {@link RangeItem}s, where all * ranges must be specified against the same field, or pair of fields, and where all the left boundaries, * and all the right boundaries, share whether they are {@link Limit#INCLUSIVE} or {@link Limit#EXCLUSIVE}. * Furthermore, all overlapping ranges are joined, such that the set of ranges this represents is * guaranteed to be non-overlapping, and thus, sorted order of start and end boundaries is the same, * which allows efficient filtering of document points or ranges by whether they are included in any of * the ranges contained in this term. Note that touching ranges, like [3, 5) and [5, 7), will be joined * unless this item is declared to have exclusive boundaries at both ends, or if one of the items is contained * within the other, e.g., (3, 5) and (5, 5) (see below for explanation of weirdness). *

* In the absence of a range type in the schema definition, all ranges (and points) contained in documents * are taken to be inclusive at both ends. However, intersection of range starts from documents and range * ends from the query is inclusive, i.e., matching on equality, iff. both boundaries are inclusive, and * likewise for intersection between range starts from query and range ends from documents. It is therefore * possible to achieve any matching by choosing inclusiveness for the query ranges appropriately. * For the case where document ranges are to be treated as exclusive, and the query has single points, this * becomes weird, since the ranges [1, 1), (1, 1] and (1, 1) are all logically empty, but this still works :) *

* Unless ranges are added in ascending start order, the implementation lazily sorts and merges ranges, * when a representation of the item is required. This is typically when the query is serialized and sent * to the backend, or when trace information is written, or {@link #toString()} is called on the item. * Adding ranges in ascending order is much faster than not; ascending order here has the rather lax * requirement that each added interval is not completely before the current last interval. * * @author jonmv */ public class MultiRangeItem extends MultiTermItem { public enum Limit { /** Points match the boundaries of ranges which are inclusive. */ INCLUSIVE, /** Points do not match the boundaries of ranges which are exclusive. */ EXCLUSIVE; } /** Type of numbers stored used to describe the ranges stored in a {@link MultiRangeItem}. */ public static class NumberType { public static final NumberType INTEGER = new NumberType<>((a, b) -> a < b ? -1 : a > b ? 1 : 0, ranges -> switch (IntegerCompressor.compressionMode(ranges.get(0).start, ranges.get(ranges.size() - 1).end)) { case NONE -> NumberEncoder.UNIFORM_INTEGER; case COMPRESSED -> NumberEncoder.COMPRESSED_INTEGER; case COMPRESSED_POSITIVE -> NumberEncoder.COMPRESSED_POSITIVE_INTEGER; }); public static final NumberType LONG = new NumberType<>((a, b) -> a < b ? -1 : a > b ? 1 : 0, __ -> NumberEncoder.UNIFORM_LONG); public static final NumberType DOUBLE = new NumberType<>((a, b) -> a < b ? -1 : a > b ? 1 : 0, __ -> NumberEncoder.UNIFORM_DOUBLE); private final Comparator comparator; private final Function>, NumberEncoder> encoder; private NumberType(Comparator comparator, Function>, NumberEncoder> encoder) { this.comparator = comparator; this.encoder = encoder; } NumberEncoder encoderFor(List> ranges) { return encoder.apply(ranges); } } static class Range { final Type start; final Type end; Range(Type start, Type end) { this.start = start; this.end = end; } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; Range range = (Range) o; return start.equals(range.start) && end.equals(range.end); } @Override public int hashCode() { return Objects.hash(start, end); } @Override public String toString() { return "(" + start + ", " + end + ")"; } } private static class NumberEncoder { static NumberEncoder UNIFORM_INTEGER = new NumberEncoder<>(0, ByteBuffer::putInt); static NumberEncoder UNIFORM_LONG = new NumberEncoder<>(1, ByteBuffer::putLong); static NumberEncoder UNIFORM_DOUBLE = new NumberEncoder<>(2, ByteBuffer::putDouble); static NumberEncoder COMPRESSED_INTEGER = new NumberEncoder<>(3, (b, n) -> IntegerCompressor.putCompressedNumber(n, b)); static NumberEncoder COMPRESSED_POSITIVE_INTEGER = new NumberEncoder<>(4, (b, n) -> IntegerCompressor.putCompressedPositiveNumber(n, b)); final byte id; // 5 bits final BiConsumer serializer; NumberEncoder(int id, BiConsumer serializer) { this.id = (byte) id; this.serializer = serializer; } } private final NumberType type; private final String startIndex; private final String endIndex; private final boolean startInclusive; private final boolean endInclusive; private List> ranges = new ArrayList<>(); private boolean sorted = true; private NumberEncoder encoder; /** A multi range item with ranges to intersect with ranges given by the pair of index names. */ private MultiRangeItem(NumberType type, String startIndex, Limit startInclusive, String endIndex, Limit endInclusive) { if (startIndex.isBlank()) throw new IllegalArgumentException("start index name must be non-blank"); if (endIndex.isBlank()) throw new IllegalArgumentException("end index name must be non-blank"); this.type = requireNonNull(type); this.startIndex = startIndex; this.endIndex = endIndex; this.startInclusive = startInclusive == requireNonNull(Limit.INCLUSIVE); this.endInclusive = endInclusive == requireNonNull(Limit.INCLUSIVE); } /** A multi range item to intersect with the ranges defined by the two given index names. */ public static MultiRangeItem overRanges(NumberType type, String startIndex, Limit startInclusive, String endIndex, Limit endInclusive) { return new MultiRangeItem<>(type, startIndex, startInclusive, endIndex, endInclusive); } /** A multi range item to contain the points defined by the single given index name. */ public static MultiRangeItem overPoints(NumberType type, String index, Limit startInclusive, Limit endInclusive) { return overRanges(type, index, startInclusive, index, endInclusive); } /** Adds the given point to this item. More efficient when each added point is not completely before the current last one. */ public MultiRangeItem addPoint(Type point) { return addRange(point, point); } /** Adds the given range to this item. More efficient when each added range is not completely before the current last one. */ public MultiRangeItem addRange(Type start, Type end) { if (Double.isNaN(start.doubleValue())) throw new IllegalArgumentException("range start cannot be NaN"); if (Double.isNaN(end.doubleValue())) throw new IllegalArgumentException("range end cannot be NaN"); if (type.comparator.compare(start, end) > 0) throw new IllegalArgumentException("ranges must satisfy start <= end, but got " + start + " > " + end); Range range = new Range<>(start, end); // If the list of ranges is still sorted, we try to keep it sorted. if (sorted && ! ranges.isEmpty()) { // If the new range is not completely before the current last range, we can prune, and keep a sorted list. Range last = ranges.get(ranges.size() - 1); sorted = overlap(range, last); if (sorted) { // Pop all ranges that overlap with the new range ... Range popped = range; for (int i = ranges.size(); i-- > 0; ) if (overlap(ranges.get(i), range)) popped = ranges.remove(i); else break; // ... then add a new range, possibly with start and end from the popped and last ranges. ranges.add(popped == range ? range : new Range<>(min(popped.start, range.start), max(last.end, range.end))); return this; } } ranges.add(range); return this; } /** * Assumption: first.start <= second.start * Then there is overlap if * first.end > second.start * first.end == second.start AND either of * ranges are inclusive at either end, OR * either range is a (logically empty) point (a, a), since this dominated by (or equal to) the other range. */ private boolean overlap(Range first, Range second) { int comparison = type.comparator.compare(first.end, second.start); return comparison > 0 || comparison == 0 && (startInclusive || endInclusive || first.start.equals(first.end) || second.start.equals(second.end)); } private Type min(Type a, Type b) { return type.comparator.compare(a, b) <= 0 ? a : b; } private Type max(Type a, Type b) { return type.comparator.compare(a, b) >= 0 ? a : b; } List> sortedRanges() { if (sorted) return ranges; ranges.sort((r1, r2) -> type.comparator.compare(r1.start, r2.start)); List> pruned = new ArrayList<>(); Range start = ranges.get(0), end = start; for (int i = 1; i < ranges.size(); i++) { Range range = ranges.get(i); if (overlap(end, range)) { end = type.comparator.compare(end.end, range.end) < 0 ? range : end; } else { pruned.add(start == end ? start : new Range<>(start.start, end.end)); start = end = range; } } pruned.add(start == end ? start : new Range<>(start.start, end.end)); sorted = true; return ranges = pruned; } @Override public void setIndexName(String index) { throw new UnsupportedOperationException("index cannot be changed for " + MultiRangeItem.class.getSimpleName()); } @Override OperatorType operatorType() { return OperatorType.OR; } @Override TermType termType() { return TermType.RANGES; } @Override int terms() { return sortedRanges().size(); } @Override void encodeBlueprint(ByteBuffer buffer) { boolean sameIndex = startIndex.equals(endIndex); byte metadata = 0; if (sameIndex) metadata |= 0b00000001; if (startInclusive) metadata |= 0b00000010; if (endInclusive) metadata |= 0b00000100; encoder = type.encoderFor(sortedRanges()); metadata |= (byte)(encoder.id << 3); buffer.put(metadata); putString(startIndex, buffer); if ( ! sameIndex) putString(endIndex, buffer); } @Override void encodeTerms(ByteBuffer buffer) { for (Range range : sortedRanges()) { encoder.serializer.accept(buffer, range.start); encoder.serializer.accept(buffer, range.end); } } @Override protected void appendBodyString(StringBuilder buffer) { asCompositeItem().appendBodyString(buffer); } @Override public void disclose(Discloser discloser) { asCompositeItem().disclose(discloser); } @Override @SuppressWarnings("unchecked") public Item clone() { MultiRangeItem clone = (MultiRangeItem) super.clone(); clone.ranges = new ArrayList<>(ranges); return clone; } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; if ( ! super.equals(o)) return false; MultiRangeItem that = (MultiRangeItem) o; return type == that.type && startInclusive == that.startInclusive && endInclusive == that.endInclusive && startIndex.equals(that.startIndex) && endIndex.equals(that.endIndex) && sortedRanges().equals(that.sortedRanges()); } @Override public int hashCode() { return Objects.hash(super.hashCode(), type, startIndex, endIndex, startInclusive, endInclusive, sortedRanges()); } @Override Item asCompositeItem() { OrItem root = new OrItem(); if (startIndex.equals(endIndex)) { for (Range range : sortedRanges()) { root.addItem(new IntItem(new com.yahoo.prelude.query.Limit(range.start, startInclusive), new com.yahoo.prelude.query.Limit(range.end, endInclusive), startIndex)); } } else { for (Range range : sortedRanges()) { AndItem pair = new AndItem(); pair.addItem(new IntItem(new com.yahoo.prelude.query.Limit(range.start, startInclusive), com.yahoo.prelude.query.Limit.POSITIVE_INFINITY, startIndex)); pair.addItem(new IntItem(com.yahoo.prelude.query.Limit.NEGATIVE_INFINITY, new com.yahoo.prelude.query.Limit(range.end, endInclusive), endIndex)); root.addItem(pair); } } return root; } }