diff options
author | jonmv <venstad@gmail.com> | 2022-10-25 11:56:49 +0200 |
---|---|---|
committer | jonmv <venstad@gmail.com> | 2022-10-26 12:07:18 +0200 |
commit | 4961b091bad43c3ac5809b1229ff473dc5663fed (patch) | |
tree | 780708fab260bc31bafc7fd047df827062b05281 /container-search/src/main/java/com/yahoo/prelude/query/MultiRangeItem.java | |
parent | 731208d1aef2d3fb107b9f4fbfc4a733f8569841 (diff) |
Separate handling of different number types
Diffstat (limited to 'container-search/src/main/java/com/yahoo/prelude/query/MultiRangeItem.java')
-rw-r--r-- | container-search/src/main/java/com/yahoo/prelude/query/MultiRangeItem.java | 167 |
1 files changed, 106 insertions, 61 deletions
diff --git a/container-search/src/main/java/com/yahoo/prelude/query/MultiRangeItem.java b/container-search/src/main/java/com/yahoo/prelude/query/MultiRangeItem.java index 27696b85da3..dfd26d5df6d 100644 --- a/container-search/src/main/java/com/yahoo/prelude/query/MultiRangeItem.java +++ b/container-search/src/main/java/com/yahoo/prelude/query/MultiRangeItem.java @@ -1,22 +1,18 @@ // Copyright Verizon Media. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. package com.yahoo.prelude.query; +import com.yahoo.compress.IntegerCompressor; import com.yahoo.prelude.query.textualrepresentation.Discloser; import java.nio.ByteBuffer; -import java.util.AbstractMap; -import java.util.AbstractSet; import java.util.ArrayList; import java.util.Comparator; -import java.util.Iterator; +import java.util.ConcurrentModificationException; import java.util.List; -import java.util.Map; import java.util.Objects; -import java.util.Set; -import java.util.SortedMap; -import java.util.TreeMap; +import java.util.function.BiConsumer; +import java.util.function.Function; -import static java.util.Comparator.comparingDouble; import static java.util.Objects.requireNonNull; /** @@ -36,7 +32,7 @@ import static java.util.Objects.requireNonNull; * are taken to be inclusive at both ends. However, intersection of range starts from documents and range * ends from the query is inclusive, i.e., matching on equality, iff. both boundaries are inclusive, and * likewise for intersection between range starts from query and range ends from documents. It is therefore - * possible to achieve any matching by choosing inclusiveness for the query ranges properly. + * possible to achieve any matching by choosing inclusiveness for the query ranges appropriately. * For the case where document ranges are to be treated as exclusive, and the query has single points, this * becomes weird, since the ranges [1, 1), (1, 1] and (1, 1) are all logically empty, but this still works :) * <p> @@ -46,11 +42,9 @@ import static java.util.Objects.requireNonNull; * Adding ranges in ascending order is much faster than not; ascending order here has the rather lax * requirement that each added interval is not completely before the current last interval. * - * todo: cover all three cases of single index + ranges, dual index + ranges, dual index + points - * * @author jonmv */ -public class MultiRangeItem extends MultiTermItem { +public class MultiRangeItem<Type extends Number> extends MultiTermItem { public enum Limit { /** Points match the boundaries of ranges which are inclusive. */ @@ -59,12 +53,38 @@ public class MultiRangeItem extends MultiTermItem { EXCLUSIVE; } - static class Range { + /** Type of numbers stored used to describe the ranges stored in a {@link MultiRangeItem}. */ + public static class NumberType<Type extends Number> { + + public static final NumberType<Integer> INTEGER = new NumberType<>((a, b) -> a < b ? -1 : a > b ? 1 : 0, ranges -> + switch (IntegerCompressor.compressionMode(ranges.get(0).start, ranges.get(ranges.size() - 1).end)) { + case NONE -> NumberEncoder.UNIFORM_INTEGER; + case COMPRESSED -> NumberEncoder.COMPRESSED_INTEGER; + case COMPRESSED_POSITIVE -> NumberEncoder.COMPRESSED_POSITIVE_INTEGER; + }); + public static final NumberType<Long> LONG = new NumberType<>((a, b) -> a < b ? -1 : a > b ? 1 : 0, __ -> NumberEncoder.UNIFORM_LONG); + public static final NumberType<Double> DOUBLE = new NumberType<>((a, b) -> a < b ? -1 : a > b ? 1 : 0, __ -> NumberEncoder.UNIFORM_DOUBLE); + + private final Comparator<Type> comparator; + private final Function<List<Range<Type>>, NumberEncoder<Type>> encoder; + + private NumberType(Comparator<Type> comparator, Function<List<Range<Type>>, NumberEncoder<Type>> encoder) { + this.comparator = comparator; + this.encoder = encoder; + } + + NumberEncoder<Type> encoderFor(List<Range<Type>> ranges) { + return encoder.apply(ranges); + } + + } + + static class Range<Type extends Number> { - final Number start; - final Number end; + final Type start; + final Type end; - Range(Number start, Number end) { + Range(Type start, Type end) { this.start = start; this.end = end; } @@ -73,7 +93,7 @@ public class MultiRangeItem extends MultiTermItem { public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; - Range range = (Range) o; + Range<?> range = (Range<?>) o; return start.equals(range.start) && end.equals(range.end); } @@ -89,70 +109,87 @@ public class MultiRangeItem extends MultiTermItem { } - static final Comparator<Number> comparator = (a, b) -> { - double u = a.doubleValue(), v = b.doubleValue(); - return u < v ? -1 : u > v ? 1 : 0; - }; + private static class NumberEncoder<Type extends Number> { + static NumberEncoder<Integer> UNIFORM_INTEGER = new NumberEncoder<>(0, ByteBuffer::putInt); + static NumberEncoder<Long> UNIFORM_LONG = new NumberEncoder<>(1, ByteBuffer::putLong); + static NumberEncoder<Double> UNIFORM_DOUBLE = new NumberEncoder<>(2, ByteBuffer::putDouble); + static NumberEncoder<Integer> COMPRESSED_INTEGER = new NumberEncoder<>(3, (b, n) -> IntegerCompressor.putCompressedNumber(n, b)); + static NumberEncoder<Integer> COMPRESSED_POSITIVE_INTEGER = new NumberEncoder<>(4, (b, n) -> IntegerCompressor.putCompressedPositiveNumber(n, b)); + + final byte id; // 5 bits + final BiConsumer<ByteBuffer, Type> serializer; + + NumberEncoder(int id, BiConsumer<ByteBuffer, Type> serializer) { + this.id = (byte) id; + this.serializer = serializer; + } + + } + + private final NumberType<Type> type; private final String startIndex; private final String endIndex; private final boolean startInclusive; private final boolean endInclusive; - private List<Range> ranges = new ArrayList<>(); + private List<Range<Type>> ranges = new ArrayList<>(); private boolean sorted = true; - + private NumberEncoder<Type> encoder; /** A multi range item with ranges to intersect with ranges given by the pair of index names. */ - private MultiRangeItem(String startIndex, Limit startInclusive, - String endIndex, Limit endInclusive) { + private MultiRangeItem(NumberType<Type> type, String startIndex, Limit startInclusive, String endIndex, Limit endInclusive) { if (startIndex.isBlank()) throw new IllegalArgumentException("start index name must be non-blank"); if (endIndex.isBlank()) throw new IllegalArgumentException("end index name must be non-blank"); + this.type = requireNonNull(type); this.startIndex = startIndex; this.endIndex = endIndex; this.startInclusive = startInclusive == requireNonNull(Limit.INCLUSIVE); this.endInclusive = endInclusive == requireNonNull(Limit.INCLUSIVE); } - /** A multi range item with the given ranges to intersect with the ranges defined by the two given index names. */ - public static MultiRangeItem overRanges(String startIndex, Limit startInclusive, String endIndex, Limit endInclusive) { - return new MultiRangeItem(startIndex, startInclusive, endIndex, endInclusive); + /** A multi range item to intersect with the ranges defined by the two given index names. */ + public static <Type extends Number> MultiRangeItem<Type> overRanges(NumberType<Type> type, + String startIndex, Limit startInclusive, + String endIndex, Limit endInclusive) { + return new MultiRangeItem<>(type, startIndex, startInclusive, endIndex, endInclusive); } - /** A multi range item with ranges to contain the points defined by the single given index name. */ - public static MultiRangeItem overPoints(String index, Limit startInclusive, Limit endInclusive) { - return overRanges(index, startInclusive, index, endInclusive); + /** A multi range item to contain the points defined by the single given index name. */ + public static <Type extends Number> MultiRangeItem<Type> overPoints(NumberType<Type> type, String index, + Limit startInclusive, Limit endInclusive) { + return overRanges(type, index, startInclusive, index, endInclusive); } /** Adds the given point to this item. More efficient when each added point is not completely before the current last one. */ - public MultiRangeItem addPoint(Number point) { + public MultiRangeItem<Type> addPoint(Type point) { return addRange(point, point); } /** Adds the given range to this item. More efficient when each added range is not completely before the current last one. */ - public MultiRangeItem addRange(Number start, Number end) { + public MultiRangeItem<Type> addRange(Type start, Type end) { if (Double.isNaN(start.doubleValue())) throw new IllegalArgumentException("range start cannot be NaN"); if (Double.isNaN(end.doubleValue())) throw new IllegalArgumentException("range end cannot be NaN"); - if (comparator.compare(start, end) > 0) + if (type.comparator.compare(start, end) > 0) throw new IllegalArgumentException("ranges must satisfy start <= end, but got " + start + " > " + end); - Range range = new Range(start, end); + Range<Type> range = new Range<>(start, end); // If the list of ranges is still sorted, we try to keep it sorted. if (sorted && ! ranges.isEmpty()) { // If the new range is not completely before the current last range, we can prune, and keep a sorted list. - Range last = ranges.get(ranges.size() - 1); + Range<Type> last = ranges.get(ranges.size() - 1); sorted = overlap(range.end, last.start); if (sorted) { // Pop all ranges that overlap with the new range ... - Range popped = range; + Range<Type> popped = range; for (int i = ranges.size(); i-- > 0; ) if (overlap(ranges.get(i).end, range.start)) popped = ranges.remove(i); else break; // ... then add a new range, possibly with start and end from the popped and last ranges. - ranges.add(popped == range ? range : new Range(min(popped.start, range.start), max(last.end, range.end))); + ranges.add(popped == range ? range : new Range<>(min(popped.start, range.start), max(last.end, range.end))); return this; } } @@ -165,40 +202,42 @@ public class MultiRangeItem extends MultiTermItem { return comparison > 0 || comparison == 0 && startInclusive | endInclusive; } - private Number min(Number a, Number b) { - return comparator.compare(a, b) <= 0 ? a : b; + private Type min(Type a, Type b) { + return type.comparator.compare(a, b) <= 0 ? a : b; } - private Number max(Number a, Number b) { - return comparator.compare(a, b) >= 0 ? a : b; + private Type max(Type a, Type b) { + return type.comparator.compare(a, b) >= 0 ? a : b; } - List<Range> sortedRanges() { + List<Range<Type>> sortedRanges() { if (sorted) return ranges; - ranges.sort(comparingDouble(range -> range.start.doubleValue())); + ranges.sort((r1, r2) -> type.comparator.compare(r1.start, r2.start)); - List<Range> pruned = new ArrayList<>(); - Range start = ranges.get(0), end = start; + List<Range<Type>> pruned = new ArrayList<>(); + Range<Type> start = ranges.get(0), end = start; for (int i = 1; i < ranges.size(); i++) { - Range range = ranges.get(i); + Range<Type> range = ranges.get(i); if (overlap(end.end, range.start)) { - end = comparator.compare(end.end, range.end) < 0 ? range : end; + end = type.comparator.compare(end.end, range.end) < 0 ? range : end; } else { - pruned.add(start == end ? start : new Range(start.start, end.end)); + pruned.add(start == end ? start : new Range<>(start.start, end.end)); start = end = range; } } - pruned.add(start == end ? start : new Range(start.start, end.end)); + pruned.add(start == end ? start : new Range<>(start.start, end.end)); sorted = true; return ranges = pruned; } @Override - public void setIndexName(String index) { } // This makes no sense :< + public void setIndexName(String index) { + throw new UnsupportedOperationException("index cannot be changed for " + MultiRangeItem.class.getSimpleName()); + } @Override OperatorType operatorType() { @@ -222,6 +261,10 @@ public class MultiRangeItem extends MultiTermItem { if (sameIndex) metadata |= 0b00000001; if (startInclusive) metadata |= 0b00000010; if (endInclusive) metadata |= 0b00000100; + + encoder = type.encoderFor(sortedRanges()); + metadata |= encoder.id << 3; + buffer.put(metadata); putString(startIndex, buffer); if ( ! sameIndex) putString(endIndex, buffer); @@ -229,9 +272,9 @@ public class MultiRangeItem extends MultiTermItem { @Override void encodeTerms(ByteBuffer buffer) { - for (Range range : sortedRanges()) { - buffer.putDouble(range.start.doubleValue()); - buffer.putDouble(range.end.doubleValue()); + for (Range<Type> range : sortedRanges()) { + encoder.serializer.accept(buffer, range.start); + encoder.serializer.accept(buffer, range.end); } } @@ -246,8 +289,9 @@ public class MultiRangeItem extends MultiTermItem { } @Override + @SuppressWarnings("unchecked") public Item clone() { - MultiRangeItem clone = (MultiRangeItem) super.clone(); + MultiRangeItem<Type> clone = (MultiRangeItem<Type>) super.clone(); clone.ranges = new ArrayList<>(ranges); return clone; } @@ -257,8 +301,9 @@ public class MultiRangeItem extends MultiTermItem { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; if ( ! super.equals(o)) return false; - MultiRangeItem that = (MultiRangeItem) o; - return startInclusive == that.startInclusive + MultiRangeItem<?> that = (MultiRangeItem<?>) o; + return type == that.type + && startInclusive == that.startInclusive && endInclusive == that.endInclusive && startIndex.equals(that.startIndex) && endIndex.equals(that.endIndex) @@ -267,20 +312,20 @@ public class MultiRangeItem extends MultiTermItem { @Override public int hashCode() { - return Objects.hash(super.hashCode(), startIndex, endIndex, startInclusive, endInclusive, sortedRanges()); + return Objects.hash(super.hashCode(), type, startIndex, endIndex, startInclusive, endInclusive, sortedRanges()); } Item asCompositeItem() { OrItem root = new OrItem(); if (startIndex.equals(endIndex)) { - for (Range range : sortedRanges()) { + for (Range<Type> range : sortedRanges()) { root.addItem(new IntItem(new com.yahoo.prelude.query.Limit(range.start, startInclusive), new com.yahoo.prelude.query.Limit(range.end, endInclusive), startIndex)); } } else { - for (Range range : sortedRanges()) { + for (Range<Type> range : sortedRanges()) { AndItem pair = new AndItem(); pair.addItem(new IntItem(new com.yahoo.prelude.query.Limit(range.start, startInclusive), com.yahoo.prelude.query.Limit.POSITIVE_INFINITY, @@ -294,4 +339,4 @@ public class MultiRangeItem extends MultiTermItem { return root; } -}
\ No newline at end of file +} |